From 7f81ed08e2552eff2135cc754cd37d8ae6a574aa Mon Sep 17 00:00:00 2001 From: Michael Tupek Date: Wed, 29 Apr 2026 12:00:56 -0600 Subject: [PATCH 01/27] Stub out new nonlinear PCG solver. --- src/smith/numerics/equation_solver.cpp | 150 ++++++++++++++++++++++++- src/smith/numerics/solver_config.hpp | 49 ++++++++ 2 files changed, 198 insertions(+), 1 deletion(-) diff --git a/src/smith/numerics/equation_solver.cpp b/src/smith/numerics/equation_solver.cpp index 0024cbafae..6edefb9328 100644 --- a/src/smith/numerics/equation_solver.cpp +++ b/src/smith/numerics/equation_solver.cpp @@ -889,6 +889,144 @@ class TrustRegion : public mfem::NewtonSolver { } }; +/** + * @brief Skeleton for a nonlinear preconditioned conjugate-gradient block solver. + * + * The full algorithm is added in a follow-on chunk. This class establishes the Smith/MFEM integration points used by + * that implementation: residual evaluation, Jacobian assembly, Hessian-vector products, preconditioning, counters, and + * standard nonlinear convergence bookkeeping. + */ +class PcgBlockSolver : public mfem::NewtonSolver { + protected: + /// Trial solution vector + mutable mfem::Vector x_trial; + /// Trial residual vector + mutable mfem::Vector r_trial; + /// Scratch vector + mutable mfem::Vector scratch; + + /// Nonlinear solution options + NonlinearSolverOptions nonlinear_options; + + /// Preconditioner used by the PCG-block recurrence + Solver& pcg_precond; + + /// Reconstructed Smith print level + mutable size_t print_level = 0; + + public: + /// Internal counter for hess-vecs + mutable size_t num_hess_vecs = 0; + /// Internal counter for preconditions + mutable size_t num_preconds = 0; + /// Internal counter for residuals + mutable size_t num_residuals = 0; + /// Internal counter for matrix assembles + mutable size_t num_jacobian_assembles = 0; + +#ifdef MFEM_USE_MPI + /// Constructor + PcgBlockSolver(MPI_Comm comm_, const NonlinearSolverOptions& nonlinear_opts, Solver& preconditioner) + : mfem::NewtonSolver(comm_), nonlinear_options(nonlinear_opts), pcg_precond(preconditioner) + { + } +#endif + + /// Assemble the Jacobian at x. + void assembleJacobian(const mfem::Vector& x) const + { + SMITH_MARK_FUNCTION; + ++num_jacobian_assembles; + grad = &oper->GetGradient(x); + if (nonlinear_options.force_monolithic) { + auto* grad_blocked = dynamic_cast(grad); + if (grad_blocked) grad = buildMonolithicMatrix(*grad_blocked).release(); + } + } + + /// Evaluate the nonlinear residual. + mfem::real_t computeResidual(const mfem::Vector& x, mfem::Vector& residual) const + { + SMITH_MARK_FUNCTION; + ++num_residuals; + oper->Mult(x, residual); + return Norm(residual); + } + + /// Apply the assembled Jacobian to a vector. + void hessVec(const mfem::Vector& x, mfem::Vector& v) const + { + SMITH_MARK_FUNCTION; + ++num_hess_vecs; + grad->Mult(x, v); + } + + /// Apply the configured nonlinear PCG preconditioner. + void precond(const mfem::Vector& x, mfem::Vector& v) const + { + SMITH_MARK_FUNCTION; + ++num_preconds; + pcg_precond.Mult(x, v); + } + + /// @overload + void Mult(const mfem::Vector&, mfem::Vector& X) const + { + MFEM_ASSERT(oper != NULL, "the Operator is not set (use SetOperator)."); + MFEM_ASSERT(prec != NULL, "the Solver is not set (use SetSolver)."); + + print_level = print_options.iterations ? 1 : print_level; + print_level = print_options.summary ? 2 : print_level; + + num_hess_vecs = 0; + num_preconds = 0; + num_residuals = 0; + num_jacobian_assembles = 0; + + mfem::real_t norm = 0.0; + norm = initial_norm = computeResidual(X, r); + if (norm == 0.0) { + converged = true; + final_iter = 0; + final_norm = norm; + return; + } + + const mfem::real_t norm_goal = std::max(rel_tol * initial_norm, abs_tol); + + if (print_level == 1) { + mfem::out << "PcgBlock iteration " << std::setw(3) << 0 << " : ||r|| = " << std::setw(13) << norm << "\n"; + } + + pcg_precond.iterative_mode = false; + + x_trial.SetSize(X.Size()); + x_trial = 0.0; + r_trial.SetSize(X.Size()); + r_trial = 0.0; + scratch.SetSize(X.Size()); + scratch = 0.0; + + if (print_level >= 2) { + mfem::out << "PcgBlock iteration " << std::setw(3) << 0 << " : ||r|| = " << std::setw(13) << norm + << ", norm goal = " << std::setw(13) << norm_goal << '\n'; + } + + if (norm <= norm_goal && nonlinear_options.min_iterations == 0) { + converged = true; + } else { + converged = false; + } + + final_iter = 0; + final_norm = norm; + + if (!converged && print_level >= 1) { + mfem::out << "PcgBlock: No convergence! Algorithm implementation pending.\n"; + } + } +}; + EquationSolver::EquationSolver(NonlinearSolverOptions nonlinear_opts, LinearSolverOptions lin_opts, MPI_Comm comm) { auto [lin_solver, preconditioner] = buildLinearSolverAndPreconditioner(lin_opts, comm); @@ -1041,6 +1179,8 @@ std::unique_ptr buildNonlinearSolver(NonlinearSolverOptions nonlinear_solver = std::make_unique(comm, nonlinear_opts); } else if (nonlinear_opts.nonlin_solver == NonlinearSolver::TrustRegion) { nonlinear_solver = std::make_unique(comm, nonlinear_opts, linear_opts, prec); + } else if (nonlinear_opts.nonlin_solver == NonlinearSolver::PcgBlock) { + nonlinear_solver = std::make_unique(comm, nonlinear_opts, prec); #ifdef SMITH_USE_PETSC } else if (nonlinear_opts.nonlin_solver == NonlinearSolver::PetscNewton) { nonlinear_solver = std::make_unique(comm, nonlinear_opts); @@ -1298,7 +1438,9 @@ void EquationSolver::defineInputFileSchema(axom::inlet::Container& container) nonlinear_container.addDouble("abs_tol", "Absolute tolerance for the Newton solve.").defaultValue(1.0e-4); nonlinear_container.addInt("max_iter", "Maximum iterations for the Newton solve.").defaultValue(500); nonlinear_container.addInt("print_level", "Nonlinear print level.").defaultValue(0); - nonlinear_container.addString("solver_type", "Solver type (Newton|KINFullStep|KINLineSearch)").defaultValue("Newton"); + nonlinear_container + .addString("solver_type", "Solver type (Newton|NewtonLineSearch|TrustRegion|PcgBlock|KINFullStep|KINLineSearch)") + .defaultValue("Newton"); } } // namespace smith @@ -1373,6 +1515,12 @@ smith::NonlinearSolverOptions FromInlet::operator const std::string solver_type = base["solver_type"]; if (solver_type == "Newton") { options.nonlin_solver = smith::NonlinearSolver::Newton; + } else if (solver_type == "NewtonLineSearch") { + options.nonlin_solver = smith::NonlinearSolver::NewtonLineSearch; + } else if (solver_type == "TrustRegion") { + options.nonlin_solver = smith::NonlinearSolver::TrustRegion; + } else if (solver_type == "PcgBlock") { + options.nonlin_solver = smith::NonlinearSolver::PcgBlock; } else if (solver_type == "KINFullStep") { options.nonlin_solver = smith::NonlinearSolver::KINFullStep; } else if (solver_type == "KINLineSearch") { diff --git a/src/smith/numerics/solver_config.hpp b/src/smith/numerics/solver_config.hpp index aebf795305..e7c26bda35 100644 --- a/src/smith/numerics/solver_config.hpp +++ b/src/smith/numerics/solver_config.hpp @@ -152,6 +152,7 @@ enum class NonlinearSolver LBFGS, /**< MFEM-native Limited memory BFGS */ NewtonLineSearch, /**< Custom solver using preconditioned earch direction with backtracking line search */ TrustRegion, /**< Custom solver using a trust region solver */ + PcgBlock, /**< Custom nonlinear preconditioned conjugate-gradient block solver */ KINFullStep, /**< KINSOL Full Newton (Sundials must be enabled) */ KINBacktrackingLineSearch, /**< KINSOL Newton with Backtracking Line Search (Sundials must be enabled) */ KINPicard, /**< KINSOL Picard (Sundials must be enabled) */ @@ -174,6 +175,8 @@ inline std::string nonlinearName(const NonlinearSolver& s) return "NewtonLineSearch"; case NonlinearSolver::TrustRegion: return "TrustRegion"; + case NonlinearSolver::PcgBlock: + return "PcgBlock"; case NonlinearSolver::KINFullStep: return "KINFullStep"; case NonlinearSolver::KINBacktrackingLineSearch: @@ -202,6 +205,7 @@ inline std::map nonlinearSolverMap = { {"LBFGS", NonlinearSolver::LBFGS}, {"NewtonLineSearch", NonlinearSolver::NewtonLineSearch}, {"TrustRegion", NonlinearSolver::TrustRegion}, + {"PcgBlock", NonlinearSolver::PcgBlock}, {"KINFullStep", NonlinearSolver::KINFullStep}, {"KINBacktrackingLineSearch", NonlinearSolver::KINBacktrackingLineSearch}, {"KINPicard", NonlinearSolver::KINPicard}, @@ -469,6 +473,51 @@ struct NonlinearSolverOptions { /// Should the gradient be converted to a monolithic matrix bool force_monolithic = false; + + /// Number of speculative nonlinear PCG steps per accepted/rejected block + int pcg_block_len = 10; + + /// Powell restart threshold for nonlinear PCG residual orthogonality + double pcg_powell_eta = 0.2; + + /// Trust-ratio threshold below which the PCG-block trust scale shrinks + double pcg_trust_eta_bad = 0.1; + + /// Trust-ratio threshold above which the PCG-block trust scale grows + double pcg_trust_eta_good = 0.75; + + /// PCG-block trust-scale shrink factor + double pcg_shrink = 0.5; + + /// PCG-block trust-scale growth factor + double pcg_growth = 1.25; + + /// Initial PCG-block trust scale + double pcg_h_scale_init = 1.0; + + /// Minimum PCG-block trust scale before declaring failure + double pcg_min_h_scale = 1e-8; + + /// Maximum number of rejected PCG blocks before declaring failure + int pcg_max_block_retries = 20; + + /// Nonmonotone cumulative gradient-work acceptance window + int pcg_window = 5; + + /// Armijo coefficient for PCG-block inner step backtracking + double pcg_ls_armijo_c = 1e-4; + + /// Maximum number of PCG-block inner step backtracks + int pcg_ls_max_backtracks = 8; + + /// PCG-block inner step backtracking shrink factor + double pcg_ls_shrink = 0.5; + + /// Descent and model denominator tolerance for PCG-block guards + double pcg_eps_descent = 1e-12; + + /// Running-mean window for successful PCG-block trust-radius reference steps + int pcg_delta_avg_window = 5; }; // _nonlinear_options_end From 70c55fd75e3ea79d23ae6d7b881d512b7a953afd Mon Sep 17 00:00:00 2001 From: Michael Tupek Date: Wed, 29 Apr 2026 12:19:37 -0600 Subject: [PATCH 02/27] Initial implementation of nonlinear pcg. --- src/smith/numerics/equation_solver.cpp | 292 ++++++++++++++++++++++++- 1 file changed, 280 insertions(+), 12 deletions(-) diff --git a/src/smith/numerics/equation_solver.cpp b/src/smith/numerics/equation_solver.cpp index 6edefb9328..03eff72240 100644 --- a/src/smith/numerics/equation_solver.cpp +++ b/src/smith/numerics/equation_solver.cpp @@ -983,8 +983,13 @@ class PcgBlockSolver : public mfem::NewtonSolver { num_residuals = 0; num_jacobian_assembles = 0; - mfem::real_t norm = 0.0; - norm = initial_norm = computeResidual(X, r); + SLIC_ERROR_ROOT_IF(nonlinear_options.pcg_block_len <= 0, "PcgBlock requires pcg_block_len > 0"); + SLIC_ERROR_ROOT_IF(nonlinear_options.pcg_window <= 0, "PcgBlock requires pcg_window > 0"); + SLIC_ERROR_ROOT_IF(nonlinear_options.pcg_ls_max_backtracks < 0, "PcgBlock requires pcg_ls_max_backtracks >= 0"); + SLIC_ERROR_ROOT_IF(nonlinear_options.pcg_delta_avg_window <= 0, "PcgBlock requires pcg_delta_avg_window > 0"); + + mfem::real_t norm = computeResidual(X, r); + initial_norm = norm; if (norm == 0.0) { converged = true; final_iter = 0; @@ -1007,22 +1012,285 @@ class PcgBlockSolver : public mfem::NewtonSolver { scratch.SetSize(X.Size()); scratch = 0.0; - if (print_level >= 2) { - mfem::out << "PcgBlock iteration " << std::setw(3) << 0 << " : ||r|| = " << std::setw(13) << norm - << ", norm goal = " << std::setw(13) << norm_goal << '\n'; - } + mfem::Vector r_block(X.Size()); + mfem::Vector r_candidate(X.Size()); + mfem::Vector force(X.Size()); + mfem::Vector z(X.Size()); + mfem::Vector z_old(X.Size()); + mfem::Vector p(X.Size()); + mfem::Vector p_old(X.Size()); + mfem::Vector Hp(X.Size()); + mfem::Vector step(X.Size()); + mfem::Vector x_candidate(X.Size()); + + bool have_momentum = false; + double rho_old = 0.0; + double h_scale = nonlinear_options.pcg_h_scale_init; + int retries_remaining = nonlinear_options.pcg_max_block_retries; + int it = 0; + double cumulative_work = 0.0; + std::vector work_history{cumulative_work}; + std::vector accepted_step_norms; + + auto reset_momentum = [&]() { + have_momentum = false; + rho_old = 0.0; + p_old = 0.0; + z_old = 0.0; + }; + + auto window_max = [&](const std::vector& history) { + const int window = nonlinear_options.pcg_window; + const auto begin = history.size() > static_cast(window) ? history.end() - window : history.begin(); + return *std::max_element(begin, history.end()); + }; + + auto current_delta_ref = [&]() { + if (accepted_step_norms.empty()) { + return 0.0; + } + const int window = nonlinear_options.pcg_delta_avg_window; + const auto begin = accepted_step_norms.size() > static_cast(window) ? accepted_step_norms.end() - window + : accepted_step_norms.begin(); + double sum = 0.0; + for (auto iter = begin; iter != accepted_step_norms.end(); ++iter) { + sum += *iter; + } + return sum / static_cast(accepted_step_norms.end() - begin); + }; - if (norm <= norm_goal && nonlinear_options.min_iterations == 0) { - converged = true; - } else { - converged = false; + for (; true;) { + MFEM_ASSERT(mfem::IsFinite(norm), "norm = " << norm); + if (print_level >= 2) { + mfem::out << "PcgBlock iteration " << std::setw(3) << it << " : ||r|| = " << std::setw(13) << norm; + if (it > 0) { + mfem::out << ", ||r||/||r_0|| = " << std::setw(13) << (initial_norm != 0.0 ? norm / initial_norm : norm); + } else { + mfem::out << ", norm goal = " << std::setw(13) << norm_goal; + } + mfem::out << '\n'; + } + + if (print_level >= 1 && (norm != norm)) { + mfem::out << "Initial residual for PCG-block iteration is undefined/nan." << std::endl; + mfem::out << "PcgBlock: No convergence!\n"; + converged = false; + break; + } + + if (norm <= norm_goal && it >= nonlinear_options.min_iterations) { + converged = true; + break; + } else if (it >= max_iter) { + converged = false; + break; + } else if (retries_remaining <= 0 || h_scale < nonlinear_options.pcg_min_h_scale) { + converged = false; + break; + } + + assembleJacobian(X); + pcg_precond.SetOperator(*grad); + + r_block = r; + const double norm_block = norm; + bool block_finished = false; + + while (!block_finished) { + x_trial = X; + r = r_block; + norm = norm_block; + + double block_predicted = 0.0; + double block_actual = 0.0; + double trial_cumulative_work = cumulative_work; + int trial_steps = 0; + bool trial_failed = false; + std::vector trial_step_norms; + auto trial_work_history = work_history; + + for (int block_it = 0; block_it < nonlinear_options.pcg_block_len && it + trial_steps < max_iter; ++block_it) { + force = r; + force *= -1.0; + precond(force, z); + + const double rho = Dot(force, z); + if (!mfem::IsFinite(rho) || rho <= nonlinear_options.pcg_eps_descent) { + trial_failed = true; + break; + } + + double beta = 0.0; + if (have_momentum) { + const double force_dot_z_old = Dot(force, z_old); + beta = std::max(0.0, (rho - force_dot_z_old) / rho_old); + if (std::abs(force_dot_z_old) > nonlinear_options.pcg_powell_eta * rho) { + beta = 0.0; + } + } + + p = z; + if (have_momentum && beta != 0.0) { + p.Add(beta, p_old); + } + + double force_dot_p = Dot(force, p); + if (force_dot_p <= nonlinear_options.pcg_eps_descent * rho) { + beta = 0.0; + p = z; + force_dot_p = rho; + } + + hessVec(p, Hp); + const double pHp = Dot(p, Hp); + + double alpha = 0.0; + double alpha_quad = std::numeric_limits::quiet_NaN(); + const bool positive_curvature = pHp > nonlinear_options.pcg_eps_descent && mfem::IsFinite(pHp); + if (positive_curvature) { + alpha_quad = force_dot_p / pHp; + alpha = alpha_quad; + } + + const double p_norm = Norm(p); + double delta_ref = current_delta_ref(); + if (delta_ref <= 0.0 && alpha > 0.0 && mfem::IsFinite(alpha) && p_norm > 0.0) { + delta_ref = alpha * p_norm; + } else if (delta_ref <= 0.0) { + delta_ref = 1.0; + } + + const bool apply_trust_cap = !positive_curvature || h_scale < nonlinear_options.pcg_h_scale_init; + if (apply_trust_cap && p_norm > 0.0) { + const double alpha_cap = h_scale * delta_ref / p_norm; + if (alpha > 0.0 && mfem::IsFinite(alpha)) { + alpha = std::min(alpha, alpha_cap); + } else { + alpha = alpha_cap; + } + } + + if (!(alpha > 0.0) || !mfem::IsFinite(alpha)) { + trial_failed = true; + break; + } + + bool accepted_step = false; + double accepted_work = 0.0; + double accepted_predicted = 0.0; + double accepted_step_norm = 0.0; + + for (int ls = 0; ls <= nonlinear_options.pcg_ls_max_backtracks; ++ls) { + step = p; + step *= alpha; + add(x_trial, step, x_candidate); + + const double norm_candidate = computeResidual(x_candidate, r_candidate); + const double work = -0.5 * Dot(r, step) - 0.5 * Dot(r_candidate, step); + const double cumulative_candidate = trial_cumulative_work + work; + const double work_ref = window_max(trial_work_history); + const bool finite_candidate = mfem::IsFinite(norm_candidate) && mfem::IsFinite(work); + const bool sufficient_work = + cumulative_candidate >= work_ref - nonlinear_options.pcg_ls_armijo_c * alpha * force_dot_p; + + if (finite_candidate && (sufficient_work || norm_candidate <= norm_goal)) { + const double predicted = alpha * force_dot_p - 0.5 * alpha * alpha * pHp; + accepted_predicted = std::max(predicted, 0.0); + accepted_work = work; + accepted_step_norm = Norm(step); + norm = norm_candidate; + accepted_step = true; + break; + } + + alpha *= nonlinear_options.pcg_ls_shrink; + } + + if (!accepted_step) { + trial_failed = true; + break; + } + + x_trial = x_candidate; + r = r_candidate; + trial_cumulative_work += accepted_work; + trial_work_history.push_back(trial_cumulative_work); + trial_step_norms.push_back(accepted_step_norm); + block_predicted += accepted_predicted; + block_actual += accepted_work; + + p_old = p; + z_old = z; + rho_old = rho; + have_momentum = true; + ++trial_steps; + + if (norm <= norm_goal && it + trial_steps >= nonlinear_options.min_iterations) { + break; + } + } + + double trust_ratio = 1.0; + if (block_predicted > nonlinear_options.pcg_eps_descent) { + trust_ratio = block_actual / block_predicted; + } else if (block_actual < 0.0) { + trust_ratio = -std::numeric_limits::infinity(); + } + + const bool block_converged = norm <= norm_goal && it + trial_steps >= nonlinear_options.min_iterations; + const bool accept_block = + trial_steps > 0 && !trial_failed && + (block_converged || (block_actual >= 0.0 && trust_ratio >= nonlinear_options.pcg_trust_eta_bad)); + + if (accept_block) { + X = x_trial; + cumulative_work = trial_cumulative_work; + work_history = std::move(trial_work_history); + accepted_step_norms.insert(accepted_step_norms.end(), trial_step_norms.begin(), trial_step_norms.end()); + it += trial_steps; + + if (trust_ratio < nonlinear_options.pcg_trust_eta_bad) { + h_scale = std::max(h_scale * nonlinear_options.pcg_shrink, nonlinear_options.pcg_min_h_scale); + reset_momentum(); + } else if (trust_ratio >= nonlinear_options.pcg_trust_eta_good) { + h_scale = std::min(h_scale * nonlinear_options.pcg_growth, nonlinear_options.pcg_h_scale_init); + } + + if (print_level >= 2) { + mfem::out << "PcgBlock block accepted: steps = " << trial_steps << ", rho = " << std::setw(13) + << trust_ratio << ", h_scale = " << std::setw(13) << h_scale << '\n'; + } + + block_finished = true; + } else { + r = r_block; + norm = norm_block; + h_scale *= nonlinear_options.pcg_shrink; + reset_momentum(); + --retries_remaining; + + if (print_level >= 2) { + mfem::out << "PcgBlock block rejected: steps = " << trial_steps << ", rho = " << std::setw(13) + << trust_ratio << ", h_scale = " << std::setw(13) << h_scale + << ", retries left = " << retries_remaining << '\n'; + } + + if (retries_remaining <= 0 || h_scale < nonlinear_options.pcg_min_h_scale) { + block_finished = true; + } + } + } } - final_iter = 0; + final_iter = it; final_norm = norm; + if (print_level == 1) { + mfem::out << "PcgBlock iteration " << std::setw(3) << final_iter << " : ||r|| = " << std::setw(13) << norm + << '\n'; + } if (!converged && print_level >= 1) { - mfem::out << "PcgBlock: No convergence! Algorithm implementation pending.\n"; + mfem::out << "PcgBlock: No convergence!\n"; } } }; From 079054cb82eec6c8a53bdcfe2325dda105751202 Mon Sep 17 00:00:00 2001 From: Michael Tupek Date: Wed, 29 Apr 2026 16:30:56 -0600 Subject: [PATCH 03/27] Start implement a new potential solver. --- src/smith/numerics/equation_solver.cpp | 158 ++++++++++++++++-- src/smith/numerics/equation_solver.hpp | 33 ++++ src/smith/physics/solid_mechanics.hpp | 6 + src/smith/physics/tests/CMakeLists.txt | 1 + .../physics/tests/shallow_arch_buckling.cpp | 128 ++++++++++++++ src/smith/physics/tests/solid.cpp | 60 +++++++ .../physics/tests/solid_statics_patch.cpp | 79 +++++++++ 7 files changed, 448 insertions(+), 17 deletions(-) create mode 100644 src/smith/physics/tests/shallow_arch_buckling.cpp diff --git a/src/smith/numerics/equation_solver.cpp b/src/smith/numerics/equation_solver.cpp index 03eff72240..4840b0a1ef 100644 --- a/src/smith/numerics/equation_solver.cpp +++ b/src/smith/numerics/equation_solver.cpp @@ -92,8 +92,9 @@ class NewtonSolver : public mfem::NewtonSolver { MFEM_ASSERT(oper != NULL, "the Operator is not set (use SetOperator)."); MFEM_ASSERT(prec != NULL, "the Solver is not set (use SetSolver)."); - print_level = print_options.iterations ? 1 : print_level; - print_level = print_options.summary ? 2 : print_level; + print_level = static_cast(std::max(nonlinear_options.print_level, 0)); + print_level = print_options.iterations ? std::max(1, print_level) : print_level; + print_level = print_options.summary ? std::max(2, print_level) : print_level; using real_t = mfem::real_t; @@ -636,8 +637,9 @@ class TrustRegion : public mfem::NewtonSolver { MFEM_ASSERT(oper != NULL, "the Operator is not set (use SetOperator)."); MFEM_ASSERT(prec != NULL, "the Solver is not set (use SetSolver)."); - print_level = print_options.iterations ? 1 : print_level; - print_level = print_options.summary ? 2 : print_level; + print_level = static_cast(std::max(nonlinear_options.print_level, 0)); + print_level = print_options.iterations ? std::max(1, print_level) : print_level; + print_level = print_options.summary ? std::max(2, print_level) : print_level; using real_t = mfem::real_t; @@ -923,6 +925,28 @@ class PcgBlockSolver : public mfem::NewtonSolver { mutable size_t num_residuals = 0; /// Internal counter for matrix assembles mutable size_t num_jacobian_assembles = 0; + /// Internal counter for accepted blocks + mutable size_t num_blocks = 0; + /// Internal counter for rejected blocks + mutable size_t num_block_rejects = 0; + /// Internal counter for Powell restarts + mutable size_t num_powell_restarts = 0; + /// Internal counter for descent-guard restarts + mutable size_t num_descent_restarts = 0; + /// Internal counter for non-positive curvature directions + mutable size_t num_negative_curvature = 0; + /// Internal counter for line-search backtracks + mutable size_t num_line_search_backtracks = 0; + /// Internal counter for positive-curvature steps capped by the trust radius + mutable size_t num_trust_capped_steps = 0; + /// Internal counter for accepted inner PCG steps + mutable size_t num_accepted_steps = 0; + /// Internal counter for trial inner PCG steps + mutable size_t num_trial_steps = 0; + /// Last trust scale used by the solver + mutable double final_h_scale = 1.0; + /// Last accepted block trust ratio + mutable double last_trust_ratio = 0.0; #ifdef MFEM_USE_MPI /// Constructor @@ -969,19 +993,47 @@ class PcgBlockSolver : public mfem::NewtonSolver { pcg_precond.Mult(x, v); } + /// Return solver diagnostic counters. + PcgBlockDiagnostics diagnostics() const + { + return {.num_blocks = num_blocks, + .num_block_rejects = num_block_rejects, + .num_powell_restarts = num_powell_restarts, + .num_descent_restarts = num_descent_restarts, + .num_negative_curvature = num_negative_curvature, + .num_line_search_backtracks = num_line_search_backtracks, + .num_trust_capped_steps = num_trust_capped_steps, + .num_accepted_steps = num_accepted_steps, + .num_trial_steps = num_trial_steps, + .final_h_scale = final_h_scale, + .last_trust_ratio = last_trust_ratio}; + } + /// @overload void Mult(const mfem::Vector&, mfem::Vector& X) const { MFEM_ASSERT(oper != NULL, "the Operator is not set (use SetOperator)."); MFEM_ASSERT(prec != NULL, "the Solver is not set (use SetSolver)."); - print_level = print_options.iterations ? 1 : print_level; - print_level = print_options.summary ? 2 : print_level; + print_level = static_cast(std::max(nonlinear_options.print_level, 0)); + print_level = print_options.iterations ? std::max(1, print_level) : print_level; + print_level = print_options.summary ? std::max(2, print_level) : print_level; num_hess_vecs = 0; num_preconds = 0; num_residuals = 0; num_jacobian_assembles = 0; + num_blocks = 0; + num_block_rejects = 0; + num_powell_restarts = 0; + num_descent_restarts = 0; + num_negative_curvature = 0; + num_line_search_backtracks = 0; + num_trust_capped_steps = 0; + num_accepted_steps = 0; + num_trial_steps = 0; + final_h_scale = nonlinear_options.pcg_h_scale_init; + last_trust_ratio = 0.0; SLIC_ERROR_ROOT_IF(nonlinear_options.pcg_block_len <= 0, "PcgBlock requires pcg_block_len > 0"); SLIC_ERROR_ROOT_IF(nonlinear_options.pcg_window <= 0, "PcgBlock requires pcg_window > 0"); @@ -1032,6 +1084,15 @@ class PcgBlockSolver : public mfem::NewtonSolver { std::vector work_history{cumulative_work}; std::vector accepted_step_norms; + auto append_bounded = [](std::vector& history, double value, int max_size) { + history.push_back(value); + const auto bound = static_cast(max_size); + if (history.size() > bound) { + const auto num_to_remove = static_cast::difference_type>(history.size() - bound); + history.erase(history.begin(), history.begin() + num_to_remove); + } + }; + auto reset_momentum = [&]() { have_momentum = false; rho_old = 0.0; @@ -1103,9 +1164,12 @@ class PcgBlockSolver : public mfem::NewtonSolver { double block_predicted = 0.0; double block_actual = 0.0; + double block_delta_ref = current_delta_ref(); + double block_trust_size = h_scale * (block_delta_ref > 0.0 ? block_delta_ref : 1.0); double trial_cumulative_work = cumulative_work; int trial_steps = 0; bool trial_failed = false; + bool trial_ended_after_inner_failure = false; std::vector trial_step_norms; auto trial_work_history = work_history; @@ -1113,10 +1177,12 @@ class PcgBlockSolver : public mfem::NewtonSolver { force = r; force *= -1.0; precond(force, z); + ++num_trial_steps; const double rho = Dot(force, z); - if (!mfem::IsFinite(rho) || rho <= nonlinear_options.pcg_eps_descent) { - trial_failed = true; + if (!mfem::IsFinite(rho) || rho <= 0.0) { + trial_ended_after_inner_failure = trial_steps > 0; + trial_failed = trial_steps == 0; break; } @@ -1126,6 +1192,7 @@ class PcgBlockSolver : public mfem::NewtonSolver { beta = std::max(0.0, (rho - force_dot_z_old) / rho_old); if (std::abs(force_dot_z_old) > nonlinear_options.pcg_powell_eta * rho) { beta = 0.0; + ++num_powell_restarts; } } @@ -1139,6 +1206,7 @@ class PcgBlockSolver : public mfem::NewtonSolver { beta = 0.0; p = z; force_dot_p = rho; + ++num_descent_restarts; } hessVec(p, Hp); @@ -1146,10 +1214,12 @@ class PcgBlockSolver : public mfem::NewtonSolver { double alpha = 0.0; double alpha_quad = std::numeric_limits::quiet_NaN(); - const bool positive_curvature = pHp > nonlinear_options.pcg_eps_descent && mfem::IsFinite(pHp); + const bool positive_curvature = pHp > 0.0 && mfem::IsFinite(pHp); if (positive_curvature) { alpha_quad = force_dot_p / pHp; alpha = alpha_quad; + } else { + ++num_negative_curvature; } const double p_norm = Norm(p); @@ -1159,19 +1229,28 @@ class PcgBlockSolver : public mfem::NewtonSolver { } else if (delta_ref <= 0.0) { delta_ref = 1.0; } + block_delta_ref = delta_ref; + block_trust_size = h_scale * delta_ref; const bool apply_trust_cap = !positive_curvature || h_scale < nonlinear_options.pcg_h_scale_init; + bool trust_capped = false; if (apply_trust_cap && p_norm > 0.0) { const double alpha_cap = h_scale * delta_ref / p_norm; if (alpha > 0.0 && mfem::IsFinite(alpha)) { + if (alpha_cap < alpha) { + ++num_trust_capped_steps; + trust_capped = true; + } alpha = std::min(alpha, alpha_cap); } else { alpha = alpha_cap; + trust_capped = true; } } if (!(alpha > 0.0) || !mfem::IsFinite(alpha)) { - trial_failed = true; + trial_ended_after_inner_failure = trial_steps > 0; + trial_failed = trial_steps == 0; break; } @@ -1179,6 +1258,7 @@ class PcgBlockSolver : public mfem::NewtonSolver { double accepted_work = 0.0; double accepted_predicted = 0.0; double accepted_step_norm = 0.0; + int accepted_ls_count = 0; for (int ls = 0; ls <= nonlinear_options.pcg_ls_max_backtracks; ++ls) { step = p; @@ -1198,6 +1278,7 @@ class PcgBlockSolver : public mfem::NewtonSolver { accepted_predicted = std::max(predicted, 0.0); accepted_work = work; accepted_step_norm = Norm(step); + accepted_ls_count = ls; norm = norm_candidate; accepted_step = true; break; @@ -1207,23 +1288,33 @@ class PcgBlockSolver : public mfem::NewtonSolver { } if (!accepted_step) { - trial_failed = true; + trial_ended_after_inner_failure = trial_steps > 0; + trial_failed = trial_steps == 0; break; } x_trial = x_candidate; r = r_candidate; trial_cumulative_work += accepted_work; - trial_work_history.push_back(trial_cumulative_work); - trial_step_norms.push_back(accepted_step_norm); + append_bounded(trial_work_history, trial_cumulative_work, nonlinear_options.pcg_window); + append_bounded(trial_step_norms, accepted_step_norm, nonlinear_options.pcg_delta_avg_window); block_predicted += accepted_predicted; block_actual += accepted_work; + num_line_search_backtracks += static_cast(accepted_ls_count); + + if (print_level >= 2) { + mfem::out << " PcgBlock step " << std::setw(3) << (it + trial_steps + 1) << " : alpha = " << std::setw(13) + << alpha << ", approx work = " << std::setw(13) << accepted_predicted + << ", achieved work = " << std::setw(13) << accepted_work << ", trust size = " << std::setw(13) + << block_trust_size << ", capped = " << trust_capped << ", ls = " << accepted_ls_count << '\n'; + } p_old = p; z_old = z; rho_old = rho; have_momentum = true; ++trial_steps; + ++num_accepted_steps; if (norm <= norm_goal && it + trial_steps >= nonlinear_options.min_iterations) { break; @@ -1242,24 +1333,42 @@ class PcgBlockSolver : public mfem::NewtonSolver { trial_steps > 0 && !trial_failed && (block_converged || (block_actual >= 0.0 && trust_ratio >= nonlinear_options.pcg_trust_eta_bad)); + const double old_h_scale = h_scale; + const bool prefix_accept = accept_block && trial_ended_after_inner_failure; + bool reset_next_momentum = false; if (accept_block) { X = x_trial; cumulative_work = trial_cumulative_work; work_history = std::move(trial_work_history); accepted_step_norms.insert(accepted_step_norms.end(), trial_step_norms.begin(), trial_step_norms.end()); + if (accepted_step_norms.size() > static_cast(nonlinear_options.pcg_delta_avg_window)) { + accepted_step_norms.erase(accepted_step_norms.begin(), + accepted_step_norms.end() - nonlinear_options.pcg_delta_avg_window); + } it += trial_steps; + ++num_blocks; if (trust_ratio < nonlinear_options.pcg_trust_eta_bad) { h_scale = std::max(h_scale * nonlinear_options.pcg_shrink, nonlinear_options.pcg_min_h_scale); reset_momentum(); + reset_next_momentum = true; + } else if (trial_ended_after_inner_failure) { + reset_momentum(); + reset_next_momentum = true; } else if (trust_ratio >= nonlinear_options.pcg_trust_eta_good) { h_scale = std::min(h_scale * nonlinear_options.pcg_growth, nonlinear_options.pcg_h_scale_init); } + const double next_trust_size = h_scale * block_delta_ref; if (print_level >= 2) { - mfem::out << "PcgBlock block accepted: steps = " << trial_steps << ", rho = " << std::setw(13) - << trust_ratio << ", h_scale = " << std::setw(13) << h_scale << '\n'; + mfem::out << "PcgBlock block accepted: steps = " << std::setw(3) << trial_steps + << ", prefix = " << prefix_accept << ", approx work = " << std::setw(13) << block_predicted + << ", achieved work = " << std::setw(13) << block_actual << ", rho = " << std::setw(13) + << trust_ratio << ", h_scale = " << std::setw(13) << old_h_scale << " -> " << std::setw(13) + << h_scale << ", trust size = " << std::setw(13) << block_trust_size << " -> " << std::setw(13) + << next_trust_size << ", reset momentum = " << reset_next_momentum << '\n'; } + last_trust_ratio = trust_ratio; block_finished = true; } else { @@ -1268,10 +1377,15 @@ class PcgBlockSolver : public mfem::NewtonSolver { h_scale *= nonlinear_options.pcg_shrink; reset_momentum(); --retries_remaining; + ++num_block_rejects; + const double next_trust_size = h_scale * block_delta_ref; if (print_level >= 2) { - mfem::out << "PcgBlock block rejected: steps = " << trial_steps << ", rho = " << std::setw(13) - << trust_ratio << ", h_scale = " << std::setw(13) << h_scale + mfem::out << "PcgBlock block rejected: steps = " << std::setw(3) << trial_steps + << ", approx work = " << std::setw(13) << block_predicted << ", achieved work = " << std::setw(13) + << block_actual << ", rho = " << std::setw(13) << trust_ratio << ", h_scale = " << std::setw(13) + << old_h_scale << " -> " << std::setw(13) << h_scale << ", trust size = " << std::setw(13) + << block_trust_size << " -> " << std::setw(13) << next_trust_size << ", reset momentum = 1" << ", retries left = " << retries_remaining << '\n'; } @@ -1284,6 +1398,7 @@ class PcgBlockSolver : public mfem::NewtonSolver { final_iter = it; final_norm = norm; + final_h_scale = h_scale; if (print_level == 1) { mfem::out << "PcgBlock iteration " << std::setw(3) << final_iter << " : ||r|| = " << std::setw(13) << norm @@ -1336,6 +1451,15 @@ void EquationSolver::solve(mfem::Vector& x) const nonlin_solver_->Mult(zero, x); } +std::optional EquationSolver::pcgBlockDiagnostics() const +{ + auto* pcg_block = dynamic_cast(nonlin_solver_.get()); + if (!pcg_block) { + return std::nullopt; + } + return pcg_block->diagnostics(); +} + void SuperLUSolver::Mult(const mfem::Vector& input, mfem::Vector& output) const { SLIC_ERROR_ROOT_IF(!superlu_mat_, "Operator must be set prior to solving with SuperLU"); diff --git a/src/smith/numerics/equation_solver.hpp b/src/smith/numerics/equation_solver.hpp index 33dcc42621..3700cad532 100644 --- a/src/smith/numerics/equation_solver.hpp +++ b/src/smith/numerics/equation_solver.hpp @@ -12,6 +12,7 @@ #pragma once +#include #include #include #include @@ -26,6 +27,32 @@ namespace smith { +/// Diagnostic counters for the nonlinear PCG-block solver +struct PcgBlockDiagnostics { + /// Number of accepted blocks + size_t num_blocks = 0; + /// Number of rejected blocks + size_t num_block_rejects = 0; + /// Number of Powell restarts + size_t num_powell_restarts = 0; + /// Number of descent-guard restarts + size_t num_descent_restarts = 0; + /// Number of non-positive curvature directions + size_t num_negative_curvature = 0; + /// Number of line-search backtracks + size_t num_line_search_backtracks = 0; + /// Number of positive-curvature steps capped by the trust radius + size_t num_trust_capped_steps = 0; + /// Number of accepted inner PCG steps + size_t num_accepted_steps = 0; + /// Number of trial inner PCG steps + size_t num_trial_steps = 0; + /// Last trust scale used by the solver + double final_h_scale = 1.0; + /// Last accepted block trust ratio + double last_trust_ratio = 0.0; +}; + /** * @brief This class manages the objects typically required to solve a nonlinear set of equations arising from * discretization of a PDE of the form F(x) = 0. Specifically, it has @@ -94,6 +121,12 @@ class EquationSolver { */ const mfem::NewtonSolver& nonlinearSolver() const { return *nonlin_solver_; } + /** + * Returns diagnostic counters when the nonlinear solver is PcgBlock. + * @return Optional PCG-block diagnostics; empty for other nonlinear solvers + */ + std::optional pcgBlockDiagnostics() const; + /** * Returns the underlying linear solver object * @return A non-owning reference to the underlying linear solver diff --git a/src/smith/physics/solid_mechanics.hpp b/src/smith/physics/solid_mechanics.hpp index f6867ee72b..e717272274 100644 --- a/src/smith/physics/solid_mechanics.hpp +++ b/src/smith/physics/solid_mechanics.hpp @@ -1383,6 +1383,12 @@ class SolidMechanics, std::integer_se /// @brief getter for nodal forces (before zeroing-out essential dofs) const smith::FiniteElementDual& reactions() const { return reactions_; }; + /// @brief Get the equation solver used by this physics module + smith::EquationSolver& equationSolver() { return *nonlin_solver_; } + + /// @overload + const smith::EquationSolver& equationSolver() const { return *nonlin_solver_; } + protected: /// The compile-time finite element trial space for displacement and velocity (H1 of order p) using trial = H1; diff --git a/src/smith/physics/tests/CMakeLists.txt b/src/smith/physics/tests/CMakeLists.txt index a0ba90546e..292b9140fa 100644 --- a/src/smith/physics/tests/CMakeLists.txt +++ b/src/smith/physics/tests/CMakeLists.txt @@ -80,6 +80,7 @@ set(physics_parallel_test_sources dynamic_thermal_adjoint.cpp solid_reaction_adjoint.cpp thermal_nonlinear_solve.cpp + shallow_arch_buckling.cpp ) set(physics_parallel_tribol_test_sources contact_patch.cpp diff --git a/src/smith/physics/tests/shallow_arch_buckling.cpp b/src/smith/physics/tests/shallow_arch_buckling.cpp new file mode 100644 index 0000000000..3c36ec1a66 --- /dev/null +++ b/src/smith/physics/tests/shallow_arch_buckling.cpp @@ -0,0 +1,128 @@ +// Copyright (c) Lawrence Livermore National Security, LLC and +// other Smith Project Developers. See the top-level LICENSE file for +// details. +// +// SPDX-License-Identifier: (BSD-3-Clause) + +#include +#include +#include +#include + +#include "gtest/gtest.h" +#include "mpi.h" +#include "mfem.hpp" + +#include "smith/infrastructure/application_manager.hpp" +#include "smith/numerics/functional/domain.hpp" +#include "smith/numerics/functional/tensor.hpp" +#include "smith/numerics/solver_config.hpp" +#include "smith/physics/materials/solid_material.hpp" +#include "smith/physics/mesh.hpp" +#include "smith/physics/solid_mechanics.hpp" +#include "smith/physics/state/state_manager.hpp" + +namespace smith { +namespace { + +constexpr double length = 10.0; +constexpr double thickness = 0.25; +constexpr double rise = 0.75; +constexpr double end_tol = 1.0e-8; + +void warpToShallowArch(smith::Mesh& mesh) +{ + auto& mfem_mesh = mesh.mfemParMesh(); + for (int i = 0; i < mfem_mesh.GetNV(); ++i) { + auto* vertex = mfem_mesh.GetVertex(i); + const double xi = 2.0 * vertex[0] / length - 1.0; + vertex[1] += rise * (1.0 - xi * xi); + } + + mesh.mfemParMesh().DeleteGeometricFactors(); + auto* nodes = mesh.mfemParMesh().GetNodes(); + auto* coords = nodes->ReadWrite(); + const int vdim = nodes->VectorDim(); + const int scalar_size = nodes->Size() / vdim; + + for (int i = 0; i < scalar_size; ++i) { + const double x = coords[i]; + const double y = coords[i + scalar_size]; + const double xi = 2.0 * x / length - 1.0; + coords[i + scalar_size] = y + rise * (1.0 - xi * xi); + } +} + +} // namespace + +TEST(ShallowArchBuckling, NeoHookeanTractionControlled) +{ + MPI_Barrier(MPI_COMM_WORLD); + + constexpr int p = 1; + constexpr int dim = 2; + constexpr int nx = 48; + constexpr int ny = 4; + + axom::sidre::DataStore datastore; + smith::StateManager::initialize(datastore, "shallow_arch_buckling"); + + auto mesh = std::make_shared( + mfem::Mesh::MakeCartesian2D(nx, ny, mfem::Element::QUADRILATERAL, true, length, thickness), "arch_mesh", 0, 0); + warpToShallowArch(*mesh); + + mesh->addDomainOfBoundaryElements("left_end", + [](std::vector vertices, int) { return average(vertices)[0] < end_tol; }); + mesh->addDomainOfBoundaryElements( + "right_end", [](std::vector vertices, int) { return average(vertices)[0] > length - end_tol; }); + mesh->addDomainOfBoundaryElements("top_face", [](std::vector, int attr) { return attr == 3; }); + EXPECT_GT(mesh->domain("top_face").total_elements(), 0); + + smith::LinearSolverOptions linear_options{.linear_solver = LinearSolver::CG, + .preconditioner = Preconditioner::HypreAMG, + .relative_tol = 1.0e-8, + .absolute_tol = 1.0e-14, + .max_iterations = 500, + .print_level = 0}; + + smith::NonlinearSolverOptions nonlinear_options{.nonlin_solver = NonlinearSolver::PcgBlock, + .relative_tol = 1.0e-8, + .absolute_tol = 1.0e-10, + .max_iterations = 500, + .print_level = 2, + .pcg_block_len = 10, + .pcg_max_block_retries = 40}; + + SolidMechanics solid(nonlinear_options, linear_options, solid_mechanics::default_quasistatic_options, + "shallow_arch", mesh); + + solid_mechanics::NeoHookean mat{.density = 1.0, .K = 100.0, .G = 10.0}; + solid.setMaterial(mat, mesh->entireBody()); + solid.setFixedBCs(mesh->domain("left_end")); + solid.setFixedBCs(mesh->domain("right_end")); + + constexpr double final_traction = 0.2; + solid.setTraction([](auto, auto, double t) { return vec2{{0.0, -final_traction * t}}; }, mesh->domain("top_face")); + + solid.completeSetup(); + solid.outputStateToDisk("shallow_arch_buckling"); + + constexpr int num_steps = 40; + for (int step = 0; step < num_steps; ++step) { + EXPECT_NO_THROW(solid.advanceTimestep(1.0 / num_steps)); + solid.outputStateToDisk("shallow_arch_buckling"); + } + + const auto diagnostics = solid.equationSolver().pcgBlockDiagnostics(); + ASSERT_TRUE(diagnostics.has_value()); + EXPECT_GT(diagnostics->num_accepted_steps, 0); +} + +} // namespace smith + +int main(int argc, char* argv[]) +{ + ::testing::InitGoogleTest(&argc, argv); + smith::ApplicationManager applicationManager(argc, argv); + return RUN_ALL_TESTS(); +} diff --git a/src/smith/physics/tests/solid.cpp b/src/smith/physics/tests/solid.cpp index e48bed601f..44a68c4240 100644 --- a/src/smith/physics/tests/solid.cpp +++ b/src/smith/physics/tests/solid.cpp @@ -236,6 +236,66 @@ TEST(SolidMechanics, 2DQuadParameterizedStatic) { functional_parameterized_solid TEST(SolidMechanics, 3DQuadStaticJ2) { functional_solid_test_static_J2(); } +TEST(SolidMechanics, PcgBlockLinearElasticity) +{ + MPI_Barrier(MPI_COMM_WORLD); + + constexpr int p = 1; + constexpr int dim = 2; + constexpr int serial_refinement = 1; + constexpr int parallel_refinement = 0; + + axom::sidre::DataStore datastore; + smith::StateManager::initialize(datastore, "pcg_block_linear_elasticity"); + + std::string filename = SMITH_REPO_DIR "/data/meshes/square.mesh"; + auto mesh = + std::make_shared(buildMeshFromFile(filename), "mesh", serial_refinement, parallel_refinement); + mesh->addDomainOfBoundaryElements("fixed", by_attr(1)); + + smith::LinearSolverOptions linear_options{.linear_solver = LinearSolver::CG, + .preconditioner = Preconditioner::HypreL1Jacobi, + .relative_tol = 1.0e-14, + .absolute_tol = 1.0e-16, + .max_iterations = 500, + .print_level = 0}; + + smith::NonlinearSolverOptions nonlinear_options{.nonlin_solver = NonlinearSolver::PcgBlock, + .relative_tol = 1.0e-12, + .absolute_tol = 1.0e-14, + .max_iterations = 200, + .print_level = 0, + .pcg_block_len = 10}; + + SolidMechanics solid(nonlinear_options, linear_options, solid_mechanics::default_quasistatic_options, + "pcg_block_solid", mesh); + + solid_mechanics::LinearIsotropic mat{.density = 1.0, .K = 0.5, .G = 1.0}; + solid.setMaterial(mat, mesh->entireBody()); + solid.setFixedBCs(mesh->domain("fixed")); + + tensor constant_force{}; + constant_force[0] = 0.1; + constant_force[1] = -0.05; + solid_mechanics::ConstantBodyForce force{constant_force}; + solid.addBodyForce(force, mesh->entireBody()); + + solid.completeSetup(); + solid.advanceTimestep(1.0); + + const auto& nonlinear_solver = solid.equationSolver().nonlinearSolver(); + const auto diagnostics = solid.equationSolver().pcgBlockDiagnostics(); + + ASSERT_TRUE(diagnostics.has_value()); + EXPECT_TRUE(nonlinear_solver.GetConverged()); + EXPECT_LE(nonlinear_solver.GetNumIterations(), solid.displacement().space().GlobalTrueVSize()); + EXPECT_LT(nonlinear_solver.GetFinalRelNorm(), 1.0e-10); + EXPECT_EQ(diagnostics->num_block_rejects, 0u); + EXPECT_EQ(diagnostics->num_powell_restarts, 0u); + EXPECT_EQ(diagnostics->num_negative_curvature, 0u); + EXPECT_EQ(diagnostics->num_line_search_backtracks, 0u); +} + TEST(SolidMechanics, TDofBoundaryCondition) { /* diff --git a/src/smith/physics/tests/solid_statics_patch.cpp b/src/smith/physics/tests/solid_statics_patch.cpp index 9ed9daa247..2d09ab2cff 100644 --- a/src/smith/physics/tests/solid_statics_patch.cpp +++ b/src/smith/physics/tests/solid_statics_patch.cpp @@ -241,6 +241,78 @@ double solution_error(PatchBoundaryCondition bc) return computeL2Error(solid.displacement(), exact_solution_coef); } +template +double pcg_block_solution_error(PatchBoundaryCondition bc) +{ + MPI_Barrier(MPI_COMM_WORLD); + + axom::sidre::DataStore datastore; + smith::StateManager::initialize(datastore, "solid_static_pcg_block_solve"); + + constexpr int p = element_type::order; + constexpr int dim = dimension_of(element_type::geometry); + + static_assert(dim == 2 || dim == 3, "Dimension must be 2 or 3 for solid test"); + + AffineSolution exact_displacement; + + std::string meshdir = std::string(SMITH_REPO_DIR) + "/data/meshes/"; + std::string filename; + switch (element_type::geometry) { + case mfem::Geometry::TRIANGLE: + filename = meshdir + "patch2D_tris.mesh"; + break; + case mfem::Geometry::SQUARE: + filename = meshdir + "patch2D_quads.mesh"; + break; + case mfem::Geometry::TETRAHEDRON: + filename = meshdir + "patch3D_tets.mesh"; + break; + case mfem::Geometry::CUBE: + filename = meshdir + "patch3D_hexes.mesh"; + break; + default: + SLIC_ERROR_ROOT("unsupported element type for patch test"); + break; + } + + auto mesh = std::make_shared(buildMeshFromFile(filename), "mesh_tag"); + + smith::NonlinearSolverOptions nonlin_solver_options{.nonlin_solver = NonlinearSolver::PcgBlock, + .relative_tol = 0.0, + .absolute_tol = 5.0e-14, + .max_iterations = 200, + .print_level = 0, + .pcg_block_len = 10, + .pcg_ls_max_backtracks = 8}; + + auto equation_solver = std::make_unique( + nonlin_solver_options, smith::solid_mechanics::default_linear_options, mesh->getComm()); + + SolidMechanics solid(std::move(equation_solver), solid_mechanics::default_quasistatic_options, "solid", mesh); + + solid_mechanics::NeoHookean mat{.density = 1.0, .K = 1.0, .G = 1.0}; + solid.setMaterial(mat, mesh->entireBody()); + + mesh->addDomainOfBoundaryElements("essential_boundary", by_attr(essentialBoundaryAttributes(bc))); + exact_displacement.applyLoads(mat, solid, mesh->domain("essential_boundary")); + + solid.completeSetup(); + solid.advanceTimestep(1.0); + + const auto& nonlinear_solver = solid.equationSolver().nonlinearSolver(); + const auto diagnostics = solid.equationSolver().pcgBlockDiagnostics(); + EXPECT_TRUE(nonlinear_solver.GetConverged()); + EXPECT_LT(nonlinear_solver.GetFinalRelNorm(), 1.0e-10); + EXPECT_TRUE(diagnostics.has_value()); + if (diagnostics.has_value()) { + EXPECT_GT(diagnostics->num_blocks, 0u); + } + + mfem::VectorFunctionCoefficient exact_solution_coef(dim, exact_displacement); + return computeL2Error(solid.displacement(), exact_solution_coef); +} + /** * @brief Solve pressure-driven problem with 10% uniaxial strain and compare numerical solution to exact answer * @@ -464,6 +536,13 @@ TEST(SolidMechanics, PatchTest2dQ1EssentialAndNaturalBcs) EXPECT_LT(quad_error, tol); } +TEST(SolidMechanics, PcgBlockPatchTest2dQ1EssentialAndNaturalBcs) +{ + using quadrilateral = finite_element >; + double quad_error = pcg_block_solution_error(PatchBoundaryCondition::EssentialAndNatural); + EXPECT_LT(quad_error, 1.0e-6); +} + TEST(SolidMechanics, PatchTest3dQ1EssentialAndNaturalBcs) { using tetrahedron = finite_element >; From 3eda3a3c51d28745a395befdc57c80ccaaa8ffe7 Mon Sep 17 00:00:00 2001 From: Michael Tupek Date: Thu, 30 Apr 2026 10:35:27 -0600 Subject: [PATCH 04/27] Start implementing a more efficient interface with matrix-free operations that we can use with our new matrix-free nonlinear solver. --- src/smith/numerics/equation_solver.cpp | 66 +++++++- src/smith/numerics/equation_solver.hpp | 37 +++++ src/smith/numerics/functional/functional.hpp | 66 +++++++- .../tests/functional_comparisons.cpp | 20 +++ src/smith/numerics/solver_config.hpp | 2 +- .../numerics/tests/test_equationsolver.cpp | 81 ++++++++++ src/smith/physics/solid_mechanics.hpp | 23 +++ .../physics/tests/shallow_arch_buckling.cpp | 150 +++++++++++++----- 8 files changed, 398 insertions(+), 47 deletions(-) diff --git a/src/smith/numerics/equation_solver.cpp b/src/smith/numerics/equation_solver.cpp index 4840b0a1ef..d6ccbe5b36 100644 --- a/src/smith/numerics/equation_solver.cpp +++ b/src/smith/numerics/equation_solver.cpp @@ -7,6 +7,7 @@ #include "smith/numerics/equation_solver.hpp" #include +#include #include #include #include @@ -925,6 +926,16 @@ class PcgBlockSolver : public mfem::NewtonSolver { mutable size_t num_residuals = 0; /// Internal counter for matrix assembles mutable size_t num_jacobian_assembles = 0; + /// Internal counter for preconditioner operator updates + mutable size_t num_preconditioner_updates = 0; + /// Internal counter for accepted prefix blocks + mutable size_t num_prefix_accepts = 0; + /// Internal counter for momentum resets + mutable size_t num_momentum_resets = 0; + /// Internal counter for nonzero PCG beta values + mutable size_t num_nonzero_beta = 0; + /// Internal counter for zero PCG beta values + mutable size_t num_zero_beta = 0; /// Internal counter for accepted blocks mutable size_t num_blocks = 0; /// Internal counter for rejected blocks @@ -948,6 +959,9 @@ class PcgBlockSolver : public mfem::NewtonSolver { /// Last accepted block trust ratio mutable double last_trust_ratio = 0.0; + /// Optional matrix-free tangent action, y = J(x) dx + MatrixFreeTangentAction matrix_free_tangent_action; + #ifdef MFEM_USE_MPI /// Constructor PcgBlockSolver(MPI_Comm comm_, const NonlinearSolverOptions& nonlinear_opts, Solver& preconditioner) @@ -977,12 +991,22 @@ class PcgBlockSolver : public mfem::NewtonSolver { return Norm(residual); } - /// Apply the assembled Jacobian to a vector. - void hessVec(const mfem::Vector& x, mfem::Vector& v) const + /// Set an optional matrix-free tangent action. + void setMatrixFreeTangentAction(MatrixFreeTangentAction tangent_action) + { + matrix_free_tangent_action = std::move(tangent_action); + } + + /// Apply the tangent at x to dx. + void hessVec(const mfem::Vector& x, const mfem::Vector& dx, mfem::Vector& y) const { SMITH_MARK_FUNCTION; ++num_hess_vecs; - grad->Mult(x, v); + if (matrix_free_tangent_action) { + matrix_free_tangent_action(x, dx, y); + } else { + grad->Mult(dx, y); + } } /// Apply the configured nonlinear PCG preconditioner. @@ -996,7 +1020,16 @@ class PcgBlockSolver : public mfem::NewtonSolver { /// Return solver diagnostic counters. PcgBlockDiagnostics diagnostics() const { - return {.num_blocks = num_blocks, + return {.num_residuals = num_residuals, + .num_hess_vecs = num_hess_vecs, + .num_preconds = num_preconds, + .num_jacobian_assembles = num_jacobian_assembles, + .num_preconditioner_updates = num_preconditioner_updates, + .num_prefix_accepts = num_prefix_accepts, + .num_momentum_resets = num_momentum_resets, + .num_nonzero_beta = num_nonzero_beta, + .num_zero_beta = num_zero_beta, + .num_blocks = num_blocks, .num_block_rejects = num_block_rejects, .num_powell_restarts = num_powell_restarts, .num_descent_restarts = num_descent_restarts, @@ -1023,6 +1056,11 @@ class PcgBlockSolver : public mfem::NewtonSolver { num_preconds = 0; num_residuals = 0; num_jacobian_assembles = 0; + num_preconditioner_updates = 0; + num_prefix_accepts = 0; + num_momentum_resets = 0; + num_nonzero_beta = 0; + num_zero_beta = 0; num_blocks = 0; num_block_rejects = 0; num_powell_restarts = 0; @@ -1098,6 +1136,7 @@ class PcgBlockSolver : public mfem::NewtonSolver { rho_old = 0.0; p_old = 0.0; z_old = 0.0; + ++num_momentum_resets; }; auto window_max = [&](const std::vector& history) { @@ -1151,6 +1190,7 @@ class PcgBlockSolver : public mfem::NewtonSolver { } assembleJacobian(X); + ++num_preconditioner_updates; pcg_precond.SetOperator(*grad); r_block = r; @@ -1208,8 +1248,13 @@ class PcgBlockSolver : public mfem::NewtonSolver { force_dot_p = rho; ++num_descent_restarts; } + if (beta == 0.0) { + ++num_zero_beta; + } else { + ++num_nonzero_beta; + } - hessVec(p, Hp); + hessVec(X, p, Hp); const double pHp = Dot(p, Hp); double alpha = 0.0; @@ -1337,6 +1382,9 @@ class PcgBlockSolver : public mfem::NewtonSolver { const bool prefix_accept = accept_block && trial_ended_after_inner_failure; bool reset_next_momentum = false; if (accept_block) { + if (prefix_accept) { + ++num_prefix_accepts; + } X = x_trial; cumulative_work = trial_cumulative_work; work_history = std::move(trial_work_history); @@ -1442,6 +1490,14 @@ void EquationSolver::setOperator(const mfem::Operator& op) } } +void EquationSolver::setMatrixFreeTangentAction(MatrixFreeTangentAction tangent_action) +{ + auto* pcg_block = dynamic_cast(nonlin_solver_.get()); + if (pcg_block) { + pcg_block->setMatrixFreeTangentAction(std::move(tangent_action)); + } +} + void EquationSolver::solve(mfem::Vector& x) const { mfem::Vector zero(x); diff --git a/src/smith/numerics/equation_solver.hpp b/src/smith/numerics/equation_solver.hpp index 3700cad532..7e5882a74a 100644 --- a/src/smith/numerics/equation_solver.hpp +++ b/src/smith/numerics/equation_solver.hpp @@ -13,6 +13,7 @@ #pragma once #include +#include #include #include #include @@ -27,8 +28,34 @@ namespace smith { +/** + * @brief Matrix-free tangent action callback. + * + * The callback evaluates y = J(x) dx for the current nonlinear state x + * without requiring EquationSolver to assemble J. + */ +using MatrixFreeTangentAction = std::function; + /// Diagnostic counters for the nonlinear PCG-block solver struct PcgBlockDiagnostics { + /// Number of nonlinear residual evaluations + size_t num_residuals = 0; + /// Number of assembled Jacobian-vector products + size_t num_hess_vecs = 0; + /// Number of preconditioner applications + size_t num_preconds = 0; + /// Number of assembled Jacobians + size_t num_jacobian_assembles = 0; + /// Number of preconditioner operator updates + size_t num_preconditioner_updates = 0; + /// Number of accepted prefix blocks + size_t num_prefix_accepts = 0; + /// Number of momentum resets + size_t num_momentum_resets = 0; + /// Number of steps with nonzero PCG beta + size_t num_nonzero_beta = 0; + /// Number of steps with zero PCG beta + size_t num_zero_beta = 0; /// Number of accepted blocks size_t num_blocks = 0; /// Number of rejected blocks @@ -103,6 +130,16 @@ class EquationSolver { */ void setOperator(const mfem::Operator& op); + /** + * @brief Sets an optional matrix-free tangent action for nonlinear solvers that can use J(x) dx directly. + * + * Solvers that do not support matrix-free tangent actions ignore this callback. Supported solvers retain their + * assembled-gradient fallback when no callback is set. + * + * @param[in] tangent_action Callback evaluating y = J(x) dx. + */ + void setMatrixFreeTangentAction(MatrixFreeTangentAction tangent_action); + /** * Solves the system F(x) = 0 * @param[in,out] x Solution to the system of nonlinear equations diff --git a/src/smith/numerics/functional/functional.hpp b/src/smith/numerics/functional/functional.hpp index f5a40d8259..730d4570d6 100644 --- a/src/smith/numerics/functional/functional.hpp +++ b/src/smith/numerics/functional/functional.hpp @@ -828,7 +828,7 @@ class Functional { } }; - uint64_t max_buffer_size() + uint64_t max_buffer_size() const { uint64_t max_entries = 0; for (auto& integral : form_.integrals_) { @@ -849,6 +849,69 @@ class Functional { return max_entries; } + void AssembleDiagonal(mfem::Vector& diag) const override + { + SLIC_ERROR_ROOT_IF(form_.test_function_space_.family != Family::H1 || + form_.trial_function_spaces_[which_argument].family != Family::H1, + "Functional gradient diagonal assembly currently supports H1 test/trial spaces only."); + SLIC_ERROR_ROOT_IF(test_space_ != trial_space_, + "Functional gradient diagonal assembly currently requires the same test/trial FE space."); + SLIC_ERROR_ROOT_IF(form_.output_L_.Size() != form_.input_L_[which_argument].Size(), + "Functional gradient diagonal assembly requires square local operators."); + + mfem::Vector local_diag(form_.output_L_.Size(), form_.mem_type); + local_diag = 0.0; + + std::vector K_elem_buffer(max_buffer_size()); + + for (auto& integral : form_.integrals_) { + // if this integral's derivative isn't identically zero + if (integral.functional_to_integral_index_.count(which_argument) > 0) { + Domain& dom = integral.domain_; + + uint32_t id = integral.functional_to_integral_index_.at(which_argument); + const auto& G_test = dom.get_restriction(form_.test_function_space_); + const auto& G_trial = dom.get_restriction(form_.trial_function_spaces_[which_argument]); + for (const auto& [geom, calculate_element_matrices_func] : integral.element_gradient_[id]) { + const auto& test_restriction = G_test.restrictions.at(geom); + const auto& trial_restriction = G_trial.restrictions.at(geom); + + CPUArrayView K_e(K_elem_buffer.data(), test_restriction.num_elements, + trial_restriction.nodes_per_elem * trial_restriction.components, + test_restriction.nodes_per_elem * test_restriction.components); + detail::zero_out(K_e); + + calculate_element_matrices_func(K_e); + + uint32_t rows_per_elem = uint32_t(test_restriction.nodes_per_elem * test_restriction.components); + uint32_t cols_per_elem = uint32_t(trial_restriction.nodes_per_elem * trial_restriction.components); + + std::vector test_vdofs(rows_per_elem); + std::vector trial_vdofs(cols_per_elem); + + for (uint32_t e = 0; e < test_restriction.num_elements; e++) { + test_restriction.GetElementVDofs(int(e), test_vdofs); + trial_restriction.GetElementVDofs(int(e), trial_vdofs); + + for (uint32_t i = 0; i < cols_per_elem; i++) { + int col = int(trial_vdofs[i].index()); + + for (uint32_t j = 0; j < rows_per_elem; j++) { + int row = int(test_vdofs[j].index()); + if (row == col) { + local_diag(row) += K_e(e, i, j); + } + } + } + } + } + } + } + + diag.SetSize(Height(), form_.mem_type); + form_.P_test_->MultTranspose(local_diag, diag); + } + std::unique_ptr assemble() { if (row_ptr.empty()) { @@ -977,6 +1040,7 @@ class Functional { }; friend auto assemble(Gradient& g) { return g.assemble(); } + friend void assemble_diagonal(const Gradient& g, mfem::Vector& diag) { g.AssembleDiagonal(diag); } private: /// @brief The "parent" @p Functional to calculate gradients with diff --git a/src/smith/numerics/functional/tests/functional_comparisons.cpp b/src/smith/numerics/functional/tests/functional_comparisons.cpp index 12314e45d0..48a8638fb6 100644 --- a/src/smith/numerics/functional/tests/functional_comparisons.cpp +++ b/src/smith/numerics/functional/tests/functional_comparisons.cpp @@ -184,6 +184,15 @@ void functional_test(mfem::ParMesh& mesh, H1

test, H1

trial, Dimension J_func = assemble(drdU); + mfem::Vector diag_direct(U.Size()); + drdU.AssembleDiagonal(diag_direct); + + mfem::Vector diag_assembled(U.Size()); + J_func->GetDiag(diag_assembled); + + mfem::Vector diag_diff(U.Size()); + subtract(diag_direct, diag_assembled, diag_diff); + // Compute the gradient action using standard MFEM and functional // mfem::Vector g1 = (*J_mfem) * U; mfem::Vector g1(U.Size()); @@ -209,6 +218,7 @@ void functional_test(mfem::ParMesh& mesh, H1

test, H1

trial, Dimension test, H1 trial, Dim std::unique_ptr J_func = assemble(drdU); + mfem::Vector diag_direct(U.Size()); + drdU.AssembleDiagonal(diag_direct); + + mfem::Vector diag_assembled(U.Size()); + J_func->GetDiag(diag_assembled); + + mfem::Vector diag_diff(U.Size()); + subtract(diag_direct, diag_assembled, diag_diff); + // mfem::Vector g1 = (*J_mfem) * U; mfem::Vector g1(U.Size()); J_mfem->Mult(U, g1); @@ -325,6 +344,7 @@ void functional_test(mfem::ParMesh& mesh, H1 test, H1 trial, Dim std::cout << "||g1-g3||/||g1||: " << diff2.Norml2() / g1.Norml2() << std::endl; } + EXPECT_NEAR(0., diag_diff.Norml2() / diag_assembled.Norml2(), 1.e-14); EXPECT_NEAR(0., diff1.Norml2() / g1.Norml2(), 1.e-14); EXPECT_NEAR(0., diff2.Norml2() / g1.Norml2(), 1.e-14); } diff --git a/src/smith/numerics/solver_config.hpp b/src/smith/numerics/solver_config.hpp index e7c26bda35..ecbfde4cd9 100644 --- a/src/smith/numerics/solver_config.hpp +++ b/src/smith/numerics/solver_config.hpp @@ -478,7 +478,7 @@ struct NonlinearSolverOptions { int pcg_block_len = 10; /// Powell restart threshold for nonlinear PCG residual orthogonality - double pcg_powell_eta = 0.2; + double pcg_powell_eta = 0.005; /// Trust-ratio threshold below which the PCG-block trust scale shrinks double pcg_trust_eta_bad = 0.1; diff --git a/src/smith/numerics/tests/test_equationsolver.cpp b/src/smith/numerics/tests/test_equationsolver.cpp index edab4fd012..6ee649716c 100644 --- a/src/smith/numerics/tests/test_equationsolver.cpp +++ b/src/smith/numerics/tests/test_equationsolver.cpp @@ -124,6 +124,87 @@ TEST_P(EquationSolverSuite, All) } } +TEST(EquationSolver, PcgBlockUsesMatrixFreeTangentAction) +{ + auto mesh = mfem::Mesh::MakeCartesian2D(1, 1, mfem::Element::QUADRILATERAL); + auto pmesh = mfem::ParMesh(MPI_COMM_WORLD, mesh); + + pmesh.EnsureNodes(); + pmesh.ExchangeFaceNbrData(); + + constexpr int p = 1; + constexpr int dim = 2; + using test_space = H1

; + using trial_space = H1

; + + auto [fes, fec] = smith::generateParFiniteElementSpace(&pmesh); + + mfem::HypreParVector x_exact(fes.get()); + mfem::HypreParVector x_computed(fes.get()); + x_exact.Randomize(0); + x_computed = 0.0; + + std::unique_ptr J; + + Functional residual(fes.get(), {fes.get()}); + Domain domain = EntireDomain(pmesh); + residual.AddDomainIntegral( + Dimension{}, DependsOn<0>{}, + [](double /*t*/, auto, auto scalar) { + auto [u, du_dx] = scalar; + return smith::tuple{u, du_dx}; + }, + domain); + + StdFunctionOperator residual_opr( + fes->TrueVSize(), + [&x_exact, &residual](const mfem::Vector& x, mfem::Vector& r) { + constexpr double time = 0.0; + r = residual(time, x); + r -= residual(time, x_exact); + }, + [&residual, &J](const mfem::Vector& x) -> mfem::Operator& { + constexpr double time = 0.0; + auto [val, grad] = residual(time, differentiate_wrt(x)); + J = assemble(grad); + return *J; + }); + + const LinearSolverOptions lin_opts = {.linear_solver = LinearSolver::CG, + .preconditioner = Preconditioner::HypreJacobi, + .relative_tol = 1.0e-12, + .absolute_tol = 1.0e-14, + .max_iterations = 500, + .print_level = 0}; + + const NonlinearSolverOptions nonlin_opts = {.nonlin_solver = NonlinearSolver::PcgBlock, + .relative_tol = 1.0e-12, + .absolute_tol = 1.0e-14, + .max_iterations = 500, + .print_level = 0}; + + EquationSolver eq_solver(nonlin_opts, lin_opts); + eq_solver.setOperator(residual_opr); + + int num_tangent_actions = 0; + eq_solver.setMatrixFreeTangentAction( + [&residual, &num_tangent_actions](const mfem::Vector& x, const mfem::Vector& dx, mfem::Vector& y) { + constexpr double time = 0.0; + auto [val, grad] = residual(time, differentiate_wrt(x)); + grad.Mult(dx, y); + ++num_tangent_actions; + }); + + eq_solver.solve(x_computed); + + const auto diagnostics = eq_solver.pcgBlockDiagnostics(); + ASSERT_TRUE(diagnostics.has_value()); + EXPECT_GT(num_tangent_actions, 0); + EXPECT_EQ(diagnostics->num_hess_vecs, static_cast(num_tangent_actions)); + EXPECT_TRUE(eq_solver.nonlinearSolver().GetConverged()); + EXPECT_LT(x_computed.DistanceTo(x_exact.GetData()), 1.0e-10); +} + /** * @brief Nonlinear solvers to test. Always includes NonlinearSolver::Newton and NonlinearSolver::LBFGS * If SMITH_USE_SUNDIALS is set, adds: NonlinearSolver::KINFullStep, NonlinearSolver::KINBacktrackingLineSearch, and diff --git a/src/smith/physics/solid_mechanics.hpp b/src/smith/physics/solid_mechanics.hpp index e717272274..c635f67f89 100644 --- a/src/smith/physics/solid_mechanics.hpp +++ b/src/smith/physics/solid_mechanics.hpp @@ -1061,6 +1061,25 @@ class SolidMechanics, std::integer_se }); } + /// @brief Matrix-free action of the quasistatic tangent with essential boundary conditions applied. + void quasistaticTangentAction(const mfem::Vector& u, const mfem::Vector& du, mfem::Vector& dr) const + { + SMITH_MARK_FUNCTION; + + mfem::Vector du_interior(du); + du_interior.SetSubVector(bcs_.allEssentialTrueDofs(), 0.0); + + auto [r, drdu] = (*residual_)(time_, shapeDisplacement(), differentiate_wrt(u), acceleration_, + *parameters_[parameter_indices].state...); + drdu.Mult(du_interior, dr); + + const auto& constrained_dofs = bcs_.allEssentialTrueDofs(); + for (int i = 0; i < constrained_dofs.Size(); ++i) { + const int dof = constrained_dofs[i]; + dr[dof] = du[dof]; + } + } + /** * @brief Return the assembled stiffness matrix * @@ -1139,6 +1158,10 @@ class SolidMechanics, std::integer_se #endif nonlin_solver_->setOperator(*residual_with_bcs_); + if (is_quasistatic_) { + nonlin_solver_->setMatrixFreeTangentAction([this](const mfem::Vector& u, const mfem::Vector& du, + mfem::Vector& dr) { quasistaticTangentAction(u, du, dr); }); + } if (checkpoint_to_disk_) { outputStateToDisk(); diff --git a/src/smith/physics/tests/shallow_arch_buckling.cpp b/src/smith/physics/tests/shallow_arch_buckling.cpp index 3c36ec1a66..b9514b74ed 100644 --- a/src/smith/physics/tests/shallow_arch_buckling.cpp +++ b/src/smith/physics/tests/shallow_arch_buckling.cpp @@ -4,8 +4,8 @@ // // SPDX-License-Identifier: (BSD-3-Clause) -#include #include +#include #include #include @@ -27,101 +27,171 @@ namespace { constexpr double length = 10.0; constexpr double thickness = 0.25; -constexpr double rise = 0.75; constexpr double end_tol = 1.0e-8; - -void warpToShallowArch(smith::Mesh& mesh) +constexpr double top_tol = 1.0e-8; +std::string solver_name = "TrustRegion"; +int print_level = 2; +int pcg_block_len = 10; +double pcg_powell_eta = 0.005; +int nonlinear_max_iterations = 30000; + +NonlinearSolver selectedNonlinearSolver() { - auto& mfem_mesh = mesh.mfemParMesh(); - for (int i = 0; i < mfem_mesh.GetNV(); ++i) { - auto* vertex = mfem_mesh.GetVertex(i); - const double xi = 2.0 * vertex[0] / length - 1.0; - vertex[1] += rise * (1.0 - xi * xi); + if (solver_name == "NewtonLineSearch") { + return NonlinearSolver::NewtonLineSearch; + } + if (solver_name == "TrustRegion") { + return NonlinearSolver::TrustRegion; } + if (solver_name == "PcgBlock") { + return NonlinearSolver::PcgBlock; + } + + throw std::runtime_error("Unknown --solver value '" + solver_name + + "'. Use NewtonLineSearch, TrustRegion, or PcgBlock."); +} - mesh.mfemParMesh().DeleteGeometricFactors(); - auto* nodes = mesh.mfemParMesh().GetNodes(); - auto* coords = nodes->ReadWrite(); - const int vdim = nodes->VectorDim(); - const int scalar_size = nodes->Size() / vdim; - - for (int i = 0; i < scalar_size; ++i) { - const double x = coords[i]; - const double y = coords[i + scalar_size]; - const double xi = 2.0 * x / length - 1.0; - coords[i + scalar_size] = y + rise * (1.0 - xi * xi); +void parseCommandLine(int& argc, char** argv) +{ + int write_arg = 1; + for (int read_arg = 1; read_arg < argc; ++read_arg) { + const std::string arg = argv[read_arg]; + if (arg.rfind("--solver=", 0) == 0) { + solver_name = arg.substr(std::string("--solver=").size()); + } else if (arg.rfind("--print-level=", 0) == 0) { + print_level = std::stoi(arg.substr(std::string("--print-level=").size())); + } else if (arg.rfind("--pcg-block-len=", 0) == 0) { + pcg_block_len = std::stoi(arg.substr(std::string("--pcg-block-len=").size())); + } else if (arg.rfind("--pcg-powell-eta=", 0) == 0) { + pcg_powell_eta = std::stod(arg.substr(std::string("--pcg-powell-eta=").size())); + } else if (arg.rfind("--nonlinear-max-iterations=", 0) == 0) { + nonlinear_max_iterations = std::stoi(arg.substr(std::string("--nonlinear-max-iterations=").size())); + } else { + argv[write_arg] = argv[read_arg]; + ++write_arg; + } } + argc = write_arg; } } // namespace -TEST(ShallowArchBuckling, NeoHookeanTractionControlled) +TEST(ShallowArchBuckling, CompressedThinBeamSnapThrough) { MPI_Barrier(MPI_COMM_WORLD); constexpr int p = 1; constexpr int dim = 2; - constexpr int nx = 48; + constexpr int nx = 96; constexpr int ny = 4; axom::sidre::DataStore datastore; smith::StateManager::initialize(datastore, "shallow_arch_buckling"); auto mesh = std::make_shared( - mfem::Mesh::MakeCartesian2D(nx, ny, mfem::Element::QUADRILATERAL, true, length, thickness), "arch_mesh", 0, 0); - warpToShallowArch(*mesh); + mfem::Mesh::MakeCartesian2D(nx, ny, mfem::Element::QUADRILATERAL, true, length, thickness), + "compressed_beam_mesh", 0, 0); mesh->addDomainOfBoundaryElements("left_end", [](std::vector vertices, int) { return average(vertices)[0] < end_tol; }); mesh->addDomainOfBoundaryElements( "right_end", [](std::vector vertices, int) { return average(vertices)[0] > length - end_tol; }); - mesh->addDomainOfBoundaryElements("top_face", [](std::vector, int attr) { return attr == 3; }); + mesh->addDomainOfBoundaryElements( + "top_face", [](std::vector vertices, int) { return average(vertices)[1] > thickness - top_tol; }); + EXPECT_GT(mesh->domain("left_end").total_elements(), 0); + EXPECT_GT(mesh->domain("right_end").total_elements(), 0); EXPECT_GT(mesh->domain("top_face").total_elements(), 0); smith::LinearSolverOptions linear_options{.linear_solver = LinearSolver::CG, - .preconditioner = Preconditioner::HypreAMG, + .preconditioner = Preconditioner::HypreJacobi, .relative_tol = 1.0e-8, .absolute_tol = 1.0e-14, - .max_iterations = 500, + .max_iterations = 10000, .print_level = 0}; - smith::NonlinearSolverOptions nonlinear_options{.nonlin_solver = NonlinearSolver::PcgBlock, + smith::NonlinearSolverOptions nonlinear_options{.nonlin_solver = selectedNonlinearSolver(), .relative_tol = 1.0e-8, .absolute_tol = 1.0e-10, - .max_iterations = 500, - .print_level = 2, - .pcg_block_len = 10, + .max_iterations = nonlinear_max_iterations, + .print_level = print_level, + .pcg_block_len = pcg_block_len, + .pcg_powell_eta = pcg_powell_eta, .pcg_max_block_retries = 40}; SolidMechanics solid(nonlinear_options, linear_options, solid_mechanics::default_quasistatic_options, - "shallow_arch", mesh); + "compressed_beam", mesh); solid_mechanics::NeoHookean mat{.density = 1.0, .K = 100.0, .G = 10.0}; solid.setMaterial(mat, mesh->entireBody()); solid.setFixedBCs(mesh->domain("left_end")); - solid.setFixedBCs(mesh->domain("right_end")); - constexpr double final_traction = 0.2; - solid.setTraction([](auto, auto, double t) { return vec2{{0.0, -final_traction * t}}; }, mesh->domain("top_face")); + constexpr double final_compression = 0.2; + constexpr double seed_down_traction = 1.0e-5; + constexpr double final_snap_up_traction = 0.02; + solid.setDisplacementBCs([](auto, double t) { return vec2{{-final_compression * t, 0.0}}; }, + mesh->domain("right_end"), Component::X); + solid.setFixedBCs(mesh->domain("right_end"), Component::Y); + solid.setTraction( + [](auto, auto, double t) { + if (t < 0.5) { + return vec2{{0.0, -seed_down_traction * (t / 0.5)}}; + } + const double snap_ramp = (t - 0.5) / 0.5; + return vec2{{0.0, -seed_down_traction * (1.0 - snap_ramp) + final_snap_up_traction * snap_ramp}}; + }, + mesh->domain("top_face")); solid.completeSetup(); solid.outputStateToDisk("shallow_arch_buckling"); - constexpr int num_steps = 40; + mfem::out << "Compressed thin beam snap-through run: solver = " << solver_name << '\n'; + + constexpr int num_steps = 20; + int num_converged_steps = 0; for (int step = 0; step < num_steps; ++step) { - EXPECT_NO_THROW(solid.advanceTimestep(1.0 / num_steps)); + solid.advanceTimestep(1.0 / num_steps); + const auto& nonlinear_solver = solid.equationSolver().nonlinearSolver(); + if (nonlinear_solver.GetConverged()) { + ++num_converged_steps; + } + mfem::out << "Load step " << step + 1 << "/" << num_steps << ": converged = " << nonlinear_solver.GetConverged() + << ", nonlinear iterations = " << nonlinear_solver.GetNumIterations() + << ", final relative residual = " << nonlinear_solver.GetFinalRelNorm() << '\n'; solid.outputStateToDisk("shallow_arch_buckling"); + if (const auto diagnostics = solid.equationSolver().pcgBlockDiagnostics()) { + mfem::out << " PCG diagnostics: residuals = " << diagnostics->num_residuals + << ", hess-vecs = " << diagnostics->num_hess_vecs + << ", preconditioner applications = " << diagnostics->num_preconds + << ", Jacobian assemblies = " << diagnostics->num_jacobian_assembles + << ", preconditioner updates = " << diagnostics->num_preconditioner_updates + << ", accepted blocks = " << diagnostics->num_blocks + << ", accepted steps = " << diagnostics->num_accepted_steps + << ", block rejects = " << diagnostics->num_block_rejects + << ", prefix accepts = " << diagnostics->num_prefix_accepts + << ", momentum resets = " << diagnostics->num_momentum_resets + << ", nonzero beta = " << diagnostics->num_nonzero_beta + << ", zero beta = " << diagnostics->num_zero_beta + << ", Powell restarts = " << diagnostics->num_powell_restarts + << ", descent restarts = " << diagnostics->num_descent_restarts + << ", negative curvature = " << diagnostics->num_negative_curvature + << ", trust capped steps = " << diagnostics->num_trust_capped_steps + << ", line-search backtracks = " << diagnostics->num_line_search_backtracks + << ", final h_scale = " << diagnostics->final_h_scale + << ", last trust ratio = " << diagnostics->last_trust_ratio << '\n'; + } + if (!nonlinear_solver.GetConverged()) { + throw std::runtime_error("Nonlinear solve failed to converge at load step " + std::to_string(step + 1)); + } } - const auto diagnostics = solid.equationSolver().pcgBlockDiagnostics(); - ASSERT_TRUE(diagnostics.has_value()); - EXPECT_GT(diagnostics->num_accepted_steps, 0); + mfem::out << "Converged load steps: " << num_converged_steps << "/" << num_steps << '\n'; } } // namespace smith int main(int argc, char* argv[]) { + smith::parseCommandLine(argc, argv); ::testing::InitGoogleTest(&argc, argv); smith::ApplicationManager applicationManager(argc, argv); return RUN_ALL_TESTS(); From 0f438f0a6ffcd1803dd3d619a1b964e70187cfb9 Mon Sep 17 00:00:00 2001 From: Michael Tupek Date: Thu, 30 Apr 2026 12:33:42 -0600 Subject: [PATCH 05/27] Trying to implement JacobianOperator abstraction. --- src/smith/numerics/equation_solver.cpp | 31 ++++- src/smith/numerics/equation_solver.hpp | 72 +++++++++++ src/smith/numerics/functional/functional.hpp | 3 +- .../tests/functional_comparisons.cpp | 100 ++++++++++++++- .../numerics/tests/test_equationsolver.cpp | 116 ++++++++++++++++++ 5 files changed, 317 insertions(+), 5 deletions(-) diff --git a/src/smith/numerics/equation_solver.cpp b/src/smith/numerics/equation_solver.cpp index d6ccbe5b36..923f94c182 100644 --- a/src/smith/numerics/equation_solver.cpp +++ b/src/smith/numerics/equation_solver.cpp @@ -926,6 +926,10 @@ class PcgBlockSolver : public mfem::NewtonSolver { mutable size_t num_residuals = 0; /// Internal counter for matrix assembles mutable size_t num_jacobian_assembles = 0; + /// Internal counter for JacobianOperator evaluations + mutable size_t num_jacobian_operator_evals = 0; + /// Internal counter for direct diagonal assemblies + mutable size_t num_diagonal_assembles = 0; /// Internal counter for preconditioner operator updates mutable size_t num_preconditioner_updates = 0; /// Internal counter for accepted prefix blocks @@ -961,6 +965,8 @@ class PcgBlockSolver : public mfem::NewtonSolver { /// Optional matrix-free tangent action, y = J(x) dx MatrixFreeTangentAction matrix_free_tangent_action; + /// Optional JacobianOperator factory + JacobianOperatorFactory jacobian_operator_factory; #ifdef MFEM_USE_MPI /// Constructor @@ -997,12 +1003,23 @@ class PcgBlockSolver : public mfem::NewtonSolver { matrix_free_tangent_action = std::move(tangent_action); } + /// Set an optional JacobianOperator factory. + void setJacobianOperator(JacobianOperatorFactory jacobian_operator) + { + jacobian_operator_factory = std::move(jacobian_operator); + } + /// Apply the tangent at x to dx. void hessVec(const mfem::Vector& x, const mfem::Vector& dx, mfem::Vector& y) const { SMITH_MARK_FUNCTION; ++num_hess_vecs; - if (matrix_free_tangent_action) { + if (jacobian_operator_factory) { + ++num_jacobian_operator_evals; + std::unique_ptr jacobian_operator = jacobian_operator_factory(x); + SLIC_ERROR_ROOT_IF(!jacobian_operator, "JacobianOperator factory returned a null operator."); + jacobian_operator->Mult(dx, y); + } else if (matrix_free_tangent_action) { matrix_free_tangent_action(x, dx, y); } else { grad->Mult(dx, y); @@ -1024,6 +1041,8 @@ class PcgBlockSolver : public mfem::NewtonSolver { .num_hess_vecs = num_hess_vecs, .num_preconds = num_preconds, .num_jacobian_assembles = num_jacobian_assembles, + .num_jacobian_operator_evals = num_jacobian_operator_evals, + .num_diagonal_assembles = num_diagonal_assembles, .num_preconditioner_updates = num_preconditioner_updates, .num_prefix_accepts = num_prefix_accepts, .num_momentum_resets = num_momentum_resets, @@ -1056,6 +1075,8 @@ class PcgBlockSolver : public mfem::NewtonSolver { num_preconds = 0; num_residuals = 0; num_jacobian_assembles = 0; + num_jacobian_operator_evals = 0; + num_diagonal_assembles = 0; num_preconditioner_updates = 0; num_prefix_accepts = 0; num_momentum_resets = 0; @@ -1498,6 +1519,14 @@ void EquationSolver::setMatrixFreeTangentAction(MatrixFreeTangentAction tangent_ } } +void EquationSolver::setJacobianOperator(JacobianOperatorFactory jacobian_operator) +{ + auto* pcg_block = dynamic_cast(nonlin_solver_.get()); + if (pcg_block) { + pcg_block->setJacobianOperator(std::move(jacobian_operator)); + } +} + void EquationSolver::solve(mfem::Vector& x) const { mfem::Vector zero(x); diff --git a/src/smith/numerics/equation_solver.hpp b/src/smith/numerics/equation_solver.hpp index 7e5882a74a..febc40754b 100644 --- a/src/smith/numerics/equation_solver.hpp +++ b/src/smith/numerics/equation_solver.hpp @@ -23,11 +23,63 @@ #include "mfem.hpp" #include "smith/infrastructure/input.hpp" +#include "smith/infrastructure/logger.hpp" #include "smith/numerics/solver_config.hpp" #include "smith/numerics/petsc_solvers.hpp" namespace smith { +/** + * @brief Solver-facing interface for Jacobian operations. + * + * A JacobianOperator represents the operations available on J(x) after differentiating a residual but before + * necessarily assembling a sparse matrix. Concrete implementations may support matrix-free products, sparse assembly, + * diagonal extraction, or all of them. Unsupported operations should throw. + */ +class JacobianOperator : public mfem::Operator { + public: + using mfem::Operator::Operator; + + /// Assemble the sparse Jacobian representation. + virtual std::unique_ptr assemble() + { + SLIC_ERROR("This JacobianOperator does not support sparse assembly."); + return nullptr; + } + + /// Assemble the scalar true-dof diagonal of the Jacobian. + virtual void assembleDiagonal(mfem::Vector&) const + { + SLIC_ERROR("This JacobianOperator does not support diagonal assembly."); + } +}; + +/** + * @brief Adapter from a smith::functional Gradient object to the solver-facing JacobianOperator interface. + */ +template +class FunctionalJacobianOperator : public JacobianOperator { + public: + explicit FunctionalJacobianOperator(Gradient& gradient) + : JacobianOperator(gradient.Height(), gradient.Width()), gradient_(gradient) + { + } + + void Mult(const mfem::Vector& dx, mfem::Vector& y) const override { gradient_.Mult(dx, y); } + + void AddMult(const mfem::Vector& dx, mfem::Vector& y, const double a = 1.0) const override + { + gradient_.AddMult(dx, y, a); + } + + std::unique_ptr assemble() override { return gradient_.assemble(); } + + void assembleDiagonal(mfem::Vector& diag) const override { gradient_.assembleDiagonal(diag); } + + private: + Gradient& gradient_; +}; + /** * @brief Matrix-free tangent action callback. * @@ -36,6 +88,11 @@ namespace smith { */ using MatrixFreeTangentAction = std::function; +/** + * @brief Callback that evaluates and returns a JacobianOperator at the supplied nonlinear state. + */ +using JacobianOperatorFactory = std::function(const mfem::Vector& x)>; + /// Diagnostic counters for the nonlinear PCG-block solver struct PcgBlockDiagnostics { /// Number of nonlinear residual evaluations @@ -46,6 +103,10 @@ struct PcgBlockDiagnostics { size_t num_preconds = 0; /// Number of assembled Jacobians size_t num_jacobian_assembles = 0; + /// Number of solver-facing JacobianOperator evaluations + size_t num_jacobian_operator_evals = 0; + /// Number of direct diagonal assemblies + size_t num_diagonal_assembles = 0; /// Number of preconditioner operator updates size_t num_preconditioner_updates = 0; /// Number of accepted prefix blocks @@ -140,6 +201,17 @@ class EquationSolver { */ void setMatrixFreeTangentAction(MatrixFreeTangentAction tangent_action); + /** + * @brief Sets an optional JacobianOperator factory for nonlinear solvers that can use matrix-free Jacobian products. + * + * This is the preferred replacement for the narrower matrix-free tangent-action callback. During migration, + * PCG-block uses this callback first when it is registered and otherwise falls back to MatrixFreeTangentAction or + * assembled gradients. + * + * @param[in] jacobian_operator Callback evaluating and returning J(x). + */ + void setJacobianOperator(JacobianOperatorFactory jacobian_operator); + /** * Solves the system F(x) = 0 * @param[in,out] x Solution to the system of nonlinear equations diff --git a/src/smith/numerics/functional/functional.hpp b/src/smith/numerics/functional/functional.hpp index 730d4570d6..7e611182b8 100644 --- a/src/smith/numerics/functional/functional.hpp +++ b/src/smith/numerics/functional/functional.hpp @@ -912,6 +912,8 @@ class Functional { form_.P_test_->MultTranspose(local_diag, diag); } + void assembleDiagonal(mfem::Vector& diag) const { AssembleDiagonal(diag); } + std::unique_ptr assemble() { if (row_ptr.empty()) { @@ -1040,7 +1042,6 @@ class Functional { }; friend auto assemble(Gradient& g) { return g.assemble(); } - friend void assemble_diagonal(const Gradient& g, mfem::Vector& diag) { g.AssembleDiagonal(diag); } private: /// @brief The "parent" @p Functional to calculate gradients with diff --git a/src/smith/numerics/functional/tests/functional_comparisons.cpp b/src/smith/numerics/functional/tests/functional_comparisons.cpp index 48a8638fb6..95a7d95d42 100644 --- a/src/smith/numerics/functional/tests/functional_comparisons.cpp +++ b/src/smith/numerics/functional/tests/functional_comparisons.cpp @@ -4,9 +4,10 @@ // // SPDX-License-Identifier: (BSD-3-Clause) +#include +#include #include #include -#include #include #include #include @@ -32,6 +33,8 @@ using namespace smith; int nsamples = 1; // because mfem doesn't take in unsigned int +bool run_diagonal_benchmark = false; +int diagonal_benchmark_samples = 5; constexpr bool verbose = false; std::unique_ptr mesh2D; @@ -185,7 +188,7 @@ void functional_test(mfem::ParMesh& mesh, H1

test, H1

trial, Dimension J_func = assemble(drdU); mfem::Vector diag_direct(U.Size()); - drdU.AssembleDiagonal(diag_direct); + drdU.assembleDiagonal(diag_direct); mfem::Vector diag_assembled(U.Size()); J_func->GetDiag(diag_assembled); @@ -311,7 +314,7 @@ void functional_test(mfem::ParMesh& mesh, H1 test, H1 trial, Dim std::unique_ptr J_func = assemble(drdU); mfem::Vector diag_direct(U.Size()); - drdU.AssembleDiagonal(diag_direct); + drdU.assembleDiagonal(diag_direct); mfem::Vector diag_assembled(U.Size()); J_func->GetDiag(diag_assembled); @@ -479,6 +482,93 @@ TEST(Elasticity, 3DLinear) { functional_test(*mesh3D, H1<1, 3>{}, H1<1, 3>{}, Di TEST(Elasticity, 3DQuadratic) { functional_test(*mesh3D, H1<2, 3>{}, H1<2, 3>{}, Dimension<3>{}); } TEST(Elasticity, 3DCubic) { functional_test(*mesh3D, H1<3, 3>{}, H1<3, 3>{}, Dimension<3>{}); } +namespace { + +template +double time_on_slowest_rank(Function&& function) +{ + auto [num_ranks, rank] = smith::getMPIInfo(); + (void)rank; + if (num_ranks > 1) { + MPI_Barrier(MPI_COMM_WORLD); + } + + auto start = std::chrono::steady_clock::now(); + function(); + auto stop = std::chrono::steady_clock::now(); + + double elapsed = + std::chrono::duration_cast>(stop - start).count(); + double max_elapsed = elapsed; + if (num_ranks > 1) { + MPI_Allreduce(&elapsed, &max_elapsed, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD); + } + return max_elapsed; +} + +} // namespace + +TEST(Elasticity, DiagonalAssemblyBenchmark) +{ + if (!run_diagonal_benchmark) { + GTEST_SKIP() << "Set --run-diagonal-benchmark to time direct diagonal assembly."; + } + + static constexpr int dim = 3; + using test_space = H1<2, dim>; + using trial_space = H1<2, dim>; + + auto [fespace, fec] = smith::generateParFiniteElementSpace(mesh3D.get()); + (void)fec; + + mfem::ParGridFunction u_global(fespace.get()); + int seed = 9; + u_global.Randomize(seed); + + mfem::Vector U(fespace->TrueVSize()); + u_global.GetTrueDofs(U); + + Functional residual(fespace.get(), {fespace.get()}); + Domain domain = EntireDomain(*mesh3D); + residual.AddDomainIntegral(Dimension{}, DependsOn<0>{}, StressFunctor{}, domain); + + auto [r, drdU] = residual(0.0, differentiate_wrt(U)); + + mfem::Vector diag_direct(U.Size()); + mfem::Vector diag_assembled(U.Size()); + drdU.assembleDiagonal(diag_direct); + std::unique_ptr J_warmup = assemble(drdU); + J_warmup->GetDiag(diag_assembled); + + const int samples = std::max(diagonal_benchmark_samples, 1); + double direct_time = time_on_slowest_rank([&]() { + for (int sample = 0; sample < samples; sample++) { + drdU.assembleDiagonal(diag_direct); + } + }); + + double sparse_time = time_on_slowest_rank([&]() { + for (int sample = 0; sample < samples; sample++) { + std::unique_ptr J = assemble(drdU); + J->GetDiag(diag_assembled); + } + }); + + mfem::Vector diag_diff(U.Size()); + subtract(diag_direct, diag_assembled, diag_diff); + EXPECT_NEAR(0.0, diag_diff.Norml2() / diag_assembled.Norml2(), 1.e-14); + + auto [num_ranks, rank] = smith::getMPIInfo(); + (void)num_ranks; + if (rank == 0) { + std::cout << "DiagonalAssemblyBenchmark direct_seconds=" << direct_time / samples + << " sparse_getdiag_seconds=" << sparse_time / samples + << " speedup=" << sparse_time / direct_time << std::endl; + } + + EXPECT_GT(sparse_time / direct_time, 5.0); +} + // TODO: reenable these once hcurl implements of simplex elements is finished // TEST(Hcurl, 2DLinear) { functional_test(*mesh2D, Hcurl<1>{}, Hcurl<1>{}, Dimension<2>{}); } // TEST(Hcurl, 2DQuadratic) { functional_test(*mesh2D, Hcurl<2>{}, Hcurl<2>{}, Dimension<2>{}); } @@ -501,6 +591,10 @@ int main(int argc, char* argv[]) args.AddOption(&serial_refinement, "-r", "--ref", ""); args.AddOption(¶llel_refinement, "-pr", "--pref", ""); args.AddOption(&nsamples, "-n", "--n-samples", "Samples per test"); + args.AddOption(&run_diagonal_benchmark, "-rdb", "--run-diagonal-benchmark", "-sdb", "--skip-diagonal-benchmark", + "Run direct diagonal vs sparse assemble+GetDiag timing benchmark."); + args.AddOption(&diagonal_benchmark_samples, "-dbs", "--diagonal-benchmark-samples", + "Samples for the diagonal assembly benchmark."); args.Parse(); if (!args.Good()) { diff --git a/src/smith/numerics/tests/test_equationsolver.cpp b/src/smith/numerics/tests/test_equationsolver.cpp index 6ee649716c..f0b73bf53e 100644 --- a/src/smith/numerics/tests/test_equationsolver.cpp +++ b/src/smith/numerics/tests/test_equationsolver.cpp @@ -138,6 +138,7 @@ TEST(EquationSolver, PcgBlockUsesMatrixFreeTangentAction) using trial_space = H1

; auto [fes, fec] = smith::generateParFiniteElementSpace(&pmesh); + (void)fec; mfem::HypreParVector x_exact(fes.get()); mfem::HypreParVector x_computed(fes.get()); @@ -205,6 +206,121 @@ TEST(EquationSolver, PcgBlockUsesMatrixFreeTangentAction) EXPECT_LT(x_computed.DistanceTo(x_exact.GetData()), 1.0e-10); } +TEST(EquationSolver, PcgBlockUsesJacobianOperator) +{ + class MatrixJacobianOperator : public JacobianOperator { + public: + explicit MatrixJacobianOperator(std::unique_ptr matrix) + : JacobianOperator(matrix->Height(), matrix->Width()), matrix_(std::move(matrix)) + { + } + + void Mult(const mfem::Vector& dx, mfem::Vector& y) const override { matrix_->Mult(dx, y); } + + std::unique_ptr assemble() override { return std::move(matrix_); } + + void assembleDiagonal(mfem::Vector& diag) const override { matrix_->GetDiag(diag); } + + private: + std::unique_ptr matrix_; + }; + + auto mesh = mfem::Mesh::MakeCartesian2D(1, 1, mfem::Element::QUADRILATERAL); + auto pmesh = mfem::ParMesh(MPI_COMM_WORLD, mesh); + + pmesh.EnsureNodes(); + pmesh.ExchangeFaceNbrData(); + + constexpr int p = 1; + constexpr int dim = 2; + using test_space = H1

; + using trial_space = H1

; + + auto [fes, fec] = smith::generateParFiniteElementSpace(&pmesh); + (void)fec; + + mfem::HypreParVector x_exact(fes.get()); + mfem::HypreParVector x_computed(fes.get()); + x_exact.Randomize(0); + x_computed = 0.0; + + std::unique_ptr J; + + Functional residual(fes.get(), {fes.get()}); + Domain domain = EntireDomain(pmesh); + residual.AddDomainIntegral( + Dimension{}, DependsOn<0>{}, + [](double /*t*/, auto, auto scalar) { + auto [u, du_dx] = scalar; + return smith::tuple{u, du_dx}; + }, + domain); + + { + constexpr double time = 0.0; + auto [val, grad] = residual(time, differentiate_wrt(x_exact)); + FunctionalJacobianOperator jacobian_operator(grad); + + mfem::Vector dx(x_exact.Size()); + mfem::Vector y_grad(x_exact.Size()); + mfem::Vector y_operator(x_exact.Size()); + dx.Randomize(1); + grad.Mult(dx, y_grad); + jacobian_operator.Mult(dx, y_operator); + + EXPECT_LT(y_operator.DistanceTo(y_grad.GetData()), 1.0e-14); + } + + StdFunctionOperator residual_opr( + fes->TrueVSize(), + [&x_exact, &residual](const mfem::Vector& x, mfem::Vector& r) { + constexpr double time = 0.0; + r = residual(time, x); + r -= residual(time, x_exact); + }, + [&residual, &J](const mfem::Vector& x) -> mfem::Operator& { + constexpr double time = 0.0; + auto [val, grad] = residual(time, differentiate_wrt(x)); + J = assemble(grad); + return *J; + }); + + const LinearSolverOptions lin_opts = {.linear_solver = LinearSolver::CG, + .preconditioner = Preconditioner::HypreJacobi, + .relative_tol = 1.0e-12, + .absolute_tol = 1.0e-14, + .max_iterations = 500, + .print_level = 0}; + + const NonlinearSolverOptions nonlin_opts = {.nonlin_solver = NonlinearSolver::PcgBlock, + .relative_tol = 1.0e-12, + .absolute_tol = 1.0e-14, + .max_iterations = 500, + .print_level = 0}; + + EquationSolver eq_solver(nonlin_opts, lin_opts); + eq_solver.setOperator(residual_opr); + + int num_operator_evals = 0; + eq_solver.setJacobianOperator([&residual, &num_operator_evals](const mfem::Vector& x) { + constexpr double time = 0.0; + auto [val, grad] = residual(time, differentiate_wrt(x)); + ++num_operator_evals; + return std::make_unique(assemble(grad)); + }); + + eq_solver.solve(x_computed); + + const auto diagnostics = eq_solver.pcgBlockDiagnostics(); + ASSERT_TRUE(diagnostics.has_value()); + EXPECT_GT(num_operator_evals, 0); + EXPECT_EQ(diagnostics->num_hess_vecs, static_cast(num_operator_evals)); + EXPECT_EQ(diagnostics->num_jacobian_operator_evals, static_cast(num_operator_evals)); + EXPECT_EQ(diagnostics->num_diagonal_assembles, 0u); + EXPECT_TRUE(eq_solver.nonlinearSolver().GetConverged()); + EXPECT_LT(x_computed.DistanceTo(x_exact.GetData()), 1.0e-10); +} + /** * @brief Nonlinear solvers to test. Always includes NonlinearSolver::Newton and NonlinearSolver::LBFGS * If SMITH_USE_SUNDIALS is set, adds: NonlinearSolver::KINFullStep, NonlinearSolver::KINBacktrackingLineSearch, and From d94f4b3d1e1d9c276f856bcc60f3b8ec49f09bb8 Mon Sep 17 00:00:00 2001 From: Michael Tupek Date: Thu, 30 Apr 2026 12:53:07 -0600 Subject: [PATCH 06/27] Implement JacobianOperator for weak form. --- src/smith/numerics/equation_solver.hpp | 25 +++++++++++---- src/smith/physics/dfem_weak_form.hpp | 12 +++++++ src/smith/physics/functional_weak_form.hpp | 21 ++++++++++++ .../tests/test_functional_weak_form.cpp | 32 +++++++++++++++++++ src/smith/physics/weak_form.hpp | 18 +++++++++++ 5 files changed, 101 insertions(+), 7 deletions(-) diff --git a/src/smith/numerics/equation_solver.hpp b/src/smith/numerics/equation_solver.hpp index febc40754b..8d67cc64a5 100644 --- a/src/smith/numerics/equation_solver.hpp +++ b/src/smith/numerics/equation_solver.hpp @@ -16,6 +16,7 @@ #include #include #include +#include #include #include @@ -59,25 +60,35 @@ class JacobianOperator : public mfem::Operator { */ template class FunctionalJacobianOperator : public JacobianOperator { + using GradientT = std::remove_reference_t; + public: - explicit FunctionalJacobianOperator(Gradient& gradient) - : JacobianOperator(gradient.Height(), gradient.Width()), gradient_(gradient) + explicit FunctionalJacobianOperator(GradientT& gradient) + : JacobianOperator(gradient.Height(), gradient.Width()), gradient_(&gradient) + { + } + + explicit FunctionalJacobianOperator(GradientT&& gradient) + : JacobianOperator(gradient.Height(), gradient.Width()), + owned_gradient_(std::make_unique(std::move(gradient))), + gradient_(owned_gradient_.get()) { } - void Mult(const mfem::Vector& dx, mfem::Vector& y) const override { gradient_.Mult(dx, y); } + void Mult(const mfem::Vector& dx, mfem::Vector& y) const override { gradient_->Mult(dx, y); } void AddMult(const mfem::Vector& dx, mfem::Vector& y, const double a = 1.0) const override { - gradient_.AddMult(dx, y, a); + gradient_->AddMult(dx, y, a); } - std::unique_ptr assemble() override { return gradient_.assemble(); } + std::unique_ptr assemble() override { return gradient_->assemble(); } - void assembleDiagonal(mfem::Vector& diag) const override { gradient_.assembleDiagonal(diag); } + void assembleDiagonal(mfem::Vector& diag) const override { gradient_->assembleDiagonal(diag); } private: - Gradient& gradient_; + std::unique_ptr owned_gradient_; + GradientT* gradient_; }; /** diff --git a/src/smith/physics/dfem_weak_form.hpp b/src/smith/physics/dfem_weak_form.hpp index 83a55d6ddd..f55598039c 100644 --- a/src/smith/physics/dfem_weak_form.hpp +++ b/src/smith/physics/dfem_weak_form.hpp @@ -213,6 +213,18 @@ class DfemWeakForm : public WeakForm { return std::make_unique(); } + /// @overload + std::unique_ptr jacobianOperator( + TimeInfo time_info, ConstFieldPtr /*shape_disp*/, const std::vector& /*fields*/, + size_t /*input_col*/, const std::vector& /*quad_fields*/ = {}) const override + { + SLIC_ERROR_ROOT("DfemWeakForm does not support JacobianOperator construction"); + dt_ = time_info.dt(); + cycle_ = time_info.cycle(); + + return nullptr; + } + /// @overload void jvp(TimeInfo time_info, ConstFieldPtr /*shape_disp*/, const std::vector& /*fields*/, const std::vector& /*quad_fields*/, ConstFieldPtr /*v_shape_disp*/, diff --git a/src/smith/physics/functional_weak_form.hpp b/src/smith/physics/functional_weak_form.hpp index 18c292dcab..8e99e71afc 100644 --- a/src/smith/physics/functional_weak_form.hpp +++ b/src/smith/physics/functional_weak_form.hpp @@ -15,6 +15,7 @@ #include "smith/physics/weak_form.hpp" #include "smith/physics/mesh.hpp" +#include "smith/numerics/equation_solver.hpp" #include "smith/numerics/functional/shape_aware_functional.hpp" #include "smith/physics/state/finite_element_state.hpp" #include "smith/physics/state/finite_element_dual.hpp" @@ -331,6 +332,26 @@ class FunctionalWeakForm, return J; } + /// @overload + std::unique_ptr jacobianOperator( + TimeInfo time_info, ConstFieldPtr shape_disp, const std::vector& fields, size_t input_col, + [[maybe_unused]] const std::vector& quad_fields = {}) const override + { + SLIC_ERROR_IF(input_col >= fields.size(), "Invalid JacobianOperator input column."); + + dt_ = time_info.dt(); + cycle_ = time_info.cycle(); + + auto jacs = jacobianFunctions(std::make_integer_sequence{}, time_info.time(), + shape_disp, fields); + auto K = smith::get(jacs[input_col](time_info.time(), shape_disp, fields)); + + SLIC_ERROR_IF(K.Height() != K.Width(), + "WeakForm::jacobianOperator currently supports square one-field derivatives only."); + + return std::make_unique>(std::move(K)); + } + /// @overload void jvp(TimeInfo time_info, ConstFieldPtr shape_disp, const std::vector& fields, [[maybe_unused]] const std::vector& quad_fields, diff --git a/src/smith/physics/tests/test_functional_weak_form.cpp b/src/smith/physics/tests/test_functional_weak_form.cpp index 61ea04e68d..0dc318ab82 100644 --- a/src/smith/physics/tests/test_functional_weak_form.cpp +++ b/src/smith/physics/tests/test_functional_weak_form.cpp @@ -247,6 +247,38 @@ TEST_F(WeakFormFixture, JvpConsistency) } } +TEST_F(WeakFormFixture, JacobianOperatorConsistency) +{ + auto input_fields = getConstFieldPointers(states, params); + auto field_tangents = getConstFieldPointers(state_tangents, param_tangents); + + std::vector jacobian_weights(input_fields.size()); + jacobian_weights[DISP] = 1.0; + + auto J = weak_form->jacobian(time_info, shape_disp.get(), input_fields, jacobian_weights); + auto J_op = weak_form->jacobianOperator(time_info, shape_disp.get(), input_fields, DISP); + + smith::FiniteElementDual jvp_slow(states[DISP].space(), "jvp_slow"); + smith::FiniteElementDual jvp_op(states[DISP].space(), "jvp_op"); + J->Mult(*field_tangents[DISP], jvp_slow); + J_op->Mult(*field_tangents[DISP], jvp_op); + EXPECT_NEAR(jvp_slow.Norml2(), jvp_op.Norml2(), 1e-12); + + std::unique_ptr J_op_assembled = J_op->assemble(); + smith::FiniteElementDual jvp_op_assembled(states[DISP].space(), "jvp_op_assembled"); + J_op_assembled->Mult(*field_tangents[DISP], jvp_op_assembled); + EXPECT_NEAR(jvp_slow.Norml2(), jvp_op_assembled.Norml2(), 1e-12); + + mfem::Vector diag_direct(J_op->Height()); + mfem::Vector diag_assembled(J->Height()); + J_op->assembleDiagonal(diag_direct); + J->GetDiag(diag_assembled); + + mfem::Vector diag_diff(diag_direct.Size()); + subtract(diag_direct, diag_assembled, diag_diff); + EXPECT_NEAR(0.0, diag_diff.Norml2() / diag_assembled.Norml2(), 1.e-14); +} + int main(int argc, char* argv[]) { ::testing::InitGoogleTest(&argc, argv); diff --git a/src/smith/physics/weak_form.hpp b/src/smith/physics/weak_form.hpp index 2e766de4b3..1675545ba2 100644 --- a/src/smith/physics/weak_form.hpp +++ b/src/smith/physics/weak_form.hpp @@ -12,6 +12,7 @@ #pragma once +#include #include #include #include @@ -25,6 +26,7 @@ class HypreParMatrix; namespace smith { +class JacobianOperator; class FiniteElementState; class FiniteElementDual; @@ -69,6 +71,22 @@ class WeakForm { const std::vector& field_argument_tangents, const std::vector& quad_fields = {}) const = 0; + /** @brief Derivative of the residual with respect to one field argument as a solver-facing JacobianOperator. + * + * The returned operator represents one derivative column, d{r}/d{fields}_field_argument_index. The first supported + * use case is the square solved-field derivative used by PCG-block tangent products and diagonal extraction. + * + * @param time_info time and timestep information + * @param shape_disp smith::FiniteElementState*, change in model coordinates relative to the initially read in mesh + * @param fields vector of smith::FiniteElementState* + * @param field_argument_index field argument to differentiate with respect to + * @param quad_fields vector of ConstQuadratureFieldPtr + * @return std::unique_ptr returns d{r}/d{fields}_field_argument_index + */ + virtual std::unique_ptr jacobianOperator( + TimeInfo time_info, ConstFieldPtr shape_disp, const std::vector& fields, + size_t field_argument_index, const std::vector& quad_fields = {}) const = 0; + /** * @brief Jacobian-vector product, will overwrite any existing values in jvp_reactions * @param time_info time and timestep information From 3b6a6adf2b066d72b93f0d8bf2940455c3d4740c Mon Sep 17 00:00:00 2001 From: Michael Tupek Date: Thu, 30 Apr 2026 12:54:01 -0600 Subject: [PATCH 07/27] style. --- .../numerics/functional/tests/functional_comparisons.cpp | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/smith/numerics/functional/tests/functional_comparisons.cpp b/src/smith/numerics/functional/tests/functional_comparisons.cpp index 95a7d95d42..031bb56ee5 100644 --- a/src/smith/numerics/functional/tests/functional_comparisons.cpp +++ b/src/smith/numerics/functional/tests/functional_comparisons.cpp @@ -497,8 +497,7 @@ double time_on_slowest_rank(Function&& function) function(); auto stop = std::chrono::steady_clock::now(); - double elapsed = - std::chrono::duration_cast>(stop - start).count(); + double elapsed = std::chrono::duration_cast>(stop - start).count(); double max_elapsed = elapsed; if (num_ranks > 1) { MPI_Allreduce(&elapsed, &max_elapsed, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD); @@ -562,8 +561,8 @@ TEST(Elasticity, DiagonalAssemblyBenchmark) (void)num_ranks; if (rank == 0) { std::cout << "DiagonalAssemblyBenchmark direct_seconds=" << direct_time / samples - << " sparse_getdiag_seconds=" << sparse_time / samples - << " speedup=" << sparse_time / direct_time << std::endl; + << " sparse_getdiag_seconds=" << sparse_time / samples << " speedup=" << sparse_time / direct_time + << std::endl; } EXPECT_GT(sparse_time / direct_time, 5.0); From 538cc489e10f9e8148a96f67288d015cdafc2175 Mon Sep 17 00:00:00 2001 From: Michael Tupek Date: Fri, 8 May 2026 09:13:08 -0600 Subject: [PATCH 08/27] Working on various potential improvements to the trust-region solver. --- src/smith/numerics/CMakeLists.txt | 4 +- src/smith/numerics/equation_solver.cpp | 1205 +++++++++++++++-- src/smith/numerics/equation_solver.hpp | 186 +++ .../numerics/mfem_trust_region_subspace.cpp | 589 ++++++++ ...er.cpp => petsc_trust_region_subspace.cpp} | 150 +- src/smith/numerics/solver_config.hpp | 24 + src/smith/numerics/tests/CMakeLists.txt | 3 +- .../numerics/tests/test_equationsolver.cpp | 7 +- .../tests/test_trust_region_solver_mfem.cpp | 500 +++++++ ...cpp => test_trust_region_solver_petsc.cpp} | 78 +- .../numerics/trust_region_cubic_subspace.cpp | 461 +++++++ src/smith/numerics/trust_region_solver.hpp | 71 +- src/smith/physics/solid_mechanics.hpp | 115 ++ .../physics/tests/shallow_arch_buckling.cpp | 253 +++- 14 files changed, 3344 insertions(+), 302 deletions(-) create mode 100644 src/smith/numerics/mfem_trust_region_subspace.cpp rename src/smith/numerics/{trust_region_solver.cpp => petsc_trust_region_subspace.cpp} (64%) create mode 100644 src/smith/numerics/tests/test_trust_region_solver_mfem.cpp rename src/smith/numerics/tests/{test_trust_region_solver.cpp => test_trust_region_solver_petsc.cpp} (62%) create mode 100644 src/smith/numerics/trust_region_cubic_subspace.cpp diff --git a/src/smith/numerics/CMakeLists.txt b/src/smith/numerics/CMakeLists.txt index 6df0f7eb12..e8c767394d 100644 --- a/src/smith/numerics/CMakeLists.txt +++ b/src/smith/numerics/CMakeLists.txt @@ -19,7 +19,9 @@ set(numerics_headers set(numerics_sources equation_solver.cpp - trust_region_solver.cpp + petsc_trust_region_subspace.cpp + mfem_trust_region_subspace.cpp + trust_region_cubic_subspace.cpp odes.cpp petsc_solvers.cpp block_preconditioner.cpp diff --git a/src/smith/numerics/equation_solver.cpp b/src/smith/numerics/equation_solver.cpp index 923f94c182..29d9af1e3f 100644 --- a/src/smith/numerics/equation_solver.cpp +++ b/src/smith/numerics/equation_solver.cpp @@ -6,6 +6,7 @@ #include "smith/numerics/equation_solver.hpp" +#include #include #include #include @@ -16,6 +17,7 @@ #include #include #include +#include #include "smith/smith_config.hpp" #include "smith/infrastructure/profiling.hpp" @@ -24,6 +26,17 @@ namespace smith { +namespace { + +using Clock = std::chrono::steady_clock; + +double secondsSince(Clock::time_point start) +{ + return std::chrono::duration_cast>(Clock::now() - start).count(); +} + +} // namespace + /// Newton solver with a 2-way line-search. Reverts to regular Newton if max_line_search_iterations is set to 0. class NewtonSolver : public mfem::NewtonSolver { protected: @@ -251,11 +264,22 @@ struct TrustRegionResults { H_z.SetSize(size); d_old.SetSize(size); H_d_old.SetSize(size); + H_d_old_at_accept.SetSize(size); d.SetSize(size); H_d.SetSize(size); Pr.SetSize(size); cauchy_point.SetSize(size); H_cauchy_point.SetSize(size); + z = 0.0; + H_z = 0.0; + d_old = 0.0; + H_d_old = 0.0; + H_d_old_at_accept = 0.0; + d = 0.0; + H_d = 0.0; + Pr = 0.0; + cauchy_point = 0.0; + H_cauchy_point = 0.0; } /// resets trust region results for a new outer iteration @@ -282,6 +306,10 @@ struct TrustRegionResults { mfem::Vector d_old; /// action of hessian on previous step z_old mfem::Vector H_d_old; + /// action of previous accepted hessian on previous step z_old + mfem::Vector H_d_old_at_accept; + /// true after at least one accepted line-search step has populated d_old + bool has_d_old = false; /// incrementalCG direction mfem::Vector d; /// action of hessian on direction d @@ -299,9 +327,9 @@ struct TrustRegionResults { }; /// trust region printing utility function -void printTrustRegionInfo(double realObjective, double modelObjective, size_t cgIters, double trSize, bool willAccept) +void printTrustRegionInfo(double realWork, double modelObjective, size_t cgIters, double trSize, bool willAccept) { - mfem::out << "real energy = " << std::setw(13) << realObjective << ", model energy = " << std::setw(13) + mfem::out << "real work = " << std::setw(13) << realWork << ", model energy = " << std::setw(13) << modelObjective << ", cg iter = " << std::setw(7) << cgIters << ", next tr size = " << std::setw(8) << trSize << ", accepting = " << willAccept << std::endl; } @@ -327,6 +355,14 @@ class TrustRegion : public mfem::NewtonSolver { mutable std::vector> left_mosts; /// the action of the stiffness/hessian (H) on the left most eigenvectors mutable std::vector> H_left_mosts; + /// previous accepted-iteration Hessian actions on the retained left most eigenvectors + mutable std::vector> previous_H_left_mosts; + /// accepted TrustRegion steps, newest first + mutable std::vector> accepted_step_history; + /// initial state for this nonlinear solve, used as an optional history direction + mutable mfem::Vector solve_start_x; + mutable mfem::Vector min_residual_x; + mutable double min_residual_norm = -1.0; /// nonlinear solution options NonlinearSolverOptions nonlinear_options; @@ -343,14 +379,148 @@ class TrustRegion : public mfem::NewtonSolver { public: /// internal counter for hess-vecs mutable size_t num_hess_vecs = 0; + /// internal counter for model CG hess-vecs + mutable size_t num_model_hess_vecs = 0; + /// internal counter for Cauchy-point hess-vecs + mutable size_t num_cauchy_hess_vecs = 0; + /// internal counter for line-search hess-vecs + mutable size_t num_line_search_hess_vecs = 0; /// internal counter for preconditions mutable size_t num_preconds = 0; /// internal counter for residuals mutable size_t num_residuals = 0; /// internal counter for subspace solves mutable size_t num_subspace_solves = 0; + /// internal counter for retained-leftmost Hessian-vector products used by subspace solves + mutable size_t num_subspace_leftmost_hess_vecs = 0; + /// internal counter for batched Hessian-vector groups used by subspace solves + mutable size_t num_subspace_hess_vec_batches = 0; + /// internal counter for Hessian-vector products inside subspace batches + mutable size_t num_subspace_batched_hess_vecs = 0; + /// internal counter for accepted-step history vectors added to subspace solves + mutable size_t num_subspace_past_step_vectors = 0; + /// internal counter for accepted-step history Hessian-vector products + mutable size_t num_subspace_past_step_hess_vecs = 0; + /// internal counter for nonlinear-solve-start directions added to subspace solves + mutable size_t num_subspace_solve_start_vectors = 0; + /// internal counter for nonlinear-solve-start Hessian-vector products + mutable size_t num_subspace_solve_start_hess_vecs = 0; + /// internal counter for quadratic subspace backend solves + mutable size_t num_quadratic_subspace_solves = 0; + /// internal counter for cubic subspace backend attempts + mutable size_t num_cubic_subspace_attempts = 0; + /// internal counter for cubic subspace candidates used + mutable size_t num_cubic_subspace_uses = 0; + /// internal counter for cubic attempts that returned quadratic candidate + mutable size_t num_cubic_subspace_quadratic_fallbacks = 0; /// internal counter for matrix assembles mutable size_t num_jacobian_assembles = 0; + /// internal counter for JacobianOperator evaluations + mutable size_t num_jacobian_operator_evals = 0; + /// internal counter for direct diagonal assemblies + mutable size_t num_diagonal_assembles = 0; + /// internal counter for model CG iterations + mutable size_t num_cg_iterations = 0; + /// internal counter for preconditioner operator updates + mutable size_t num_preconditioner_updates = 0; + /// internal counter for nonmonotone accepted steps + mutable size_t num_nonmonotone_work_accepts = 0; + /// internal counter for accepted steps that monotone acceptance would reject + mutable size_t num_monotone_work_would_reject = 0; + /// time spent evaluating residuals + mutable double residual_seconds = 0.0; + /// time spent applying Hessian-vector products + mutable double hess_vec_seconds = 0.0; + /// time spent applying model CG Hessian-vector products + mutable double model_hess_vec_seconds = 0.0; + /// time spent applying Cauchy-point Hessian-vector products + mutable double cauchy_hess_vec_seconds = 0.0; + /// time spent applying line-search Hessian-vector products + mutable double line_search_hess_vec_seconds = 0.0; + /// time spent applying JacobianOperator Hessian-vector products + mutable double jacobian_operator_hess_vec_seconds = 0.0; + /// time spent evaluating JacobianOperator factories + mutable double jacobian_operator_eval_seconds = 0.0; + /// time spent directly assembling diagonals + mutable double diagonal_assembly_seconds = 0.0; + /// time spent inverting direct diagonals + mutable double diagonal_invert_seconds = 0.0; + /// time spent applying preconditioners + mutable double preconditioner_seconds = 0.0; + /// total time spent in the nonlinear solve + mutable double total_seconds = 0.0; + /// time spent solving trust-region model problems + mutable double model_solve_seconds = 0.0; + /// total time spent in trust-region subspace solves + mutable double subspace_seconds = 0.0; + /// time spent building retained leftmost subspace directions + mutable double subspace_leftmost_seconds = 0.0; + /// time spent in subspace Hessian-vector batches + mutable double subspace_hess_vec_batch_seconds = 0.0; + /// time spent removing dependent directions for subspace solves + mutable double subspace_filter_seconds = 0.0; + /// time spent in dense subspace backend assembly/solve work + mutable double subspace_backend_seconds = 0.0; + /// time spent in subspace postprocessing and model-energy comparison + mutable double subspace_finalize_seconds = 0.0; + /// time spent building the Cauchy point + mutable double cauchy_point_seconds = 0.0; + /// time spent constructing dogleg steps + mutable double dogleg_seconds = 0.0; + /// time spent in line-search and trust-radius acceptance logic + mutable double line_search_seconds = 0.0; + /// time spent in dot products + mutable double dot_seconds = 0.0; + /// number of dot products + mutable size_t num_dot_products = 0; + /// number of dot product batches/reductions + mutable size_t num_dot_reductions = 0; + /// number of dot products in trust-region model solves + mutable size_t num_model_dot_products = 0; + /// number of dot products in Cauchy-point construction + mutable size_t num_cauchy_dot_products = 0; + /// number of dot products in dogleg construction + mutable size_t num_dogleg_dot_products = 0; + /// number of dot products in line-search and acceptance logic + mutable size_t num_line_search_dot_products = 0; + /// number of setup dot products outside the main per-step kernels + mutable size_t num_setup_dot_products = 0; + /// time spent in trust-region model-solve dot products + mutable double model_dot_seconds = 0.0; + /// time spent in Cauchy-point dot products + mutable double cauchy_dot_seconds = 0.0; + /// time spent in dogleg dot products + mutable double dogleg_dot_seconds = 0.0; + /// time spent in line-search dot products + mutable double line_search_dot_seconds = 0.0; + /// time spent in setup dot products + mutable double setup_dot_seconds = 0.0; + /// time spent in vector add/update operations + mutable double vector_update_seconds = 0.0; + /// time spent in vector copies and scaling operations + mutable double vector_copy_scale_seconds = 0.0; + /// time spent in boundary projection operations + mutable double projection_seconds = 0.0; + /// time spent assembling Jacobians + mutable double jacobian_assembly_seconds = 0.0; + /// time spent refreshing preconditioners + mutable double preconditioner_update_seconds = 0.0; + /// time spent in preconditioner SetOperator calls + mutable double preconditioner_setup_seconds = 0.0; + /// current accumulated actual work-surrogate level for nonmonotone acceptance + mutable double current_work_objective = 0.0; + /// last nonmonotone reference work surrogate + mutable double last_nonmonotone_work_reference = 0.0; + /// Optional JacobianOperator factory + JacobianOperatorFactory jacobian_operator_factory; + /// Cached JacobianOperator for current TrustRegion iteration + mutable std::unique_ptr current_jacobian_operator; + /// Inverted scalar diagonal preconditioner for JacobianOperator mode + mutable mfem::Vector inverse_diagonal_preconditioner; + /// Current assembled Hessian clone used to preserve a valid previous Hessian + mutable std::unique_ptr current_hessian; + /// Previous assembled Hessian used for cubic finite-difference subspace models + mutable std::unique_ptr previous_hessian; #ifdef MFEM_USE_MPI /// constructor @@ -361,15 +531,211 @@ class TrustRegion : public mfem::NewtonSolver { } #endif + /// Timed dot product with global and grouped accounting. + double timedDot(const mfem::Vector& a, const mfem::Vector& b, size_t& group_count, double& group_seconds) const + { + auto start = Clock::now(); + const double value = Dot(a, b); + const double seconds = secondsSince(start); + ++num_dot_products; + ++num_dot_reductions; + ++group_count; + dot_seconds += seconds; + group_seconds += seconds; + return value; + } + + /// Timed pair of dot products with one local vector pass and one MPI reduction when possible. + std::pair timedDot2(const mfem::Vector& a0, const mfem::Vector& b0, const mfem::Vector& a1, + const mfem::Vector& b1, size_t& group_count, double& group_seconds) const + { + if (dot_oper) { + return {timedDot(a0, b0, group_count, group_seconds), timedDot(a1, b1, group_count, group_seconds)}; + } + + MFEM_ASSERT(a0.Size() == b0.Size(), "Incompatible vector sizes."); + MFEM_ASSERT(a1.Size() == b1.Size(), "Incompatible vector sizes."); + + auto start = Clock::now(); + mfem::real_t products[2] = {0.0, 0.0}; + if (a0.Size() == a1.Size()) { + for (int i = 0; i < a0.Size(); ++i) { + products[0] += a0[i] * b0[i]; + products[1] += a1[i] * b1[i]; + } + } else { + for (int i = 0; i < a0.Size(); ++i) { + products[0] += a0[i] * b0[i]; + } + for (int i = 0; i < a1.Size(); ++i) { + products[1] += a1[i] * b1[i]; + } + } + +#ifdef MFEM_USE_MPI + const MPI_Comm dot_comm = GetComm(); + if (dot_comm != MPI_COMM_NULL) { + mfem::real_t global_products[2] = {0.0, 0.0}; + MPI_Allreduce(products, global_products, 2, MFEM_MPI_REAL_T, MPI_SUM, dot_comm); + products[0] = global_products[0]; + products[1] = global_products[1]; + } +#endif + + const double seconds = secondsSince(start); + num_dot_products += 2; + ++num_dot_reductions; + group_count += 2; + dot_seconds += seconds; + group_seconds += seconds; + return {products[0], products[1]}; + } + + struct Dot4Result { + double v0 = 0.0; + double v1 = 0.0; + double v2 = 0.0; + double v3 = 0.0; + }; + + /// Timed four-dot batch with one local vector pass and one MPI reduction when possible. + Dot4Result timedDot4(const mfem::Vector& a0, const mfem::Vector& b0, const mfem::Vector& a1, const mfem::Vector& b1, + const mfem::Vector& a2, const mfem::Vector& b2, const mfem::Vector& a3, + const mfem::Vector& b3, size_t& group_count, double& group_seconds) const + { + if (dot_oper) { + return {.v0 = timedDot(a0, b0, group_count, group_seconds), + .v1 = timedDot(a1, b1, group_count, group_seconds), + .v2 = timedDot(a2, b2, group_count, group_seconds), + .v3 = timedDot(a3, b3, group_count, group_seconds)}; + } + + MFEM_ASSERT(a0.Size() == b0.Size(), "Incompatible vector sizes."); + MFEM_ASSERT(a1.Size() == b1.Size(), "Incompatible vector sizes."); + MFEM_ASSERT(a2.Size() == b2.Size(), "Incompatible vector sizes."); + MFEM_ASSERT(a3.Size() == b3.Size(), "Incompatible vector sizes."); + MFEM_ASSERT(a0.Size() == a1.Size() && a0.Size() == a2.Size() && a0.Size() == a3.Size(), + "timedDot4 currently requires equal vector sizes."); + + auto start = Clock::now(); + mfem::real_t products[4] = {0.0, 0.0, 0.0, 0.0}; + for (int i = 0; i < a0.Size(); ++i) { + products[0] += a0[i] * b0[i]; + products[1] += a1[i] * b1[i]; + products[2] += a2[i] * b2[i]; + products[3] += a3[i] * b3[i]; + } + +#ifdef MFEM_USE_MPI + const MPI_Comm dot_comm = GetComm(); + if (dot_comm != MPI_COMM_NULL) { + mfem::real_t global_products[4] = {0.0, 0.0, 0.0, 0.0}; + MPI_Allreduce(products, global_products, 4, MFEM_MPI_REAL_T, MPI_SUM, dot_comm); + for (int i = 0; i < 4; ++i) { + products[i] = global_products[i]; + } + } +#endif + + const double seconds = secondsSince(start); + num_dot_products += 4; + ++num_dot_reductions; + group_count += 4; + dot_seconds += seconds; + group_seconds += seconds; + return {.v0 = products[0], .v1 = products[1], .v2 = products[2], .v3 = products[3]}; + } + + template + void batchedSubspaceHessVec(HessVecFunc hess_vec_func, const std::vector& inputs, + const std::vector& outputs) const + { + MFEM_VERIFY(inputs.size() == outputs.size(), "Subspace Hessian-vector batch input/output size mismatch"); + if (inputs.empty()) { + return; + } + + auto start = Clock::now(); + ++num_subspace_hess_vec_batches; + num_subspace_batched_hess_vecs += inputs.size(); + for (size_t i = 0; i < inputs.size(); ++i) { + hess_vec_func(*inputs[i], *outputs[i]); + } + subspace_hess_vec_batch_seconds += secondsSince(start); + } + + template + void timedModelHessVec(HessVecFunc hess_vec_func, const mfem::Vector& input, mfem::Vector& output) const + { + auto start = Clock::now(); + hess_vec_func(input, output); + model_hess_vec_seconds += secondsSince(start); + ++num_model_hess_vecs; + } + + template + void timedCauchyHessVec(HessVecFunc hess_vec_func, const mfem::Vector& input, mfem::Vector& output) const + { + auto start = Clock::now(); + hess_vec_func(input, output); + cauchy_hess_vec_seconds += secondsSince(start); + ++num_cauchy_hess_vecs; + } + + template + void timedLineSearchHessVec(HessVecFunc hess_vec_func, const mfem::Vector& input, mfem::Vector& output) const + { + auto start = Clock::now(); + hess_vec_func(input, output); + line_search_hess_vec_seconds += secondsSince(start); + ++num_line_search_hess_vecs; + } + + double nonmonotoneWorkReference(const std::vector& work_objective_history) const + { + if (work_objective_history.empty()) { + return current_work_objective; + } + return *std::max_element(work_objective_history.begin(), work_objective_history.end()); + } + + void pushWorkObjectiveHistory(std::vector& work_objective_history, double objective) const + { + const int window = nonlinear_options.trust_nonmonotone_window; + if (window <= 0) { + return; + } + work_objective_history.push_back(objective); + while (work_objective_history.size() > static_cast(window)) { + work_objective_history.erase(work_objective_history.begin()); + } + } + + void pushAcceptedStepHistory(const mfem::Vector& step) const + { + if (nonlinear_options.trust_num_past_steps <= 0) { + accepted_step_history.clear(); + return; + } + + accepted_step_history.insert(accepted_step_history.begin(), std::make_shared(step)); + const size_t max_size = static_cast(nonlinear_options.trust_num_past_steps) + 1; + while (accepted_step_history.size() > max_size) { + accepted_step_history.pop_back(); + } + } + /// finds tau s.t. (z + tau*d)^2 = trSize^2 void projectToBoundaryWithCoefs(mfem::Vector& z, const mfem::Vector& d, double delta, double zz, double zd, double dd) const { + auto start = Clock::now(); // find z + tau d double deltadelta_m_zz = delta * delta - zz; if (deltadelta_m_zz == 0) return; // already on boundary double tau = (std::sqrt(deltadelta_m_zz * dd + zd * zd) - zd) / dd; z.Add(tau, d); + projection_seconds += secondsSince(start); } /// solve the exact trust-region subspace problem with directions ds, and the leftmosts @@ -378,10 +744,14 @@ class TrustRegion : public mfem::NewtonSolver { [[maybe_unused]] const std::vector ds, [[maybe_unused]] const std::vector Hds, [[maybe_unused]] const mfem::Vector& g, [[maybe_unused]] double delta, - [[maybe_unused]] int num_leftmost) const + [[maybe_unused]] int num_leftmost, + [[maybe_unused]] std::vector>& candidate_left_mosts, + [[maybe_unused]] const mfem::Vector& previous_step, + [[maybe_unused]] const mfem::Vector* previous_H_previous_step, + [[maybe_unused]] bool allow_cubic_subspace) const { -#ifdef SMITH_USE_SLEPC SMITH_MARK_FUNCTION; + auto subspace_start = Clock::now(); ++num_subspace_solves; std::vector directions; @@ -400,15 +770,6 @@ class TrustRegion : public mfem::NewtonSolver { H_directions.emplace_back(H_left.get()); } - try { - std::tie(directions, H_directions) = removeDependentDirections(directions, H_directions); - } catch (const std::exception& e) { - if (print_level >= 2) { - mfem::out << "remove dependent directions failed with " << e.what() << std::endl; - } - return; - } - mfem::Vector b(g); b *= -1; @@ -418,18 +779,45 @@ class TrustRegion : public mfem::NewtonSolver { double energy_change; try { - std::tie(sol, leftvecs, leftvals, energy_change) = - solveSubspaceProblem(directions, H_directions, b, delta, num_leftmost); + auto backend_start = Clock::now(); + if (nonlinear_options.trust_use_cubic_subspace && allow_cubic_subspace && previous_hessian) { + std::vector previous_H_vectors; + std::vector previous_H_directions; + previous_H_vectors.reserve(directions.size()); + previous_H_directions.reserve(directions.size()); + for (const auto* direction : directions) { + previous_H_vectors.emplace_back(direction->Size()); + previous_hessian->Mult(*direction, previous_H_vectors.back()); + previous_H_directions.emplace_back(&previous_H_vectors.back()); + } + ++num_cubic_subspace_attempts; + bool used_cubic = false; + std::tie(sol, leftvecs, leftvals, energy_change) = solveCubicSubspaceProblemMfem( + directions, H_directions, previous_H_directions, previous_step, b, delta, num_leftmost, &used_cubic); + if (used_cubic) { + ++num_cubic_subspace_uses; + } else { + ++num_cubic_subspace_quadratic_fallbacks; + ++num_quadratic_subspace_solves; + } + } else { + ++num_quadratic_subspace_solves; + std::tie(sol, leftvecs, leftvals, energy_change) = + solveSubspaceProblem(directions, H_directions, b, delta, num_leftmost); + } + subspace_backend_seconds += secondsSince(backend_start); } catch (const std::exception& e) { - if (print_level == 1) { + if (print_level >= 1) { mfem::out << "subspace solve failed with " << e.what() << std::endl; } + subspace_seconds += secondsSince(subspace_start); return; } - left_mosts.clear(); + auto finalize_start = Clock::now(); + candidate_left_mosts.clear(); for (auto& lv : leftvecs) { - left_mosts.emplace_back(std::move(lv)); + candidate_left_mosts.emplace_back(std::move(lv)); } double base_energy = computeEnergy(g, hess_vec_func, z); @@ -444,43 +832,54 @@ class TrustRegion : public mfem::NewtonSolver { if (subspace_energy < base_energy) { z = sol; } -#endif + subspace_finalize_seconds += secondsSince(finalize_start); + subspace_seconds += secondsSince(subspace_start); } /// finds tau s.t. (z + tau*(y-z))^2 = trSize^2 void projectToBoundaryBetweenWithCoefs(mfem::Vector& z, const mfem::Vector& y, double trSize, double zz, double zy, double yy) const { + auto start = Clock::now(); double dd = yy - 2 * zy + zz; double zd = zy - zz; double tau = (std::sqrt((trSize * trSize - zz) * dd + zd * zd) - zd) / dd; z.Add(-tau, z); z.Add(tau, y); + projection_seconds += secondsSince(start); } /// take a dogleg step in direction s, solution norm must be within trSize void doglegStep(const mfem::Vector& cp, const mfem::Vector& newtonP, double trSize, mfem::Vector& s) const { SMITH_MARK_FUNCTION; - // MRT, could optimize some of these eventually, compute on the outside and save - double cc = Dot(cp, cp); - double nn = Dot(newtonP, newtonP); + auto [cc, nn] = timedDot2(cp, cp, newtonP, newtonP, num_dogleg_dot_products, dogleg_dot_seconds); double tt = trSize * trSize; + auto update_start = Clock::now(); s = 0.0; + vector_copy_scale_seconds += secondsSince(update_start); if (cc >= tt) { + update_start = Clock::now(); add(s, std::sqrt(tt / cc), cp, s); + vector_update_seconds += secondsSince(update_start); } else if (cc > nn) { if (print_level >= 2) { mfem::out << "cp outside newton, preconditioner likely inaccurate\n"; } + update_start = Clock::now(); add(s, 1.0, cp, s); + vector_update_seconds += secondsSince(update_start); } else if (nn > tt) { // on the dogleg (we have nn >= cc, and tt >= cc) + update_start = Clock::now(); add(s, 1.0, cp, s); - double cn = Dot(cp, newtonP); + vector_update_seconds += secondsSince(update_start); + double cn = timedDot(cp, newtonP, num_dogleg_dot_products, dogleg_dot_seconds); projectToBoundaryBetweenWithCoefs(s, newtonP, trSize, cc, cn, nn); } else { + update_start = Clock::now(); s = newtonP; + vector_copy_scale_seconds += secondsSince(update_start); } } @@ -489,18 +888,18 @@ class TrustRegion : public mfem::NewtonSolver { double computeEnergy(const mfem::Vector& r_local, const HessVecFunc& H, const mfem::Vector& z) const { SMITH_MARK_FUNCTION; - double rz = Dot(r_local, z); + double rz = timedDot(r_local, z, num_line_search_dot_products, line_search_dot_seconds); mfem::Vector tmp(r_local); tmp = 0.0; H(z, tmp); - return rz + 0.5 * Dot(z, tmp); + return rz + 0.5 * timedDot(z, tmp, num_line_search_dot_products, line_search_dot_seconds); } /// Minimize quadratic sub-problem given residual vector, the action of the stiffness and a preconditioner template void solveTrustRegionModelProblem(const mfem::Vector& r0, mfem::Vector& rCurrent, HessVecFunc hess_vec_func, PrecondFunc precond, const TrustRegionSettings& settings, double& trSize, - TrustRegionResults& results) const + TrustRegionResults& results, double r0_norm_squared) const { SMITH_MARK_FUNCTION; // minimize r0@z + 0.5*z@J@z @@ -515,7 +914,7 @@ class TrustRegion : public mfem::NewtonSolver { const double cg_tol_squared = settings.cg_tol * settings.cg_tol; - if (Dot(r0, r0) <= cg_tol_squared && settings.min_cg_iterations == 0) { + if (r0_norm_squared <= cg_tol_squared && settings.min_cg_iterations == 0) { if (print_level >= 2) { mfem::out << "Trust region solution state within tolerance on first iteration." << "\n"; @@ -523,37 +922,43 @@ class TrustRegion : public mfem::NewtonSolver { return; } + auto copy_start = Clock::now(); rCurrent = r0; + vector_copy_scale_seconds += secondsSince(copy_start); precond(rCurrent, Pr); // d = -Pr + copy_start = Clock::now(); d = Pr; d *= -1.0; z = 0.0; + vector_copy_scale_seconds += secondsSince(copy_start); double zz = 0.; - double rPr = Dot(rCurrent, Pr); - double zd = 0.0; - double dd = Dot(d, d); + double rPr = timedDot(rCurrent, Pr, num_model_dot_products, model_dot_seconds); // std::cout << "initial energy = " << computeEnergy(r0, hess_vec_func, z) << std::endl; for (cgIter = 1; cgIter <= settings.max_cg_iterations; ++cgIter) { - // check if this is a descent direction - if (Dot(d, rCurrent) > 0) { + hess_vec_func(d, Hd); + const auto dots = timedDot4(d, rCurrent, d, Hd, z, d, d, d, num_model_dot_products, model_dot_seconds); + double descent_check = dots.v0; + double curvature = dots.v1; + double zd = dots.v2; + double dd = dots.v3; + if (descent_check > 0) { + copy_start = Clock::now(); d *= -1; + Hd *= -1; + vector_copy_scale_seconds += secondsSince(copy_start); results.interior_status = TrustRegionResults::Status::NonDescentDirection; + descent_check *= -1.0; + curvature *= -1.0; + zd *= -1.0; } - hess_vec_func(d, Hd); - const double curvature = Dot(d, Hd); const double alphaCg = curvature != 0.0 ? rPr / curvature : 0.0; - - auto& zPred = Pr; // re-use Pr memory. - // This predicted step will no longer be used by the time Pr is, so we can avoid an extra - // vector floating around - add(z, alphaCg, d, zPred); - double zzNp1 = Dot(zPred, zPred); + const double zzNp1 = zz + 2.0 * alphaCg * zd + alphaCg * alphaCg * dd; const bool go_to_boundary = curvature <= 0 || zzNp1 >= trSize * trSize; if (go_to_boundary) { @@ -566,7 +971,16 @@ class TrustRegion : public mfem::NewtonSolver { return; } + auto& zPred = Pr; // re-use Pr memory. + // This predicted step will no longer be used by the time Pr is, so we can avoid an extra + // vector floating around + auto update_start = Clock::now(); + add(z, alphaCg, d, zPred); + vector_update_seconds += secondsSince(update_start); + + copy_start = Clock::now(); z = zPred; + vector_copy_scale_seconds += secondsSince(copy_start); if (results.interior_status == TrustRegionResults::Status::NonDescentDirection) { if (print_level >= 2) { @@ -575,68 +989,241 @@ class TrustRegion : public mfem::NewtonSolver { return; } + update_start = Clock::now(); add(rCurrent, alphaCg, Hd, rCurrent); + vector_update_seconds += secondsSince(update_start); precond(rCurrent, Pr); - double rPrNp1 = Dot(rCurrent, Pr); - - if (Dot(rCurrent, rCurrent) <= cg_tol_squared && cgIter >= settings.min_cg_iterations) { + auto [rPrNp1, r_current_norm_squared] = + timedDot2(rCurrent, Pr, rCurrent, rCurrent, num_model_dot_products, model_dot_seconds); + if (r_current_norm_squared <= cg_tol_squared && cgIter >= settings.min_cg_iterations) { return; } double beta = rPrNp1 / rPr; rPr = rPrNp1; + update_start = Clock::now(); add(-1.0, Pr, beta, d, d); + vector_update_seconds += secondsSince(update_start); zz = zzNp1; - zd = Dot(z, d); - dd = Dot(d, d); } cgIter--; // if all cg iterations are taken, correct for output } + std::unique_ptr cloneAssembledOperator(const mfem::Operator& op) const + { + if (const auto* hypre_matrix = dynamic_cast(&op)) { + return std::make_unique(*hypre_matrix); + } + if (const auto* sparse_matrix = dynamic_cast(&op)) { + return std::make_unique(*sparse_matrix); + } + if (const auto* block_operator = dynamic_cast(&op)) { + return buildMonolithicMatrix(*block_operator); + } + return nullptr; + } + /// assemble the jacobian void assembleJacobian(const mfem::Vector& x) const { SMITH_MARK_FUNCTION; + auto start = Clock::now(); ++num_jacobian_assembles; + if (nonlinear_options.trust_use_cubic_subspace) { + previous_hessian = std::move(current_hessian); + } grad = &oper->GetGradient(x); if (nonlinear_options.force_monolithic) { auto* grad_blocked = dynamic_cast(grad); if (grad_blocked) grad = buildMonolithicMatrix(*grad_blocked).release(); } + if (nonlinear_options.trust_use_cubic_subspace) { + current_hessian = cloneAssembledOperator(*grad); + } + jacobian_assembly_seconds += secondsSince(start); + } + + /// Set an optional JacobianOperator factory. + void setJacobianOperator(JacobianOperatorFactory jacobian_operator) + { + jacobian_operator_factory = std::move(jacobian_operator); + } + + /// Evaluate and cache the JacobianOperator at x. + void updateJacobianOperator(const mfem::Vector& x) const + { + SMITH_MARK_FUNCTION; + SLIC_ERROR_ROOT_IF(!jacobian_operator_factory, "No JacobianOperator factory is registered."); + auto start = Clock::now(); + ++num_jacobian_operator_evals; + current_jacobian_operator = jacobian_operator_factory(x); + SLIC_ERROR_ROOT_IF(!current_jacobian_operator, "JacobianOperator factory returned a null operator."); + jacobian_operator_eval_seconds += secondsSince(start); + } + + /// Assemble and invert the scalar diagonal preconditioner from the current JacobianOperator. + void updateDiagonalPreconditioner() const + { + SMITH_MARK_FUNCTION; + SLIC_ERROR_ROOT_IF(!current_jacobian_operator, "Cannot build diagonal preconditioner without a JacobianOperator."); + + auto diagonal_start = Clock::now(); + current_jacobian_operator->assembleDiagonal(inverse_diagonal_preconditioner); + diagonal_assembly_seconds += secondsSince(diagonal_start); + ++num_diagonal_assembles; + + auto invert_start = Clock::now(); + double max_abs_diag = 0.0; + for (int i = 0; i < inverse_diagonal_preconditioner.Size(); ++i) { + max_abs_diag = std::max(max_abs_diag, std::abs(inverse_diagonal_preconditioner[i])); + } + + const double floor = nonlinear_options.pcg_diagonal_floor * max_abs_diag; + SLIC_ERROR_ROOT_IF(!(floor > 0.0), "Cannot invert a zero Jacobian diagonal for TrustRegion preconditioning."); + for (int i = 0; i < inverse_diagonal_preconditioner.Size(); ++i) { + inverse_diagonal_preconditioner[i] = 1.0 / std::max(std::abs(inverse_diagonal_preconditioner[i]), floor); + } + diagonal_invert_seconds += secondsSince(invert_start); } /// evaluate the nonlinear residual mfem::real_t computeResidual(const mfem::Vector& x_, mfem::Vector& r_) const { SMITH_MARK_FUNCTION; + auto start = Clock::now(); ++num_residuals; oper->Mult(x_, r_); - return Norm(r_); + const auto norm = Norm(r_); + residual_seconds += secondsSince(start); + return norm; } - /// apply the action of the assembled Jacobian matrix to a vector + /// apply the action of the current Jacobian representation to a vector void hessVec(const mfem::Vector& x_, mfem::Vector& v_) const { SMITH_MARK_FUNCTION; + auto start = Clock::now(); ++num_hess_vecs; - grad->Mult(x_, v_); + if (nonlinear_options.trust_use_jacobian_operator) { + SLIC_ERROR_ROOT_IF(!current_jacobian_operator, "TrustRegion JacobianOperator mode has no current operator."); + current_jacobian_operator->Mult(x_, v_); + const double seconds = secondsSince(start); + hess_vec_seconds += seconds; + jacobian_operator_hess_vec_seconds += seconds; + } else { + grad->Mult(x_, v_); + hess_vec_seconds += secondsSince(start); + } } /// apply trust region specific preconditioner void precond(const mfem::Vector& x_, mfem::Vector& v_) const { SMITH_MARK_FUNCTION; + auto start = Clock::now(); ++num_preconds; - tr_precond.Mult(x_, v_); + if (nonlinear_options.trust_use_jacobian_operator) { + SLIC_ERROR_ROOT_IF(inverse_diagonal_preconditioner.Size() != x_.Size(), + "TrustRegion JacobianOperator diagonal preconditioner is not initialized."); + v_.SetSize(x_.Size()); + for (int i = 0; i < x_.Size(); ++i) { + v_[i] = inverse_diagonal_preconditioner[i] * x_[i]; + } + } else { + tr_precond.Mult(x_, v_); + } + preconditioner_seconds += secondsSince(start); }; + /// Return solver diagnostic counters. + TrustRegionDiagnostics diagnostics() const + { + return {.num_residuals = num_residuals, + .num_hess_vecs = num_hess_vecs, + .num_model_hess_vecs = num_model_hess_vecs, + .num_cauchy_hess_vecs = num_cauchy_hess_vecs, + .num_line_search_hess_vecs = num_line_search_hess_vecs, + .num_preconds = num_preconds, + .num_jacobian_assembles = num_jacobian_assembles, + .num_jacobian_operator_evals = num_jacobian_operator_evals, + .num_diagonal_assembles = num_diagonal_assembles, + .num_cg_iterations = num_cg_iterations, + .num_subspace_solves = num_subspace_solves, + .num_subspace_leftmost_hess_vecs = num_subspace_leftmost_hess_vecs, + .num_subspace_hess_vec_batches = num_subspace_hess_vec_batches, + .num_subspace_batched_hess_vecs = num_subspace_batched_hess_vecs, + .num_subspace_past_step_vectors = num_subspace_past_step_vectors, + .num_subspace_past_step_hess_vecs = num_subspace_past_step_hess_vecs, + .num_subspace_solve_start_vectors = num_subspace_solve_start_vectors, + .num_subspace_solve_start_hess_vecs = num_subspace_solve_start_hess_vecs, + .num_quadratic_subspace_solves = num_quadratic_subspace_solves, + .num_cubic_subspace_attempts = num_cubic_subspace_attempts, + .num_cubic_subspace_uses = num_cubic_subspace_uses, + .num_cubic_subspace_quadratic_fallbacks = num_cubic_subspace_quadratic_fallbacks, + .num_preconditioner_updates = num_preconditioner_updates, + .num_nonmonotone_work_accepts = num_nonmonotone_work_accepts, + .num_monotone_work_would_reject = num_monotone_work_would_reject, + .residual_seconds = residual_seconds, + .hess_vec_seconds = hess_vec_seconds, + .model_hess_vec_seconds = model_hess_vec_seconds, + .cauchy_hess_vec_seconds = cauchy_hess_vec_seconds, + .line_search_hess_vec_seconds = line_search_hess_vec_seconds, + .jacobian_operator_hess_vec_seconds = jacobian_operator_hess_vec_seconds, + .jacobian_operator_eval_seconds = jacobian_operator_eval_seconds, + .diagonal_assembly_seconds = diagonal_assembly_seconds, + .diagonal_invert_seconds = diagonal_invert_seconds, + .preconditioner_seconds = preconditioner_seconds, + .total_seconds = total_seconds, + .model_solve_seconds = model_solve_seconds, + .subspace_seconds = subspace_seconds, + .subspace_leftmost_seconds = subspace_leftmost_seconds, + .subspace_hess_vec_batch_seconds = subspace_hess_vec_batch_seconds, + .subspace_filter_seconds = subspace_filter_seconds, + .subspace_backend_seconds = subspace_backend_seconds, + .subspace_project_A_seconds = trustRegionSubspaceTimings().project_A_seconds, + .subspace_project_gram_seconds = trustRegionSubspaceTimings().project_gram_seconds, + .subspace_project_b_seconds = trustRegionSubspaceTimings().project_b_seconds, + .subspace_basis_seconds = trustRegionSubspaceTimings().basis_seconds, + .subspace_reduced_A_seconds = trustRegionSubspaceTimings().reduced_A_seconds, + .subspace_dense_eigensystem_seconds = trustRegionSubspaceTimings().dense_eigensystem_seconds, + .subspace_dense_trust_solve_seconds = trustRegionSubspaceTimings().dense_trust_solve_seconds, + .subspace_reconstruct_solution_seconds = trustRegionSubspaceTimings().reconstruct_solution_seconds, + .subspace_reconstruct_leftmost_seconds = trustRegionSubspaceTimings().reconstruct_leftmost_seconds, + .subspace_finalize_seconds = subspace_finalize_seconds, + .cauchy_point_seconds = cauchy_point_seconds, + .dogleg_seconds = dogleg_seconds, + .line_search_seconds = line_search_seconds, + .dot_seconds = dot_seconds, + .num_dot_products = num_dot_products, + .num_dot_reductions = num_dot_reductions, + .num_model_dot_products = num_model_dot_products, + .num_cauchy_dot_products = num_cauchy_dot_products, + .num_dogleg_dot_products = num_dogleg_dot_products, + .num_line_search_dot_products = num_line_search_dot_products, + .num_setup_dot_products = num_setup_dot_products, + .model_dot_seconds = model_dot_seconds, + .cauchy_dot_seconds = cauchy_dot_seconds, + .dogleg_dot_seconds = dogleg_dot_seconds, + .line_search_dot_seconds = line_search_dot_seconds, + .setup_dot_seconds = setup_dot_seconds, + .vector_update_seconds = vector_update_seconds, + .vector_copy_scale_seconds = vector_copy_scale_seconds, + .projection_seconds = projection_seconds, + .jacobian_assembly_seconds = jacobian_assembly_seconds, + .preconditioner_update_seconds = preconditioner_update_seconds, + .preconditioner_setup_seconds = preconditioner_setup_seconds, + .last_work_objective = current_work_objective, + .last_nonmonotone_work_reference = last_nonmonotone_work_reference}; + } + /// @overload void Mult(const mfem::Vector&, mfem::Vector& X) const { MFEM_ASSERT(oper != NULL, "the Operator is not set (use SetOperator)."); MFEM_ASSERT(prec != NULL, "the Solver is not set (use SetSolver)."); + auto total_start = Clock::now(); print_level = static_cast(std::max(nonlinear_options.print_level, 0)); print_level = print_options.iterations ? std::max(1, print_level) : print_level; @@ -645,13 +1232,87 @@ class TrustRegion : public mfem::NewtonSolver { using real_t = mfem::real_t; num_hess_vecs = 0; + num_model_hess_vecs = 0; + num_cauchy_hess_vecs = 0; + num_line_search_hess_vecs = 0; num_preconds = 0; num_residuals = 0; num_subspace_solves = 0; + num_subspace_leftmost_hess_vecs = 0; + num_subspace_hess_vec_batches = 0; + num_subspace_batched_hess_vecs = 0; + num_subspace_past_step_vectors = 0; + num_subspace_past_step_hess_vecs = 0; + num_subspace_solve_start_vectors = 0; + num_subspace_solve_start_hess_vecs = 0; + num_quadratic_subspace_solves = 0; + num_cubic_subspace_attempts = 0; + num_cubic_subspace_uses = 0; + num_cubic_subspace_quadratic_fallbacks = 0; num_jacobian_assembles = 0; + num_jacobian_operator_evals = 0; + num_diagonal_assembles = 0; + num_cg_iterations = 0; + num_preconditioner_updates = 0; + num_nonmonotone_work_accepts = 0; + num_monotone_work_would_reject = 0; + residual_seconds = 0.0; + hess_vec_seconds = 0.0; + model_hess_vec_seconds = 0.0; + cauchy_hess_vec_seconds = 0.0; + line_search_hess_vec_seconds = 0.0; + jacobian_operator_hess_vec_seconds = 0.0; + jacobian_operator_eval_seconds = 0.0; + diagonal_assembly_seconds = 0.0; + diagonal_invert_seconds = 0.0; + preconditioner_seconds = 0.0; + total_seconds = 0.0; + model_solve_seconds = 0.0; + subspace_seconds = 0.0; + subspace_leftmost_seconds = 0.0; + subspace_hess_vec_batch_seconds = 0.0; + subspace_filter_seconds = 0.0; + subspace_backend_seconds = 0.0; + subspace_finalize_seconds = 0.0; + cauchy_point_seconds = 0.0; + dogleg_seconds = 0.0; + line_search_seconds = 0.0; + dot_seconds = 0.0; + num_dot_products = 0; + num_dot_reductions = 0; + num_model_dot_products = 0; + num_cauchy_dot_products = 0; + num_dogleg_dot_products = 0; + num_line_search_dot_products = 0; + num_setup_dot_products = 0; + model_dot_seconds = 0.0; + cauchy_dot_seconds = 0.0; + dogleg_dot_seconds = 0.0; + line_search_dot_seconds = 0.0; + setup_dot_seconds = 0.0; + vector_update_seconds = 0.0; + vector_copy_scale_seconds = 0.0; + projection_seconds = 0.0; + jacobian_assembly_seconds = 0.0; + preconditioner_update_seconds = 0.0; + preconditioner_setup_seconds = 0.0; + current_work_objective = 0.0; + last_nonmonotone_work_reference = 0.0; + accepted_step_history.clear(); + resetTrustRegionSubspaceTimings(); + solve_start_x.SetSize(X.Size()); + solve_start_x = X; + min_residual_x.SetSize(X.Size()); + min_residual_x = X; + current_jacobian_operator.reset(); + inverse_diagonal_preconditioner.SetSize(0); + previous_H_left_mosts.clear(); + current_hessian.reset(); + previous_hessian.reset(); real_t norm, norm_goal = 0.0; norm = initial_norm = computeResidual(X, r); + min_residual_norm = initial_norm; if (norm == 0.0) return; norm_goal = std::max(rel_tol * initial_norm, abs_tol); @@ -660,6 +1321,11 @@ class TrustRegion : public mfem::NewtonSolver { mfem::out << "TrustRegion iteration " << std::setw(3) << 0 << " : ||r|| = " << std::setw(13) << norm << "\n"; } + SLIC_ERROR_ROOT_IF(nonlinear_options.trust_nonmonotone_window < 0, + "TrustRegion requires trust_nonmonotone_window >= 0"); + std::vector work_objective_history; + pushWorkObjectiveHistory(work_objective_history, current_work_objective); + prec->iterative_mode = false; tr_precond.iterative_mode = false; @@ -680,8 +1346,11 @@ class TrustRegion : public mfem::NewtonSolver { int subspace_option = nonlinear_options.subspace_option; int num_leftmost = nonlinear_options.num_leftmost; + auto copy_start = Clock::now(); scratch = 1.0; - double tr_size = nonlinear_options.trust_region_scaling * std::sqrt(Dot(scratch, scratch)); + vector_copy_scale_seconds += secondsSince(copy_start); + double tr_size = nonlinear_options.trust_region_scaling * + std::sqrt(timedDot(scratch, scratch, num_setup_dot_products, setup_dot_seconds)); size_t cumulative_cg_iters_from_last_precond_update = 0; int it = 0; @@ -712,12 +1381,26 @@ class TrustRegion : public mfem::NewtonSolver { break; } - assembleJacobian(X); - - if (it == 0 || (trResults.cg_iterations_count >= settings.max_cg_iterations || - cumulative_cg_iters_from_last_precond_update >= settings.max_cumulative_iteration)) { - tr_precond.SetOperator(*grad); + if (nonlinear_options.trust_use_jacobian_operator) { + SLIC_ERROR_ROOT_IF(!jacobian_operator_factory, + "TrustRegion JacobianOperator mode requires a registered JacobianOperator factory."); + updateJacobianOperator(X); + updateDiagonalPreconditioner(); + ++num_preconditioner_updates; cumulative_cg_iters_from_last_precond_update = 0; + } else { + assembleJacobian(X); + + if (it == 0 || (trResults.cg_iterations_count >= settings.max_cg_iterations || + cumulative_cg_iters_from_last_precond_update >= settings.max_cumulative_iteration)) { + auto preconditioner_update_start = Clock::now(); + auto preconditioner_setup_start = Clock::now(); + tr_precond.SetOperator(*grad); + preconditioner_setup_seconds += secondsSince(preconditioner_setup_start); + preconditioner_update_seconds += secondsSince(preconditioner_update_start); + ++num_preconditioner_updates; + cumulative_cg_iters_from_last_precond_update = 0; + } } auto hess_vec_func = [&](const mfem::Vector& x_, mfem::Vector& v_) { hessVec(x_, v_); }; @@ -726,19 +1409,29 @@ class TrustRegion : public mfem::NewtonSolver { double cauchyPointNormSquared = tr_size * tr_size; trResults.reset(); - hess_vec_func(r, trResults.H_d); - const double gKg = Dot(r, trResults.H_d); - if (gKg > 0) { - const double alphaCp = -Dot(r, r) / gKg; - add(trResults.cauchy_point, alphaCp, r, trResults.cauchy_point); - cauchyPointNormSquared = Dot(trResults.cauchy_point, trResults.cauchy_point); - } else { - const double alphaTr = -tr_size / std::sqrt(Dot(r, r)); - add(trResults.cauchy_point, alphaTr, r, trResults.cauchy_point); - if (print_level >= 2) { - mfem::out << "Negative curvature un-preconditioned cauchy point direction found." - << "\n"; + { + auto cauchy_start = Clock::now(); + timedCauchyHessVec(hess_vec_func, r, trResults.H_d); + const double gKg = timedDot(r, trResults.H_d, num_cauchy_dot_products, cauchy_dot_seconds); + const double residual_norm_squared = norm * norm; + if (gKg > 0) { + const double alphaCp = -residual_norm_squared / gKg; + auto update_start = Clock::now(); + add(trResults.cauchy_point, alphaCp, r, trResults.cauchy_point); + vector_update_seconds += secondsSince(update_start); + cauchyPointNormSquared = + timedDot(trResults.cauchy_point, trResults.cauchy_point, num_cauchy_dot_products, cauchy_dot_seconds); + } else { + const double alphaTr = -tr_size / norm; + auto update_start = Clock::now(); + add(trResults.cauchy_point, alphaTr, r, trResults.cauchy_point); + vector_update_seconds += secondsSince(update_start); + if (print_level >= 2) { + mfem::out << "Negative curvature un-preconditioned cauchy point direction found." + << "\n"; + } } + cauchy_point_seconds += secondsSince(cauchy_start); } if (cauchyPointNormSquared >= tr_size * tr_size) { @@ -753,68 +1446,193 @@ class TrustRegion : public mfem::NewtonSolver { trResults.interior_status = TrustRegionResults::Status::OnBoundary; } else { settings.cg_tol = std::max(0.5 * norm_goal, 5e-5 * norm); - solveTrustRegionModelProblem(r, scratch, hess_vec_func, precond_func, settings, tr_size, trResults); + auto model_start = Clock::now(); + auto model_hess_vec_func = [&](const mfem::Vector& x_, mfem::Vector& v_) { + timedModelHessVec(hess_vec_func, x_, v_); + }; + solveTrustRegionModelProblem(r, scratch, model_hess_vec_func, precond_func, settings, tr_size, trResults, + norm * norm); + model_solve_seconds += secondsSince(model_start); } cumulative_cg_iters_from_last_precond_update += trResults.cg_iterations_count; + num_cg_iterations += trResults.cg_iterations_count; bool have_computed_Hvs = false; + bool have_computed_H_left_mosts = false; + std::vector> candidate_left_mosts; int lineSearchIter = 0; while (lineSearchIter <= nonlinear_options.max_line_search_iterations) { + auto line_search_start = Clock::now(); ++lineSearchIter; + auto dogleg_start = Clock::now(); doglegStep(trResults.cauchy_point, trResults.z, tr_size, trResults.d); + dogleg_seconds += secondsSince(dogleg_start); + const bool check_subspace_boundary = subspace_option >= 1; + const double d_norm = + check_subspace_boundary + ? std::sqrt(timedDot(trResults.d, trResults.d, num_line_search_dot_products, line_search_dot_seconds)) + : 0.0; bool use_with_option1 = (subspace_option >= 1) && (trResults.interior_status == TrustRegionResults::Status::NonDescentDirection || trResults.interior_status == TrustRegionResults::Status::NegativeCurvature || - ((Norm(trResults.d) > (1.0 - 1.0e-6) * tr_size) && lineSearchIter > 1)); - bool use_with_option2 = (subspace_option >= 2) && (Norm(trResults.d) > (1.0 - 1.0e-6) * tr_size); + ((d_norm > (1.0 - 1.0e-6) * tr_size) && lineSearchIter > 1)); + bool use_with_option2 = (subspace_option >= 2) && (d_norm > (1.0 - 1.0e-6) * tr_size); bool use_with_option3 = (subspace_option >= 3); + const bool allow_cubic_subspace = + trResults.interior_status == TrustRegionResults::Status::NegativeCurvature || use_with_option2; if (use_with_option1 || use_with_option2 || use_with_option3) { if (!have_computed_Hvs) { have_computed_Hvs = true; - hess_vec_func(trResults.z, trResults.H_z); - hess_vec_func(trResults.d_old, trResults.H_d_old); - hess_vec_func(trResults.cauchy_point, trResults.H_cauchy_point); + + std::vector subspace_hess_inputs{&trResults.z, &trResults.cauchy_point}; + std::vector subspace_hess_outputs{&trResults.H_z, &trResults.H_cauchy_point}; + if (trResults.has_d_old) { + subspace_hess_inputs.push_back(&trResults.d_old); + subspace_hess_outputs.push_back(&trResults.H_d_old); + } + + batchedSubspaceHessVec(hess_vec_func, subspace_hess_inputs, subspace_hess_outputs); } - H_left_mosts.clear(); - for (auto& left : left_mosts) { - H_left_mosts.emplace_back(std::make_shared(*left)); - hess_vec_func(*left, *H_left_mosts.back()); + if (!have_computed_H_left_mosts) { + have_computed_H_left_mosts = true; + auto leftmost_start = Clock::now(); + previous_H_left_mosts = H_left_mosts; + H_left_mosts.clear(); + std::vector leftmost_inputs; + std::vector leftmost_outputs; + for (auto& left : left_mosts) { + H_left_mosts.emplace_back(std::make_shared(*left)); + leftmost_inputs.push_back(left.get()); + leftmost_outputs.push_back(H_left_mosts.back().get()); + ++num_subspace_leftmost_hess_vecs; + } + subspace_leftmost_seconds += secondsSince(leftmost_start); + batchedSubspaceHessVec(hess_vec_func, leftmost_inputs, leftmost_outputs); } - std::vector ds{&trResults.z, &trResults.d_old, &trResults.cauchy_point}; - std::vector H_ds{&trResults.H_z, &trResults.H_d_old, &trResults.H_cauchy_point}; - solveTheSubspaceProblem(trResults.d, hess_vec_func, ds, H_ds, r, tr_size, num_leftmost); + std::vector ds{&trResults.z, &trResults.cauchy_point}; + std::vector H_ds{&trResults.H_z, &trResults.H_cauchy_point}; + if (trResults.has_d_old) { + ds.push_back(&trResults.d_old); + H_ds.push_back(&trResults.H_d_old); + } + + std::vector H_past_steps; + std::vector past_step_inputs; + std::vector past_step_outputs; + const size_t max_past_steps = static_cast(std::max(nonlinear_options.trust_num_past_steps, 0)); + const size_t num_past_steps = + accepted_step_history.size() > 1 ? std::min(max_past_steps, accepted_step_history.size() - 1) : 0; + H_past_steps.reserve(num_past_steps); + past_step_inputs.reserve(num_past_steps); + past_step_outputs.reserve(num_past_steps); + for (size_t i = 0; i < num_past_steps; ++i) { + const auto& past_step = accepted_step_history[i + 1]; + H_past_steps.emplace_back(past_step->Size()); + past_step_inputs.push_back(past_step.get()); + past_step_outputs.push_back(&H_past_steps.back()); + } + if (!past_step_inputs.empty()) { + num_subspace_past_step_vectors += past_step_inputs.size(); + num_subspace_past_step_hess_vecs += past_step_inputs.size(); + batchedSubspaceHessVec(hess_vec_func, past_step_inputs, past_step_outputs); + for (size_t i = 0; i < past_step_inputs.size(); ++i) { + ds.push_back(past_step_inputs[i]); + H_ds.push_back(past_step_outputs[i]); + } + } + + mfem::Vector solve_start_direction; + mfem::Vector H_solve_start_direction; + if (nonlinear_options.trust_use_solve_start_direction && solve_start_x.Size() == X.Size()) { + solve_start_direction.SetSize(X.Size()); + subtract(solve_start_x, X, solve_start_direction); + if (solve_start_direction.Norml2() > 0.0) { + H_solve_start_direction.SetSize(X.Size()); + std::vector solve_start_inputs{&solve_start_direction}; + std::vector solve_start_outputs{&H_solve_start_direction}; + ++num_subspace_solve_start_vectors; + ++num_subspace_solve_start_hess_vecs; + batchedSubspaceHessVec(hess_vec_func, solve_start_inputs, solve_start_outputs); + ds.push_back(&solve_start_direction); + H_ds.push_back(&H_solve_start_direction); + } + } + + mfem::Vector min_residual_direction; + mfem::Vector H_min_residual_direction; + if (nonlinear_options.trust_use_min_residual_direction && min_residual_x.Size() == X.Size()) { + min_residual_direction.SetSize(X.Size()); + subtract(min_residual_x, X, min_residual_direction); + if (min_residual_direction.Norml2() > 0.0) { + H_min_residual_direction.SetSize(X.Size()); + std::vector min_res_inputs{&min_residual_direction}; + std::vector min_res_outputs{&H_min_residual_direction}; + // Reusing solve_start counters for now + ++num_subspace_solve_start_vectors; + ++num_subspace_solve_start_hess_vecs; + batchedSubspaceHessVec(hess_vec_func, min_res_inputs, min_res_outputs); + ds.push_back(&min_residual_direction); + H_ds.push_back(&H_min_residual_direction); + } + } + solveTheSubspaceProblem(trResults.d, hess_vec_func, ds, H_ds, r, tr_size, num_leftmost, candidate_left_mosts, + trResults.d_old, + trResults.has_d_old ? &trResults.H_d_old_at_accept : nullptr, allow_cubic_subspace); } static constexpr double roundOffTol = 0.0; // 1e-14; - hess_vec_func(trResults.d, trResults.H_d); - double dHd = Dot(trResults.d, trResults.H_d); - double modelObjective = Dot(r, trResults.d) + 0.5 * dHd - roundOffTol; + timedLineSearchHessVec(hess_vec_func, trResults.d, trResults.H_d); + const auto [dHd, rd] = timedDot2(trResults.d, trResults.H_d, r, trResults.d, num_line_search_dot_products, + line_search_dot_seconds); + double modelObjective = rd + 0.5 * dHd - roundOffTol; + auto update_start = Clock::now(); add(X, trResults.d, x_pred); + vector_update_seconds += secondsSince(update_start); double realObjective = std::numeric_limits::max(); double normPred = std::numeric_limits::max(); try { normPred = computeResidual(x_pred, r_pred); - double obj1 = 0.5 * (Dot(r, trResults.d) + Dot(r_pred, trResults.d)) - roundOffTol; + if (normPred < min_residual_norm) { + min_residual_norm = normPred; + min_residual_x = x_pred; + } + double obj1 = + 0.5 * (rd + timedDot(r_pred, trResults.d, num_line_search_dot_products, line_search_dot_seconds)) - + roundOffTol; realObjective = obj1; } catch (const std::exception&) { realObjective = std::numeric_limits::max(); normPred = std::numeric_limits::max(); } + const double trial_work_objective = current_work_objective + realObjective; + last_nonmonotone_work_reference = nonmonotoneWorkReference(work_objective_history); + if (normPred <= norm_goal) { trResults.d_old = trResults.d; + trResults.H_d_old_at_accept = trResults.H_d; + trResults.has_d_old = true; + pushAcceptedStepHistory(trResults.d); + if (!candidate_left_mosts.empty()) { + left_mosts = std::move(candidate_left_mosts); + } + copy_start = Clock::now(); X = x_pred; r = r_pred; + vector_copy_scale_seconds += secondsSince(copy_start); norm = normPred; + current_work_objective = trial_work_objective; + pushWorkObjectiveHistory(work_objective_history, current_work_objective); + line_search_seconds += secondsSince(line_search_start); if (print_level >= 2) { printTrustRegionInfo(realObjective, modelObjective, trResults.cg_iterations_count, tr_size, true); trResults.cg_iterations_count = @@ -853,7 +1671,11 @@ class TrustRegion : public mfem::NewtonSolver { // modelRes = g + Jd // modelResNorm = np.linalg.norm(modelRes) // realResNorm = np.linalg.norm(gy) - bool willAccept = rho >= settings.eta1 && rho <= settings.eta4; // or (rho >= -0 and realResNorm <= gNorm) + const bool monotoneAccept = rho >= settings.eta1 && rho <= settings.eta4; + const bool nonmonotoneAccept = + nonlinear_options.trust_nonmonotone_window > 0 && modelObjective < 0.0 && rho <= settings.eta4 && + trial_work_objective <= last_nonmonotone_work_reference + settings.eta1 * modelObjective; + bool willAccept = monotoneAccept || nonmonotoneAccept; // or (rho >= -0 and realResNorm <= gNorm) if (print_level >= 2) { printTrustRegionInfo(realObjective, modelObjective, trResults.cg_iterations_count, tr_size, willAccept); @@ -863,11 +1685,27 @@ class TrustRegion : public mfem::NewtonSolver { if (willAccept) { trResults.d_old = trResults.d; + trResults.H_d_old_at_accept = trResults.H_d; + trResults.has_d_old = true; + pushAcceptedStepHistory(trResults.d); + if (!candidate_left_mosts.empty()) { + left_mosts = std::move(candidate_left_mosts); + } + if (nonmonotoneAccept && !monotoneAccept) { + ++num_nonmonotone_work_accepts; + ++num_monotone_work_would_reject; + } + copy_start = Clock::now(); X = x_pred; r = r_pred; + vector_copy_scale_seconds += secondsSince(copy_start); norm = normPred; + current_work_objective = trial_work_objective; + pushWorkObjectiveHistory(work_objective_history, current_work_objective); + line_search_seconds += secondsSince(line_search_start); break; } + line_search_seconds += secondsSince(line_search_start); } } @@ -889,6 +1727,7 @@ class TrustRegion : public mfem::NewtonSolver { mfem::out << "num subspace solves = " << num_subspace_solves << "\n"; mfem::out << "num jacobian_assembles = " << num_jacobian_assembles << "\n"; } + total_seconds = secondsSince(total_start); } }; @@ -962,11 +1801,43 @@ class PcgBlockSolver : public mfem::NewtonSolver { mutable double final_h_scale = 1.0; /// Last accepted block trust ratio mutable double last_trust_ratio = 0.0; + /// Time spent evaluating residuals + mutable double residual_seconds = 0.0; + /// Time spent applying all Hessian-vector products + mutable double hess_vec_seconds = 0.0; + /// Time spent applying JacobianOperator Hessian-vector products + mutable double jacobian_operator_hess_vec_seconds = 0.0; + /// Time spent applying assembled Hessian-vector products + mutable double assembled_hess_vec_seconds = 0.0; + /// Time spent applying legacy matrix-free tangent products + mutable double matrix_free_hess_vec_seconds = 0.0; + /// Time spent applying preconditioners + mutable double preconditioner_seconds = 0.0; + /// Time spent evaluating JacobianOperator factories + mutable double jacobian_operator_eval_seconds = 0.0; + /// Time spent assembling sparse Jacobians + mutable double jacobian_assembly_seconds = 0.0; + /// Time spent directly assembling diagonals + mutable double diagonal_assembly_seconds = 0.0; + /// Time spent inverting direct diagonals + mutable double diagonal_invert_seconds = 0.0; + /// Time spent refreshing preconditioner data + mutable double preconditioner_update_seconds = 0.0; + /// Time spent in preconditioner SetOperator calls + mutable double preconditioner_setup_seconds = 0.0; /// Optional matrix-free tangent action, y = J(x) dx MatrixFreeTangentAction matrix_free_tangent_action; /// Optional JacobianOperator factory JacobianOperatorFactory jacobian_operator_factory; + /// Cached JacobianOperator for the current PCG block + mutable std::unique_ptr current_jacobian_operator; + /// Owned sparse Jacobian assembled through the JacobianOperator fallback path + mutable std::unique_ptr assembled_jacobian_from_operator; + /// Inverted scalar diagonal preconditioner for the current PCG block + mutable mfem::Vector inverse_diagonal_preconditioner; + /// Whether the current PCG block should use the scalar diagonal preconditioner + mutable bool use_inverse_diagonal_preconditioner = false; #ifdef MFEM_USE_MPI /// Constructor @@ -980,21 +1851,26 @@ class PcgBlockSolver : public mfem::NewtonSolver { void assembleJacobian(const mfem::Vector& x) const { SMITH_MARK_FUNCTION; + auto start = Clock::now(); ++num_jacobian_assembles; grad = &oper->GetGradient(x); if (nonlinear_options.force_monolithic) { auto* grad_blocked = dynamic_cast(grad); if (grad_blocked) grad = buildMonolithicMatrix(*grad_blocked).release(); } + jacobian_assembly_seconds += secondsSince(start); } /// Evaluate the nonlinear residual. mfem::real_t computeResidual(const mfem::Vector& x, mfem::Vector& residual) const { SMITH_MARK_FUNCTION; + auto start = Clock::now(); ++num_residuals; oper->Mult(x, residual); - return Norm(residual); + const auto norm = Norm(residual); + residual_seconds += secondsSince(start); + return norm; } /// Set an optional matrix-free tangent action. @@ -1009,20 +1885,106 @@ class PcgBlockSolver : public mfem::NewtonSolver { jacobian_operator_factory = std::move(jacobian_operator); } + /// Evaluate and cache the JacobianOperator at x. + void updateJacobianOperator(const mfem::Vector& x) const + { + SMITH_MARK_FUNCTION; + SLIC_ERROR_ROOT_IF(!jacobian_operator_factory, "No JacobianOperator factory is registered."); + auto start = Clock::now(); + ++num_jacobian_operator_evals; + current_jacobian_operator = jacobian_operator_factory(x); + SLIC_ERROR_ROOT_IF(!current_jacobian_operator, "JacobianOperator factory returned a null operator."); + jacobian_operator_eval_seconds += secondsSince(start); + } + + /// Assemble and invert the scalar diagonal preconditioner from the current JacobianOperator. + void updateDiagonalPreconditioner() const + { + SMITH_MARK_FUNCTION; + SLIC_ERROR_ROOT_IF(!current_jacobian_operator, "Cannot build diagonal preconditioner without a JacobianOperator."); + + auto diagonal_start = Clock::now(); + current_jacobian_operator->assembleDiagonal(inverse_diagonal_preconditioner); + diagonal_assembly_seconds += secondsSince(diagonal_start); + ++num_diagonal_assembles; + + auto invert_start = Clock::now(); + double max_abs_diag = 0.0; + for (int i = 0; i < inverse_diagonal_preconditioner.Size(); ++i) { + max_abs_diag = std::max(max_abs_diag, std::abs(inverse_diagonal_preconditioner[i])); + } + + const double floor = nonlinear_options.pcg_diagonal_floor * max_abs_diag; + SLIC_ERROR_ROOT_IF(!(floor > 0.0), "Cannot invert a zero Jacobian diagonal for PCG-block preconditioning."); + for (int i = 0; i < inverse_diagonal_preconditioner.Size(); ++i) { + inverse_diagonal_preconditioner[i] = 1.0 / std::max(std::abs(inverse_diagonal_preconditioner[i]), floor); + } + diagonal_invert_seconds += secondsSince(invert_start); + + use_inverse_diagonal_preconditioner = true; + } + + /// Refresh the tangent and preconditioner used by the next PCG block attempt. + void refreshBlockOperators(const mfem::Vector& x) const + { + auto refresh_start = Clock::now(); + if (jacobian_operator_factory) { + updateJacobianOperator(x); + ++num_preconditioner_updates; + if (nonlinear_options.pcg_use_jacobian_diagonal_preconditioner) { + updateDiagonalPreconditioner(); + } else { + use_inverse_diagonal_preconditioner = false; + auto assembly_start = Clock::now(); + ++num_jacobian_assembles; + assembled_jacobian_from_operator = current_jacobian_operator->assemble(); + jacobian_assembly_seconds += secondsSince(assembly_start); + grad = assembled_jacobian_from_operator.get(); + auto setup_start = Clock::now(); + pcg_precond.SetOperator(*grad); + preconditioner_setup_seconds += secondsSince(setup_start); + } + } else { + SLIC_ERROR_ROOT_IF(nonlinear_options.pcg_use_jacobian_diagonal_preconditioner, + "PCG-block diagonal preconditioning requires a registered JacobianOperator."); + current_jacobian_operator.reset(); + use_inverse_diagonal_preconditioner = false; + assembleJacobian(x); + ++num_preconditioner_updates; + auto setup_start = Clock::now(); + pcg_precond.SetOperator(*grad); + preconditioner_setup_seconds += secondsSince(setup_start); + } + preconditioner_update_seconds += secondsSince(refresh_start); + } + /// Apply the tangent at x to dx. void hessVec(const mfem::Vector& x, const mfem::Vector& dx, mfem::Vector& y) const { SMITH_MARK_FUNCTION; + auto start = Clock::now(); ++num_hess_vecs; - if (jacobian_operator_factory) { - ++num_jacobian_operator_evals; - std::unique_ptr jacobian_operator = jacobian_operator_factory(x); - SLIC_ERROR_ROOT_IF(!jacobian_operator, "JacobianOperator factory returned a null operator."); - jacobian_operator->Mult(dx, y); + if (current_jacobian_operator) { + current_jacobian_operator->Mult(dx, y); + const double seconds = secondsSince(start); + hess_vec_seconds += seconds; + jacobian_operator_hess_vec_seconds += seconds; + } else if (jacobian_operator_factory) { + updateJacobianOperator(x); + current_jacobian_operator->Mult(dx, y); + const double seconds = secondsSince(start); + hess_vec_seconds += seconds; + jacobian_operator_hess_vec_seconds += seconds; } else if (matrix_free_tangent_action) { matrix_free_tangent_action(x, dx, y); + const double seconds = secondsSince(start); + hess_vec_seconds += seconds; + matrix_free_hess_vec_seconds += seconds; } else { grad->Mult(dx, y); + const double seconds = secondsSince(start); + hess_vec_seconds += seconds; + assembled_hess_vec_seconds += seconds; } } @@ -1030,8 +1992,19 @@ class PcgBlockSolver : public mfem::NewtonSolver { void precond(const mfem::Vector& x, mfem::Vector& v) const { SMITH_MARK_FUNCTION; + auto start = Clock::now(); ++num_preconds; - pcg_precond.Mult(x, v); + if (use_inverse_diagonal_preconditioner) { + SLIC_ERROR_ROOT_IF(inverse_diagonal_preconditioner.Size() != x.Size(), + "PCG-block diagonal preconditioner size does not match the residual vector."); + v.SetSize(x.Size()); + for (int i = 0; i < x.Size(); ++i) { + v[i] = inverse_diagonal_preconditioner[i] * x[i]; + } + } else { + pcg_precond.Mult(x, v); + } + preconditioner_seconds += secondsSince(start); } /// Return solver diagnostic counters. @@ -1057,6 +2030,18 @@ class PcgBlockSolver : public mfem::NewtonSolver { .num_trust_capped_steps = num_trust_capped_steps, .num_accepted_steps = num_accepted_steps, .num_trial_steps = num_trial_steps, + .residual_seconds = residual_seconds, + .hess_vec_seconds = hess_vec_seconds, + .jacobian_operator_hess_vec_seconds = jacobian_operator_hess_vec_seconds, + .assembled_hess_vec_seconds = assembled_hess_vec_seconds, + .matrix_free_hess_vec_seconds = matrix_free_hess_vec_seconds, + .preconditioner_seconds = preconditioner_seconds, + .jacobian_operator_eval_seconds = jacobian_operator_eval_seconds, + .jacobian_assembly_seconds = jacobian_assembly_seconds, + .diagonal_assembly_seconds = diagonal_assembly_seconds, + .diagonal_invert_seconds = diagonal_invert_seconds, + .preconditioner_update_seconds = preconditioner_update_seconds, + .preconditioner_setup_seconds = preconditioner_setup_seconds, .final_h_scale = final_h_scale, .last_trust_ratio = last_trust_ratio}; } @@ -1093,6 +2078,22 @@ class PcgBlockSolver : public mfem::NewtonSolver { num_trial_steps = 0; final_h_scale = nonlinear_options.pcg_h_scale_init; last_trust_ratio = 0.0; + residual_seconds = 0.0; + hess_vec_seconds = 0.0; + jacobian_operator_hess_vec_seconds = 0.0; + assembled_hess_vec_seconds = 0.0; + matrix_free_hess_vec_seconds = 0.0; + preconditioner_seconds = 0.0; + jacobian_operator_eval_seconds = 0.0; + jacobian_assembly_seconds = 0.0; + diagonal_assembly_seconds = 0.0; + diagonal_invert_seconds = 0.0; + preconditioner_update_seconds = 0.0; + preconditioner_setup_seconds = 0.0; + current_jacobian_operator.reset(); + assembled_jacobian_from_operator.reset(); + inverse_diagonal_preconditioner.SetSize(0); + use_inverse_diagonal_preconditioner = false; SLIC_ERROR_ROOT_IF(nonlinear_options.pcg_block_len <= 0, "PcgBlock requires pcg_block_len > 0"); SLIC_ERROR_ROOT_IF(nonlinear_options.pcg_window <= 0, "PcgBlock requires pcg_window > 0"); @@ -1210,9 +2211,7 @@ class PcgBlockSolver : public mfem::NewtonSolver { break; } - assembleJacobian(X); - ++num_preconditioner_updates; - pcg_precond.SetOperator(*grad); + refreshBlockOperators(X); r_block = r; const double norm_block = norm; @@ -1460,6 +2459,8 @@ class PcgBlockSolver : public mfem::NewtonSolver { if (retries_remaining <= 0 || h_scale < nonlinear_options.pcg_min_h_scale) { block_finished = true; + } else { + refreshBlockOperators(X); } } } @@ -1524,6 +2525,11 @@ void EquationSolver::setJacobianOperator(JacobianOperatorFactory jacobian_operat auto* pcg_block = dynamic_cast(nonlin_solver_.get()); if (pcg_block) { pcg_block->setJacobianOperator(std::move(jacobian_operator)); + return; + } + auto* trust_region = dynamic_cast(nonlin_solver_.get()); + if (trust_region) { + trust_region->setJacobianOperator(std::move(jacobian_operator)); } } @@ -1545,6 +2551,15 @@ std::optional EquationSolver::pcgBlockDiagnostics() const return pcg_block->diagnostics(); } +std::optional EquationSolver::trustRegionDiagnostics() const +{ + auto* trust_region = dynamic_cast(nonlin_solver_.get()); + if (!trust_region) { + return std::nullopt; + } + return trust_region->diagnostics(); +} + void SuperLUSolver::Mult(const mfem::Vector& input, mfem::Vector& output) const { SLIC_ERROR_ROOT_IF(!superlu_mat_, "Operator must be set prior to solving with SuperLU"); diff --git a/src/smith/numerics/equation_solver.hpp b/src/smith/numerics/equation_solver.hpp index 8d67cc64a5..6100fad73f 100644 --- a/src/smith/numerics/equation_solver.hpp +++ b/src/smith/numerics/equation_solver.hpp @@ -146,12 +146,192 @@ struct PcgBlockDiagnostics { size_t num_accepted_steps = 0; /// Number of trial inner PCG steps size_t num_trial_steps = 0; + /// Time spent evaluating nonlinear residuals + double residual_seconds = 0.0; + /// Time spent applying Jacobian-vector products + double hess_vec_seconds = 0.0; + /// Time spent applying JacobianOperator products + double jacobian_operator_hess_vec_seconds = 0.0; + /// Time spent applying assembled Jacobian products + double assembled_hess_vec_seconds = 0.0; + /// Time spent applying legacy matrix-free tangent products + double matrix_free_hess_vec_seconds = 0.0; + /// Time spent applying preconditioners + double preconditioner_seconds = 0.0; + /// Time spent evaluating JacobianOperator factories + double jacobian_operator_eval_seconds = 0.0; + /// Time spent assembling sparse Jacobians + double jacobian_assembly_seconds = 0.0; + /// Time spent directly assembling diagonals + double diagonal_assembly_seconds = 0.0; + /// Time spent inverting direct diagonals + double diagonal_invert_seconds = 0.0; + /// Time spent refreshing preconditioner data + double preconditioner_update_seconds = 0.0; + /// Time spent in preconditioner SetOperator calls + double preconditioner_setup_seconds = 0.0; /// Last trust scale used by the solver double final_h_scale = 1.0; /// Last accepted block trust ratio double last_trust_ratio = 0.0; }; +/// Diagnostic counters for the TrustRegion nonlinear solver +struct TrustRegionDiagnostics { + /// Number of nonlinear residual evaluations + size_t num_residuals = 0; + /// Number of Jacobian-vector products + size_t num_hess_vecs = 0; + /// Number of Hessian-vector products in model CG solves + size_t num_model_hess_vecs = 0; + /// Number of Hessian-vector products in Cauchy-point construction + size_t num_cauchy_hess_vecs = 0; + /// Number of Hessian-vector products in line-search model checks + size_t num_line_search_hess_vecs = 0; + /// Number of preconditioner applications + size_t num_preconds = 0; + /// Number of assembled Jacobians + size_t num_jacobian_assembles = 0; + /// Number of solver-facing JacobianOperator evaluations + size_t num_jacobian_operator_evals = 0; + /// Number of direct diagonal assemblies + size_t num_diagonal_assembles = 0; + /// Number of trust-region model CG iterations + size_t num_cg_iterations = 0; + /// Number of subspace solves + size_t num_subspace_solves = 0; + /// Number of retained-leftmost Hessian-vector products for subspace solves + size_t num_subspace_leftmost_hess_vecs = 0; + /// Number of batched Hessian-vector groups used for subspace solves + size_t num_subspace_hess_vec_batches = 0; + /// Number of Hessian-vector products inside subspace batches + size_t num_subspace_batched_hess_vecs = 0; + /// Number of accepted-step history vectors added to subspace solves + size_t num_subspace_past_step_vectors = 0; + /// Number of Hessian-vector products for accepted-step history vectors + size_t num_subspace_past_step_hess_vecs = 0; + /// Number of nonlinear-solve-start directions added to subspace solves + size_t num_subspace_solve_start_vectors = 0; + /// Number of Hessian-vector products for nonlinear-solve-start directions + size_t num_subspace_solve_start_hess_vecs = 0; + /// Number of quadratic subspace backend solves + size_t num_quadratic_subspace_solves = 0; + /// Number of cubic subspace backend attempts + size_t num_cubic_subspace_attempts = 0; + /// Number of cubic subspace attempts that used the cubic candidate + size_t num_cubic_subspace_uses = 0; + /// Number of cubic subspace attempts that fell back to the quadratic candidate + size_t num_cubic_subspace_quadratic_fallbacks = 0; + /// Number of preconditioner operator updates + size_t num_preconditioner_updates = 0; + /// Number of nonmonotone accepted TrustRegion steps based on work surrogate + size_t num_nonmonotone_work_accepts = 0; + /// Number of accepted TrustRegion work-surrogate steps that monotone acceptance would have rejected + size_t num_monotone_work_would_reject = 0; + /// Time spent evaluating nonlinear residuals + double residual_seconds = 0.0; + /// Time spent applying Jacobian-vector products + double hess_vec_seconds = 0.0; + /// Time spent applying Hessian-vector products in model CG solves + double model_hess_vec_seconds = 0.0; + /// Time spent applying Hessian-vector products in Cauchy-point construction + double cauchy_hess_vec_seconds = 0.0; + /// Time spent applying Hessian-vector products in line-search model checks + double line_search_hess_vec_seconds = 0.0; + /// Time spent applying JacobianOperator products + double jacobian_operator_hess_vec_seconds = 0.0; + /// Time spent evaluating JacobianOperator factories + double jacobian_operator_eval_seconds = 0.0; + /// Time spent directly assembling diagonals + double diagonal_assembly_seconds = 0.0; + /// Time spent inverting direct diagonals + double diagonal_invert_seconds = 0.0; + /// Time spent applying preconditioners + double preconditioner_seconds = 0.0; + /// Total time spent in the nonlinear solve + double total_seconds = 0.0; + /// Time spent solving trust-region model problems + double model_solve_seconds = 0.0; + /// Total time spent in trust-region subspace solves + double subspace_seconds = 0.0; + /// Time spent building/applying retained leftmost directions for subspace solves + double subspace_leftmost_seconds = 0.0; + /// Time spent in subspace Hessian-vector batches + double subspace_hess_vec_batch_seconds = 0.0; + /// Time spent removing dependent directions before subspace solves + double subspace_filter_seconds = 0.0; + /// Time spent in dense subspace backend assembly/solve work + double subspace_backend_seconds = 0.0; + /// Time spent projecting dense subspace Hessian + double subspace_project_A_seconds = 0.0; + /// Time spent projecting dense subspace Gram matrix + double subspace_project_gram_seconds = 0.0; + /// Time spent projecting dense subspace gradient + double subspace_project_b_seconds = 0.0; + /// Time spent building dense subspace orthonormal basis + double subspace_basis_seconds = 0.0; + /// Time spent forming reduced dense Hessian + double subspace_reduced_A_seconds = 0.0; + /// Time spent in dense subspace eigensystems + double subspace_dense_eigensystem_seconds = 0.0; + /// Time spent in dense trust-region solve outside eigensystems + double subspace_dense_trust_solve_seconds = 0.0; + /// Time spent reconstructing full-space subspace solution + double subspace_reconstruct_solution_seconds = 0.0; + /// Time spent reconstructing retained leftmost vectors + double subspace_reconstruct_leftmost_seconds = 0.0; + /// Time spent in subspace postprocessing and model-energy comparison + double subspace_finalize_seconds = 0.0; + /// Time spent building the Cauchy point + double cauchy_point_seconds = 0.0; + /// Time spent in dogleg step construction + double dogleg_seconds = 0.0; + /// Time spent in line-search and trust-radius acceptance logic + double line_search_seconds = 0.0; + /// Time spent in TrustRegion dot products + double dot_seconds = 0.0; + /// Number of TrustRegion dot products + size_t num_dot_products = 0; + /// Number of TrustRegion dot batches/reductions + size_t num_dot_reductions = 0; + /// Number of dot products in trust-region model solves + size_t num_model_dot_products = 0; + /// Number of dot products in Cauchy-point construction + size_t num_cauchy_dot_products = 0; + /// Number of dot products in dogleg construction + size_t num_dogleg_dot_products = 0; + /// Number of dot products in line-search and acceptance logic + size_t num_line_search_dot_products = 0; + /// Number of setup dot products outside the main per-step kernels + size_t num_setup_dot_products = 0; + /// Time spent in trust-region model-solve dot products + double model_dot_seconds = 0.0; + /// Time spent in Cauchy-point dot products + double cauchy_dot_seconds = 0.0; + /// Time spent in dogleg dot products + double dogleg_dot_seconds = 0.0; + /// Time spent in line-search dot products + double line_search_dot_seconds = 0.0; + /// Time spent in setup dot products + double setup_dot_seconds = 0.0; + /// Time spent in TrustRegion vector add/update operations + double vector_update_seconds = 0.0; + /// Time spent in TrustRegion vector copies and scaling operations + double vector_copy_scale_seconds = 0.0; + /// Time spent in TrustRegion boundary projection operations + double projection_seconds = 0.0; + /// Time spent assembling sparse Jacobians + double jacobian_assembly_seconds = 0.0; + /// Time spent refreshing preconditioner data + double preconditioner_update_seconds = 0.0; + /// Time spent in preconditioner SetOperator calls + double preconditioner_setup_seconds = 0.0; + /// Last TrustRegion accumulated work-surrogate level used by nonmonotone acceptance + double last_work_objective = 0.0; + /// Last nonmonotone reference work-surrogate level + double last_nonmonotone_work_reference = 0.0; +}; + /** * @brief This class manages the objects typically required to solve a nonlinear set of equations arising from * discretization of a PDE of the form F(x) = 0. Specifically, it has @@ -247,6 +427,12 @@ class EquationSolver { */ std::optional pcgBlockDiagnostics() const; + /** + * Returns diagnostic counters when the nonlinear solver is TrustRegion. + * @return Optional TrustRegion diagnostics; empty for other nonlinear solvers + */ + std::optional trustRegionDiagnostics() const; + /** * Returns the underlying linear solver object * @return A non-owning reference to the underlying linear solver diff --git a/src/smith/numerics/mfem_trust_region_subspace.cpp b/src/smith/numerics/mfem_trust_region_subspace.cpp new file mode 100644 index 0000000000..454cb81d2d --- /dev/null +++ b/src/smith/numerics/mfem_trust_region_subspace.cpp @@ -0,0 +1,589 @@ +// Copyright (c) Lawrence Livermore National Security, LLC and +// other Smith Project Developers. See the top-level LICENSE file for +// details. +// +// SPDX-License-Identifier: (BSD-3-Clause) + +#include "smith/numerics/trust_region_solver.hpp" + +#include +#include +#include +#include + +#include "smith/infrastructure/profiling.hpp" + +namespace smith { + +namespace { + +using Clock = std::chrono::steady_clock; + +double secondsSince(Clock::time_point start) +{ + return std::chrono::duration_cast>(Clock::now() - start).count(); +} + +TrustRegionSubspaceTimings& mutableTrustRegionSubspaceTimings() +{ + static TrustRegionSubspaceTimings timings; + return timings; +} + +} // namespace + +void resetTrustRegionSubspaceTimings() +{ + mutableTrustRegionSubspaceTimings() = TrustRegionSubspaceTimings {}; +} + +TrustRegionSubspaceTimings trustRegionSubspaceTimings() +{ + return mutableTrustRegionSubspaceTimings(); +} + +int globalSize(const mfem::Vector& parallel_v, const MPI_Comm& comm) +{ + int local_size = parallel_v.Size(); + int global_size; + MPI_Allreduce(&local_size, &global_size, 1, MPI_INT, MPI_SUM, comm); + return global_size; +} + +double innerProduct(const mfem::Vector& a, const mfem::Vector& b, const MPI_Comm& comm) +{ + return mfem::InnerProduct(comm, a, b); +} + +std::pair, std::vector> removeDependentDirections( + std::vector directions, std::vector A_directions) +{ + SMITH_MARK_FUNCTION; + std::vector norms; + size_t num_dirs = directions.size(); + + for (size_t i = 0; i < num_dirs; ++i) { + norms.push_back(std::sqrt(mfem::InnerProduct(MPI_COMM_WORLD, *directions[i], *directions[i]))); + } + + std::vector> kepts; + for (size_t i = 0; i < num_dirs; ++i) { + bool keepi = true; + if (norms[i] == 0) keepi = false; + for (auto&& kept_and_j : kepts) { + size_t j = kept_and_j.second; + double dot_ij = mfem::InnerProduct(MPI_COMM_WORLD, *directions[i], *kept_and_j.first); + if (dot_ij > 0.999 * norms[i] * norms[j]) { + keepi = false; + } + } + if (keepi) { + kepts.emplace_back(std::make_pair(directions[i], i)); + } + } + + std::vector directions_new; + std::vector A_directions_new; + + for (auto kept_and_j : kepts) { + directions_new.push_back(directions[kept_and_j.second]); + A_directions_new.push_back(A_directions[kept_and_j.second]); + } + + return std::make_pair(directions_new, A_directions_new); +} + +std::tuple, std::vector, std::vector> +removeDependentDirectionTriples(std::vector directions, + std::vector A_directions, + std::vector previous_A_directions) +{ + SMITH_MARK_FUNCTION; + MFEM_VERIFY(directions.size() == A_directions.size() && directions.size() == previous_A_directions.size(), + "Direction triple lists must have matching sizes."); + + std::vector norms; + size_t num_dirs = directions.size(); + + for (size_t i = 0; i < num_dirs; ++i) { + norms.push_back(std::sqrt(mfem::InnerProduct(MPI_COMM_WORLD, *directions[i], *directions[i]))); + } + + std::vector> kepts; + for (size_t i = 0; i < num_dirs; ++i) { + bool keepi = norms[i] != 0.0; + for (auto&& kept_and_j : kepts) { + size_t j = kept_and_j.second; + double dot_ij = mfem::InnerProduct(MPI_COMM_WORLD, *directions[i], *kept_and_j.first); + if (dot_ij > 0.999 * norms[i] * norms[j]) { + keepi = false; + } + } + if (keepi) { + kepts.emplace_back(std::make_pair(directions[i], i)); + } + } + + std::vector directions_new; + std::vector A_directions_new; + std::vector previous_A_directions_new; + + for (auto kept_and_j : kepts) { + directions_new.push_back(directions[kept_and_j.second]); + A_directions_new.push_back(A_directions[kept_and_j.second]); + previous_A_directions_new.push_back(previous_A_directions[kept_and_j.second]); + } + + return std::make_tuple(directions_new, A_directions_new, previous_A_directions_new); +} + +#ifdef MFEM_USE_LAPACK + +TrustRegionSubspaceResult solveSubspaceProblem(const std::vector& directions, + const std::vector& A_directions, + const mfem::Vector& b, double delta, int num_leftmost) +{ + return solveSubspaceProblemMfem(directions, A_directions, b, delta, num_leftmost); +} + +namespace { + +double dot(const mfem::Vector& a, const mfem::Vector& b) +{ + return a * b; +} + +double norm(const mfem::Vector& x) +{ + return x.Norml2(); +} + +mfem::Vector operator+(const mfem::Vector& x, double value) +{ + mfem::Vector out(x); + for (int i = 0; i < out.Size(); ++i) { + out[i] += value; + } + return out; +} + +mfem::Vector pointwiseMultiply(const mfem::Vector& a, const mfem::Vector& b) +{ + mfem::Vector out(a.Size()); + for (int i = 0; i < a.Size(); ++i) { + out[i] = a[i] * b[i]; + } + return out; +} + +mfem::Vector pointwiseDivide(const mfem::Vector& a, const mfem::Vector& b) +{ + mfem::Vector out(a.Size()); + for (int i = 0; i < a.Size(); ++i) { + out[i] = a[i] / b[i]; + } + return out; +} + +double sumAbs(const mfem::Vector& x) +{ + double total = 0.0; + for (int i = 0; i < x.Size(); ++i) { + total += std::abs(x[i]); + } + return total; +} + +double sum(const mfem::Vector& x) +{ + double total = 0.0; + for (int i = 0; i < x.Size(); ++i) { + total += x[i]; + } + return total; +} + +void symmetrize(mfem::DenseMatrix& A) +{ + MFEM_VERIFY(A.Height() == A.Width(), "symmetrize requires square matrix"); + for (int i = 0; i < A.Height(); ++i) { + for (int j = 0; j < i; ++j) { + const double value = 0.5 * (A(i, j) + A(j, i)); + A(i, j) = value; + A(j, i) = value; + } + } +} + +struct SubspaceProjections { + mfem::DenseMatrix sAs; + mfem::DenseMatrix ss; + mfem::Vector sb; +}; + +SubspaceProjections denseSubspaceProjections(const std::vector& states, + const std::vector& Astates, const mfem::Vector& b) +{ + MFEM_VERIFY(states.size() == Astates.size(), + "Search directions and their linear operator result must have same number of columns"); + MFEM_VERIFY(!states.empty(), "Subspace projections require at least one direction."); + + const int n = static_cast(states.size()); + const int vector_size = states[0]->Size(); + for (int j = 0; j < n; ++j) { + MFEM_VERIFY(states[size_t(j)]->Size() == vector_size, "Subspace direction sizes differ."); + MFEM_VERIFY(Astates[size_t(j)]->Size() == vector_size, "Subspace Hessian-vector sizes differ."); + } + MFEM_VERIFY(b.Size() == vector_size, "Subspace right-hand-side size differs."); + + const int triangular_size = n * (n + 1) / 2; + const auto triangular_index = [n](int i, int j) { + return i * n - (i * (i - 1)) / 2 + (j - i); + }; + const int sAs_offset = 0; + const int ss_offset = triangular_size; + const int sb_offset = 2 * triangular_size; + const int buffer_size = 2 * triangular_size + n; + std::vector local(size_t(buffer_size), 0.0); + std::vector global(size_t(buffer_size), 0.0); + + for (int k = 0; k < vector_size; ++k) { + const double b_k = b[k]; + for (int i = 0; i < n; ++i) { + const double s_i = (*states[size_t(i)])[k]; + local[size_t(sb_offset + i)] += s_i * b_k; + for (int j = i; j < n; ++j) { + const size_t ij = size_t(triangular_index(i, j)); + local[size_t(sAs_offset) + ij] += s_i * (*Astates[size_t(j)])[k]; + local[size_t(ss_offset) + ij] += s_i * (*states[size_t(j)])[k]; + } + } + } + + MPI_Allreduce(local.data(), global.data(), buffer_size, MFEM_MPI_REAL_T, MPI_SUM, MPI_COMM_WORLD); + + SubspaceProjections projections{mfem::DenseMatrix(n), mfem::DenseMatrix(n), mfem::Vector(n)}; + for (int i = 0; i < n; ++i) { + projections.sb[i] = global[size_t(sb_offset + i)]; + for (int j = i; j < n; ++j) { + const size_t ij = size_t(triangular_index(i, j)); + projections.sAs(i, j) = global[size_t(sAs_offset) + ij]; + projections.sAs(j, i) = projections.sAs(i, j); + projections.ss(i, j) = global[size_t(ss_offset) + ij]; + projections.ss(j, i) = projections.ss(i, j); + } + } + + return projections; +} + +mfem::Vector solveDense(const mfem::DenseMatrix& A, const mfem::Vector& b) +{ + mfem::DenseMatrix A_copy(A); + mfem::DenseMatrixInverse inv(A_copy); + mfem::Vector x(b.Size()); + inv.Mult(b, x); + return x; +} + +double quadraticEnergy(const mfem::DenseMatrix& A, const mfem::Vector& b, const mfem::Vector& x) +{ + mfem::Vector Ax(x.Size()); + A.Mult(x, Ax); + return 0.5 * dot(x, Ax) - dot(x, b); +} + +double pnormSquared(const mfem::Vector& bvv, const mfem::Vector& sig) +{ + return sum(pointwiseDivide(bvv, pointwiseMultiply(sig, sig))); +} + +double qnormSquared(const mfem::Vector& bvv, const mfem::Vector& sig) +{ + mfem::Vector sig_sq = pointwiseMultiply(sig, sig); + mfem::Vector sig_cu = pointwiseMultiply(sig_sq, sig); + return sum(pointwiseDivide(bvv, sig_cu)); +} + +mfem::Vector matrixColumn(const mfem::DenseMatrix& A, int j) +{ + mfem::Vector col(A.Height()); + for (int i = 0; i < A.Height(); ++i) { + col[i] = A(i, j); + } + return col; +} + +mfem::DenseMatrix columnsToMatrix(const std::vector& cols) +{ + mfem::DenseMatrix A(cols.empty() ? 0 : cols[0].Size(), static_cast(cols.size())); + for (int j = 0; j < A.Width(); ++j) { + for (int i = 0; i < A.Height(); ++i) { + A(i, j) = cols[size_t(j)][i]; + } + } + return A; +} + +std::tuple, std::vector, bool> exactTrustRegionSolve( + mfem::DenseMatrix A, const mfem::Vector& b, double delta, int num_leftmost) +{ + auto dense_solve_start = Clock::now(); + if (A.Height() != A.Width()) { + throw PetscException("Exact trust region solver requires square matrices"); + } + if (A.Height() != b.Size()) { + throw PetscException("The right hand size for exact trust region solve must be consistent with the input matrix size"); + } + + mfem::Vector sigs; + mfem::DenseMatrix V; + auto eig_start = Clock::now(); + A.Eigensystem(sigs, V); + mutableTrustRegionSubspaceTimings().dense_eigensystem_seconds += secondsSince(eig_start); + + std::vector leftmosts; + std::vector minsigs; + const int num_leftmost_possible = std::min(num_leftmost, sigs.Size()); + for (int i = 0; i < num_leftmost_possible; ++i) { + leftmosts.emplace_back(matrixColumn(V, i)); + minsigs.emplace_back(sigs[i]); + } + + const mfem::Vector leftMost = matrixColumn(V, 0); + const double minSig = sigs[0]; + + mfem::Vector bv(sigs.Size()); + for (int i = 0; i < sigs.Size(); ++i) { + const mfem::Vector vi = matrixColumn(V, i); + bv[i] = dot(vi, b); + } + + mfem::Vector bvOverSigs = pointwiseDivide(bv, sigs); + const double sigScale = sumAbs(sigs) / sigs.Size(); + const double eps = 1e-12 * sigScale; + + if ((minSig >= eps) && (norm(bvOverSigs) <= delta)) { + mutableTrustRegionSubspaceTimings().dense_trust_solve_seconds += secondsSince(dense_solve_start); + return std::make_tuple(solveDense(A, b), leftmosts, minsigs, true); + } + + double lam = minSig < eps ? -minSig + eps : 0.0; + mfem::Vector sigsPlusLam = sigs + lam; + bvOverSigs = pointwiseDivide(bv, sigsPlusLam); + + if ((minSig < eps) && (norm(bvOverSigs) < delta)) { + mfem::Vector p(b.Size()); + p = 0.0; + for (int i = 0; i < b.Size(); ++i) { + const mfem::Vector vi = matrixColumn(V, i); + p.Add(bv[i], vi); + } + + const double pz = dot(p, leftMost); + const double pp = dot(p, p); + const double ddmpp = std::max(delta * delta - pp, 0.0); + + const double tau1 = -pz + std::sqrt(pz * pz + ddmpp); + const double tau2 = -pz - std::sqrt(pz * pz + ddmpp); + + mfem::Vector x1(p); + mfem::Vector x2(p); + x1.Add(tau1, leftMost); + x2.Add(tau2, leftMost); + + const double e1 = quadraticEnergy(A, b, x1); + const double e2 = quadraticEnergy(A, b, x2); + + mutableTrustRegionSubspaceTimings().dense_trust_solve_seconds += secondsSince(dense_solve_start); + return std::make_tuple(e1 < e2 ? x1 : x2, leftmosts, minsigs, true); + } + + const mfem::Vector bvbv = pointwiseMultiply(bv, bv); + sigsPlusLam = sigs + lam; + + double pNormSq = pnormSquared(bvbv, sigsPlusLam); + double pNorm = std::sqrt(pNormSq); + double bError = (pNorm - delta) / delta; + + size_t iters = 0; + constexpr size_t maxIters = 30; + while ((std::abs(bError) > 1e-9) && (iters++ < maxIters)) { + const double qNormSq = qnormSquared(bvbv, sigsPlusLam); + lam += (pNormSq / qNormSq) * bError; + sigsPlusLam = sigs + lam; + pNormSq = pnormSquared(bvbv, sigsPlusLam); + pNorm = std::sqrt(pNormSq); + bError = (pNorm - delta) / delta; + } + + const bool success = iters < maxIters; + + bvOverSigs = pointwiseDivide(bv, sigsPlusLam); + + mfem::Vector x(b.Size()); + x = 0.0; + for (int i = 0; i < b.Size(); ++i) { + const mfem::Vector vi = matrixColumn(V, i); + x.Add(bvOverSigs[i], vi); + } + + const double e1 = quadraticEnergy(A, b, x); + mfem::Vector neg_x(x); + neg_x *= -1.0; + const double e2 = quadraticEnergy(A, b, neg_x); + + x *= (e2 < e1 ? -delta : delta) / norm(x); + + mutableTrustRegionSubspaceTimings().dense_trust_solve_seconds += secondsSince(dense_solve_start); + return std::make_tuple(x, leftmosts, minsigs, success); +} + +mfem::DenseMatrix orthonormalBasisTransform(const mfem::DenseMatrix& gram, double& trace_mag) +{ + mfem::DenseMatrix gram_copy(gram); + mfem::Vector evals; + mfem::DenseMatrix evecs; + gram_copy.Eigensystem(evals, evecs); + + trace_mag = 0.0; + for (int i = 0; i < evals.Size(); ++i) { + trace_mag += std::abs(evals[i]); + } + + std::vector kept_columns; + for (int i = 0; i < evals.Size(); ++i) { + if (evals[i] > 1e-9 * trace_mag) { + mfem::Vector col = matrixColumn(evecs, i); + col /= std::sqrt(evals[i]); + kept_columns.emplace_back(std::move(col)); + } + } + + return columnsToMatrix(kept_columns); +} + +mfem::DenseMatrix tripleProduct(const mfem::DenseMatrix& L, const mfem::DenseMatrix& A, const mfem::DenseMatrix& R) +{ + mfem::DenseMatrix tmp(A.Height(), R.Width()); + mfem::Mult(A, R, tmp); + mfem::DenseMatrix out(L.Width(), R.Width()); + mfem::MultAtB(L, tmp, out); + return out; +} + +mfem::Vector projectWithTranspose(const mfem::DenseMatrix& A, const mfem::Vector& x) +{ + mfem::Vector out(A.Width()); + A.MultTranspose(x, out); + return out; +} + +mfem::Vector combineDirections(const std::vector& states, const mfem::Vector& coeffs) +{ + mfem::Vector out(*states[0]); + out = 0.0; + for (int i = 0; i < coeffs.Size(); ++i) { + out.Add(coeffs[i], *states[size_t(i)]); + } + return out; +} + +} // namespace + +TrustRegionSubspaceResult solveSubspaceProblemMfem(const std::vector& states, + const std::vector& Astates, + const mfem::Vector& b, double delta, int num_leftmost) +{ + SMITH_MARK_FUNCTION; + auto& timings = mutableTrustRegionSubspaceTimings(); + ++timings.num_solves; + timings.total_input_dim += states.size(); + timings.max_input_dim = std::max(timings.max_input_dim, states.size()); + + auto project_A_start = Clock::now(); + SubspaceProjections projections = denseSubspaceProjections(states, Astates, b); + mfem::DenseMatrix& sAs = projections.sAs; + timings.project_A_seconds += secondsSince(project_A_start); + symmetrize(sAs); + + for (int i = 0; i < sAs.Height(); ++i) { + for (int j = 0; j < sAs.Width(); ++j) { + if (std::isnan(sAs(i, j))) { + throw PetscException("States in subspace solve contain NaNs."); + } + } + } + + auto project_gram_start = Clock::now(); + mfem::DenseMatrix& ss = projections.ss; + timings.project_gram_seconds += secondsSince(project_gram_start); + symmetrize(ss); + + double trace_mag = 0.0; + auto basis_start = Clock::now(); + mfem::DenseMatrix T = orthonormalBasisTransform(ss, trace_mag); + timings.basis_seconds += secondsSince(basis_start); + if (T.Width() == 0) { + throw PetscException("No independent directions in MFEM subspace solve."); + } + timings.total_reduced_dim += static_cast(T.Width()); + timings.max_reduced_dim = std::max(timings.max_reduced_dim, static_cast(T.Width())); + + auto reduced_A_start = Clock::now(); + mfem::DenseMatrix pAp = tripleProduct(T, sAs, T); + timings.reduced_A_seconds += secondsSince(reduced_A_start); + symmetrize(pAp); + + auto project_b_start = Clock::now(); + const mfem::Vector& sb = projections.sb; + timings.project_b_seconds += secondsSince(project_b_start); + const mfem::Vector pb = projectWithTranspose(T, sb); + + auto [reduced_x, leftvecs, leftvals, success] = exactTrustRegionSolve(pAp, pb, delta, num_leftmost); + (void)success; + const double energy = quadraticEnergy(pAp, pb, reduced_x); + + auto reconstruct_solution_start = Clock::now(); + mfem::Vector coeffs(T.Height()); + T.Mult(reduced_x, coeffs); + mfem::Vector sol = combineDirections(states, coeffs); + timings.reconstruct_solution_seconds += secondsSince(reconstruct_solution_start); + + auto reconstruct_leftmost_start = Clock::now(); + std::vector> leftmosts; + for (const auto& leftvec : leftvecs) { + mfem::Vector left_coeffs(T.Height()); + T.Mult(leftvec, left_coeffs); + leftmosts.emplace_back(std::make_shared(combineDirections(states, left_coeffs))); + } + timings.reconstruct_leftmost_seconds += secondsSince(reconstruct_leftmost_start); + + return std::make_tuple(sol, leftmosts, leftvals, energy); +} + +#else + +TrustRegionSubspaceResult solveSubspaceProblem(const std::vector& directions, + const std::vector& A_directions, + const mfem::Vector& b, double delta, int num_leftmost) +{ +#ifdef SMITH_USE_SLEPC + return solveSubspaceProblemPetsc(directions, A_directions, b, delta, num_leftmost); +#else + throw PetscException("MFEM trust-region subspace solve requires MFEM LAPACK support."); + return std::make_tuple(b, std::vector> {}, std::vector {}, 0.0); +#endif +} + +TrustRegionSubspaceResult solveSubspaceProblemMfem(const std::vector&, + const std::vector&, const mfem::Vector& b, + double, int) +{ + throw PetscException("MFEM trust-region subspace solve requires MFEM LAPACK support."); + return std::make_tuple(b, std::vector> {}, std::vector {}, 0.0); +} + +#endif // MFEM_USE_LAPACK + +} // namespace smith diff --git a/src/smith/numerics/trust_region_solver.cpp b/src/smith/numerics/petsc_trust_region_subspace.cpp similarity index 64% rename from src/smith/numerics/trust_region_solver.cpp rename to src/smith/numerics/petsc_trust_region_subspace.cpp index 8d8d04a9cc..aac63c7cd1 100644 --- a/src/smith/numerics/trust_region_solver.cpp +++ b/src/smith/numerics/petsc_trust_region_subspace.cpp @@ -14,26 +14,10 @@ #include "smith/numerics/dense_petsc.hpp" namespace smith { - -/** - * @brief Get the global size of a mfem vector - * @param parallel_v Vector to check global size - * @param comm Parallel communicator - */ -int globalSize(const mfem::Vector& parallel_v, const MPI_Comm& comm) -{ - int local_size = parallel_v.Size(); - int global_size; - MPI_Allreduce(&local_size, &global_size, 1, MPI_INT, MPI_SUM, comm); - return global_size; -} +namespace { /// @brief struct which aids in moving between mfem::Vector and petsc BV struct BasisVectors { - /** - * @brief Construct with a representative state to set sizes - * @param state The state which is used to set sizes for basis vectors - */ BasisVectors(const mfem::Vector& state) : local_rows(state.Size()), global_rows(globalSize(state, PETSC_COMM_WORLD)) { VecCreateMPI(PETSC_COMM_WORLD, local_rows, global_rows, &v); @@ -47,15 +31,8 @@ struct BasisVectors { } } - /** - * @brief Destructor - */ ~BasisVectors() { VecDestroy(&v); } - /** - * @brief Construct petsc BV from vector of mfem::Vector - * @param states The states used to construct basis vectors - */ BV constructBases(const std::vector& states) const { size_t num_cols = states.size(); @@ -81,10 +58,6 @@ struct BasisVectors { Vec v; }; -/** - * @brief Create a petsc vector from a mfem::Vector - * @param state The state used to create an mfem::Vector - */ Vec petscVec(const mfem::Vector& state) { const int local_rows = state.Size(); @@ -110,11 +83,6 @@ Vec petscVec(const mfem::Vector& state) return v; } -/** - * @brief Copy a petsc vector to an mfem::Vector - * @param v The petsc vector - * @param s The mfem vector - */ void copy(const Vec& v, mfem::Vector& s) { const int local_rows = s.Size(); @@ -133,11 +101,6 @@ void copy(const Vec& v, mfem::Vector& s) VecGetValues(v, local_rows, &col_indices[0], &s[0]); } -/** - * @brief The reduced matrix in the space of {s} - * @param s The vector of mfem::Vector of directions - * @param As The vector of mfem::Vector of a global matrix A operated on directions - */ Mat dot(const std::vector& s, const std::vector& As) { SLIC_ERROR_IF(s.size() != As.size(), @@ -157,11 +120,6 @@ Mat dot(const std::vector& s, const std::vector& s, const mfem::Vector& b) { size_t num_cols = s.size(); @@ -173,11 +131,6 @@ Vec dot(const std::vector& s, const mfem::Vector& b) return sb; } -/** - * @brief The qr decomposition of the state vectors - * @param states The vector of mfem::vectors of directions - * @return Pair of BV Q and DenseMat R - */ auto qr(const std::vector& states) { BasisVectors bvs(*states[0]); @@ -193,13 +146,6 @@ auto qr(const std::vector& states) return std::make_pair(Q, DenseMat(R)); } -/** - * @brief compute the quadratic energy from small dense matrices and vectors - * @param A The stiffness matrix - * @param b The rhs vector - * @param x The current solution vector - * @return The quadratic, linearized energy approximation - */ double quadraticEnergy(const DenseMat& A, const DenseVec& b, const DenseVec& x) { DenseVec Ax = A * x; @@ -208,47 +154,20 @@ double quadraticEnergy(const DenseMat& A, const DenseVec& b, const DenseVec& x) return 0.5 * xAx - xb; } -/** - * @brief compute the pnorm_squared - * @param bvv input vector - * @param sig eigenvectors - */ double pnorm_squared(const DenseVec& bvv, const DenseVec& sig) { auto bvv_div_sig_squared = bvv / (sig * sig); return sum(bvv_div_sig_squared); } -/** - * @brief compute the qnorm_squared - * @param bvv input vector - * @param sig eigenvectors - */ double qnorm_squared(const DenseVec& bvv, const DenseVec& sig) { auto bvv_div_sig_cubed = bvv / (sig * sig * sig); return sum(bvv_div_sig_cubed); - // return bvv.dot((1.0 / (sig * sig * sig)).matrix()); } -// returns: -// minimum energy solution within delta -// N leftmost eigenvectors -// N smallest eigenvalue -// success status - -/** - * @brief solve the trust region problem exactly using a variant of the Moore Sorensen algorithm - * @param A matrix - * @param b rhs - * @param delta trust region radius - * @param num_leftmost the number of leftmost eigenvector/values to output - * returns the solution vector, a std::vector of leftmost vectors - * a std::vector of leftmost eigenvalues and the energy change (relative to x=0) - */ auto exactTrustRegionSolve(DenseMat A, const DenseVec& b, double delta, int num_leftmost) { - // minimize 1/2 x^T A x - b^T x, s.t. norm(x) <= delta auto [isize, jsize] = A.size(); auto isize2 = b.size(); SLIC_ERROR_IF(isize != jsize, "Exact trust region solver requires square matrices"); @@ -267,7 +186,6 @@ auto exactTrustRegionSolve(DenseMat A, const DenseVec& b, double delta, int num_ const auto& leftMost = V[0]; double minSig = sigs[0]; - // bv = V.T b, V has columns which are eigenvectors DenseVec bv(isize); for (size_t i = 0; i < size_t(isize); ++i) { bv.setValue(i, dot(V[i], b)); @@ -277,22 +195,16 @@ auto exactTrustRegionSolve(DenseMat A, const DenseVec& b, double delta, int num_ double sigScale = sum(abs(sigs)) / isize; double eps = 1e-12 * sigScale; - // Check if solution is inside the trust region if ((minSig >= eps) && (norm(bvOverSigs) <= delta)) { return std::make_tuple(A.solve(b), leftmosts, minsigs, true); } - // if we get here, the solution must be on the tr boundary - // consider bounding the initial guess, see More' Sorenson paper double lam = minSig < eps ? -minSig + eps : 0.0; - // try to solve this for lam: - // (A + lam I)p = b, such that norm(p) = Delta DenseVec sigsPlusLam = sigs + lam; bvOverSigs = bv / sigsPlusLam; - // Check for the hard case if ((minSig < eps) && (norm(bvOverSigs) < delta)) { DenseVec p(isize); p = 0.0; @@ -327,7 +239,6 @@ auto exactTrustRegionSolve(DenseMat A, const DenseVec& b, double delta, int num_ double pNorm = std::sqrt(pNormSq); double bError = (pNorm - delta) / delta; - // consider an out if it doesn't converge, or use a better initial guess, or bound the lam from below and above. size_t iters = 0; size_t maxIters = 30; while ((std::abs(bError) > 1e-9) && (iters++ < maxIters)) { @@ -364,7 +275,6 @@ auto exactTrustRegionSolve(DenseMat A, const DenseVec& b, double delta, int num_ return std::make_tuple(x, leftmosts, minsigs, success); } -/// @brief remove the vector at location j and return what is left std::vector remove_at(const std::vector& a, size_t j) { std::vector b; @@ -376,11 +286,11 @@ std::vector remove_at(const std::vector>, std::vector, double> solveSubspaceProblem( - const std::vector& states, const std::vector& Astates, - const mfem::Vector& b, double delta, int num_leftmost) +} // namespace + +TrustRegionSubspaceResult solveSubspaceProblemPetsc(const std::vector& states, + const std::vector& Astates, + const mfem::Vector& b, double delta, int num_leftmost) { SMITH_MARK_FUNCTION; DenseMat sAs1 = dot(states, Astates); @@ -388,14 +298,12 @@ std::tuple>, std::vector if (sAs.hasNan()) { throw PetscException("States in subspace solve contain NaNs."); - return std::make_tuple(b, std::vector>{}, std::vector{}, 0); } auto [Q_parallel, R] = qr(states); if (R.hasNan()) { throw PetscException("R from qr returning with a NaN."); - return std::make_tuple(b, std::vector>{}, std::vector{}, 0); } auto [rows, cols] = R.size(); @@ -406,13 +314,11 @@ std::tuple>, std::vector trace_mag += std::abs(R(i, i)); } - // remove any nearly colinear state for (int i = 0; i < rows; ++i) { if (R(i, i) < 1e-9 * trace_mag) { - // printf("removing after QR state number %d\n", i); auto statesNew = remove_at(states, size_t(i)); auto AstatesNew = remove_at(Astates, size_t(i)); - return solveSubspaceProblem(statesNew, AstatesNew, b, delta, num_leftmost); + return solveSubspaceProblemPetsc(statesNew, AstatesNew, b, delta, num_leftmost); } } @@ -425,6 +331,7 @@ std::tuple>, std::vector DenseVec pb(pb_vec); auto [reduced_x, leftvecs, leftvals, success] = exactTrustRegionSolve(pAp, pb, delta, num_leftmost); + (void)success; double energy = quadraticEnergy(pAp, pb, reduced_x); @@ -450,47 +357,6 @@ std::tuple>, std::vector return std::make_tuple(sol, leftmosts, leftvals, energy); } -/// @brief Remove any obvious dependent directions, namely ones which are scaled version of previous directions -/// The case where they are linear combinations of previous direction will be handled in the QR solver -std::pair, std::vector> removeDependentDirections( - std::vector directions, std::vector A_directions) -{ - SMITH_MARK_FUNCTION; - std::vector norms; - size_t num_dirs = directions.size(); - - for (size_t i = 0; i < num_dirs; ++i) { - norms.push_back(std::sqrt(mfem::InnerProduct(PETSC_COMM_WORLD, *directions[i], *directions[i]))); - } - - std::vector> kepts; - for (size_t i = 0; i < num_dirs; ++i) { - bool keepi = true; - if (norms[i] == 0) keepi = false; - for (auto&& kept_and_j : kepts) { - size_t j = kept_and_j.second; - double dot_ij = mfem::InnerProduct(PETSC_COMM_WORLD, *directions[i], *kept_and_j.first); - if (dot_ij > 0.999 * norms[i] * norms[j]) { - keepi = false; - } - } - // if (!keepi) printf("not keeping %zu\n",i); - if (keepi) { - kepts.emplace_back(std::make_pair(directions[i], i)); - } - } - - std::vector directions_new; - std::vector A_directions_new; - - for (auto kept_and_j : kepts) { - directions_new.push_back(directions[kept_and_j.second]); - A_directions_new.push_back(A_directions[kept_and_j.second]); - } - - return std::make_pair(directions_new, A_directions_new); -} - } // namespace smith #endif // SMITH_USE_SLEPC diff --git a/src/smith/numerics/solver_config.hpp b/src/smith/numerics/solver_config.hpp index ecbfde4cd9..27635aeda3 100644 --- a/src/smith/numerics/solver_config.hpp +++ b/src/smith/numerics/solver_config.hpp @@ -465,12 +465,30 @@ struct NonlinearSolverOptions { /// Scaling for the initial trust region size double trust_region_scaling = 0.1; + /// Nonmonotone TrustRegion acceptance window. Zero preserves monotone acceptance. + int trust_nonmonotone_window = 0; + + /// Use JacobianOperator products and diagonal preconditioning in TrustRegion instead of assembled sparse products. + bool trust_use_jacobian_operator = false; + + /// Use a dense cubic subspace model built from retained Hessian-vector changes. + bool trust_use_cubic_subspace = false; + /// Option for how when the subspace solver should be utilized within trust-region solver SubSpaceOptions subspace_option = SubSpaceOptions::NEVER; /// Number of extra leftmost eigenvector to be stored between solves int num_leftmost = 1; + /// Number of additional older accepted TrustRegion steps to include in subspace solves. + int trust_num_past_steps = 0; + + /// Include the displacement from current nonlinear-solve state back to the nonlinear-solve initial state. + bool trust_use_solve_start_direction = false; + + /// Include the displacement from current nonlinear-solve state to the state with the minimum residual seen so far in this nonlinear solve. + bool trust_use_min_residual_direction = false; + /// Should the gradient be converted to a monolithic matrix bool force_monolithic = false; @@ -518,6 +536,12 @@ struct NonlinearSolverOptions { /// Running-mean window for successful PCG-block trust-radius reference steps int pcg_delta_avg_window = 5; + + /// Use a direct scalar diagonal extracted from the JacobianOperator as the PCG-block preconditioner + bool pcg_use_jacobian_diagonal_preconditioner = false; + + /// Relative floor used when inverting the absolute Jacobian diagonal for PCG-block diagonal preconditioning + double pcg_diagonal_floor = 1e-14; }; // _nonlinear_options_end diff --git a/src/smith/numerics/tests/CMakeLists.txt b/src/smith/numerics/tests/CMakeLists.txt index 10e693b21a..a2577051e2 100644 --- a/src/smith/numerics/tests/CMakeLists.txt +++ b/src/smith/numerics/tests/CMakeLists.txt @@ -13,6 +13,7 @@ set(numerics_serial_test_sources test_block_preconditioner.cpp test_block_preconditioner_backend.cpp test_block_preconditioner_custom_operators.cpp + test_trust_region_solver_mfem.cpp ) smith_add_tests( SOURCES ${numerics_serial_test_sources} @@ -30,7 +31,7 @@ if(PETSC_FOUND) if(SLEPC_FOUND) set(slepc_solver_tests test_eigensolver.cpp - test_trust_region_solver.cpp + test_trust_region_solver_petsc.cpp ) smith_add_tests(SOURCES ${slepc_solver_tests} DEPENDS_ON ${numerics_test_dependencies} diff --git a/src/smith/numerics/tests/test_equationsolver.cpp b/src/smith/numerics/tests/test_equationsolver.cpp index f0b73bf53e..a534acd8f1 100644 --- a/src/smith/numerics/tests/test_equationsolver.cpp +++ b/src/smith/numerics/tests/test_equationsolver.cpp @@ -217,7 +217,10 @@ TEST(EquationSolver, PcgBlockUsesJacobianOperator) void Mult(const mfem::Vector& dx, mfem::Vector& y) const override { matrix_->Mult(dx, y); } - std::unique_ptr assemble() override { return std::move(matrix_); } + std::unique_ptr assemble() override + { + return std::make_unique(*matrix_); + } void assembleDiagonal(mfem::Vector& diag) const override { matrix_->GetDiag(diag); } @@ -314,8 +317,8 @@ TEST(EquationSolver, PcgBlockUsesJacobianOperator) const auto diagnostics = eq_solver.pcgBlockDiagnostics(); ASSERT_TRUE(diagnostics.has_value()); EXPECT_GT(num_operator_evals, 0); - EXPECT_EQ(diagnostics->num_hess_vecs, static_cast(num_operator_evals)); EXPECT_EQ(diagnostics->num_jacobian_operator_evals, static_cast(num_operator_evals)); + EXPECT_GE(diagnostics->num_hess_vecs, diagnostics->num_jacobian_operator_evals); EXPECT_EQ(diagnostics->num_diagonal_assembles, 0u); EXPECT_TRUE(eq_solver.nonlinearSolver().GetConverged()); EXPECT_LT(x_computed.DistanceTo(x_exact.GetData()), 1.0e-10); diff --git a/src/smith/numerics/tests/test_trust_region_solver_mfem.cpp b/src/smith/numerics/tests/test_trust_region_solver_mfem.cpp new file mode 100644 index 0000000000..6e52393681 --- /dev/null +++ b/src/smith/numerics/tests/test_trust_region_solver_mfem.cpp @@ -0,0 +1,500 @@ +// Copyright (c) Lawrence Livermore National Security, LLC and +// other Smith Project Developers. See the top-level LICENSE file for +// details. +// +// SPDX-License-Identifier: (BSD-3-Clause) + +#include +#include + +#include "gtest/gtest.h" +#include "mfem.hpp" + +#include "smith/infrastructure/application_manager.hpp" +#include "smith/numerics/trust_region_solver.hpp" + +namespace { + +constexpr int test_size = 5; +constexpr double test_delta = 1.0e-3; + +std::vector applyDiagonalOperator(const mfem::Vector& diag, const std::vector& states) +{ + std::vector out; + out.reserve(states.size()); + for (const auto* state : states) { + out.emplace_back(state->Size()); + for (int i = 0; i < state->Size(); ++i) { + out.back()[i] = diag[i] * (*state)[i]; + } + } + return out; +} + +void expectNearVector(const mfem::Vector& a, const mfem::Vector& b, double tol) +{ + ASSERT_EQ(a.Size(), b.Size()); + for (int i = 0; i < a.Size(); ++i) { + EXPECT_NEAR(a[i], b[i], tol); + } +} + +std::vector toPointers(const std::vector& vectors) +{ + std::vector ptrs; + ptrs.reserve(vectors.size()); + for (const auto& v : vectors) { + ptrs.push_back(&v); + } + return ptrs; +} + +struct DiagonalSubspaceFixture { + DiagonalSubspaceFixture(int size) + : u1(size), + u2(size), + u3(size), + diag(size), + b(size) + { + u1 = 1.0; + for (int i = 0; i < size; ++i) { + u2[i] = i + 2.0; + u3[i] = i * i - 15.0; + diag[i] = 2.0 * i + 0.01 * i * i + 1.25; + b[i] = -i + 0.02 * i * i + 0.1; + } + } + + mfem::Vector u1; + mfem::Vector u2; + mfem::Vector u3; + mfem::Vector diag; + mfem::Vector b; +}; + +} // namespace + +TEST(TrustRegionSubspaceMfem, RemoveDependentDirectionsDropsDuplicatesAndZero) +{ + mfem::Vector d1(4); + mfem::Vector d2(4); + mfem::Vector d3(4); + mfem::Vector hd1(4); + mfem::Vector hd2(4); + mfem::Vector hd3(4); + + d1 = 0.0; + d2 = 0.0; + d3 = 0.0; + hd1 = 0.0; + hd2 = 0.0; + hd3 = 0.0; + + d1[0] = 1.0; + d1[1] = 2.0; + d2 = d1; + d2 *= 3.0; + + hd1[0] = 2.0; + hd1[1] = 5.0; + hd2 = hd1; + hd2 *= 3.0; + + std::vector dirs = {&d1, &d2, &d3}; + std::vector hdirs = {&hd1, &hd2, &hd3}; + + auto [dirs_new, hdirs_new] = smith::removeDependentDirections(dirs, hdirs); + + ASSERT_EQ(dirs_new.size(), 1); + ASSERT_EQ(hdirs_new.size(), 1); + expectNearVector(*dirs_new[0], d1, 0.0); + expectNearVector(*hdirs_new[0], hd1, 0.0); +} + +TEST(TrustRegionSubspaceMfem, RemoveDependentDirectionTriplesKeepsHistoryAligned) +{ + mfem::Vector d1(3); + mfem::Vector d2(3); + mfem::Vector d3(3); + mfem::Vector hd1(3); + mfem::Vector hd2(3); + mfem::Vector hd3(3); + mfem::Vector old_hd1(3); + mfem::Vector old_hd2(3); + mfem::Vector old_hd3(3); + + d1 = 0.0; + d2 = 0.0; + d3 = 0.0; + hd1 = 0.0; + hd2 = 0.0; + hd3 = 0.0; + old_hd1 = 0.0; + old_hd2 = 0.0; + old_hd3 = 0.0; + + d1[0] = 1.0; + d2 = d1; + d2 *= 2.0; + d3[2] = 1.0; + hd1[0] = 3.0; + hd2[0] = 6.0; + hd3[2] = 4.0; + old_hd1[0] = 2.0; + old_hd2[0] = 4.0; + old_hd3[2] = 5.0; + + std::vector dirs = {&d1, &d2, &d3}; + std::vector hdirs = {&hd1, &hd2, &hd3}; + std::vector old_hdirs = {&old_hd1, &old_hd2, &old_hd3}; + + auto [dirs_new, hdirs_new, old_hdirs_new] = smith::removeDependentDirectionTriples(dirs, hdirs, old_hdirs); + + ASSERT_EQ(dirs_new.size(), 2); + expectNearVector(*dirs_new[0], d1, 0.0); + expectNearVector(*hdirs_new[0], hd1, 0.0); + expectNearVector(*old_hdirs_new[0], old_hd1, 0.0); + expectNearVector(*dirs_new[1], d3, 0.0); + expectNearVector(*hdirs_new[1], hd3, 0.0); + expectNearVector(*old_hdirs_new[1], old_hd3, 0.0); +} + +TEST(TrustRegionSubspaceMfem, SolveHitsTrustRegionBoundary) +{ + DiagonalSubspaceFixture fixture(test_size); + + const std::vector states = {&fixture.u1, &fixture.u2, &fixture.u3}; + const auto astates = applyDiagonalOperator(fixture.diag, states); + const auto astate_ptrs = toPointers(astates); + + auto [sol, leftvecs, leftvals, energy] = + smith::solveSubspaceProblemMfem(states, astate_ptrs, fixture.b, test_delta, 1); + + EXPECT_NEAR(sol.Norml2(), test_delta, 1.0e-12); + EXPECT_FALSE(leftvecs.empty()); + EXPECT_EQ(leftvals.size(), 1); + EXPECT_LT(energy, 0.0); +} + +TEST(TrustRegionSubspaceMfem, GenericSolveUsesMfemBackend) +{ + DiagonalSubspaceFixture fixture(test_size); + + const std::vector states = {&fixture.u1, &fixture.u2, &fixture.u3, &fixture.u2}; + const auto astates = applyDiagonalOperator(fixture.diag, states); + const auto astate_ptrs = toPointers(astates); + + auto [generic_sol, generic_leftvecs, generic_leftvals, generic_energy] = + smith::solveSubspaceProblem(states, astate_ptrs, fixture.b, test_delta, 2); + auto [mfem_sol, mfem_leftvecs, mfem_leftvals, mfem_energy] = + smith::solveSubspaceProblemMfem(states, astate_ptrs, fixture.b, test_delta, 2); + + expectNearVector(generic_sol, mfem_sol, 1.0e-12); + ASSERT_EQ(generic_leftvecs.size(), mfem_leftvecs.size()); + ASSERT_EQ(generic_leftvals.size(), mfem_leftvals.size()); + for (size_t i = 0; i < generic_leftvecs.size(); ++i) { + const double same = smith::innerProduct(*generic_leftvecs[i], *mfem_leftvecs[i], MPI_COMM_WORLD); + mfem::Vector neg(*mfem_leftvecs[i]); + neg *= -1.0; + const double flipped = smith::innerProduct(*generic_leftvecs[i], neg, MPI_COMM_WORLD); + if (std::abs(flipped) > std::abs(same)) { + expectNearVector(*generic_leftvecs[i], neg, 1.0e-10); + } else { + expectNearVector(*generic_leftvecs[i], *mfem_leftvecs[i], 1.0e-10); + } + EXPECT_NEAR(generic_leftvals[i], mfem_leftvals[i], 1.0e-12); + } + EXPECT_NEAR(generic_energy, mfem_energy, 1.0e-12); +} + +TEST(TrustRegionSubspaceMfem, SolveHandlesZeroDirection) +{ + mfem::Vector u1(4); + mfem::Vector u2(4); + mfem::Vector zero(4); + mfem::Vector diag(4); + mfem::Vector b(4); + + zero = 0.0; + for (int i = 0; i < 4; ++i) { + u1[i] = 1.0 + i; + u2[i] = 0.25 * i - 0.5; + diag[i] = 1.0 + i; + b[i] = 0.5 - 0.1 * i; + } + + const std::vector states = {&u1, &zero, &u2}; + const auto astates = applyDiagonalOperator(diag, states); + const auto astate_ptrs = toPointers(astates); + + auto [sol, leftvecs, leftvals, energy] = smith::solveSubspaceProblemMfem(states, astate_ptrs, b, 0.25, 1); + + EXPECT_LE(sol.Norml2(), 0.25 + 1.0e-12); + EXPECT_FALSE(leftvecs.empty()); + EXPECT_EQ(leftvals.size(), 1); + EXPECT_LT(energy, 0.0); +} + +TEST(TrustRegionCubicSubspaceMfem, ZeroCubicMatchesInteriorQuadraticSolve) +{ + mfem::DenseMatrix A(2); + A = 0.0; + A(0, 0) = 4.0; + A(1, 1) = 2.0; + + mfem::Vector b(2); + b[0] = 2.0; + b[1] = -1.0; + + std::vector cubic(2, mfem::DenseMatrix(2)); + for (auto& matrix : cubic) { + matrix = 0.0; + } + + auto [x, energy] = smith::solveDenseCubicTrustRegionProblemMfem(A, b, cubic, 10.0); + + EXPECT_NEAR(x[0], 0.5, 1.0e-10); + EXPECT_NEAR(x[1], -0.5, 1.0e-10); + EXPECT_NEAR(energy, -0.75, 1.0e-10); +} + +TEST(TrustRegionCubicSubspaceMfem, CubicTermChangesOneDimensionalMinimizer) +{ + mfem::DenseMatrix A(1); + A(0, 0) = 1.0; + + mfem::Vector b(1); + b[0] = 1.0; + + std::vector cubic(1, mfem::DenseMatrix(1)); + cubic[0](0, 0) = 6.0; + + auto [x, energy] = smith::solveDenseCubicTrustRegionProblemMfem(A, b, cubic, 1.0); + + const double expected = (-1.0 + std::sqrt(13.0)) / 6.0; + EXPECT_NEAR(x[0], expected, 2.0e-3); + EXPECT_NEAR(energy, 0.5 * expected * expected - expected + expected * expected * expected, 5.0e-6); +} + +TEST(TrustRegionCubicSubspaceMfem, RespectsTrustRegionBoundary) +{ + mfem::DenseMatrix A(1); + A(0, 0) = 1.0; + + mfem::Vector b(1); + b[0] = 10.0; + + std::vector cubic(1, mfem::DenseMatrix(1)); + cubic[0] = 0.0; + + auto [x, energy] = smith::solveDenseCubicTrustRegionProblemMfem(A, b, cubic, 0.25); + + EXPECT_NEAR(x.Norml2(), 0.25, 1.0e-12); + EXPECT_NEAR(x[0], 0.25, 1.0e-12); + EXPECT_NEAR(energy, 0.5 * 0.25 * 0.25 - 10.0 * 0.25, 1.0e-12); +} + +TEST(TrustRegionCubicSubspaceMfem, HistoryProjectedSubspaceSolveRuns) +{ + mfem::Vector e1(2); + mfem::Vector e2(2); + e1 = 0.0; + e2 = 0.0; + e1[0] = 1.0; + e2[1] = 1.0; + + mfem::Vector h1(2); + mfem::Vector h2(2); + mfem::Vector old_h1(2); + mfem::Vector old_h2(2); + h1 = 0.0; + h2 = 0.0; + old_h1 = 0.0; + old_h2 = 0.0; + h1[0] = 2.0; + h2[1] = 3.0; + old_h1[0] = 1.0; + old_h2[1] = 3.0; + + mfem::Vector previous_step(2); + previous_step = 0.0; + previous_step[0] = 1.0; + + mfem::Vector b(2); + b[0] = 1.0; + b[1] = 0.25; + + std::vector directions = {&e1, &e2}; + std::vector h_directions = {&h1, &h2}; + std::vector old_h_directions = {&old_h1, &old_h2}; + + auto [x, leftvecs, leftvals, energy] = + smith::solveCubicSubspaceProblemMfem(directions, h_directions, old_h_directions, previous_step, b, 0.5, 1); + + EXPECT_LE(x.Norml2(), 0.5 + 1.0e-12); + EXPECT_FALSE(leftvecs.empty()); + EXPECT_EQ(leftvals.size(), 1); + EXPECT_LT(energy, 0.0); +} + +TEST(TrustRegionCubicSubspaceMfem, FallsBackToQuadraticWhenCubicPredictionDoesNotImprove) +{ + mfem::Vector e1(1); + mfem::Vector h1(1); + mfem::Vector old_h1(1); + mfem::Vector previous_step(1); + mfem::Vector b(1); + + e1[0] = 1.0; + h1[0] = 1.0; + old_h1[0] = 1.0; + previous_step[0] = 1.0; + b[0] = 1.0; + + std::vector directions = {&e1}; + std::vector h_directions = {&h1}; + std::vector old_h_directions = {&old_h1}; + + auto [cubic_x, cubic_leftvecs, cubic_leftvals, cubic_energy] = + smith::solveCubicSubspaceProblemMfem(directions, h_directions, old_h_directions, previous_step, b, 1.0, 1); + auto [quadratic_x, quadratic_leftvecs, quadratic_leftvals, quadratic_energy] = + smith::solveSubspaceProblemMfem(directions, h_directions, b, 1.0, 1); + + expectNearVector(cubic_x, quadratic_x, 1.0e-12); + EXPECT_EQ(cubic_leftvecs.size(), quadratic_leftvecs.size()); + EXPECT_EQ(cubic_leftvals.size(), quadratic_leftvals.size()); + EXPECT_NEAR(cubic_energy, quadratic_energy, 1.0e-12); +} + +TEST(TrustRegionCubicSubspaceMfem, PreviousStepSecantIsExactForCompatibleCubic) +{ + mfem::Vector e1(2); + mfem::Vector e2(2); + e1 = 0.0; + e2 = 0.0; + e1[0] = 1.0; + e2[1] = 1.0; + + mfem::Vector h1(2); + mfem::Vector h2(2); + mfem::Vector old_h1(2); + mfem::Vector old_h2(2); + h1 = 0.0; + h2 = 0.0; + old_h1 = 0.0; + old_h2 = 0.0; + h1[0] = 1.0; + h2[1] = 1.0; + old_h1[0] = 7.0; + old_h2[1] = 1.0; + + mfem::Vector previous_step(2); + previous_step = 0.0; + previous_step[0] = 1.0; + + mfem::Vector b(2); + b = 0.0; + b[0] = 0.1; + + std::vector directions = {&e1, &e2}; + std::vector h_directions = {&h1, &h2}; + std::vector old_h_directions = {&old_h1, &old_h2}; + + bool used_cubic = false; + auto [x, leftvecs, leftvals, energy] = + smith::solveCubicSubspaceProblemMfem(directions, h_directions, old_h_directions, previous_step, b, 1.0, 1, + &used_cubic); + + mfem::DenseMatrix A(2); + A = 0.0; + A(0, 0) = 1.0; + A(1, 1) = 1.0; + std::vector cubic(2, mfem::DenseMatrix(2)); + cubic[0] = 0.0; + cubic[1] = 0.0; + cubic[0](0, 0) = -6.0; + auto [expected_x, expected_energy] = smith::solveDenseCubicTrustRegionProblemMfem(A, b, cubic, 1.0); + + EXPECT_TRUE(used_cubic); + expectNearVector(x, expected_x, 1.0e-12); + EXPECT_NEAR(energy, expected_energy, 1.0e-12); + EXPECT_FALSE(leftvecs.empty()); + EXPECT_EQ(leftvals.size(), 1); +} + +TEST(TrustRegionCubicSubspaceMfem, PreviousStepSecantIsExactForRotatedCompatibleCubic) +{ + mfem::Vector e1(2); + mfem::Vector e2(2); + e1 = 0.0; + e2 = 0.0; + e1[0] = 1.0; + e2[1] = 1.0; + + constexpr double lambda = -6.0; + mfem::Vector previous_step(2); + previous_step[0] = 1.0; + previous_step[1] = 1.0; + mfem::Vector u(previous_step); + u /= u.Norml2(); + + mfem::DenseMatrix delta_h(2); + delta_h = 0.0; + for (int i = 0; i < 2; ++i) { + for (int j = 0; j < 2; ++j) { + delta_h(i, j) = lambda * previous_step.Norml2() * u[i] * u[j]; + } + } + + mfem::Vector h1(e1); + mfem::Vector h2(e2); + mfem::Vector old_h1(e1); + mfem::Vector old_h2(e2); + for (int i = 0; i < 2; ++i) { + old_h1[i] -= delta_h(i, 0); + old_h2[i] -= delta_h(i, 1); + } + + mfem::Vector b(2); + b[0] = 0.1 * u[0]; + b[1] = 0.1 * u[1]; + + std::vector directions = {&e1, &e2}; + std::vector h_directions = {&h1, &h2}; + std::vector old_h_directions = {&old_h1, &old_h2}; + + bool used_cubic = false; + auto [x, leftvecs, leftvals, energy] = + smith::solveCubicSubspaceProblemMfem(directions, h_directions, old_h_directions, previous_step, b, 1.0, 1, + &used_cubic); + + mfem::DenseMatrix A(2); + A = 0.0; + A(0, 0) = 1.0; + A(1, 1) = 1.0; + std::vector cubic(2, mfem::DenseMatrix(2)); + cubic[0] = 0.0; + cubic[1] = 0.0; + for (int k = 0; k < 2; ++k) { + for (int i = 0; i < 2; ++i) { + for (int j = 0; j < 2; ++j) { + cubic[size_t(k)](i, j) = lambda * u[k] * u[i] * u[j]; + } + } + } + auto [expected_x, expected_energy] = smith::solveDenseCubicTrustRegionProblemMfem(A, b, cubic, 1.0); + + EXPECT_TRUE(used_cubic); + expectNearVector(x, expected_x, 1.0e-12); + EXPECT_NEAR(energy, expected_energy, 1.0e-12); + EXPECT_FALSE(leftvecs.empty()); + EXPECT_EQ(leftvals.size(), 1); +} + +int main(int argc, char* argv[]) +{ + ::testing::InitGoogleTest(&argc, argv); + smith::ApplicationManager applicationManager(argc, argv); + return RUN_ALL_TESTS(); +} diff --git a/src/smith/numerics/tests/test_trust_region_solver.cpp b/src/smith/numerics/tests/test_trust_region_solver_petsc.cpp similarity index 62% rename from src/smith/numerics/tests/test_trust_region_solver.cpp rename to src/smith/numerics/tests/test_trust_region_solver_petsc.cpp index af030fc4c3..1e3eae5433 100644 --- a/src/smith/numerics/tests/test_trust_region_solver.cpp +++ b/src/smith/numerics/tests/test_trust_region_solver_petsc.cpp @@ -5,6 +5,7 @@ // SPDX-License-Identifier: (BSD-3-Clause) #include +#include #include #include #include @@ -84,12 +85,9 @@ std::vector applyLinearOperator(const Mat& A, const std::vector{}, "u1", MESHTAG); auto u2 = smith::StateManager::newState(smith::H1{}, "u2", MESHTAG); auto u3 = smith::StateManager::newState(smith::H1{}, "u3", MESHTAG); - auto u4 = smith::StateManager::newState(smith::H1{}, "u4", MESHTAG); auto a = smith::StateManager::newState(smith::H1{}, "a", MESHTAG); auto b = smith::StateManager::newState(smith::H1{}, "b", MESHTAG); @@ -132,11 +137,10 @@ TEST_F(MeshFixture, QR) for (int i = 0; i < u2.Size(); ++i) { u2[i] = i + 2; u3[i] = i * i - 15.0; - u4[i] = -i + 0.1 * i * i * i - 1.0; a[i] = 2 * i + 0.01 * i * i + 1.25; b[i] = -i + 0.02 * i * i + 0.1; } - std::vector states = {&u1, &u2, &u3}; //,u4}; + std::vector states = {&u1, &u2, &u3}; auto A_parallel = createDiagonalTestMatrix(a); std::vector Astates = applyLinearOperator(A_parallel, states); @@ -147,12 +151,64 @@ TEST_F(MeshFixture, QR) } double delta = 0.001; - auto [sol, leftvecs, leftvals, energy] = smith::solveSubspaceProblem(states, AstatePtrs, b, delta, 1); + auto [sol, leftvecs, leftvals, energy] = smith::solveSubspaceProblemPetsc(states, AstatePtrs, b, delta, 1); - smith::FiniteElementState smith_sol(b); - smith_sol = sol; + EXPECT_NEAR(sol.Norml2(), delta, 1e-12); + EXPECT_FALSE(leftvecs.empty()); + EXPECT_EQ(leftvals.size(), 1); + EXPECT_LT(energy, 0.0); - EXPECT_NEAR(std::sqrt(smith::innerProduct(smith_sol, smith_sol)), delta, 1e-12); + MatDestroy(&A_parallel); +} + +TEST_F(MeshFixture, MfemSubspaceSolveMatchesPetsc) +{ + SMITH_MARK_FUNCTION; + + auto u1 = smith::StateManager::newState(smith::H1{}, "u1", MESHTAG); + auto u2 = smith::StateManager::newState(smith::H1{}, "u2", MESHTAG); + auto u3 = smith::StateManager::newState(smith::H1{}, "u3", MESHTAG); + auto a = smith::StateManager::newState(smith::H1{}, "a", MESHTAG); + auto b = smith::StateManager::newState(smith::H1{}, "b", MESHTAG); + + u1 = 1.0; + for (int i = 0; i < u2.Size(); ++i) { + u2[i] = i + 2; + u3[i] = i * i - 15.0; + a[i] = 2 * i + 0.01 * i * i + 1.25; + b[i] = -i + 0.02 * i * i + 0.1; + } + + std::vector states = {&u1, &u2, &u3, &u2}; + auto A_parallel = createDiagonalTestMatrix(a); + std::vector Astates = applyLinearOperator(A_parallel, states); + + std::vector AstatePtrs; + for (size_t i = 0; i < Astates.size(); ++i) { + AstatePtrs.push_back(&Astates[i]); + } + + auto [petsc_sol, petsc_leftvecs, petsc_leftvals, petsc_energy] = + smith::solveSubspaceProblemPetsc(states, AstatePtrs, b, 0.001, 2); + auto [mfem_sol, mfem_leftvecs, mfem_leftvals, mfem_energy] = + smith::solveSubspaceProblemMfem(states, AstatePtrs, b, 0.001, 2); + + expectNearVector(mfem_sol, petsc_sol, 1e-10); + ASSERT_EQ(mfem_leftvecs.size(), petsc_leftvecs.size()); + ASSERT_EQ(mfem_leftvals.size(), petsc_leftvals.size()); + for (size_t i = 0; i < mfem_leftvecs.size(); ++i) { + const double same = smith::innerProduct(*mfem_leftvecs[i], *petsc_leftvecs[i], MPI_COMM_WORLD); + mfem::Vector neg(*petsc_leftvecs[i]); + neg *= -1.0; + const double flipped = smith::innerProduct(*mfem_leftvecs[i], neg, MPI_COMM_WORLD); + if (std::abs(flipped) > std::abs(same)) { + expectNearVector(*mfem_leftvecs[i], neg, 1e-9); + } else { + expectNearVector(*mfem_leftvecs[i], *petsc_leftvecs[i], 1e-9); + } + EXPECT_NEAR(mfem_leftvals[i], petsc_leftvals[i], 1e-10); + } + EXPECT_NEAR(mfem_energy, petsc_energy, 1e-12); MatDestroy(&A_parallel); } diff --git a/src/smith/numerics/trust_region_cubic_subspace.cpp b/src/smith/numerics/trust_region_cubic_subspace.cpp new file mode 100644 index 0000000000..2bbc86b16c --- /dev/null +++ b/src/smith/numerics/trust_region_cubic_subspace.cpp @@ -0,0 +1,461 @@ +// Copyright (c) Lawrence Livermore National Security, LLC and +// other Smith Project Developers. See the top-level LICENSE file for +// details. +// +// SPDX-License-Identifier: (BSD-3-Clause) + +#include "smith/numerics/trust_region_solver.hpp" + +#include + +#include "smith/infrastructure/profiling.hpp" + +namespace smith { + +#ifdef MFEM_USE_LAPACK + +namespace { + +double dot(const mfem::Vector& a, const mfem::Vector& b) +{ + return a * b; +} + +void symmetrize(mfem::DenseMatrix& A) +{ + MFEM_VERIFY(A.Height() == A.Width(), "symmetrize requires square matrix."); + for (int i = 0; i < A.Height(); ++i) { + for (int j = 0; j < i; ++j) { + const double value = 0.5 * (A(i, j) + A(j, i)); + A(i, j) = value; + A(j, i) = value; + } + } +} + +mfem::Vector matrixColumn(const mfem::DenseMatrix& A, int j) +{ + mfem::Vector col(A.Height()); + for (int i = 0; i < A.Height(); ++i) { + col[i] = A(i, j); + } + return col; +} + +mfem::DenseMatrix columnsToMatrix(const std::vector& cols) +{ + mfem::DenseMatrix A(cols.empty() ? 0 : cols[0].Size(), static_cast(cols.size())); + for (int j = 0; j < A.Width(); ++j) { + for (int i = 0; i < A.Height(); ++i) { + A(i, j) = cols[size_t(j)][i]; + } + } + return A; +} + +mfem::DenseMatrix denseDot(const std::vector& s, const std::vector& As) +{ + MFEM_VERIFY(s.size() == As.size(), "Dense dot requires matching direction counts."); + mfem::DenseMatrix result(static_cast(s.size())); + for (int i = 0; i < result.Height(); ++i) { + for (int j = 0; j < result.Width(); ++j) { + result(i, j) = innerProduct(*s[size_t(i)], *As[size_t(j)], MPI_COMM_WORLD); + } + } + return result; +} + +mfem::Vector denseDot(const std::vector& s, const mfem::Vector& b) +{ + mfem::Vector result(static_cast(s.size())); + for (int i = 0; i < result.Size(); ++i) { + result[i] = innerProduct(*s[size_t(i)], b, MPI_COMM_WORLD); + } + return result; +} + +mfem::DenseMatrix orthonormalBasisTransform(const mfem::DenseMatrix& gram) +{ + mfem::DenseMatrix gram_copy(gram); + mfem::Vector evals; + mfem::DenseMatrix evecs; + gram_copy.Eigensystem(evals, evecs); + + double trace_mag = 0.0; + for (int i = 0; i < evals.Size(); ++i) { + trace_mag += std::abs(evals[i]); + } + + std::vector kept_columns; + for (int i = 0; i < evals.Size(); ++i) { + if (evals[i] > 1e-9 * trace_mag) { + mfem::Vector col = matrixColumn(evecs, i); + col /= std::sqrt(evals[i]); + kept_columns.emplace_back(std::move(col)); + } + } + + return columnsToMatrix(kept_columns); +} + +mfem::DenseMatrix tripleProduct(const mfem::DenseMatrix& L, const mfem::DenseMatrix& A, const mfem::DenseMatrix& R) +{ + mfem::DenseMatrix tmp(A.Height(), R.Width()); + mfem::Mult(A, R, tmp); + mfem::DenseMatrix out(L.Width(), R.Width()); + mfem::MultAtB(L, tmp, out); + return out; +} + +mfem::Vector projectWithTranspose(const mfem::DenseMatrix& A, const mfem::Vector& x) +{ + mfem::Vector out(A.Width()); + A.MultTranspose(x, out); + return out; +} + +mfem::DenseMatrix orthonormalBasisWithFirstVector(const mfem::Vector& first) +{ + const int n = first.Size(); + mfem::DenseMatrix Q(n); + Q = 0.0; + + mfem::Vector q0(first); + q0 /= q0.Norml2(); + for (int i = 0; i < n; ++i) { + Q(i, 0) = q0[i]; + } + + int col = 1; + for (int seed = 0; seed < n && col < n; ++seed) { + mfem::Vector candidate(n); + candidate = 0.0; + candidate[seed] = 1.0; + for (int j = 0; j < col; ++j) { + const mfem::Vector qj = matrixColumn(Q, j); + candidate.Add(-dot(candidate, qj), qj); + } + const double norm = candidate.Norml2(); + if (norm > 1.0e-12) { + candidate /= norm; + for (int i = 0; i < n; ++i) { + Q(i, col) = candidate[i]; + } + ++col; + } + } + + MFEM_VERIFY(col == n, "Failed to build orthonormal basis for cubic tensor completion."); + return Q; +} + +std::vector completeSymmetricCubicTensor(const mfem::DenseMatrix& deltaA, + const mfem::Vector& previous_step) +{ + const int n = previous_step.Size(); + const double step_norm = previous_step.Norml2(); + MFEM_VERIFY(step_norm > 0.0, "Cannot complete cubic tensor with zero previous step."); + + const mfem::DenseMatrix Q = orthonormalBasisWithFirstVector(previous_step); + mfem::DenseMatrix delta_hat = tripleProduct(Q, deltaA, Q); + symmetrize(delta_hat); + + std::vector tensor_hat(static_cast(n), mfem::DenseMatrix(n)); + for (auto& matrix : tensor_hat) { + matrix = 0.0; + } + + for (int i = 0; i < n; ++i) { + for (int j = 0; j < n; ++j) { + const double value = delta_hat(i, j) / step_norm; + tensor_hat[0](i, j) = value; + tensor_hat[size_t(i)](0, j) = value; + tensor_hat[size_t(i)](j, 0) = value; + } + } + + std::vector tensor(static_cast(n), mfem::DenseMatrix(n)); + for (auto& matrix : tensor) { + matrix = 0.0; + } + + for (int a = 0; a < n; ++a) { + for (int b = 0; b < n; ++b) { + for (int c = 0; c < n; ++c) { + double value = 0.0; + for (int alpha = 0; alpha < n; ++alpha) { + for (int beta = 0; beta < n; ++beta) { + for (int gamma = 0; gamma < n; ++gamma) { + value += Q(a, alpha) * Q(b, beta) * Q(c, gamma) * tensor_hat[size_t(alpha)](beta, gamma); + } + } + } + tensor[size_t(a)](b, c) = value; + } + } + } + + return tensor; +} + +mfem::Vector combineDirections(const std::vector& states, const mfem::Vector& coeffs) +{ + mfem::Vector out(*states[0]); + out = 0.0; + for (int i = 0; i < coeffs.Size(); ++i) { + out.Add(coeffs[i], *states[size_t(i)]); + } + return out; +} + +void verifyCubicInputs(const mfem::DenseMatrix& A, const mfem::Vector& b, const std::vector& cubic, + double delta) +{ + MFEM_VERIFY(A.Height() == A.Width(), "Dense cubic trust-region matrix must be square."); + MFEM_VERIFY(A.Height() == b.Size(), "Dense cubic trust-region linear term has incompatible size."); + MFEM_VERIFY(delta >= 0.0, "Dense cubic trust-region radius must be nonnegative."); + MFEM_VERIFY(static_cast(cubic.size()) == b.Size(), "Dense cubic tensor must have one matrix per dimension."); + for (const auto& matrix : cubic) { + MFEM_VERIFY(matrix.Height() == b.Size() && matrix.Width() == b.Size(), + "Dense cubic tensor matrix has incompatible size."); + } +} + +double cubicEnergy(const mfem::DenseMatrix& A, const mfem::Vector& b, const std::vector& cubic, + const mfem::Vector& x) +{ + mfem::Vector Ax(x.Size()); + A.Mult(x, Ax); + double energy = 0.5 * dot(x, Ax) - dot(x, b); + for (int k = 0; k < x.Size(); ++k) { + cubic[size_t(k)].Mult(x, Ax); + energy += (x[k] * dot(x, Ax)) / 6.0; + } + return energy; +} + +mfem::Vector cubicGradient(const mfem::DenseMatrix& A, const mfem::Vector& b, + const std::vector& cubic, const mfem::Vector& x) +{ + mfem::Vector grad(x.Size()); + A.Mult(x, grad); + grad -= b; + + mfem::Vector tmp(x.Size()); + for (int i = 0; i < x.Size(); ++i) { + double correction = 0.0; + cubic[size_t(i)].Mult(x, tmp); + correction += dot(x, tmp); + for (int k = 0; k < x.Size(); ++k) { + for (int j = 0; j < x.Size(); ++j) { + correction += x[k] * (cubic[size_t(k)](i, j) + cubic[size_t(k)](j, i)) * x[j]; + } + } + grad[i] += correction / 6.0; + } + + return grad; +} + +void projectToBall(mfem::Vector& x, double delta) +{ + const double norm = x.Norml2(); + if (norm > delta && norm > 0.0) { + x *= delta / norm; + } +} + +mfem::Vector solveQuadraticCandidate(mfem::DenseMatrix A, const mfem::Vector& b, double delta) +{ + const int n = b.Size(); + mfem::DenseMatrix shifted(A); + double trace = 0.0; + for (int i = 0; i < n; ++i) { + trace += std::abs(A(i, i)); + } + const double regularization = std::max(1.0e-14, 1.0e-12 * trace / std::max(n, 1)); + for (int i = 0; i < n; ++i) { + shifted(i, i) += regularization; + } + + mfem::DenseMatrixInverse inv(shifted); + mfem::Vector x(n); + inv.Mult(b, x); + projectToBall(x, delta); + return x; +} + +mfem::Vector projectedGradientSolve(const mfem::DenseMatrix& A, const mfem::Vector& b, + const std::vector& cubic, mfem::Vector x, double delta) +{ + double energy = cubicEnergy(A, b, cubic, x); + constexpr int max_iters = 200; + constexpr double grad_tol = 1.0e-11; + + for (int iter = 0; iter < max_iters; ++iter) { + mfem::Vector grad = cubicGradient(A, b, cubic, x); + if (grad.Norml2() <= grad_tol * std::max(1.0, b.Norml2())) { + break; + } + + double step = 0.25; + bool accepted = false; + for (int ls = 0; ls < 30; ++ls) { + mfem::Vector trial(x); + trial.Add(-step, grad); + projectToBall(trial, delta); + const double trial_energy = cubicEnergy(A, b, cubic, trial); + if (trial_energy < energy - 1.0e-14) { + x = trial; + energy = trial_energy; + accepted = true; + break; + } + step *= 0.5; + } + if (!accepted) { + break; + } + } + + return x; +} + +} // namespace + +DenseCubicTrustRegionResult solveDenseCubicTrustRegionProblemMfem(const mfem::DenseMatrix& A, const mfem::Vector& b, + const std::vector& cubic, + double delta) +{ + SMITH_MARK_FUNCTION; + verifyCubicInputs(A, b, cubic, delta); + + mfem::Vector best(b.Size()); + best = 0.0; + double best_energy = cubicEnergy(A, b, cubic, best); + if (delta == 0.0 || b.Size() == 0) { + return std::make_tuple(best, best_energy); + } + + std::vector starts; + starts.emplace_back(best); + starts.emplace_back(solveQuadraticCandidate(A, b, delta)); + + mfem::Vector direction(b); + if (direction.Norml2() > 0.0) { + direction *= delta / direction.Norml2(); + starts.emplace_back(direction); + direction *= -1.0; + starts.emplace_back(direction); + } + + for (int i = 0; i < b.Size(); ++i) { + mfem::Vector axis(b.Size()); + axis = 0.0; + axis[i] = delta; + starts.emplace_back(axis); + axis[i] = -delta; + starts.emplace_back(axis); + } + + for (const auto& start : starts) { + mfem::Vector candidate = projectedGradientSolve(A, b, cubic, start, delta); + const double energy = cubicEnergy(A, b, cubic, candidate); + if (energy < best_energy) { + best = candidate; + best_energy = energy; + } + } + + return std::make_tuple(best, best_energy); +} + +TrustRegionSubspaceResult solveCubicSubspaceProblemMfem( + const std::vector& directions, const std::vector& A_directions, + const std::vector& previous_A_directions, const mfem::Vector& previous_step, + const mfem::Vector& b, double delta, int num_leftmost, bool* used_cubic) +{ + SMITH_MARK_FUNCTION; + MFEM_VERIFY(directions.size() == A_directions.size(), "Cubic subspace directions and A_directions differ."); + MFEM_VERIFY(directions.size() == previous_A_directions.size(), + "Cubic subspace directions and previous_A_directions differ."); + MFEM_VERIFY(!directions.empty(), "Cubic subspace solve requires at least one direction."); + + mfem::DenseMatrix ss = denseDot(directions, directions); + symmetrize(ss); + mfem::DenseMatrix T = orthonormalBasisTransform(ss); + MFEM_VERIFY(T.Width() > 0, "No independent directions in cubic MFEM subspace solve."); + + mfem::DenseMatrix sAs = denseDot(directions, A_directions); + symmetrize(sAs); + mfem::DenseMatrix pAp = tripleProduct(T, sAs, T); + symmetrize(pAp); + + mfem::DenseMatrix sDeltaA = denseDot(directions, previous_A_directions); + sDeltaA *= -1.0; + sDeltaA += sAs; + symmetrize(sDeltaA); + mfem::DenseMatrix pDeltaAp = tripleProduct(T, sDeltaA, T); + symmetrize(pDeltaAp); + + mfem::Vector previous_coeffs = denseDot(directions, previous_step); + previous_coeffs = projectWithTranspose(T, previous_coeffs); + const double previous_norm_squared = dot(previous_coeffs, previous_coeffs); + + std::vector cubic(size_t(T.Width()), mfem::DenseMatrix(T.Width())); + for (auto& matrix : cubic) { + matrix = 0.0; + } + if (previous_norm_squared > 0.0) { + cubic = completeSymmetricCubicTensor(pDeltaAp, previous_coeffs); + } + + const mfem::Vector sb = denseDot(directions, b); + const mfem::Vector pb = projectWithTranspose(T, sb); + auto [reduced_x, energy] = solveDenseCubicTrustRegionProblemMfem(pAp, pb, cubic, delta); + + mfem::Vector coeffs(T.Height()); + T.Mult(reduced_x, coeffs); + mfem::Vector sol = combineDirections(directions, coeffs); + + auto [quadratic_sol, leftmosts, leftvals, quadratic_energy] = + solveSubspaceProblemMfem(directions, A_directions, b, delta, num_leftmost); + (void)quadratic_energy; + + const mfem::Vector quadratic_s_coeffs = denseDot(directions, quadratic_sol); + const mfem::Vector quadratic_reduced_x = projectWithTranspose(T, quadratic_s_coeffs); + const double quadratic_cubic_energy = cubicEnergy(pAp, pb, cubic, quadratic_reduced_x); + if (quadratic_cubic_energy <= energy) { + if (used_cubic != nullptr) { + *used_cubic = false; + } + return std::make_tuple(quadratic_sol, leftmosts, leftvals, quadratic_cubic_energy); + } + + if (used_cubic != nullptr) { + *used_cubic = true; + } + return std::make_tuple(sol, leftmosts, leftvals, energy); +} + +#else + +DenseCubicTrustRegionResult solveDenseCubicTrustRegionProblemMfem(const mfem::DenseMatrix&, const mfem::Vector& b, + const std::vector&, double) +{ + throw PetscException("MFEM dense cubic trust-region solve requires MFEM LAPACK support."); + return std::make_tuple(b, 0.0); +} + +TrustRegionSubspaceResult solveCubicSubspaceProblemMfem(const std::vector&, + const std::vector&, + const std::vector&, + const mfem::Vector&, const mfem::Vector& b, double, int, bool*) +{ + throw PetscException("MFEM dense cubic trust-region solve requires MFEM LAPACK support."); + return std::make_tuple(b, std::vector> {}, std::vector {}, 0.0); +} + +#endif // MFEM_USE_LAPACK + +} // namespace smith diff --git a/src/smith/numerics/trust_region_solver.hpp b/src/smith/numerics/trust_region_solver.hpp index ad4b390f18..f076520f0e 100644 --- a/src/smith/numerics/trust_region_solver.hpp +++ b/src/smith/numerics/trust_region_solver.hpp @@ -14,14 +14,13 @@ #include "smith/smith_config.hpp" -#ifdef SMITH_USE_SLEPC - #include -#include -#include +#include +#include +#include +#include -#include "smith/physics/state/finite_element_state.hpp" -#include "smith/physics/state/finite_element_dual.hpp" +#include "mfem.hpp" namespace smith { @@ -38,6 +37,37 @@ class PetscException : public std::exception { std::string msg; }; +enum class TrustRegionSubspaceBackend { + Petsc, + Mfem +}; + +using TrustRegionSubspaceResult = + std::tuple>, std::vector, double>; + +struct TrustRegionSubspaceTimings { + size_t num_solves = 0; + size_t total_input_dim = 0; + size_t total_reduced_dim = 0; + size_t max_input_dim = 0; + size_t max_reduced_dim = 0; + double project_A_seconds = 0.0; + double project_gram_seconds = 0.0; + double project_b_seconds = 0.0; + double basis_seconds = 0.0; + double reduced_A_seconds = 0.0; + double dense_eigensystem_seconds = 0.0; + double dense_trust_solve_seconds = 0.0; + double reconstruct_solution_seconds = 0.0; + double reconstruct_leftmost_seconds = 0.0; +}; + +void resetTrustRegionSubspaceTimings(); + +TrustRegionSubspaceTimings trustRegionSubspaceTimings(); + +using DenseCubicTrustRegionResult = std::tuple; + /// @brief computes the global size of mfem::Vector int globalSize(const mfem::Vector& parallel_v, const MPI_Comm& comm); @@ -46,13 +76,36 @@ double innerProduct(const mfem::Vector& a, const mfem::Vector& b, const MPI_Comm /// @brief returns the solution, as well as a list of the N leftmost eigenvectors /// and their eigenvalues, and the predicted model energy change -std::tuple>, std::vector, double> solveSubspaceProblem( +TrustRegionSubspaceResult solveSubspaceProblem( const std::vector& directions, const std::vector& A_directions, const mfem::Vector& b, double delta, int num_leftmost); +#ifdef SMITH_USE_SLEPC +TrustRegionSubspaceResult solveSubspaceProblemPetsc( + const std::vector& directions, const std::vector& A_directions, + const mfem::Vector& b, double delta, int num_leftmost); +#endif + +TrustRegionSubspaceResult solveSubspaceProblemMfem( + const std::vector& directions, const std::vector& A_directions, + const mfem::Vector& b, double delta, int num_leftmost); + +/// @brief solves a small dense cubic trust-region model +/// 1/2 x^T A x - b^T x + 1/6 sum_k x_k x^T cubic[k] x, ||x|| <= delta. +DenseCubicTrustRegionResult solveDenseCubicTrustRegionProblemMfem( + const mfem::DenseMatrix& A, const mfem::Vector& b, const std::vector& cubic, double delta); + +TrustRegionSubspaceResult solveCubicSubspaceProblemMfem( + const std::vector& directions, const std::vector& A_directions, + const std::vector& previous_A_directions, const mfem::Vector& previous_step, + const mfem::Vector& b, double delta, int num_leftmost, bool* used_cubic = nullptr); + std::pair, std::vector> removeDependentDirections( std::vector directions, std::vector A_directions); -} // namespace smith +std::tuple, std::vector, std::vector> +removeDependentDirectionTriples(std::vector directions, + std::vector A_directions, + std::vector previous_A_directions); -#endif // SMITH_USE_SLEPC +} // namespace smith diff --git a/src/smith/physics/solid_mechanics.hpp b/src/smith/physics/solid_mechanics.hpp index c635f67f89..504538d4e6 100644 --- a/src/smith/physics/solid_mechanics.hpp +++ b/src/smith/physics/solid_mechanics.hpp @@ -12,12 +12,14 @@ #pragma once +#include #include #include #include #include #include #include +#include #include #include #include @@ -53,6 +55,18 @@ #include "smith/physics/state/finite_element_vector.hpp" namespace smith { + +struct SolidMechanicsJacobianTimings { + size_t legacy_jacobian_evals = 0; + size_t jacobian_operator_evals = 0; + size_t jacobian_operator_assemblies = 0; + double legacy_derivative_seconds = 0.0; + double legacy_sparse_assembly_seconds = 0.0; + double legacy_essential_elimination_seconds = 0.0; + double jacobian_operator_derivative_seconds = 0.0; + double jacobian_operator_sparse_assembly_seconds = 0.0; + double jacobian_operator_essential_elimination_seconds = 0.0; +}; namespace solid_mechanics { namespace detail { @@ -1051,12 +1065,23 @@ class SolidMechanics, std::integer_se // gradient of residual function [this](const mfem::Vector& u) -> mfem::Operator& { SMITH_MARK_FUNCTION; + using Clock = std::chrono::steady_clock; + auto seconds_since = [](Clock::time_point start) { + return std::chrono::duration_cast>(Clock::now() - start).count(); + }; + auto derivative_start = Clock::now(); auto [r, drdu] = (*residual_)(time_, shapeDisplacement(), differentiate_wrt(u), acceleration_, *parameters_[parameter_indices].state...); + jacobian_timings_.legacy_derivative_seconds += seconds_since(derivative_start); + ++jacobian_timings_.legacy_jacobian_evals; J_.reset(); + auto assembly_start = Clock::now(); J_ = assemble(drdu); + jacobian_timings_.legacy_sparse_assembly_seconds += seconds_since(assembly_start); J_e_.reset(); + auto elimination_start = Clock::now(); J_e_ = bcs_.eliminateAllEssentialDofsFromMatrix(*J_); + jacobian_timings_.legacy_essential_elimination_seconds += seconds_since(elimination_start); return *J_; }); } @@ -1080,6 +1105,86 @@ class SolidMechanics, std::integer_se } } + /// @brief Build a quasistatic JacobianOperator with essential boundary conditions applied. + std::unique_ptr quasistaticJacobianOperator(const mfem::Vector& u) const + { + SMITH_MARK_FUNCTION; + + using Clock = std::chrono::steady_clock; + auto seconds_since = [](Clock::time_point start) { + return std::chrono::duration_cast>(Clock::now() - start).count(); + }; + auto derivative_start = Clock::now(); + auto [r, drdu] = (*residual_)(time_, shapeDisplacement(), differentiate_wrt(u), acceleration_, + *parameters_[parameter_indices].state...); + jacobian_timings_.jacobian_operator_derivative_seconds += seconds_since(derivative_start); + ++jacobian_timings_.jacobian_operator_evals; + + using GradientT = std::remove_reference_t; + + class QuasistaticJacobianOperator : public JacobianOperator { + public: + QuasistaticJacobianOperator( + const GradientT& gradient, const mfem::Array& constrained_dofs, + std::function(mfem::HypreParMatrix&)> eliminate_essential_dofs, + SolidMechanicsJacobianTimings& timings) + : JacobianOperator(gradient.Height(), gradient.Width()), + gradient_(gradient), + constrained_dofs_(constrained_dofs), + eliminate_essential_dofs_(std::move(eliminate_essential_dofs)), + timings_(timings) + { + } + + void Mult(const mfem::Vector& du, mfem::Vector& dr) const override + { + mfem::Vector du_interior(du); + du_interior.SetSubVector(constrained_dofs_, 0.0); + + gradient_.Mult(du_interior, dr); + for (int i = 0; i < constrained_dofs_.Size(); ++i) { + const int dof = constrained_dofs_[i]; + dr[dof] = du[dof]; + } + } + + std::unique_ptr assemble() override + { + using AssemblyClock = std::chrono::steady_clock; + auto seconds_since = [](AssemblyClock::time_point start) { + return std::chrono::duration_cast>(AssemblyClock::now() - start).count(); + }; + auto assembly_start = AssemblyClock::now(); + std::unique_ptr matrix = gradient_.assemble(); + timings_.jacobian_operator_sparse_assembly_seconds += seconds_since(assembly_start); + auto elimination_start = AssemblyClock::now(); + eliminate_essential_dofs_(*matrix); + timings_.jacobian_operator_essential_elimination_seconds += seconds_since(elimination_start); + ++timings_.jacobian_operator_assemblies; + return matrix; + } + + void assembleDiagonal(mfem::Vector& diag) const override + { + gradient_.assembleDiagonal(diag); + for (int i = 0; i < constrained_dofs_.Size(); ++i) { + diag[constrained_dofs_[i]] = 1.0; + } + } + + private: + GradientT gradient_; + mfem::Array constrained_dofs_; + std::function(mfem::HypreParMatrix&)> eliminate_essential_dofs_; + SolidMechanicsJacobianTimings& timings_; + }; + + return std::make_unique( + drdu, bcs_.allEssentialTrueDofs(), + [this](mfem::HypreParMatrix& matrix) { return bcs_.eliminateAllEssentialDofsFromMatrix(matrix); }, + jacobian_timings_); + } + /** * @brief Return the assembled stiffness matrix * @@ -1161,6 +1266,7 @@ class SolidMechanics, std::integer_se if (is_quasistatic_) { nonlin_solver_->setMatrixFreeTangentAction([this](const mfem::Vector& u, const mfem::Vector& du, mfem::Vector& dr) { quasistaticTangentAction(u, du, dr); }); + nonlin_solver_->setJacobianOperator([this](const mfem::Vector& u) { return quasistaticJacobianOperator(u); }); } if (checkpoint_to_disk_) { @@ -1412,6 +1518,12 @@ class SolidMechanics, std::integer_se /// @overload const smith::EquationSolver& equationSolver() const { return *nonlin_solver_; } + /// @brief Return accumulated Jacobian construction timings for this physics object. + const SolidMechanicsJacobianTimings& jacobianTimings() const { return jacobian_timings_; } + + /// @brief Reset accumulated Jacobian construction timings for this physics object. + void resetJacobianTimings() const { jacobian_timings_ = {}; } + protected: /// The compile-time finite element trial space for displacement and velocity (H1 of order p) using trial = H1; @@ -1480,6 +1592,9 @@ class SolidMechanics, std::integer_se /// because are associated with essential boundary conditions std::unique_ptr J_e_; + /// Accumulated timing diagnostics for quasistatic Jacobian construction paths. + mutable SolidMechanicsJacobianTimings jacobian_timings_; + /// an intermediate variable used to store the predicted end-step displacement mfem::Vector predicted_displacement_; diff --git a/src/smith/physics/tests/shallow_arch_buckling.cpp b/src/smith/physics/tests/shallow_arch_buckling.cpp index b9514b74ed..a94a61bb63 100644 --- a/src/smith/physics/tests/shallow_arch_buckling.cpp +++ b/src/smith/physics/tests/shallow_arch_buckling.cpp @@ -26,14 +26,23 @@ namespace smith { namespace { constexpr double length = 10.0; -constexpr double thickness = 0.25; +constexpr double thickness = 0.025; constexpr double end_tol = 1.0e-8; constexpr double top_tol = 1.0e-8; std::string solver_name = "TrustRegion"; int print_level = 2; int pcg_block_len = 10; double pcg_powell_eta = 0.005; -int nonlinear_max_iterations = 30000; +int nonlinear_max_iterations = 300000; +bool pcg_diagonal_preconditioner = false; +int trust_subspace_option = static_cast(SubSpaceOptions::NEVER); +int trust_num_leftmost = 1; +int trust_num_past_steps = 0; +int trust_nonmonotone_window = 0; +bool trust_use_jacobian_operator = false; +bool trust_use_cubic_subspace = false; +bool trust_use_solve_start_direction = false; +bool trust_use_min_residual_direction = false; NonlinearSolver selectedNonlinearSolver() { @@ -66,6 +75,29 @@ void parseCommandLine(int& argc, char** argv) pcg_powell_eta = std::stod(arg.substr(std::string("--pcg-powell-eta=").size())); } else if (arg.rfind("--nonlinear-max-iterations=", 0) == 0) { nonlinear_max_iterations = std::stoi(arg.substr(std::string("--nonlinear-max-iterations=").size())); + } else if (arg.rfind("--pcg-diagonal-preconditioner=", 0) == 0) { + const std::string value = arg.substr(std::string("--pcg-diagonal-preconditioner=").size()); + pcg_diagonal_preconditioner = (value == "1" || value == "true" || value == "on"); + } else if (arg.rfind("--trust-subspace-option=", 0) == 0) { + trust_subspace_option = std::stoi(arg.substr(std::string("--trust-subspace-option=").size())); + } else if (arg.rfind("--trust-num-leftmost=", 0) == 0) { + trust_num_leftmost = std::stoi(arg.substr(std::string("--trust-num-leftmost=").size())); + } else if (arg.rfind("--trust-num-past-steps=", 0) == 0) { + trust_num_past_steps = std::stoi(arg.substr(std::string("--trust-num-past-steps=").size())); + } else if (arg.rfind("--trust-nonmonotone-window=", 0) == 0) { + trust_nonmonotone_window = std::stoi(arg.substr(std::string("--trust-nonmonotone-window=").size())); + } else if (arg.rfind("--trust-use-jacobian-operator=", 0) == 0) { + const std::string value = arg.substr(std::string("--trust-use-jacobian-operator=").size()); + trust_use_jacobian_operator = (value == "1" || value == "true" || value == "on"); + } else if (arg.rfind("--trust-use-cubic-subspace=", 0) == 0) { + const std::string value = arg.substr(std::string("--trust-use-cubic-subspace=").size()); + trust_use_cubic_subspace = (value == "1" || value == "true" || value == "on"); + } else if (arg.rfind("--trust-use-solve-start-direction=", 0) == 0) { + const std::string value = arg.substr(std::string("--trust-use-solve-start-direction=").size()); + trust_use_solve_start_direction = (value == "1" || value == "true" || value == "on"); + } else if (arg.rfind("--trust-use-min-residual-direction=", 0) == 0) { + const std::string value = arg.substr(std::string("--trust-use-min-residual-direction=").size()); + trust_use_min_residual_direction = (value == "1" || value == "true" || value == "on"); } else { argv[write_arg] = argv[read_arg]; ++write_arg; @@ -79,11 +111,13 @@ void parseCommandLine(int& argc, char** argv) TEST(ShallowArchBuckling, CompressedThinBeamSnapThrough) { MPI_Barrier(MPI_COMM_WORLD); + int rank = 0; + MPI_Comm_rank(MPI_COMM_WORLD, &rank); constexpr int p = 1; constexpr int dim = 2; - constexpr int nx = 96; - constexpr int ny = 4; + constexpr int nx = 150; + constexpr int ny = 6; axom::sidre::DataStore datastore; smith::StateManager::initialize(datastore, "shallow_arch_buckling"); @@ -98,9 +132,14 @@ TEST(ShallowArchBuckling, CompressedThinBeamSnapThrough) "right_end", [](std::vector vertices, int) { return average(vertices)[0] > length - end_tol; }); mesh->addDomainOfBoundaryElements( "top_face", [](std::vector vertices, int) { return average(vertices)[1] > thickness - top_tol; }); - EXPECT_GT(mesh->domain("left_end").total_elements(), 0); - EXPECT_GT(mesh->domain("right_end").total_elements(), 0); - EXPECT_GT(mesh->domain("top_face").total_elements(), 0); + auto globalElementCount = [](int local_count) { + int global_count = 0; + MPI_Allreduce(&local_count, &global_count, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD); + return global_count; + }; + EXPECT_GT(globalElementCount(mesh->domain("left_end").total_elements()), 0); + EXPECT_GT(globalElementCount(mesh->domain("right_end").total_elements()), 0); + EXPECT_GT(globalElementCount(mesh->domain("top_face").total_elements()), 0); smith::LinearSolverOptions linear_options{.linear_solver = LinearSolver::CG, .preconditioner = Preconditioner::HypreJacobi, @@ -109,14 +148,24 @@ TEST(ShallowArchBuckling, CompressedThinBeamSnapThrough) .max_iterations = 10000, .print_level = 0}; - smith::NonlinearSolverOptions nonlinear_options{.nonlin_solver = selectedNonlinearSolver(), - .relative_tol = 1.0e-8, - .absolute_tol = 1.0e-10, - .max_iterations = nonlinear_max_iterations, - .print_level = print_level, - .pcg_block_len = pcg_block_len, - .pcg_powell_eta = pcg_powell_eta, - .pcg_max_block_retries = 40}; + smith::NonlinearSolverOptions nonlinear_options{ + .nonlin_solver = selectedNonlinearSolver(), + .relative_tol = 1.0e-8, + .absolute_tol = 1.0e-10, + .max_iterations = nonlinear_max_iterations, + .print_level = print_level, + .trust_nonmonotone_window = trust_nonmonotone_window, + .trust_use_jacobian_operator = trust_use_jacobian_operator, + .trust_use_cubic_subspace = trust_use_cubic_subspace, + .subspace_option = static_cast(trust_subspace_option), + .num_leftmost = trust_num_leftmost, + .trust_num_past_steps = trust_num_past_steps, + .trust_use_solve_start_direction = trust_use_solve_start_direction, + .trust_use_min_residual_direction = trust_use_min_residual_direction, + .pcg_block_len = pcg_block_len, + .pcg_powell_eta = pcg_powell_eta, + .pcg_max_block_retries = 40, + .pcg_use_jacobian_diagonal_preconditioner = pcg_diagonal_preconditioner}; SolidMechanics solid(nonlinear_options, linear_options, solid_mechanics::default_quasistatic_options, "compressed_beam", mesh); @@ -144,47 +193,169 @@ TEST(ShallowArchBuckling, CompressedThinBeamSnapThrough) solid.completeSetup(); solid.outputStateToDisk("shallow_arch_buckling"); - mfem::out << "Compressed thin beam snap-through run: solver = " << solver_name << '\n'; + if (rank == 0) { + mfem::out << "Compressed thin beam snap-through run: solver = " << solver_name + << ", trust_subspace_option = " << trust_subspace_option + << ", trust_num_leftmost = " << trust_num_leftmost + << ", trust_num_past_steps = " << trust_num_past_steps + << ", trust_nonmonotone_window = " << trust_nonmonotone_window + << ", trust_use_jacobian_operator = " << trust_use_jacobian_operator + << ", trust_use_cubic_subspace = " << trust_use_cubic_subspace + << ", pcg_diagonal_preconditioner = " << pcg_diagonal_preconditioner << '\n'; + } - constexpr int num_steps = 20; + constexpr int num_steps = 5; int num_converged_steps = 0; for (int step = 0; step < num_steps; ++step) { + solid.resetJacobianTimings(); solid.advanceTimestep(1.0 / num_steps); const auto& nonlinear_solver = solid.equationSolver().nonlinearSolver(); if (nonlinear_solver.GetConverged()) { ++num_converged_steps; } - mfem::out << "Load step " << step + 1 << "/" << num_steps << ": converged = " << nonlinear_solver.GetConverged() - << ", nonlinear iterations = " << nonlinear_solver.GetNumIterations() - << ", final relative residual = " << nonlinear_solver.GetFinalRelNorm() << '\n'; + if (rank == 0) { + mfem::out << "Load step " << step + 1 << "/" << num_steps + << ": converged = " << nonlinear_solver.GetConverged() + << ", nonlinear iterations = " << nonlinear_solver.GetNumIterations() + << ", final relative residual = " << nonlinear_solver.GetFinalRelNorm() << '\n'; + } solid.outputStateToDisk("shallow_arch_buckling"); - if (const auto diagnostics = solid.equationSolver().pcgBlockDiagnostics()) { - mfem::out << " PCG diagnostics: residuals = " << diagnostics->num_residuals - << ", hess-vecs = " << diagnostics->num_hess_vecs - << ", preconditioner applications = " << diagnostics->num_preconds - << ", Jacobian assemblies = " << diagnostics->num_jacobian_assembles - << ", preconditioner updates = " << diagnostics->num_preconditioner_updates - << ", accepted blocks = " << diagnostics->num_blocks - << ", accepted steps = " << diagnostics->num_accepted_steps - << ", block rejects = " << diagnostics->num_block_rejects - << ", prefix accepts = " << diagnostics->num_prefix_accepts - << ", momentum resets = " << diagnostics->num_momentum_resets - << ", nonzero beta = " << diagnostics->num_nonzero_beta - << ", zero beta = " << diagnostics->num_zero_beta - << ", Powell restarts = " << diagnostics->num_powell_restarts - << ", descent restarts = " << diagnostics->num_descent_restarts - << ", negative curvature = " << diagnostics->num_negative_curvature - << ", trust capped steps = " << diagnostics->num_trust_capped_steps - << ", line-search backtracks = " << diagnostics->num_line_search_backtracks - << ", final h_scale = " << diagnostics->final_h_scale - << ", last trust ratio = " << diagnostics->last_trust_ratio << '\n'; + if (rank == 0 && print_level >= 1) { + if (const auto diagnostics = solid.equationSolver().pcgBlockDiagnostics()) { + mfem::out << " PCG diagnostics: residuals = " << diagnostics->num_residuals + << ", hess-vecs = " << diagnostics->num_hess_vecs + << ", preconditioner applications = " << diagnostics->num_preconds + << ", Jacobian assemblies = " << diagnostics->num_jacobian_assembles + << ", Jacobian operator evals = " << diagnostics->num_jacobian_operator_evals + << ", diagonal assemblies = " << diagnostics->num_diagonal_assembles + << ", preconditioner updates = " << diagnostics->num_preconditioner_updates + << ", accepted blocks = " << diagnostics->num_blocks + << ", accepted steps = " << diagnostics->num_accepted_steps + << ", block rejects = " << diagnostics->num_block_rejects + << ", prefix accepts = " << diagnostics->num_prefix_accepts + << ", momentum resets = " << diagnostics->num_momentum_resets + << ", nonzero beta = " << diagnostics->num_nonzero_beta + << ", zero beta = " << diagnostics->num_zero_beta + << ", Powell restarts = " << diagnostics->num_powell_restarts + << ", descent restarts = " << diagnostics->num_descent_restarts + << ", negative curvature = " << diagnostics->num_negative_curvature + << ", trust capped steps = " << diagnostics->num_trust_capped_steps + << ", line-search backtracks = " << diagnostics->num_line_search_backtracks + << ", final h_scale = " << diagnostics->final_h_scale + << ", last trust ratio = " << diagnostics->last_trust_ratio << '\n'; + mfem::out << " PCG timings: residual = " << diagnostics->residual_seconds + << ", hess-vec = " << diagnostics->hess_vec_seconds + << ", operator hess-vec = " << diagnostics->jacobian_operator_hess_vec_seconds + << ", assembled hess-vec = " << diagnostics->assembled_hess_vec_seconds + << ", matrix-free hess-vec = " << diagnostics->matrix_free_hess_vec_seconds + << ", preconditioner = " << diagnostics->preconditioner_seconds + << ", Jacobian operator eval = " << diagnostics->jacobian_operator_eval_seconds + << ", Jacobian assembly = " << diagnostics->jacobian_assembly_seconds + << ", diagonal assembly = " << diagnostics->diagonal_assembly_seconds + << ", diagonal invert = " << diagnostics->diagonal_invert_seconds + << ", preconditioner update = " << diagnostics->preconditioner_update_seconds + << ", preconditioner setup = " << diagnostics->preconditioner_setup_seconds << '\n'; + } + if (const auto diagnostics = solid.equationSolver().trustRegionDiagnostics()) { + const double operator_timed_seconds = + diagnostics->residual_seconds + diagnostics->hess_vec_seconds + diagnostics->preconditioner_seconds + + diagnostics->jacobian_operator_eval_seconds + diagnostics->diagonal_assembly_seconds + + diagnostics->diagonal_invert_seconds + diagnostics->jacobian_assembly_seconds + + diagnostics->preconditioner_update_seconds; + const double assembled_hess_vec_seconds = + diagnostics->hess_vec_seconds - diagnostics->jacobian_operator_hess_vec_seconds; + mfem::out << " TrustRegion diagnostics: residuals = " << diagnostics->num_residuals + << ", hess-vecs = " << diagnostics->num_hess_vecs + << ", model hess-vecs = " << diagnostics->num_model_hess_vecs + << ", cauchy hess-vecs = " << diagnostics->num_cauchy_hess_vecs + << ", line-search hess-vecs = " << diagnostics->num_line_search_hess_vecs + << ", preconditioner applications = " << diagnostics->num_preconds + << ", Jacobian assemblies = " << diagnostics->num_jacobian_assembles + << ", Jacobian operator evals = " << diagnostics->num_jacobian_operator_evals + << ", diagonal assemblies = " << diagnostics->num_diagonal_assembles + << ", CG iterations = " << diagnostics->num_cg_iterations + << ", subspace solves = " << diagnostics->num_subspace_solves + << ", subspace leftmost hess-vecs = " << diagnostics->num_subspace_leftmost_hess_vecs + << ", subspace hess-vec batches = " << diagnostics->num_subspace_hess_vec_batches + << ", subspace batched hess-vecs = " << diagnostics->num_subspace_batched_hess_vecs + << ", subspace past-step vectors = " << diagnostics->num_subspace_past_step_vectors + << ", subspace past-step hess-vecs = " << diagnostics->num_subspace_past_step_hess_vecs + << ", quadratic subspace solves = " << diagnostics->num_quadratic_subspace_solves + << ", cubic subspace attempts = " << diagnostics->num_cubic_subspace_attempts + << ", cubic subspace uses = " << diagnostics->num_cubic_subspace_uses + << ", cubic subspace quadratic fallbacks = " << diagnostics->num_cubic_subspace_quadratic_fallbacks + << ", nonmonotone work accepts = " << diagnostics->num_nonmonotone_work_accepts + << ", monotone work would reject = " << diagnostics->num_monotone_work_would_reject + << ", preconditioner updates = " << diagnostics->num_preconditioner_updates << '\n'; + mfem::out << " TrustRegion timings: total = " << diagnostics->total_seconds + << ", operator-timed = " << operator_timed_seconds << ", residual = " << diagnostics->residual_seconds + << ", hess-vec = " << diagnostics->hess_vec_seconds + << ", model hess-vec = " << diagnostics->model_hess_vec_seconds + << ", cauchy hess-vec = " << diagnostics->cauchy_hess_vec_seconds + << ", line-search hess-vec = " << diagnostics->line_search_hess_vec_seconds + << ", operator hess-vec = " << diagnostics->jacobian_operator_hess_vec_seconds + << ", assembled hess-vec = " << assembled_hess_vec_seconds + << ", preconditioner = " << diagnostics->preconditioner_seconds + << ", Jacobian operator eval = " << diagnostics->jacobian_operator_eval_seconds + << ", diagonal assembly = " << diagnostics->diagonal_assembly_seconds + << ", diagonal invert = " << diagnostics->diagonal_invert_seconds + << ", model solve = " << diagnostics->model_solve_seconds + << ", subspace = " << diagnostics->subspace_seconds + << ", subspace leftmost = " << diagnostics->subspace_leftmost_seconds + << ", subspace hess-vec batches = " << diagnostics->subspace_hess_vec_batch_seconds + << ", subspace filter = " << diagnostics->subspace_filter_seconds + << ", subspace backend = " << diagnostics->subspace_backend_seconds + << ", subspace project A = " << diagnostics->subspace_project_A_seconds + << ", subspace project gram = " << diagnostics->subspace_project_gram_seconds + << ", subspace project b = " << diagnostics->subspace_project_b_seconds + << ", subspace basis = " << diagnostics->subspace_basis_seconds + << ", subspace reduced A = " << diagnostics->subspace_reduced_A_seconds + << ", subspace dense eigensystem = " << diagnostics->subspace_dense_eigensystem_seconds + << ", subspace dense trust solve = " << diagnostics->subspace_dense_trust_solve_seconds + << ", subspace reconstruct solution = " << diagnostics->subspace_reconstruct_solution_seconds + << ", subspace reconstruct leftmost = " << diagnostics->subspace_reconstruct_leftmost_seconds + << ", subspace finalize = " << diagnostics->subspace_finalize_seconds + << ", cauchy point = " << diagnostics->cauchy_point_seconds + << ", dogleg = " << diagnostics->dogleg_seconds + << ", line search = " << diagnostics->line_search_seconds << ", dot = " << diagnostics->dot_seconds + << ", dot count = " << diagnostics->num_dot_products + << ", dot reductions = " << diagnostics->num_dot_reductions + << ", model dots = " << diagnostics->num_model_dot_products << " / " << diagnostics->model_dot_seconds + << ", cauchy dots = " << diagnostics->num_cauchy_dot_products << " / " + << diagnostics->cauchy_dot_seconds << ", dogleg dots = " << diagnostics->num_dogleg_dot_products + << " / " << diagnostics->dogleg_dot_seconds + << ", line-search dots = " << diagnostics->num_line_search_dot_products << " / " + << diagnostics->line_search_dot_seconds << ", setup dots = " << diagnostics->num_setup_dot_products + << " / " << diagnostics->setup_dot_seconds + << ", vector update = " << diagnostics->vector_update_seconds + << ", vector copy/scale = " << diagnostics->vector_copy_scale_seconds + << ", projection = " << diagnostics->projection_seconds + << ", Jacobian assembly = " << diagnostics->jacobian_assembly_seconds + << ", preconditioner update = " << diagnostics->preconditioner_update_seconds + << ", preconditioner setup = " << diagnostics->preconditioner_setup_seconds + << ", work objective = " << diagnostics->last_work_objective + << ", nonmonotone work reference = " << diagnostics->last_nonmonotone_work_reference << '\n'; + } + const auto& jacobian_timings = solid.jacobianTimings(); + mfem::out << " Solid Jacobian timings: legacy evals = " << jacobian_timings.legacy_jacobian_evals + << ", legacy derivative = " << jacobian_timings.legacy_derivative_seconds + << ", legacy sparse assembly = " << jacobian_timings.legacy_sparse_assembly_seconds + << ", legacy EBC elimination = " << jacobian_timings.legacy_essential_elimination_seconds + << ", operator evals = " << jacobian_timings.jacobian_operator_evals + << ", operator assemblies = " << jacobian_timings.jacobian_operator_assemblies + << ", operator derivative = " << jacobian_timings.jacobian_operator_derivative_seconds + << ", operator sparse assembly = " << jacobian_timings.jacobian_operator_sparse_assembly_seconds + << ", operator EBC elimination = " << jacobian_timings.jacobian_operator_essential_elimination_seconds + << '\n'; } if (!nonlinear_solver.GetConverged()) { throw std::runtime_error("Nonlinear solve failed to converge at load step " + std::to_string(step + 1)); } } - mfem::out << "Converged load steps: " << num_converged_steps << "/" << num_steps << '\n'; + if (rank == 0) { + mfem::out << "Converged load steps: " << num_converged_steps << "/" << num_steps << '\n'; + } } } // namespace smith From f8ff2c0750418f6f596727f25799da2271b14775 Mon Sep 17 00:00:00 2001 From: Michael Tupek Date: Fri, 8 May 2026 09:17:05 -0600 Subject: [PATCH 09/27] Temporary cmake adjust. --- cmake/thirdparty/FindMFEM.cmake | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/cmake/thirdparty/FindMFEM.cmake b/cmake/thirdparty/FindMFEM.cmake index cd7d45c8af..bb7d37037d 100644 --- a/cmake/thirdparty/FindMFEM.cmake +++ b/cmake/thirdparty/FindMFEM.cmake @@ -107,6 +107,17 @@ else() # filter out items containing "Xlinker" set(_mfem_tpl_list ${mfem_tpl_lnk_flags}) separate_arguments(_mfem_tpl_list) + foreach(_link_flag ${_mfem_tpl_list}) + if(_link_flag MATCHES "^-L(.+)") + set(_link_dir "${CMAKE_MATCH_1}") + if(EXISTS "${_link_dir}/liblapack.dylib" OR EXISTS "${_link_dir}/libblas.dylib") + list(APPEND _mfem_tpl_link_dirs "${_link_dir}") + endif() + endif() + endforeach() + if(_mfem_tpl_link_dirs) + list(REMOVE_DUPLICATES _mfem_tpl_link_dirs) + endif() list(FILTER _mfem_tpl_list EXCLUDE REGEX Xlinker) # On Apple, -Wl,-rpath,... entries duplicate CMake's own rpath management # (CMAKE_INSTALL_RPATH_USE_LINK_PATH) and cause ld "duplicate -rpath" warnings @@ -138,6 +149,12 @@ else() TREAT_INCLUDES_AS_SYSTEM ON EXPORTABLE ON) + if(APPLE AND _mfem_tpl_link_dirs) + foreach(_link_dir ${_mfem_tpl_link_dirs}) + target_link_options(mfem INTERFACE "LINKER:-rpath,${_link_dir}") + endforeach() + endif() + install(TARGETS mfem EXPORT smith-targets DESTINATION lib From 33dd85b7f79c53946e7db2fb9be1cfada627b24c Mon Sep 17 00:00:00 2001 From: Michael Tupek Date: Fri, 8 May 2026 17:06:15 -0600 Subject: [PATCH 10/27] Simplify the experimental attempts. --- src/smith/numerics/CMakeLists.txt | 1 - src/smith/numerics/equation_solver.cpp | 1033 +---------------- src/smith/numerics/equation_solver.hpp | 201 ---- src/smith/numerics/functional/functional.hpp | 65 -- .../tests/functional_comparisons.cpp | 81 -- .../numerics/mfem_trust_region_subspace.cpp | 46 +- .../numerics/petsc_trust_region_subspace.cpp | 2 +- src/smith/numerics/solver_config.hpp | 64 - .../numerics/tests/test_equationsolver.cpp | 200 ---- .../tests/test_trust_region_solver_mfem.cpp | 301 ----- .../tests/test_trust_region_solver_petsc.cpp | 4 + .../numerics/trust_region_cubic_subspace.cpp | 461 -------- src/smith/numerics/trust_region_solver.hpp | 19 +- src/smith/physics/dfem_weak_form.hpp | 12 - src/smith/physics/functional_weak_form.hpp | 20 - src/smith/physics/solid_mechanics.hpp | 141 --- .../physics/tests/shallow_arch_buckling.cpp | 187 +-- src/smith/physics/tests/solid.cpp | 60 - .../physics/tests/solid_statics_patch.cpp | 79 -- .../tests/test_functional_weak_form.cpp | 32 - src/smith/physics/weak_form.hpp | 17 - 21 files changed, 31 insertions(+), 2995 deletions(-) delete mode 100644 src/smith/numerics/trust_region_cubic_subspace.cpp diff --git a/src/smith/numerics/CMakeLists.txt b/src/smith/numerics/CMakeLists.txt index e8c767394d..8bc793fedd 100644 --- a/src/smith/numerics/CMakeLists.txt +++ b/src/smith/numerics/CMakeLists.txt @@ -21,7 +21,6 @@ set(numerics_sources equation_solver.cpp petsc_trust_region_subspace.cpp mfem_trust_region_subspace.cpp - trust_region_cubic_subspace.cpp odes.cpp petsc_solvers.cpp block_preconditioner.cpp diff --git a/src/smith/numerics/equation_solver.cpp b/src/smith/numerics/equation_solver.cpp index 29d9af1e3f..4db0fc096d 100644 --- a/src/smith/numerics/equation_solver.cpp +++ b/src/smith/numerics/equation_solver.cpp @@ -407,26 +407,12 @@ class TrustRegion : public mfem::NewtonSolver { mutable size_t num_subspace_solve_start_hess_vecs = 0; /// internal counter for quadratic subspace backend solves mutable size_t num_quadratic_subspace_solves = 0; - /// internal counter for cubic subspace backend attempts - mutable size_t num_cubic_subspace_attempts = 0; - /// internal counter for cubic subspace candidates used - mutable size_t num_cubic_subspace_uses = 0; - /// internal counter for cubic attempts that returned quadratic candidate - mutable size_t num_cubic_subspace_quadratic_fallbacks = 0; /// internal counter for matrix assembles mutable size_t num_jacobian_assembles = 0; - /// internal counter for JacobianOperator evaluations - mutable size_t num_jacobian_operator_evals = 0; - /// internal counter for direct diagonal assemblies - mutable size_t num_diagonal_assembles = 0; /// internal counter for model CG iterations mutable size_t num_cg_iterations = 0; /// internal counter for preconditioner operator updates mutable size_t num_preconditioner_updates = 0; - /// internal counter for nonmonotone accepted steps - mutable size_t num_nonmonotone_work_accepts = 0; - /// internal counter for accepted steps that monotone acceptance would reject - mutable size_t num_monotone_work_would_reject = 0; /// time spent evaluating residuals mutable double residual_seconds = 0.0; /// time spent applying Hessian-vector products @@ -437,14 +423,6 @@ class TrustRegion : public mfem::NewtonSolver { mutable double cauchy_hess_vec_seconds = 0.0; /// time spent applying line-search Hessian-vector products mutable double line_search_hess_vec_seconds = 0.0; - /// time spent applying JacobianOperator Hessian-vector products - mutable double jacobian_operator_hess_vec_seconds = 0.0; - /// time spent evaluating JacobianOperator factories - mutable double jacobian_operator_eval_seconds = 0.0; - /// time spent directly assembling diagonals - mutable double diagonal_assembly_seconds = 0.0; - /// time spent inverting direct diagonals - mutable double diagonal_invert_seconds = 0.0; /// time spent applying preconditioners mutable double preconditioner_seconds = 0.0; /// total time spent in the nonlinear solve @@ -507,21 +485,6 @@ class TrustRegion : public mfem::NewtonSolver { mutable double preconditioner_update_seconds = 0.0; /// time spent in preconditioner SetOperator calls mutable double preconditioner_setup_seconds = 0.0; - /// current accumulated actual work-surrogate level for nonmonotone acceptance - mutable double current_work_objective = 0.0; - /// last nonmonotone reference work surrogate - mutable double last_nonmonotone_work_reference = 0.0; - /// Optional JacobianOperator factory - JacobianOperatorFactory jacobian_operator_factory; - /// Cached JacobianOperator for current TrustRegion iteration - mutable std::unique_ptr current_jacobian_operator; - /// Inverted scalar diagonal preconditioner for JacobianOperator mode - mutable mfem::Vector inverse_diagonal_preconditioner; - /// Current assembled Hessian clone used to preserve a valid previous Hessian - mutable std::unique_ptr current_hessian; - /// Previous assembled Hessian used for cubic finite-difference subspace models - mutable std::unique_ptr previous_hessian; - #ifdef MFEM_USE_MPI /// constructor TrustRegion(MPI_Comm comm_, const NonlinearSolverOptions& nonlinear_opts, const LinearSolverOptions& linear_opts, @@ -691,26 +654,6 @@ class TrustRegion : public mfem::NewtonSolver { ++num_line_search_hess_vecs; } - double nonmonotoneWorkReference(const std::vector& work_objective_history) const - { - if (work_objective_history.empty()) { - return current_work_objective; - } - return *std::max_element(work_objective_history.begin(), work_objective_history.end()); - } - - void pushWorkObjectiveHistory(std::vector& work_objective_history, double objective) const - { - const int window = nonlinear_options.trust_nonmonotone_window; - if (window <= 0) { - return; - } - work_objective_history.push_back(objective); - while (work_objective_history.size() > static_cast(window)) { - work_objective_history.erase(work_objective_history.begin()); - } - } - void pushAcceptedStepHistory(const mfem::Vector& step) const { if (nonlinear_options.trust_num_past_steps <= 0) { @@ -745,10 +688,7 @@ class TrustRegion : public mfem::NewtonSolver { [[maybe_unused]] const std::vector Hds, [[maybe_unused]] const mfem::Vector& g, [[maybe_unused]] double delta, [[maybe_unused]] int num_leftmost, - [[maybe_unused]] std::vector>& candidate_left_mosts, - [[maybe_unused]] const mfem::Vector& previous_step, - [[maybe_unused]] const mfem::Vector* previous_H_previous_step, - [[maybe_unused]] bool allow_cubic_subspace) const + [[maybe_unused]] std::vector>& candidate_left_mosts) const { SMITH_MARK_FUNCTION; auto subspace_start = Clock::now(); @@ -780,31 +720,9 @@ class TrustRegion : public mfem::NewtonSolver { try { auto backend_start = Clock::now(); - if (nonlinear_options.trust_use_cubic_subspace && allow_cubic_subspace && previous_hessian) { - std::vector previous_H_vectors; - std::vector previous_H_directions; - previous_H_vectors.reserve(directions.size()); - previous_H_directions.reserve(directions.size()); - for (const auto* direction : directions) { - previous_H_vectors.emplace_back(direction->Size()); - previous_hessian->Mult(*direction, previous_H_vectors.back()); - previous_H_directions.emplace_back(&previous_H_vectors.back()); - } - ++num_cubic_subspace_attempts; - bool used_cubic = false; - std::tie(sol, leftvecs, leftvals, energy_change) = solveCubicSubspaceProblemMfem( - directions, H_directions, previous_H_directions, previous_step, b, delta, num_leftmost, &used_cubic); - if (used_cubic) { - ++num_cubic_subspace_uses; - } else { - ++num_cubic_subspace_quadratic_fallbacks; - ++num_quadratic_subspace_solves; - } - } else { - ++num_quadratic_subspace_solves; - std::tie(sol, leftvecs, leftvals, energy_change) = - solveSubspaceProblem(directions, H_directions, b, delta, num_leftmost); - } + ++num_quadratic_subspace_solves; + std::tie(sol, leftvecs, leftvals, energy_change) = + solveSubspaceProblem(directions, H_directions, b, delta, num_leftmost); subspace_backend_seconds += secondsSince(backend_start); } catch (const std::exception& e) { if (print_level >= 1) { @@ -1031,63 +949,14 @@ class TrustRegion : public mfem::NewtonSolver { SMITH_MARK_FUNCTION; auto start = Clock::now(); ++num_jacobian_assembles; - if (nonlinear_options.trust_use_cubic_subspace) { - previous_hessian = std::move(current_hessian); - } grad = &oper->GetGradient(x); if (nonlinear_options.force_monolithic) { auto* grad_blocked = dynamic_cast(grad); if (grad_blocked) grad = buildMonolithicMatrix(*grad_blocked).release(); } - if (nonlinear_options.trust_use_cubic_subspace) { - current_hessian = cloneAssembledOperator(*grad); - } jacobian_assembly_seconds += secondsSince(start); } - /// Set an optional JacobianOperator factory. - void setJacobianOperator(JacobianOperatorFactory jacobian_operator) - { - jacobian_operator_factory = std::move(jacobian_operator); - } - - /// Evaluate and cache the JacobianOperator at x. - void updateJacobianOperator(const mfem::Vector& x) const - { - SMITH_MARK_FUNCTION; - SLIC_ERROR_ROOT_IF(!jacobian_operator_factory, "No JacobianOperator factory is registered."); - auto start = Clock::now(); - ++num_jacobian_operator_evals; - current_jacobian_operator = jacobian_operator_factory(x); - SLIC_ERROR_ROOT_IF(!current_jacobian_operator, "JacobianOperator factory returned a null operator."); - jacobian_operator_eval_seconds += secondsSince(start); - } - - /// Assemble and invert the scalar diagonal preconditioner from the current JacobianOperator. - void updateDiagonalPreconditioner() const - { - SMITH_MARK_FUNCTION; - SLIC_ERROR_ROOT_IF(!current_jacobian_operator, "Cannot build diagonal preconditioner without a JacobianOperator."); - - auto diagonal_start = Clock::now(); - current_jacobian_operator->assembleDiagonal(inverse_diagonal_preconditioner); - diagonal_assembly_seconds += secondsSince(diagonal_start); - ++num_diagonal_assembles; - - auto invert_start = Clock::now(); - double max_abs_diag = 0.0; - for (int i = 0; i < inverse_diagonal_preconditioner.Size(); ++i) { - max_abs_diag = std::max(max_abs_diag, std::abs(inverse_diagonal_preconditioner[i])); - } - - const double floor = nonlinear_options.pcg_diagonal_floor * max_abs_diag; - SLIC_ERROR_ROOT_IF(!(floor > 0.0), "Cannot invert a zero Jacobian diagonal for TrustRegion preconditioning."); - for (int i = 0; i < inverse_diagonal_preconditioner.Size(); ++i) { - inverse_diagonal_preconditioner[i] = 1.0 / std::max(std::abs(inverse_diagonal_preconditioner[i]), floor); - } - diagonal_invert_seconds += secondsSince(invert_start); - } - /// evaluate the nonlinear residual mfem::real_t computeResidual(const mfem::Vector& x_, mfem::Vector& r_) const { @@ -1106,16 +975,8 @@ class TrustRegion : public mfem::NewtonSolver { SMITH_MARK_FUNCTION; auto start = Clock::now(); ++num_hess_vecs; - if (nonlinear_options.trust_use_jacobian_operator) { - SLIC_ERROR_ROOT_IF(!current_jacobian_operator, "TrustRegion JacobianOperator mode has no current operator."); - current_jacobian_operator->Mult(x_, v_); - const double seconds = secondsSince(start); - hess_vec_seconds += seconds; - jacobian_operator_hess_vec_seconds += seconds; - } else { - grad->Mult(x_, v_); - hess_vec_seconds += secondsSince(start); - } + grad->Mult(x_, v_); + hess_vec_seconds += secondsSince(start); } /// apply trust region specific preconditioner @@ -1124,16 +985,7 @@ class TrustRegion : public mfem::NewtonSolver { SMITH_MARK_FUNCTION; auto start = Clock::now(); ++num_preconds; - if (nonlinear_options.trust_use_jacobian_operator) { - SLIC_ERROR_ROOT_IF(inverse_diagonal_preconditioner.Size() != x_.Size(), - "TrustRegion JacobianOperator diagonal preconditioner is not initialized."); - v_.SetSize(x_.Size()); - for (int i = 0; i < x_.Size(); ++i) { - v_[i] = inverse_diagonal_preconditioner[i] * x_[i]; - } - } else { - tr_precond.Mult(x_, v_); - } + tr_precond.Mult(x_, v_); preconditioner_seconds += secondsSince(start); }; @@ -1147,8 +999,6 @@ class TrustRegion : public mfem::NewtonSolver { .num_line_search_hess_vecs = num_line_search_hess_vecs, .num_preconds = num_preconds, .num_jacobian_assembles = num_jacobian_assembles, - .num_jacobian_operator_evals = num_jacobian_operator_evals, - .num_diagonal_assembles = num_diagonal_assembles, .num_cg_iterations = num_cg_iterations, .num_subspace_solves = num_subspace_solves, .num_subspace_leftmost_hess_vecs = num_subspace_leftmost_hess_vecs, @@ -1159,21 +1009,12 @@ class TrustRegion : public mfem::NewtonSolver { .num_subspace_solve_start_vectors = num_subspace_solve_start_vectors, .num_subspace_solve_start_hess_vecs = num_subspace_solve_start_hess_vecs, .num_quadratic_subspace_solves = num_quadratic_subspace_solves, - .num_cubic_subspace_attempts = num_cubic_subspace_attempts, - .num_cubic_subspace_uses = num_cubic_subspace_uses, - .num_cubic_subspace_quadratic_fallbacks = num_cubic_subspace_quadratic_fallbacks, .num_preconditioner_updates = num_preconditioner_updates, - .num_nonmonotone_work_accepts = num_nonmonotone_work_accepts, - .num_monotone_work_would_reject = num_monotone_work_would_reject, .residual_seconds = residual_seconds, .hess_vec_seconds = hess_vec_seconds, .model_hess_vec_seconds = model_hess_vec_seconds, .cauchy_hess_vec_seconds = cauchy_hess_vec_seconds, .line_search_hess_vec_seconds = line_search_hess_vec_seconds, - .jacobian_operator_hess_vec_seconds = jacobian_operator_hess_vec_seconds, - .jacobian_operator_eval_seconds = jacobian_operator_eval_seconds, - .diagonal_assembly_seconds = diagonal_assembly_seconds, - .diagonal_invert_seconds = diagonal_invert_seconds, .preconditioner_seconds = preconditioner_seconds, .total_seconds = total_seconds, .model_solve_seconds = model_solve_seconds, @@ -1213,9 +1054,7 @@ class TrustRegion : public mfem::NewtonSolver { .projection_seconds = projection_seconds, .jacobian_assembly_seconds = jacobian_assembly_seconds, .preconditioner_update_seconds = preconditioner_update_seconds, - .preconditioner_setup_seconds = preconditioner_setup_seconds, - .last_work_objective = current_work_objective, - .last_nonmonotone_work_reference = last_nonmonotone_work_reference}; + .preconditioner_setup_seconds = preconditioner_setup_seconds}; } /// @overload @@ -1246,25 +1085,14 @@ class TrustRegion : public mfem::NewtonSolver { num_subspace_solve_start_vectors = 0; num_subspace_solve_start_hess_vecs = 0; num_quadratic_subspace_solves = 0; - num_cubic_subspace_attempts = 0; - num_cubic_subspace_uses = 0; - num_cubic_subspace_quadratic_fallbacks = 0; num_jacobian_assembles = 0; - num_jacobian_operator_evals = 0; - num_diagonal_assembles = 0; num_cg_iterations = 0; num_preconditioner_updates = 0; - num_nonmonotone_work_accepts = 0; - num_monotone_work_would_reject = 0; residual_seconds = 0.0; hess_vec_seconds = 0.0; model_hess_vec_seconds = 0.0; cauchy_hess_vec_seconds = 0.0; line_search_hess_vec_seconds = 0.0; - jacobian_operator_hess_vec_seconds = 0.0; - jacobian_operator_eval_seconds = 0.0; - diagonal_assembly_seconds = 0.0; - diagonal_invert_seconds = 0.0; preconditioner_seconds = 0.0; total_seconds = 0.0; model_solve_seconds = 0.0; @@ -1296,19 +1124,13 @@ class TrustRegion : public mfem::NewtonSolver { jacobian_assembly_seconds = 0.0; preconditioner_update_seconds = 0.0; preconditioner_setup_seconds = 0.0; - current_work_objective = 0.0; - last_nonmonotone_work_reference = 0.0; accepted_step_history.clear(); resetTrustRegionSubspaceTimings(); solve_start_x.SetSize(X.Size()); solve_start_x = X; min_residual_x.SetSize(X.Size()); min_residual_x = X; - current_jacobian_operator.reset(); - inverse_diagonal_preconditioner.SetSize(0); previous_H_left_mosts.clear(); - current_hessian.reset(); - previous_hessian.reset(); real_t norm, norm_goal = 0.0; norm = initial_norm = computeResidual(X, r); @@ -1321,11 +1143,6 @@ class TrustRegion : public mfem::NewtonSolver { mfem::out << "TrustRegion iteration " << std::setw(3) << 0 << " : ||r|| = " << std::setw(13) << norm << "\n"; } - SLIC_ERROR_ROOT_IF(nonlinear_options.trust_nonmonotone_window < 0, - "TrustRegion requires trust_nonmonotone_window >= 0"); - std::vector work_objective_history; - pushWorkObjectiveHistory(work_objective_history, current_work_objective); - prec->iterative_mode = false; tr_precond.iterative_mode = false; @@ -1381,26 +1198,17 @@ class TrustRegion : public mfem::NewtonSolver { break; } - if (nonlinear_options.trust_use_jacobian_operator) { - SLIC_ERROR_ROOT_IF(!jacobian_operator_factory, - "TrustRegion JacobianOperator mode requires a registered JacobianOperator factory."); - updateJacobianOperator(X); - updateDiagonalPreconditioner(); + assembleJacobian(X); + + if (it == 0 || (trResults.cg_iterations_count >= settings.max_cg_iterations || + cumulative_cg_iters_from_last_precond_update >= settings.max_cumulative_iteration)) { + auto preconditioner_update_start = Clock::now(); + auto preconditioner_setup_start = Clock::now(); + tr_precond.SetOperator(*grad); + preconditioner_setup_seconds += secondsSince(preconditioner_setup_start); + preconditioner_update_seconds += secondsSince(preconditioner_update_start); ++num_preconditioner_updates; cumulative_cg_iters_from_last_precond_update = 0; - } else { - assembleJacobian(X); - - if (it == 0 || (trResults.cg_iterations_count >= settings.max_cg_iterations || - cumulative_cg_iters_from_last_precond_update >= settings.max_cumulative_iteration)) { - auto preconditioner_update_start = Clock::now(); - auto preconditioner_setup_start = Clock::now(); - tr_precond.SetOperator(*grad); - preconditioner_setup_seconds += secondsSince(preconditioner_setup_start); - preconditioner_update_seconds += secondsSince(preconditioner_update_start); - ++num_preconditioner_updates; - cumulative_cg_iters_from_last_precond_update = 0; - } } auto hess_vec_func = [&](const mfem::Vector& x_, mfem::Vector& v_) { hessVec(x_, v_); }; @@ -1481,8 +1289,6 @@ class TrustRegion : public mfem::NewtonSolver { ((d_norm > (1.0 - 1.0e-6) * tr_size) && lineSearchIter > 1)); bool use_with_option2 = (subspace_option >= 2) && (d_norm > (1.0 - 1.0e-6) * tr_size); bool use_with_option3 = (subspace_option >= 3); - const bool allow_cubic_subspace = - trResults.interior_status == TrustRegionResults::Status::NegativeCurvature || use_with_option2; if (use_with_option1 || use_with_option2 || use_with_option3) { if (!have_computed_Hvs) { @@ -1581,9 +1387,7 @@ class TrustRegion : public mfem::NewtonSolver { H_ds.push_back(&H_min_residual_direction); } } - solveTheSubspaceProblem(trResults.d, hess_vec_func, ds, H_ds, r, tr_size, num_leftmost, candidate_left_mosts, - trResults.d_old, - trResults.has_d_old ? &trResults.H_d_old_at_accept : nullptr, allow_cubic_subspace); + solveTheSubspaceProblem(trResults.d, hess_vec_func, ds, H_ds, r, tr_size, num_leftmost, candidate_left_mosts); } static constexpr double roundOffTol = 0.0; // 1e-14; @@ -1614,9 +1418,6 @@ class TrustRegion : public mfem::NewtonSolver { normPred = std::numeric_limits::max(); } - const double trial_work_objective = current_work_objective + realObjective; - last_nonmonotone_work_reference = nonmonotoneWorkReference(work_objective_history); - if (normPred <= norm_goal) { trResults.d_old = trResults.d; trResults.H_d_old_at_accept = trResults.H_d; @@ -1630,8 +1431,6 @@ class TrustRegion : public mfem::NewtonSolver { r = r_pred; vector_copy_scale_seconds += secondsSince(copy_start); norm = normPred; - current_work_objective = trial_work_objective; - pushWorkObjectiveHistory(work_objective_history, current_work_objective); line_search_seconds += secondsSince(line_search_start); if (print_level >= 2) { printTrustRegionInfo(realObjective, modelObjective, trResults.cg_iterations_count, tr_size, true); @@ -1671,11 +1470,7 @@ class TrustRegion : public mfem::NewtonSolver { // modelRes = g + Jd // modelResNorm = np.linalg.norm(modelRes) // realResNorm = np.linalg.norm(gy) - const bool monotoneAccept = rho >= settings.eta1 && rho <= settings.eta4; - const bool nonmonotoneAccept = - nonlinear_options.trust_nonmonotone_window > 0 && modelObjective < 0.0 && rho <= settings.eta4 && - trial_work_objective <= last_nonmonotone_work_reference + settings.eta1 * modelObjective; - bool willAccept = monotoneAccept || nonmonotoneAccept; // or (rho >= -0 and realResNorm <= gNorm) + const bool willAccept = rho >= settings.eta1 && rho <= settings.eta4; if (print_level >= 2) { printTrustRegionInfo(realObjective, modelObjective, trResults.cg_iterations_count, tr_size, willAccept); @@ -1691,17 +1486,11 @@ class TrustRegion : public mfem::NewtonSolver { if (!candidate_left_mosts.empty()) { left_mosts = std::move(candidate_left_mosts); } - if (nonmonotoneAccept && !monotoneAccept) { - ++num_nonmonotone_work_accepts; - ++num_monotone_work_would_reject; - } copy_start = Clock::now(); X = x_pred; r = r_pred; vector_copy_scale_seconds += secondsSince(copy_start); norm = normPred; - current_work_objective = trial_work_objective; - pushWorkObjectiveHistory(work_objective_history, current_work_objective); line_search_seconds += secondsSince(line_search_start); break; } @@ -1731,754 +1520,6 @@ class TrustRegion : public mfem::NewtonSolver { } }; -/** - * @brief Skeleton for a nonlinear preconditioned conjugate-gradient block solver. - * - * The full algorithm is added in a follow-on chunk. This class establishes the Smith/MFEM integration points used by - * that implementation: residual evaluation, Jacobian assembly, Hessian-vector products, preconditioning, counters, and - * standard nonlinear convergence bookkeeping. - */ -class PcgBlockSolver : public mfem::NewtonSolver { - protected: - /// Trial solution vector - mutable mfem::Vector x_trial; - /// Trial residual vector - mutable mfem::Vector r_trial; - /// Scratch vector - mutable mfem::Vector scratch; - - /// Nonlinear solution options - NonlinearSolverOptions nonlinear_options; - - /// Preconditioner used by the PCG-block recurrence - Solver& pcg_precond; - - /// Reconstructed Smith print level - mutable size_t print_level = 0; - - public: - /// Internal counter for hess-vecs - mutable size_t num_hess_vecs = 0; - /// Internal counter for preconditions - mutable size_t num_preconds = 0; - /// Internal counter for residuals - mutable size_t num_residuals = 0; - /// Internal counter for matrix assembles - mutable size_t num_jacobian_assembles = 0; - /// Internal counter for JacobianOperator evaluations - mutable size_t num_jacobian_operator_evals = 0; - /// Internal counter for direct diagonal assemblies - mutable size_t num_diagonal_assembles = 0; - /// Internal counter for preconditioner operator updates - mutable size_t num_preconditioner_updates = 0; - /// Internal counter for accepted prefix blocks - mutable size_t num_prefix_accepts = 0; - /// Internal counter for momentum resets - mutable size_t num_momentum_resets = 0; - /// Internal counter for nonzero PCG beta values - mutable size_t num_nonzero_beta = 0; - /// Internal counter for zero PCG beta values - mutable size_t num_zero_beta = 0; - /// Internal counter for accepted blocks - mutable size_t num_blocks = 0; - /// Internal counter for rejected blocks - mutable size_t num_block_rejects = 0; - /// Internal counter for Powell restarts - mutable size_t num_powell_restarts = 0; - /// Internal counter for descent-guard restarts - mutable size_t num_descent_restarts = 0; - /// Internal counter for non-positive curvature directions - mutable size_t num_negative_curvature = 0; - /// Internal counter for line-search backtracks - mutable size_t num_line_search_backtracks = 0; - /// Internal counter for positive-curvature steps capped by the trust radius - mutable size_t num_trust_capped_steps = 0; - /// Internal counter for accepted inner PCG steps - mutable size_t num_accepted_steps = 0; - /// Internal counter for trial inner PCG steps - mutable size_t num_trial_steps = 0; - /// Last trust scale used by the solver - mutable double final_h_scale = 1.0; - /// Last accepted block trust ratio - mutable double last_trust_ratio = 0.0; - /// Time spent evaluating residuals - mutable double residual_seconds = 0.0; - /// Time spent applying all Hessian-vector products - mutable double hess_vec_seconds = 0.0; - /// Time spent applying JacobianOperator Hessian-vector products - mutable double jacobian_operator_hess_vec_seconds = 0.0; - /// Time spent applying assembled Hessian-vector products - mutable double assembled_hess_vec_seconds = 0.0; - /// Time spent applying legacy matrix-free tangent products - mutable double matrix_free_hess_vec_seconds = 0.0; - /// Time spent applying preconditioners - mutable double preconditioner_seconds = 0.0; - /// Time spent evaluating JacobianOperator factories - mutable double jacobian_operator_eval_seconds = 0.0; - /// Time spent assembling sparse Jacobians - mutable double jacobian_assembly_seconds = 0.0; - /// Time spent directly assembling diagonals - mutable double diagonal_assembly_seconds = 0.0; - /// Time spent inverting direct diagonals - mutable double diagonal_invert_seconds = 0.0; - /// Time spent refreshing preconditioner data - mutable double preconditioner_update_seconds = 0.0; - /// Time spent in preconditioner SetOperator calls - mutable double preconditioner_setup_seconds = 0.0; - - /// Optional matrix-free tangent action, y = J(x) dx - MatrixFreeTangentAction matrix_free_tangent_action; - /// Optional JacobianOperator factory - JacobianOperatorFactory jacobian_operator_factory; - /// Cached JacobianOperator for the current PCG block - mutable std::unique_ptr current_jacobian_operator; - /// Owned sparse Jacobian assembled through the JacobianOperator fallback path - mutable std::unique_ptr assembled_jacobian_from_operator; - /// Inverted scalar diagonal preconditioner for the current PCG block - mutable mfem::Vector inverse_diagonal_preconditioner; - /// Whether the current PCG block should use the scalar diagonal preconditioner - mutable bool use_inverse_diagonal_preconditioner = false; - -#ifdef MFEM_USE_MPI - /// Constructor - PcgBlockSolver(MPI_Comm comm_, const NonlinearSolverOptions& nonlinear_opts, Solver& preconditioner) - : mfem::NewtonSolver(comm_), nonlinear_options(nonlinear_opts), pcg_precond(preconditioner) - { - } -#endif - - /// Assemble the Jacobian at x. - void assembleJacobian(const mfem::Vector& x) const - { - SMITH_MARK_FUNCTION; - auto start = Clock::now(); - ++num_jacobian_assembles; - grad = &oper->GetGradient(x); - if (nonlinear_options.force_monolithic) { - auto* grad_blocked = dynamic_cast(grad); - if (grad_blocked) grad = buildMonolithicMatrix(*grad_blocked).release(); - } - jacobian_assembly_seconds += secondsSince(start); - } - - /// Evaluate the nonlinear residual. - mfem::real_t computeResidual(const mfem::Vector& x, mfem::Vector& residual) const - { - SMITH_MARK_FUNCTION; - auto start = Clock::now(); - ++num_residuals; - oper->Mult(x, residual); - const auto norm = Norm(residual); - residual_seconds += secondsSince(start); - return norm; - } - - /// Set an optional matrix-free tangent action. - void setMatrixFreeTangentAction(MatrixFreeTangentAction tangent_action) - { - matrix_free_tangent_action = std::move(tangent_action); - } - - /// Set an optional JacobianOperator factory. - void setJacobianOperator(JacobianOperatorFactory jacobian_operator) - { - jacobian_operator_factory = std::move(jacobian_operator); - } - - /// Evaluate and cache the JacobianOperator at x. - void updateJacobianOperator(const mfem::Vector& x) const - { - SMITH_MARK_FUNCTION; - SLIC_ERROR_ROOT_IF(!jacobian_operator_factory, "No JacobianOperator factory is registered."); - auto start = Clock::now(); - ++num_jacobian_operator_evals; - current_jacobian_operator = jacobian_operator_factory(x); - SLIC_ERROR_ROOT_IF(!current_jacobian_operator, "JacobianOperator factory returned a null operator."); - jacobian_operator_eval_seconds += secondsSince(start); - } - - /// Assemble and invert the scalar diagonal preconditioner from the current JacobianOperator. - void updateDiagonalPreconditioner() const - { - SMITH_MARK_FUNCTION; - SLIC_ERROR_ROOT_IF(!current_jacobian_operator, "Cannot build diagonal preconditioner without a JacobianOperator."); - - auto diagonal_start = Clock::now(); - current_jacobian_operator->assembleDiagonal(inverse_diagonal_preconditioner); - diagonal_assembly_seconds += secondsSince(diagonal_start); - ++num_diagonal_assembles; - - auto invert_start = Clock::now(); - double max_abs_diag = 0.0; - for (int i = 0; i < inverse_diagonal_preconditioner.Size(); ++i) { - max_abs_diag = std::max(max_abs_diag, std::abs(inverse_diagonal_preconditioner[i])); - } - - const double floor = nonlinear_options.pcg_diagonal_floor * max_abs_diag; - SLIC_ERROR_ROOT_IF(!(floor > 0.0), "Cannot invert a zero Jacobian diagonal for PCG-block preconditioning."); - for (int i = 0; i < inverse_diagonal_preconditioner.Size(); ++i) { - inverse_diagonal_preconditioner[i] = 1.0 / std::max(std::abs(inverse_diagonal_preconditioner[i]), floor); - } - diagonal_invert_seconds += secondsSince(invert_start); - - use_inverse_diagonal_preconditioner = true; - } - - /// Refresh the tangent and preconditioner used by the next PCG block attempt. - void refreshBlockOperators(const mfem::Vector& x) const - { - auto refresh_start = Clock::now(); - if (jacobian_operator_factory) { - updateJacobianOperator(x); - ++num_preconditioner_updates; - if (nonlinear_options.pcg_use_jacobian_diagonal_preconditioner) { - updateDiagonalPreconditioner(); - } else { - use_inverse_diagonal_preconditioner = false; - auto assembly_start = Clock::now(); - ++num_jacobian_assembles; - assembled_jacobian_from_operator = current_jacobian_operator->assemble(); - jacobian_assembly_seconds += secondsSince(assembly_start); - grad = assembled_jacobian_from_operator.get(); - auto setup_start = Clock::now(); - pcg_precond.SetOperator(*grad); - preconditioner_setup_seconds += secondsSince(setup_start); - } - } else { - SLIC_ERROR_ROOT_IF(nonlinear_options.pcg_use_jacobian_diagonal_preconditioner, - "PCG-block diagonal preconditioning requires a registered JacobianOperator."); - current_jacobian_operator.reset(); - use_inverse_diagonal_preconditioner = false; - assembleJacobian(x); - ++num_preconditioner_updates; - auto setup_start = Clock::now(); - pcg_precond.SetOperator(*grad); - preconditioner_setup_seconds += secondsSince(setup_start); - } - preconditioner_update_seconds += secondsSince(refresh_start); - } - - /// Apply the tangent at x to dx. - void hessVec(const mfem::Vector& x, const mfem::Vector& dx, mfem::Vector& y) const - { - SMITH_MARK_FUNCTION; - auto start = Clock::now(); - ++num_hess_vecs; - if (current_jacobian_operator) { - current_jacobian_operator->Mult(dx, y); - const double seconds = secondsSince(start); - hess_vec_seconds += seconds; - jacobian_operator_hess_vec_seconds += seconds; - } else if (jacobian_operator_factory) { - updateJacobianOperator(x); - current_jacobian_operator->Mult(dx, y); - const double seconds = secondsSince(start); - hess_vec_seconds += seconds; - jacobian_operator_hess_vec_seconds += seconds; - } else if (matrix_free_tangent_action) { - matrix_free_tangent_action(x, dx, y); - const double seconds = secondsSince(start); - hess_vec_seconds += seconds; - matrix_free_hess_vec_seconds += seconds; - } else { - grad->Mult(dx, y); - const double seconds = secondsSince(start); - hess_vec_seconds += seconds; - assembled_hess_vec_seconds += seconds; - } - } - - /// Apply the configured nonlinear PCG preconditioner. - void precond(const mfem::Vector& x, mfem::Vector& v) const - { - SMITH_MARK_FUNCTION; - auto start = Clock::now(); - ++num_preconds; - if (use_inverse_diagonal_preconditioner) { - SLIC_ERROR_ROOT_IF(inverse_diagonal_preconditioner.Size() != x.Size(), - "PCG-block diagonal preconditioner size does not match the residual vector."); - v.SetSize(x.Size()); - for (int i = 0; i < x.Size(); ++i) { - v[i] = inverse_diagonal_preconditioner[i] * x[i]; - } - } else { - pcg_precond.Mult(x, v); - } - preconditioner_seconds += secondsSince(start); - } - - /// Return solver diagnostic counters. - PcgBlockDiagnostics diagnostics() const - { - return {.num_residuals = num_residuals, - .num_hess_vecs = num_hess_vecs, - .num_preconds = num_preconds, - .num_jacobian_assembles = num_jacobian_assembles, - .num_jacobian_operator_evals = num_jacobian_operator_evals, - .num_diagonal_assembles = num_diagonal_assembles, - .num_preconditioner_updates = num_preconditioner_updates, - .num_prefix_accepts = num_prefix_accepts, - .num_momentum_resets = num_momentum_resets, - .num_nonzero_beta = num_nonzero_beta, - .num_zero_beta = num_zero_beta, - .num_blocks = num_blocks, - .num_block_rejects = num_block_rejects, - .num_powell_restarts = num_powell_restarts, - .num_descent_restarts = num_descent_restarts, - .num_negative_curvature = num_negative_curvature, - .num_line_search_backtracks = num_line_search_backtracks, - .num_trust_capped_steps = num_trust_capped_steps, - .num_accepted_steps = num_accepted_steps, - .num_trial_steps = num_trial_steps, - .residual_seconds = residual_seconds, - .hess_vec_seconds = hess_vec_seconds, - .jacobian_operator_hess_vec_seconds = jacobian_operator_hess_vec_seconds, - .assembled_hess_vec_seconds = assembled_hess_vec_seconds, - .matrix_free_hess_vec_seconds = matrix_free_hess_vec_seconds, - .preconditioner_seconds = preconditioner_seconds, - .jacobian_operator_eval_seconds = jacobian_operator_eval_seconds, - .jacobian_assembly_seconds = jacobian_assembly_seconds, - .diagonal_assembly_seconds = diagonal_assembly_seconds, - .diagonal_invert_seconds = diagonal_invert_seconds, - .preconditioner_update_seconds = preconditioner_update_seconds, - .preconditioner_setup_seconds = preconditioner_setup_seconds, - .final_h_scale = final_h_scale, - .last_trust_ratio = last_trust_ratio}; - } - - /// @overload - void Mult(const mfem::Vector&, mfem::Vector& X) const - { - MFEM_ASSERT(oper != NULL, "the Operator is not set (use SetOperator)."); - MFEM_ASSERT(prec != NULL, "the Solver is not set (use SetSolver)."); - - print_level = static_cast(std::max(nonlinear_options.print_level, 0)); - print_level = print_options.iterations ? std::max(1, print_level) : print_level; - print_level = print_options.summary ? std::max(2, print_level) : print_level; - - num_hess_vecs = 0; - num_preconds = 0; - num_residuals = 0; - num_jacobian_assembles = 0; - num_jacobian_operator_evals = 0; - num_diagonal_assembles = 0; - num_preconditioner_updates = 0; - num_prefix_accepts = 0; - num_momentum_resets = 0; - num_nonzero_beta = 0; - num_zero_beta = 0; - num_blocks = 0; - num_block_rejects = 0; - num_powell_restarts = 0; - num_descent_restarts = 0; - num_negative_curvature = 0; - num_line_search_backtracks = 0; - num_trust_capped_steps = 0; - num_accepted_steps = 0; - num_trial_steps = 0; - final_h_scale = nonlinear_options.pcg_h_scale_init; - last_trust_ratio = 0.0; - residual_seconds = 0.0; - hess_vec_seconds = 0.0; - jacobian_operator_hess_vec_seconds = 0.0; - assembled_hess_vec_seconds = 0.0; - matrix_free_hess_vec_seconds = 0.0; - preconditioner_seconds = 0.0; - jacobian_operator_eval_seconds = 0.0; - jacobian_assembly_seconds = 0.0; - diagonal_assembly_seconds = 0.0; - diagonal_invert_seconds = 0.0; - preconditioner_update_seconds = 0.0; - preconditioner_setup_seconds = 0.0; - current_jacobian_operator.reset(); - assembled_jacobian_from_operator.reset(); - inverse_diagonal_preconditioner.SetSize(0); - use_inverse_diagonal_preconditioner = false; - - SLIC_ERROR_ROOT_IF(nonlinear_options.pcg_block_len <= 0, "PcgBlock requires pcg_block_len > 0"); - SLIC_ERROR_ROOT_IF(nonlinear_options.pcg_window <= 0, "PcgBlock requires pcg_window > 0"); - SLIC_ERROR_ROOT_IF(nonlinear_options.pcg_ls_max_backtracks < 0, "PcgBlock requires pcg_ls_max_backtracks >= 0"); - SLIC_ERROR_ROOT_IF(nonlinear_options.pcg_delta_avg_window <= 0, "PcgBlock requires pcg_delta_avg_window > 0"); - - mfem::real_t norm = computeResidual(X, r); - initial_norm = norm; - if (norm == 0.0) { - converged = true; - final_iter = 0; - final_norm = norm; - return; - } - - const mfem::real_t norm_goal = std::max(rel_tol * initial_norm, abs_tol); - - if (print_level == 1) { - mfem::out << "PcgBlock iteration " << std::setw(3) << 0 << " : ||r|| = " << std::setw(13) << norm << "\n"; - } - - pcg_precond.iterative_mode = false; - - x_trial.SetSize(X.Size()); - x_trial = 0.0; - r_trial.SetSize(X.Size()); - r_trial = 0.0; - scratch.SetSize(X.Size()); - scratch = 0.0; - - mfem::Vector r_block(X.Size()); - mfem::Vector r_candidate(X.Size()); - mfem::Vector force(X.Size()); - mfem::Vector z(X.Size()); - mfem::Vector z_old(X.Size()); - mfem::Vector p(X.Size()); - mfem::Vector p_old(X.Size()); - mfem::Vector Hp(X.Size()); - mfem::Vector step(X.Size()); - mfem::Vector x_candidate(X.Size()); - - bool have_momentum = false; - double rho_old = 0.0; - double h_scale = nonlinear_options.pcg_h_scale_init; - int retries_remaining = nonlinear_options.pcg_max_block_retries; - int it = 0; - double cumulative_work = 0.0; - std::vector work_history{cumulative_work}; - std::vector accepted_step_norms; - - auto append_bounded = [](std::vector& history, double value, int max_size) { - history.push_back(value); - const auto bound = static_cast(max_size); - if (history.size() > bound) { - const auto num_to_remove = static_cast::difference_type>(history.size() - bound); - history.erase(history.begin(), history.begin() + num_to_remove); - } - }; - - auto reset_momentum = [&]() { - have_momentum = false; - rho_old = 0.0; - p_old = 0.0; - z_old = 0.0; - ++num_momentum_resets; - }; - - auto window_max = [&](const std::vector& history) { - const int window = nonlinear_options.pcg_window; - const auto begin = history.size() > static_cast(window) ? history.end() - window : history.begin(); - return *std::max_element(begin, history.end()); - }; - - auto current_delta_ref = [&]() { - if (accepted_step_norms.empty()) { - return 0.0; - } - const int window = nonlinear_options.pcg_delta_avg_window; - const auto begin = accepted_step_norms.size() > static_cast(window) ? accepted_step_norms.end() - window - : accepted_step_norms.begin(); - double sum = 0.0; - for (auto iter = begin; iter != accepted_step_norms.end(); ++iter) { - sum += *iter; - } - return sum / static_cast(accepted_step_norms.end() - begin); - }; - - for (; true;) { - MFEM_ASSERT(mfem::IsFinite(norm), "norm = " << norm); - if (print_level >= 2) { - mfem::out << "PcgBlock iteration " << std::setw(3) << it << " : ||r|| = " << std::setw(13) << norm; - if (it > 0) { - mfem::out << ", ||r||/||r_0|| = " << std::setw(13) << (initial_norm != 0.0 ? norm / initial_norm : norm); - } else { - mfem::out << ", norm goal = " << std::setw(13) << norm_goal; - } - mfem::out << '\n'; - } - - if (print_level >= 1 && (norm != norm)) { - mfem::out << "Initial residual for PCG-block iteration is undefined/nan." << std::endl; - mfem::out << "PcgBlock: No convergence!\n"; - converged = false; - break; - } - - if (norm <= norm_goal && it >= nonlinear_options.min_iterations) { - converged = true; - break; - } else if (it >= max_iter) { - converged = false; - break; - } else if (retries_remaining <= 0 || h_scale < nonlinear_options.pcg_min_h_scale) { - converged = false; - break; - } - - refreshBlockOperators(X); - - r_block = r; - const double norm_block = norm; - bool block_finished = false; - - while (!block_finished) { - x_trial = X; - r = r_block; - norm = norm_block; - - double block_predicted = 0.0; - double block_actual = 0.0; - double block_delta_ref = current_delta_ref(); - double block_trust_size = h_scale * (block_delta_ref > 0.0 ? block_delta_ref : 1.0); - double trial_cumulative_work = cumulative_work; - int trial_steps = 0; - bool trial_failed = false; - bool trial_ended_after_inner_failure = false; - std::vector trial_step_norms; - auto trial_work_history = work_history; - - for (int block_it = 0; block_it < nonlinear_options.pcg_block_len && it + trial_steps < max_iter; ++block_it) { - force = r; - force *= -1.0; - precond(force, z); - ++num_trial_steps; - - const double rho = Dot(force, z); - if (!mfem::IsFinite(rho) || rho <= 0.0) { - trial_ended_after_inner_failure = trial_steps > 0; - trial_failed = trial_steps == 0; - break; - } - - double beta = 0.0; - if (have_momentum) { - const double force_dot_z_old = Dot(force, z_old); - beta = std::max(0.0, (rho - force_dot_z_old) / rho_old); - if (std::abs(force_dot_z_old) > nonlinear_options.pcg_powell_eta * rho) { - beta = 0.0; - ++num_powell_restarts; - } - } - - p = z; - if (have_momentum && beta != 0.0) { - p.Add(beta, p_old); - } - - double force_dot_p = Dot(force, p); - if (force_dot_p <= nonlinear_options.pcg_eps_descent * rho) { - beta = 0.0; - p = z; - force_dot_p = rho; - ++num_descent_restarts; - } - if (beta == 0.0) { - ++num_zero_beta; - } else { - ++num_nonzero_beta; - } - - hessVec(X, p, Hp); - const double pHp = Dot(p, Hp); - - double alpha = 0.0; - double alpha_quad = std::numeric_limits::quiet_NaN(); - const bool positive_curvature = pHp > 0.0 && mfem::IsFinite(pHp); - if (positive_curvature) { - alpha_quad = force_dot_p / pHp; - alpha = alpha_quad; - } else { - ++num_negative_curvature; - } - - const double p_norm = Norm(p); - double delta_ref = current_delta_ref(); - if (delta_ref <= 0.0 && alpha > 0.0 && mfem::IsFinite(alpha) && p_norm > 0.0) { - delta_ref = alpha * p_norm; - } else if (delta_ref <= 0.0) { - delta_ref = 1.0; - } - block_delta_ref = delta_ref; - block_trust_size = h_scale * delta_ref; - - const bool apply_trust_cap = !positive_curvature || h_scale < nonlinear_options.pcg_h_scale_init; - bool trust_capped = false; - if (apply_trust_cap && p_norm > 0.0) { - const double alpha_cap = h_scale * delta_ref / p_norm; - if (alpha > 0.0 && mfem::IsFinite(alpha)) { - if (alpha_cap < alpha) { - ++num_trust_capped_steps; - trust_capped = true; - } - alpha = std::min(alpha, alpha_cap); - } else { - alpha = alpha_cap; - trust_capped = true; - } - } - - if (!(alpha > 0.0) || !mfem::IsFinite(alpha)) { - trial_ended_after_inner_failure = trial_steps > 0; - trial_failed = trial_steps == 0; - break; - } - - bool accepted_step = false; - double accepted_work = 0.0; - double accepted_predicted = 0.0; - double accepted_step_norm = 0.0; - int accepted_ls_count = 0; - - for (int ls = 0; ls <= nonlinear_options.pcg_ls_max_backtracks; ++ls) { - step = p; - step *= alpha; - add(x_trial, step, x_candidate); - - const double norm_candidate = computeResidual(x_candidate, r_candidate); - const double work = -0.5 * Dot(r, step) - 0.5 * Dot(r_candidate, step); - const double cumulative_candidate = trial_cumulative_work + work; - const double work_ref = window_max(trial_work_history); - const bool finite_candidate = mfem::IsFinite(norm_candidate) && mfem::IsFinite(work); - const bool sufficient_work = - cumulative_candidate >= work_ref - nonlinear_options.pcg_ls_armijo_c * alpha * force_dot_p; - - if (finite_candidate && (sufficient_work || norm_candidate <= norm_goal)) { - const double predicted = alpha * force_dot_p - 0.5 * alpha * alpha * pHp; - accepted_predicted = std::max(predicted, 0.0); - accepted_work = work; - accepted_step_norm = Norm(step); - accepted_ls_count = ls; - norm = norm_candidate; - accepted_step = true; - break; - } - - alpha *= nonlinear_options.pcg_ls_shrink; - } - - if (!accepted_step) { - trial_ended_after_inner_failure = trial_steps > 0; - trial_failed = trial_steps == 0; - break; - } - - x_trial = x_candidate; - r = r_candidate; - trial_cumulative_work += accepted_work; - append_bounded(trial_work_history, trial_cumulative_work, nonlinear_options.pcg_window); - append_bounded(trial_step_norms, accepted_step_norm, nonlinear_options.pcg_delta_avg_window); - block_predicted += accepted_predicted; - block_actual += accepted_work; - num_line_search_backtracks += static_cast(accepted_ls_count); - - if (print_level >= 2) { - mfem::out << " PcgBlock step " << std::setw(3) << (it + trial_steps + 1) << " : alpha = " << std::setw(13) - << alpha << ", approx work = " << std::setw(13) << accepted_predicted - << ", achieved work = " << std::setw(13) << accepted_work << ", trust size = " << std::setw(13) - << block_trust_size << ", capped = " << trust_capped << ", ls = " << accepted_ls_count << '\n'; - } - - p_old = p; - z_old = z; - rho_old = rho; - have_momentum = true; - ++trial_steps; - ++num_accepted_steps; - - if (norm <= norm_goal && it + trial_steps >= nonlinear_options.min_iterations) { - break; - } - } - - double trust_ratio = 1.0; - if (block_predicted > nonlinear_options.pcg_eps_descent) { - trust_ratio = block_actual / block_predicted; - } else if (block_actual < 0.0) { - trust_ratio = -std::numeric_limits::infinity(); - } - - const bool block_converged = norm <= norm_goal && it + trial_steps >= nonlinear_options.min_iterations; - const bool accept_block = - trial_steps > 0 && !trial_failed && - (block_converged || (block_actual >= 0.0 && trust_ratio >= nonlinear_options.pcg_trust_eta_bad)); - - const double old_h_scale = h_scale; - const bool prefix_accept = accept_block && trial_ended_after_inner_failure; - bool reset_next_momentum = false; - if (accept_block) { - if (prefix_accept) { - ++num_prefix_accepts; - } - X = x_trial; - cumulative_work = trial_cumulative_work; - work_history = std::move(trial_work_history); - accepted_step_norms.insert(accepted_step_norms.end(), trial_step_norms.begin(), trial_step_norms.end()); - if (accepted_step_norms.size() > static_cast(nonlinear_options.pcg_delta_avg_window)) { - accepted_step_norms.erase(accepted_step_norms.begin(), - accepted_step_norms.end() - nonlinear_options.pcg_delta_avg_window); - } - it += trial_steps; - ++num_blocks; - - if (trust_ratio < nonlinear_options.pcg_trust_eta_bad) { - h_scale = std::max(h_scale * nonlinear_options.pcg_shrink, nonlinear_options.pcg_min_h_scale); - reset_momentum(); - reset_next_momentum = true; - } else if (trial_ended_after_inner_failure) { - reset_momentum(); - reset_next_momentum = true; - } else if (trust_ratio >= nonlinear_options.pcg_trust_eta_good) { - h_scale = std::min(h_scale * nonlinear_options.pcg_growth, nonlinear_options.pcg_h_scale_init); - } - const double next_trust_size = h_scale * block_delta_ref; - - if (print_level >= 2) { - mfem::out << "PcgBlock block accepted: steps = " << std::setw(3) << trial_steps - << ", prefix = " << prefix_accept << ", approx work = " << std::setw(13) << block_predicted - << ", achieved work = " << std::setw(13) << block_actual << ", rho = " << std::setw(13) - << trust_ratio << ", h_scale = " << std::setw(13) << old_h_scale << " -> " << std::setw(13) - << h_scale << ", trust size = " << std::setw(13) << block_trust_size << " -> " << std::setw(13) - << next_trust_size << ", reset momentum = " << reset_next_momentum << '\n'; - } - last_trust_ratio = trust_ratio; - - block_finished = true; - } else { - r = r_block; - norm = norm_block; - h_scale *= nonlinear_options.pcg_shrink; - reset_momentum(); - --retries_remaining; - ++num_block_rejects; - const double next_trust_size = h_scale * block_delta_ref; - - if (print_level >= 2) { - mfem::out << "PcgBlock block rejected: steps = " << std::setw(3) << trial_steps - << ", approx work = " << std::setw(13) << block_predicted << ", achieved work = " << std::setw(13) - << block_actual << ", rho = " << std::setw(13) << trust_ratio << ", h_scale = " << std::setw(13) - << old_h_scale << " -> " << std::setw(13) << h_scale << ", trust size = " << std::setw(13) - << block_trust_size << " -> " << std::setw(13) << next_trust_size << ", reset momentum = 1" - << ", retries left = " << retries_remaining << '\n'; - } - - if (retries_remaining <= 0 || h_scale < nonlinear_options.pcg_min_h_scale) { - block_finished = true; - } else { - refreshBlockOperators(X); - } - } - } - } - - final_iter = it; - final_norm = norm; - final_h_scale = h_scale; - - if (print_level == 1) { - mfem::out << "PcgBlock iteration " << std::setw(3) << final_iter << " : ||r|| = " << std::setw(13) << norm - << '\n'; - } - if (!converged && print_level >= 1) { - mfem::out << "PcgBlock: No convergence!\n"; - } - } -}; EquationSolver::EquationSolver(NonlinearSolverOptions nonlinear_opts, LinearSolverOptions lin_opts, MPI_Comm comm) { @@ -2512,27 +1553,6 @@ void EquationSolver::setOperator(const mfem::Operator& op) } } -void EquationSolver::setMatrixFreeTangentAction(MatrixFreeTangentAction tangent_action) -{ - auto* pcg_block = dynamic_cast(nonlin_solver_.get()); - if (pcg_block) { - pcg_block->setMatrixFreeTangentAction(std::move(tangent_action)); - } -} - -void EquationSolver::setJacobianOperator(JacobianOperatorFactory jacobian_operator) -{ - auto* pcg_block = dynamic_cast(nonlin_solver_.get()); - if (pcg_block) { - pcg_block->setJacobianOperator(std::move(jacobian_operator)); - return; - } - auto* trust_region = dynamic_cast(nonlin_solver_.get()); - if (trust_region) { - trust_region->setJacobianOperator(std::move(jacobian_operator)); - } -} - void EquationSolver::solve(mfem::Vector& x) const { mfem::Vector zero(x); @@ -2542,15 +1562,6 @@ void EquationSolver::solve(mfem::Vector& x) const nonlin_solver_->Mult(zero, x); } -std::optional EquationSolver::pcgBlockDiagnostics() const -{ - auto* pcg_block = dynamic_cast(nonlin_solver_.get()); - if (!pcg_block) { - return std::nullopt; - } - return pcg_block->diagnostics(); -} - std::optional EquationSolver::trustRegionDiagnostics() const { auto* trust_region = dynamic_cast(nonlin_solver_.get()); @@ -2671,8 +1682,6 @@ std::unique_ptr buildNonlinearSolver(NonlinearSolverOptions nonlinear_solver = std::make_unique(comm, nonlinear_opts); } else if (nonlinear_opts.nonlin_solver == NonlinearSolver::TrustRegion) { nonlinear_solver = std::make_unique(comm, nonlinear_opts, linear_opts, prec); - } else if (nonlinear_opts.nonlin_solver == NonlinearSolver::PcgBlock) { - nonlinear_solver = std::make_unique(comm, nonlinear_opts, prec); #ifdef SMITH_USE_PETSC } else if (nonlinear_opts.nonlin_solver == NonlinearSolver::PetscNewton) { nonlinear_solver = std::make_unique(comm, nonlinear_opts); @@ -2931,7 +1940,7 @@ void EquationSolver::defineInputFileSchema(axom::inlet::Container& container) nonlinear_container.addInt("max_iter", "Maximum iterations for the Newton solve.").defaultValue(500); nonlinear_container.addInt("print_level", "Nonlinear print level.").defaultValue(0); nonlinear_container - .addString("solver_type", "Solver type (Newton|NewtonLineSearch|TrustRegion|PcgBlock|KINFullStep|KINLineSearch)") + .addString("solver_type", "Solver type (Newton|NewtonLineSearch|TrustRegion|KINFullStep|KINLineSearch)") .defaultValue("Newton"); } @@ -3011,8 +2020,6 @@ smith::NonlinearSolverOptions FromInlet::operator options.nonlin_solver = smith::NonlinearSolver::NewtonLineSearch; } else if (solver_type == "TrustRegion") { options.nonlin_solver = smith::NonlinearSolver::TrustRegion; - } else if (solver_type == "PcgBlock") { - options.nonlin_solver = smith::NonlinearSolver::PcgBlock; } else if (solver_type == "KINFullStep") { options.nonlin_solver = smith::NonlinearSolver::KINFullStep; } else if (solver_type == "KINLineSearch") { diff --git a/src/smith/numerics/equation_solver.hpp b/src/smith/numerics/equation_solver.hpp index 6100fad73f..3ddf35edef 100644 --- a/src/smith/numerics/equation_solver.hpp +++ b/src/smith/numerics/equation_solver.hpp @@ -13,10 +13,8 @@ #pragma once #include -#include #include #include -#include #include #include @@ -30,152 +28,6 @@ namespace smith { -/** - * @brief Solver-facing interface for Jacobian operations. - * - * A JacobianOperator represents the operations available on J(x) after differentiating a residual but before - * necessarily assembling a sparse matrix. Concrete implementations may support matrix-free products, sparse assembly, - * diagonal extraction, or all of them. Unsupported operations should throw. - */ -class JacobianOperator : public mfem::Operator { - public: - using mfem::Operator::Operator; - - /// Assemble the sparse Jacobian representation. - virtual std::unique_ptr assemble() - { - SLIC_ERROR("This JacobianOperator does not support sparse assembly."); - return nullptr; - } - - /// Assemble the scalar true-dof diagonal of the Jacobian. - virtual void assembleDiagonal(mfem::Vector&) const - { - SLIC_ERROR("This JacobianOperator does not support diagonal assembly."); - } -}; - -/** - * @brief Adapter from a smith::functional Gradient object to the solver-facing JacobianOperator interface. - */ -template -class FunctionalJacobianOperator : public JacobianOperator { - using GradientT = std::remove_reference_t; - - public: - explicit FunctionalJacobianOperator(GradientT& gradient) - : JacobianOperator(gradient.Height(), gradient.Width()), gradient_(&gradient) - { - } - - explicit FunctionalJacobianOperator(GradientT&& gradient) - : JacobianOperator(gradient.Height(), gradient.Width()), - owned_gradient_(std::make_unique(std::move(gradient))), - gradient_(owned_gradient_.get()) - { - } - - void Mult(const mfem::Vector& dx, mfem::Vector& y) const override { gradient_->Mult(dx, y); } - - void AddMult(const mfem::Vector& dx, mfem::Vector& y, const double a = 1.0) const override - { - gradient_->AddMult(dx, y, a); - } - - std::unique_ptr assemble() override { return gradient_->assemble(); } - - void assembleDiagonal(mfem::Vector& diag) const override { gradient_->assembleDiagonal(diag); } - - private: - std::unique_ptr owned_gradient_; - GradientT* gradient_; -}; - -/** - * @brief Matrix-free tangent action callback. - * - * The callback evaluates y = J(x) dx for the current nonlinear state x - * without requiring EquationSolver to assemble J. - */ -using MatrixFreeTangentAction = std::function; - -/** - * @brief Callback that evaluates and returns a JacobianOperator at the supplied nonlinear state. - */ -using JacobianOperatorFactory = std::function(const mfem::Vector& x)>; - -/// Diagnostic counters for the nonlinear PCG-block solver -struct PcgBlockDiagnostics { - /// Number of nonlinear residual evaluations - size_t num_residuals = 0; - /// Number of assembled Jacobian-vector products - size_t num_hess_vecs = 0; - /// Number of preconditioner applications - size_t num_preconds = 0; - /// Number of assembled Jacobians - size_t num_jacobian_assembles = 0; - /// Number of solver-facing JacobianOperator evaluations - size_t num_jacobian_operator_evals = 0; - /// Number of direct diagonal assemblies - size_t num_diagonal_assembles = 0; - /// Number of preconditioner operator updates - size_t num_preconditioner_updates = 0; - /// Number of accepted prefix blocks - size_t num_prefix_accepts = 0; - /// Number of momentum resets - size_t num_momentum_resets = 0; - /// Number of steps with nonzero PCG beta - size_t num_nonzero_beta = 0; - /// Number of steps with zero PCG beta - size_t num_zero_beta = 0; - /// Number of accepted blocks - size_t num_blocks = 0; - /// Number of rejected blocks - size_t num_block_rejects = 0; - /// Number of Powell restarts - size_t num_powell_restarts = 0; - /// Number of descent-guard restarts - size_t num_descent_restarts = 0; - /// Number of non-positive curvature directions - size_t num_negative_curvature = 0; - /// Number of line-search backtracks - size_t num_line_search_backtracks = 0; - /// Number of positive-curvature steps capped by the trust radius - size_t num_trust_capped_steps = 0; - /// Number of accepted inner PCG steps - size_t num_accepted_steps = 0; - /// Number of trial inner PCG steps - size_t num_trial_steps = 0; - /// Time spent evaluating nonlinear residuals - double residual_seconds = 0.0; - /// Time spent applying Jacobian-vector products - double hess_vec_seconds = 0.0; - /// Time spent applying JacobianOperator products - double jacobian_operator_hess_vec_seconds = 0.0; - /// Time spent applying assembled Jacobian products - double assembled_hess_vec_seconds = 0.0; - /// Time spent applying legacy matrix-free tangent products - double matrix_free_hess_vec_seconds = 0.0; - /// Time spent applying preconditioners - double preconditioner_seconds = 0.0; - /// Time spent evaluating JacobianOperator factories - double jacobian_operator_eval_seconds = 0.0; - /// Time spent assembling sparse Jacobians - double jacobian_assembly_seconds = 0.0; - /// Time spent directly assembling diagonals - double diagonal_assembly_seconds = 0.0; - /// Time spent inverting direct diagonals - double diagonal_invert_seconds = 0.0; - /// Time spent refreshing preconditioner data - double preconditioner_update_seconds = 0.0; - /// Time spent in preconditioner SetOperator calls - double preconditioner_setup_seconds = 0.0; - /// Last trust scale used by the solver - double final_h_scale = 1.0; - /// Last accepted block trust ratio - double last_trust_ratio = 0.0; -}; - /// Diagnostic counters for the TrustRegion nonlinear solver struct TrustRegionDiagnostics { /// Number of nonlinear residual evaluations @@ -192,10 +44,6 @@ struct TrustRegionDiagnostics { size_t num_preconds = 0; /// Number of assembled Jacobians size_t num_jacobian_assembles = 0; - /// Number of solver-facing JacobianOperator evaluations - size_t num_jacobian_operator_evals = 0; - /// Number of direct diagonal assemblies - size_t num_diagonal_assembles = 0; /// Number of trust-region model CG iterations size_t num_cg_iterations = 0; /// Number of subspace solves @@ -216,18 +64,8 @@ struct TrustRegionDiagnostics { size_t num_subspace_solve_start_hess_vecs = 0; /// Number of quadratic subspace backend solves size_t num_quadratic_subspace_solves = 0; - /// Number of cubic subspace backend attempts - size_t num_cubic_subspace_attempts = 0; - /// Number of cubic subspace attempts that used the cubic candidate - size_t num_cubic_subspace_uses = 0; - /// Number of cubic subspace attempts that fell back to the quadratic candidate - size_t num_cubic_subspace_quadratic_fallbacks = 0; /// Number of preconditioner operator updates size_t num_preconditioner_updates = 0; - /// Number of nonmonotone accepted TrustRegion steps based on work surrogate - size_t num_nonmonotone_work_accepts = 0; - /// Number of accepted TrustRegion work-surrogate steps that monotone acceptance would have rejected - size_t num_monotone_work_would_reject = 0; /// Time spent evaluating nonlinear residuals double residual_seconds = 0.0; /// Time spent applying Jacobian-vector products @@ -238,14 +76,6 @@ struct TrustRegionDiagnostics { double cauchy_hess_vec_seconds = 0.0; /// Time spent applying Hessian-vector products in line-search model checks double line_search_hess_vec_seconds = 0.0; - /// Time spent applying JacobianOperator products - double jacobian_operator_hess_vec_seconds = 0.0; - /// Time spent evaluating JacobianOperator factories - double jacobian_operator_eval_seconds = 0.0; - /// Time spent directly assembling diagonals - double diagonal_assembly_seconds = 0.0; - /// Time spent inverting direct diagonals - double diagonal_invert_seconds = 0.0; /// Time spent applying preconditioners double preconditioner_seconds = 0.0; /// Total time spent in the nonlinear solve @@ -326,10 +156,6 @@ struct TrustRegionDiagnostics { double preconditioner_update_seconds = 0.0; /// Time spent in preconditioner SetOperator calls double preconditioner_setup_seconds = 0.0; - /// Last TrustRegion accumulated work-surrogate level used by nonmonotone acceptance - double last_work_objective = 0.0; - /// Last nonmonotone reference work-surrogate level - double last_nonmonotone_work_reference = 0.0; }; /** @@ -382,27 +208,6 @@ class EquationSolver { */ void setOperator(const mfem::Operator& op); - /** - * @brief Sets an optional matrix-free tangent action for nonlinear solvers that can use J(x) dx directly. - * - * Solvers that do not support matrix-free tangent actions ignore this callback. Supported solvers retain their - * assembled-gradient fallback when no callback is set. - * - * @param[in] tangent_action Callback evaluating y = J(x) dx. - */ - void setMatrixFreeTangentAction(MatrixFreeTangentAction tangent_action); - - /** - * @brief Sets an optional JacobianOperator factory for nonlinear solvers that can use matrix-free Jacobian products. - * - * This is the preferred replacement for the narrower matrix-free tangent-action callback. During migration, - * PCG-block uses this callback first when it is registered and otherwise falls back to MatrixFreeTangentAction or - * assembled gradients. - * - * @param[in] jacobian_operator Callback evaluating and returning J(x). - */ - void setJacobianOperator(JacobianOperatorFactory jacobian_operator); - /** * Solves the system F(x) = 0 * @param[in,out] x Solution to the system of nonlinear equations @@ -421,12 +226,6 @@ class EquationSolver { */ const mfem::NewtonSolver& nonlinearSolver() const { return *nonlin_solver_; } - /** - * Returns diagnostic counters when the nonlinear solver is PcgBlock. - * @return Optional PCG-block diagnostics; empty for other nonlinear solvers - */ - std::optional pcgBlockDiagnostics() const; - /** * Returns diagnostic counters when the nonlinear solver is TrustRegion. * @return Optional TrustRegion diagnostics; empty for other nonlinear solvers diff --git a/src/smith/numerics/functional/functional.hpp b/src/smith/numerics/functional/functional.hpp index 7e611182b8..dddeadc4d0 100644 --- a/src/smith/numerics/functional/functional.hpp +++ b/src/smith/numerics/functional/functional.hpp @@ -849,71 +849,6 @@ class Functional { return max_entries; } - void AssembleDiagonal(mfem::Vector& diag) const override - { - SLIC_ERROR_ROOT_IF(form_.test_function_space_.family != Family::H1 || - form_.trial_function_spaces_[which_argument].family != Family::H1, - "Functional gradient diagonal assembly currently supports H1 test/trial spaces only."); - SLIC_ERROR_ROOT_IF(test_space_ != trial_space_, - "Functional gradient diagonal assembly currently requires the same test/trial FE space."); - SLIC_ERROR_ROOT_IF(form_.output_L_.Size() != form_.input_L_[which_argument].Size(), - "Functional gradient diagonal assembly requires square local operators."); - - mfem::Vector local_diag(form_.output_L_.Size(), form_.mem_type); - local_diag = 0.0; - - std::vector K_elem_buffer(max_buffer_size()); - - for (auto& integral : form_.integrals_) { - // if this integral's derivative isn't identically zero - if (integral.functional_to_integral_index_.count(which_argument) > 0) { - Domain& dom = integral.domain_; - - uint32_t id = integral.functional_to_integral_index_.at(which_argument); - const auto& G_test = dom.get_restriction(form_.test_function_space_); - const auto& G_trial = dom.get_restriction(form_.trial_function_spaces_[which_argument]); - for (const auto& [geom, calculate_element_matrices_func] : integral.element_gradient_[id]) { - const auto& test_restriction = G_test.restrictions.at(geom); - const auto& trial_restriction = G_trial.restrictions.at(geom); - - CPUArrayView K_e(K_elem_buffer.data(), test_restriction.num_elements, - trial_restriction.nodes_per_elem * trial_restriction.components, - test_restriction.nodes_per_elem * test_restriction.components); - detail::zero_out(K_e); - - calculate_element_matrices_func(K_e); - - uint32_t rows_per_elem = uint32_t(test_restriction.nodes_per_elem * test_restriction.components); - uint32_t cols_per_elem = uint32_t(trial_restriction.nodes_per_elem * trial_restriction.components); - - std::vector test_vdofs(rows_per_elem); - std::vector trial_vdofs(cols_per_elem); - - for (uint32_t e = 0; e < test_restriction.num_elements; e++) { - test_restriction.GetElementVDofs(int(e), test_vdofs); - trial_restriction.GetElementVDofs(int(e), trial_vdofs); - - for (uint32_t i = 0; i < cols_per_elem; i++) { - int col = int(trial_vdofs[i].index()); - - for (uint32_t j = 0; j < rows_per_elem; j++) { - int row = int(test_vdofs[j].index()); - if (row == col) { - local_diag(row) += K_e(e, i, j); - } - } - } - } - } - } - } - - diag.SetSize(Height(), form_.mem_type); - form_.P_test_->MultTranspose(local_diag, diag); - } - - void assembleDiagonal(mfem::Vector& diag) const { AssembleDiagonal(diag); } - std::unique_ptr assemble() { if (row_ptr.empty()) { diff --git a/src/smith/numerics/functional/tests/functional_comparisons.cpp b/src/smith/numerics/functional/tests/functional_comparisons.cpp index 031bb56ee5..eab6ca2bb4 100644 --- a/src/smith/numerics/functional/tests/functional_comparisons.cpp +++ b/src/smith/numerics/functional/tests/functional_comparisons.cpp @@ -187,15 +187,6 @@ void functional_test(mfem::ParMesh& mesh, H1

test, H1

trial, Dimension J_func = assemble(drdU); - mfem::Vector diag_direct(U.Size()); - drdU.assembleDiagonal(diag_direct); - - mfem::Vector diag_assembled(U.Size()); - J_func->GetDiag(diag_assembled); - - mfem::Vector diag_diff(U.Size()); - subtract(diag_direct, diag_assembled, diag_diff); - // Compute the gradient action using standard MFEM and functional // mfem::Vector g1 = (*J_mfem) * U; mfem::Vector g1(U.Size()); @@ -221,7 +212,6 @@ void functional_test(mfem::ParMesh& mesh, H1

test, H1

trial, Dimension test, H1 trial, Dim std::unique_ptr J_func = assemble(drdU); - mfem::Vector diag_direct(U.Size()); - drdU.assembleDiagonal(diag_direct); - - mfem::Vector diag_assembled(U.Size()); - J_func->GetDiag(diag_assembled); - - mfem::Vector diag_diff(U.Size()); - subtract(diag_direct, diag_assembled, diag_diff); - // mfem::Vector g1 = (*J_mfem) * U; mfem::Vector g1(U.Size()); J_mfem->Mult(U, g1); @@ -347,7 +328,6 @@ void functional_test(mfem::ParMesh& mesh, H1 test, H1 trial, Dim std::cout << "||g1-g3||/||g1||: " << diff2.Norml2() / g1.Norml2() << std::endl; } - EXPECT_NEAR(0., diag_diff.Norml2() / diag_assembled.Norml2(), 1.e-14); EXPECT_NEAR(0., diff1.Norml2() / g1.Norml2(), 1.e-14); EXPECT_NEAR(0., diff2.Norml2() / g1.Norml2(), 1.e-14); } @@ -507,67 +487,6 @@ double time_on_slowest_rank(Function&& function) } // namespace -TEST(Elasticity, DiagonalAssemblyBenchmark) -{ - if (!run_diagonal_benchmark) { - GTEST_SKIP() << "Set --run-diagonal-benchmark to time direct diagonal assembly."; - } - - static constexpr int dim = 3; - using test_space = H1<2, dim>; - using trial_space = H1<2, dim>; - - auto [fespace, fec] = smith::generateParFiniteElementSpace(mesh3D.get()); - (void)fec; - - mfem::ParGridFunction u_global(fespace.get()); - int seed = 9; - u_global.Randomize(seed); - - mfem::Vector U(fespace->TrueVSize()); - u_global.GetTrueDofs(U); - - Functional residual(fespace.get(), {fespace.get()}); - Domain domain = EntireDomain(*mesh3D); - residual.AddDomainIntegral(Dimension{}, DependsOn<0>{}, StressFunctor{}, domain); - - auto [r, drdU] = residual(0.0, differentiate_wrt(U)); - - mfem::Vector diag_direct(U.Size()); - mfem::Vector diag_assembled(U.Size()); - drdU.assembleDiagonal(diag_direct); - std::unique_ptr J_warmup = assemble(drdU); - J_warmup->GetDiag(diag_assembled); - - const int samples = std::max(diagonal_benchmark_samples, 1); - double direct_time = time_on_slowest_rank([&]() { - for (int sample = 0; sample < samples; sample++) { - drdU.assembleDiagonal(diag_direct); - } - }); - - double sparse_time = time_on_slowest_rank([&]() { - for (int sample = 0; sample < samples; sample++) { - std::unique_ptr J = assemble(drdU); - J->GetDiag(diag_assembled); - } - }); - - mfem::Vector diag_diff(U.Size()); - subtract(diag_direct, diag_assembled, diag_diff); - EXPECT_NEAR(0.0, diag_diff.Norml2() / diag_assembled.Norml2(), 1.e-14); - - auto [num_ranks, rank] = smith::getMPIInfo(); - (void)num_ranks; - if (rank == 0) { - std::cout << "DiagonalAssemblyBenchmark direct_seconds=" << direct_time / samples - << " sparse_getdiag_seconds=" << sparse_time / samples << " speedup=" << sparse_time / direct_time - << std::endl; - } - - EXPECT_GT(sparse_time / direct_time, 5.0); -} - // TODO: reenable these once hcurl implements of simplex elements is finished // TEST(Hcurl, 2DLinear) { functional_test(*mesh2D, Hcurl<1>{}, Hcurl<1>{}, Dimension<2>{}); } // TEST(Hcurl, 2DQuadratic) { functional_test(*mesh2D, Hcurl<2>{}, Hcurl<2>{}, Dimension<2>{}); } diff --git a/src/smith/numerics/mfem_trust_region_subspace.cpp b/src/smith/numerics/mfem_trust_region_subspace.cpp index 454cb81d2d..268f832703 100644 --- a/src/smith/numerics/mfem_trust_region_subspace.cpp +++ b/src/smith/numerics/mfem_trust_region_subspace.cpp @@ -93,50 +93,6 @@ std::pair, std::vector> re return std::make_pair(directions_new, A_directions_new); } -std::tuple, std::vector, std::vector> -removeDependentDirectionTriples(std::vector directions, - std::vector A_directions, - std::vector previous_A_directions) -{ - SMITH_MARK_FUNCTION; - MFEM_VERIFY(directions.size() == A_directions.size() && directions.size() == previous_A_directions.size(), - "Direction triple lists must have matching sizes."); - - std::vector norms; - size_t num_dirs = directions.size(); - - for (size_t i = 0; i < num_dirs; ++i) { - norms.push_back(std::sqrt(mfem::InnerProduct(MPI_COMM_WORLD, *directions[i], *directions[i]))); - } - - std::vector> kepts; - for (size_t i = 0; i < num_dirs; ++i) { - bool keepi = norms[i] != 0.0; - for (auto&& kept_and_j : kepts) { - size_t j = kept_and_j.second; - double dot_ij = mfem::InnerProduct(MPI_COMM_WORLD, *directions[i], *kept_and_j.first); - if (dot_ij > 0.999 * norms[i] * norms[j]) { - keepi = false; - } - } - if (keepi) { - kepts.emplace_back(std::make_pair(directions[i], i)); - } - } - - std::vector directions_new; - std::vector A_directions_new; - std::vector previous_A_directions_new; - - for (auto kept_and_j : kepts) { - directions_new.push_back(directions[kept_and_j.second]); - A_directions_new.push_back(A_directions[kept_and_j.second]); - previous_A_directions_new.push_back(previous_A_directions[kept_and_j.second]); - } - - return std::make_tuple(directions_new, A_directions_new, previous_A_directions_new); -} - #ifdef MFEM_USE_LAPACK TrustRegionSubspaceResult solveSubspaceProblem(const std::vector& directions, @@ -568,7 +524,7 @@ TrustRegionSubspaceResult solveSubspaceProblem(const std::vector& A_directions, const mfem::Vector& b, double delta, int num_leftmost) { -#ifdef SMITH_USE_SLEPC +#if defined(SMITH_USE_SLEPC) && defined(SMITH_TRUST_REGION_USE_PETSC_SUBSPACE) return solveSubspaceProblemPetsc(directions, A_directions, b, delta, num_leftmost); #else throw PetscException("MFEM trust-region subspace solve requires MFEM LAPACK support."); diff --git a/src/smith/numerics/petsc_trust_region_subspace.cpp b/src/smith/numerics/petsc_trust_region_subspace.cpp index aac63c7cd1..2368e06899 100644 --- a/src/smith/numerics/petsc_trust_region_subspace.cpp +++ b/src/smith/numerics/petsc_trust_region_subspace.cpp @@ -6,7 +6,7 @@ #include "smith/numerics/trust_region_solver.hpp" -#ifdef SMITH_USE_SLEPC +#if defined(SMITH_USE_SLEPC) && defined(SMITH_TRUST_REGION_USE_PETSC_SUBSPACE) #include diff --git a/src/smith/numerics/solver_config.hpp b/src/smith/numerics/solver_config.hpp index 27635aeda3..6cfdc53014 100644 --- a/src/smith/numerics/solver_config.hpp +++ b/src/smith/numerics/solver_config.hpp @@ -152,7 +152,6 @@ enum class NonlinearSolver LBFGS, /**< MFEM-native Limited memory BFGS */ NewtonLineSearch, /**< Custom solver using preconditioned earch direction with backtracking line search */ TrustRegion, /**< Custom solver using a trust region solver */ - PcgBlock, /**< Custom nonlinear preconditioned conjugate-gradient block solver */ KINFullStep, /**< KINSOL Full Newton (Sundials must be enabled) */ KINBacktrackingLineSearch, /**< KINSOL Newton with Backtracking Line Search (Sundials must be enabled) */ KINPicard, /**< KINSOL Picard (Sundials must be enabled) */ @@ -175,8 +174,6 @@ inline std::string nonlinearName(const NonlinearSolver& s) return "NewtonLineSearch"; case NonlinearSolver::TrustRegion: return "TrustRegion"; - case NonlinearSolver::PcgBlock: - return "PcgBlock"; case NonlinearSolver::KINFullStep: return "KINFullStep"; case NonlinearSolver::KINBacktrackingLineSearch: @@ -205,7 +202,6 @@ inline std::map nonlinearSolverMap = { {"LBFGS", NonlinearSolver::LBFGS}, {"NewtonLineSearch", NonlinearSolver::NewtonLineSearch}, {"TrustRegion", NonlinearSolver::TrustRegion}, - {"PcgBlock", NonlinearSolver::PcgBlock}, {"KINFullStep", NonlinearSolver::KINFullStep}, {"KINBacktrackingLineSearch", NonlinearSolver::KINBacktrackingLineSearch}, {"KINPicard", NonlinearSolver::KINPicard}, @@ -465,15 +461,6 @@ struct NonlinearSolverOptions { /// Scaling for the initial trust region size double trust_region_scaling = 0.1; - /// Nonmonotone TrustRegion acceptance window. Zero preserves monotone acceptance. - int trust_nonmonotone_window = 0; - - /// Use JacobianOperator products and diagonal preconditioning in TrustRegion instead of assembled sparse products. - bool trust_use_jacobian_operator = false; - - /// Use a dense cubic subspace model built from retained Hessian-vector changes. - bool trust_use_cubic_subspace = false; - /// Option for how when the subspace solver should be utilized within trust-region solver SubSpaceOptions subspace_option = SubSpaceOptions::NEVER; @@ -491,57 +478,6 @@ struct NonlinearSolverOptions { /// Should the gradient be converted to a monolithic matrix bool force_monolithic = false; - - /// Number of speculative nonlinear PCG steps per accepted/rejected block - int pcg_block_len = 10; - - /// Powell restart threshold for nonlinear PCG residual orthogonality - double pcg_powell_eta = 0.005; - - /// Trust-ratio threshold below which the PCG-block trust scale shrinks - double pcg_trust_eta_bad = 0.1; - - /// Trust-ratio threshold above which the PCG-block trust scale grows - double pcg_trust_eta_good = 0.75; - - /// PCG-block trust-scale shrink factor - double pcg_shrink = 0.5; - - /// PCG-block trust-scale growth factor - double pcg_growth = 1.25; - - /// Initial PCG-block trust scale - double pcg_h_scale_init = 1.0; - - /// Minimum PCG-block trust scale before declaring failure - double pcg_min_h_scale = 1e-8; - - /// Maximum number of rejected PCG blocks before declaring failure - int pcg_max_block_retries = 20; - - /// Nonmonotone cumulative gradient-work acceptance window - int pcg_window = 5; - - /// Armijo coefficient for PCG-block inner step backtracking - double pcg_ls_armijo_c = 1e-4; - - /// Maximum number of PCG-block inner step backtracks - int pcg_ls_max_backtracks = 8; - - /// PCG-block inner step backtracking shrink factor - double pcg_ls_shrink = 0.5; - - /// Descent and model denominator tolerance for PCG-block guards - double pcg_eps_descent = 1e-12; - - /// Running-mean window for successful PCG-block trust-radius reference steps - int pcg_delta_avg_window = 5; - - /// Use a direct scalar diagonal extracted from the JacobianOperator as the PCG-block preconditioner - bool pcg_use_jacobian_diagonal_preconditioner = false; - - /// Relative floor used when inverting the absolute Jacobian diagonal for PCG-block diagonal preconditioning - double pcg_diagonal_floor = 1e-14; }; // _nonlinear_options_end diff --git a/src/smith/numerics/tests/test_equationsolver.cpp b/src/smith/numerics/tests/test_equationsolver.cpp index a534acd8f1..edab4fd012 100644 --- a/src/smith/numerics/tests/test_equationsolver.cpp +++ b/src/smith/numerics/tests/test_equationsolver.cpp @@ -124,206 +124,6 @@ TEST_P(EquationSolverSuite, All) } } -TEST(EquationSolver, PcgBlockUsesMatrixFreeTangentAction) -{ - auto mesh = mfem::Mesh::MakeCartesian2D(1, 1, mfem::Element::QUADRILATERAL); - auto pmesh = mfem::ParMesh(MPI_COMM_WORLD, mesh); - - pmesh.EnsureNodes(); - pmesh.ExchangeFaceNbrData(); - - constexpr int p = 1; - constexpr int dim = 2; - using test_space = H1

; - using trial_space = H1

; - - auto [fes, fec] = smith::generateParFiniteElementSpace(&pmesh); - (void)fec; - - mfem::HypreParVector x_exact(fes.get()); - mfem::HypreParVector x_computed(fes.get()); - x_exact.Randomize(0); - x_computed = 0.0; - - std::unique_ptr J; - - Functional residual(fes.get(), {fes.get()}); - Domain domain = EntireDomain(pmesh); - residual.AddDomainIntegral( - Dimension{}, DependsOn<0>{}, - [](double /*t*/, auto, auto scalar) { - auto [u, du_dx] = scalar; - return smith::tuple{u, du_dx}; - }, - domain); - - StdFunctionOperator residual_opr( - fes->TrueVSize(), - [&x_exact, &residual](const mfem::Vector& x, mfem::Vector& r) { - constexpr double time = 0.0; - r = residual(time, x); - r -= residual(time, x_exact); - }, - [&residual, &J](const mfem::Vector& x) -> mfem::Operator& { - constexpr double time = 0.0; - auto [val, grad] = residual(time, differentiate_wrt(x)); - J = assemble(grad); - return *J; - }); - - const LinearSolverOptions lin_opts = {.linear_solver = LinearSolver::CG, - .preconditioner = Preconditioner::HypreJacobi, - .relative_tol = 1.0e-12, - .absolute_tol = 1.0e-14, - .max_iterations = 500, - .print_level = 0}; - - const NonlinearSolverOptions nonlin_opts = {.nonlin_solver = NonlinearSolver::PcgBlock, - .relative_tol = 1.0e-12, - .absolute_tol = 1.0e-14, - .max_iterations = 500, - .print_level = 0}; - - EquationSolver eq_solver(nonlin_opts, lin_opts); - eq_solver.setOperator(residual_opr); - - int num_tangent_actions = 0; - eq_solver.setMatrixFreeTangentAction( - [&residual, &num_tangent_actions](const mfem::Vector& x, const mfem::Vector& dx, mfem::Vector& y) { - constexpr double time = 0.0; - auto [val, grad] = residual(time, differentiate_wrt(x)); - grad.Mult(dx, y); - ++num_tangent_actions; - }); - - eq_solver.solve(x_computed); - - const auto diagnostics = eq_solver.pcgBlockDiagnostics(); - ASSERT_TRUE(diagnostics.has_value()); - EXPECT_GT(num_tangent_actions, 0); - EXPECT_EQ(diagnostics->num_hess_vecs, static_cast(num_tangent_actions)); - EXPECT_TRUE(eq_solver.nonlinearSolver().GetConverged()); - EXPECT_LT(x_computed.DistanceTo(x_exact.GetData()), 1.0e-10); -} - -TEST(EquationSolver, PcgBlockUsesJacobianOperator) -{ - class MatrixJacobianOperator : public JacobianOperator { - public: - explicit MatrixJacobianOperator(std::unique_ptr matrix) - : JacobianOperator(matrix->Height(), matrix->Width()), matrix_(std::move(matrix)) - { - } - - void Mult(const mfem::Vector& dx, mfem::Vector& y) const override { matrix_->Mult(dx, y); } - - std::unique_ptr assemble() override - { - return std::make_unique(*matrix_); - } - - void assembleDiagonal(mfem::Vector& diag) const override { matrix_->GetDiag(diag); } - - private: - std::unique_ptr matrix_; - }; - - auto mesh = mfem::Mesh::MakeCartesian2D(1, 1, mfem::Element::QUADRILATERAL); - auto pmesh = mfem::ParMesh(MPI_COMM_WORLD, mesh); - - pmesh.EnsureNodes(); - pmesh.ExchangeFaceNbrData(); - - constexpr int p = 1; - constexpr int dim = 2; - using test_space = H1

; - using trial_space = H1

; - - auto [fes, fec] = smith::generateParFiniteElementSpace(&pmesh); - (void)fec; - - mfem::HypreParVector x_exact(fes.get()); - mfem::HypreParVector x_computed(fes.get()); - x_exact.Randomize(0); - x_computed = 0.0; - - std::unique_ptr J; - - Functional residual(fes.get(), {fes.get()}); - Domain domain = EntireDomain(pmesh); - residual.AddDomainIntegral( - Dimension{}, DependsOn<0>{}, - [](double /*t*/, auto, auto scalar) { - auto [u, du_dx] = scalar; - return smith::tuple{u, du_dx}; - }, - domain); - - { - constexpr double time = 0.0; - auto [val, grad] = residual(time, differentiate_wrt(x_exact)); - FunctionalJacobianOperator jacobian_operator(grad); - - mfem::Vector dx(x_exact.Size()); - mfem::Vector y_grad(x_exact.Size()); - mfem::Vector y_operator(x_exact.Size()); - dx.Randomize(1); - grad.Mult(dx, y_grad); - jacobian_operator.Mult(dx, y_operator); - - EXPECT_LT(y_operator.DistanceTo(y_grad.GetData()), 1.0e-14); - } - - StdFunctionOperator residual_opr( - fes->TrueVSize(), - [&x_exact, &residual](const mfem::Vector& x, mfem::Vector& r) { - constexpr double time = 0.0; - r = residual(time, x); - r -= residual(time, x_exact); - }, - [&residual, &J](const mfem::Vector& x) -> mfem::Operator& { - constexpr double time = 0.0; - auto [val, grad] = residual(time, differentiate_wrt(x)); - J = assemble(grad); - return *J; - }); - - const LinearSolverOptions lin_opts = {.linear_solver = LinearSolver::CG, - .preconditioner = Preconditioner::HypreJacobi, - .relative_tol = 1.0e-12, - .absolute_tol = 1.0e-14, - .max_iterations = 500, - .print_level = 0}; - - const NonlinearSolverOptions nonlin_opts = {.nonlin_solver = NonlinearSolver::PcgBlock, - .relative_tol = 1.0e-12, - .absolute_tol = 1.0e-14, - .max_iterations = 500, - .print_level = 0}; - - EquationSolver eq_solver(nonlin_opts, lin_opts); - eq_solver.setOperator(residual_opr); - - int num_operator_evals = 0; - eq_solver.setJacobianOperator([&residual, &num_operator_evals](const mfem::Vector& x) { - constexpr double time = 0.0; - auto [val, grad] = residual(time, differentiate_wrt(x)); - ++num_operator_evals; - return std::make_unique(assemble(grad)); - }); - - eq_solver.solve(x_computed); - - const auto diagnostics = eq_solver.pcgBlockDiagnostics(); - ASSERT_TRUE(diagnostics.has_value()); - EXPECT_GT(num_operator_evals, 0); - EXPECT_EQ(diagnostics->num_jacobian_operator_evals, static_cast(num_operator_evals)); - EXPECT_GE(diagnostics->num_hess_vecs, diagnostics->num_jacobian_operator_evals); - EXPECT_EQ(diagnostics->num_diagonal_assembles, 0u); - EXPECT_TRUE(eq_solver.nonlinearSolver().GetConverged()); - EXPECT_LT(x_computed.DistanceTo(x_exact.GetData()), 1.0e-10); -} - /** * @brief Nonlinear solvers to test. Always includes NonlinearSolver::Newton and NonlinearSolver::LBFGS * If SMITH_USE_SUNDIALS is set, adds: NonlinearSolver::KINFullStep, NonlinearSolver::KINBacktrackingLineSearch, and diff --git a/src/smith/numerics/tests/test_trust_region_solver_mfem.cpp b/src/smith/numerics/tests/test_trust_region_solver_mfem.cpp index 6e52393681..62c7730205 100644 --- a/src/smith/numerics/tests/test_trust_region_solver_mfem.cpp +++ b/src/smith/numerics/tests/test_trust_region_solver_mfem.cpp @@ -112,53 +112,7 @@ TEST(TrustRegionSubspaceMfem, RemoveDependentDirectionsDropsDuplicatesAndZero) expectNearVector(*hdirs_new[0], hd1, 0.0); } -TEST(TrustRegionSubspaceMfem, RemoveDependentDirectionTriplesKeepsHistoryAligned) -{ - mfem::Vector d1(3); - mfem::Vector d2(3); - mfem::Vector d3(3); - mfem::Vector hd1(3); - mfem::Vector hd2(3); - mfem::Vector hd3(3); - mfem::Vector old_hd1(3); - mfem::Vector old_hd2(3); - mfem::Vector old_hd3(3); - - d1 = 0.0; - d2 = 0.0; - d3 = 0.0; - hd1 = 0.0; - hd2 = 0.0; - hd3 = 0.0; - old_hd1 = 0.0; - old_hd2 = 0.0; - old_hd3 = 0.0; - d1[0] = 1.0; - d2 = d1; - d2 *= 2.0; - d3[2] = 1.0; - hd1[0] = 3.0; - hd2[0] = 6.0; - hd3[2] = 4.0; - old_hd1[0] = 2.0; - old_hd2[0] = 4.0; - old_hd3[2] = 5.0; - - std::vector dirs = {&d1, &d2, &d3}; - std::vector hdirs = {&hd1, &hd2, &hd3}; - std::vector old_hdirs = {&old_hd1, &old_hd2, &old_hd3}; - - auto [dirs_new, hdirs_new, old_hdirs_new] = smith::removeDependentDirectionTriples(dirs, hdirs, old_hdirs); - - ASSERT_EQ(dirs_new.size(), 2); - expectNearVector(*dirs_new[0], d1, 0.0); - expectNearVector(*hdirs_new[0], hd1, 0.0); - expectNearVector(*old_hdirs_new[0], old_hd1, 0.0); - expectNearVector(*dirs_new[1], d3, 0.0); - expectNearVector(*hdirs_new[1], hd3, 0.0); - expectNearVector(*old_hdirs_new[1], old_hd3, 0.0); -} TEST(TrustRegionSubspaceMfem, SolveHitsTrustRegionBoundary) { @@ -236,261 +190,6 @@ TEST(TrustRegionSubspaceMfem, SolveHandlesZeroDirection) EXPECT_LT(energy, 0.0); } -TEST(TrustRegionCubicSubspaceMfem, ZeroCubicMatchesInteriorQuadraticSolve) -{ - mfem::DenseMatrix A(2); - A = 0.0; - A(0, 0) = 4.0; - A(1, 1) = 2.0; - - mfem::Vector b(2); - b[0] = 2.0; - b[1] = -1.0; - - std::vector cubic(2, mfem::DenseMatrix(2)); - for (auto& matrix : cubic) { - matrix = 0.0; - } - - auto [x, energy] = smith::solveDenseCubicTrustRegionProblemMfem(A, b, cubic, 10.0); - - EXPECT_NEAR(x[0], 0.5, 1.0e-10); - EXPECT_NEAR(x[1], -0.5, 1.0e-10); - EXPECT_NEAR(energy, -0.75, 1.0e-10); -} - -TEST(TrustRegionCubicSubspaceMfem, CubicTermChangesOneDimensionalMinimizer) -{ - mfem::DenseMatrix A(1); - A(0, 0) = 1.0; - - mfem::Vector b(1); - b[0] = 1.0; - - std::vector cubic(1, mfem::DenseMatrix(1)); - cubic[0](0, 0) = 6.0; - - auto [x, energy] = smith::solveDenseCubicTrustRegionProblemMfem(A, b, cubic, 1.0); - - const double expected = (-1.0 + std::sqrt(13.0)) / 6.0; - EXPECT_NEAR(x[0], expected, 2.0e-3); - EXPECT_NEAR(energy, 0.5 * expected * expected - expected + expected * expected * expected, 5.0e-6); -} - -TEST(TrustRegionCubicSubspaceMfem, RespectsTrustRegionBoundary) -{ - mfem::DenseMatrix A(1); - A(0, 0) = 1.0; - - mfem::Vector b(1); - b[0] = 10.0; - - std::vector cubic(1, mfem::DenseMatrix(1)); - cubic[0] = 0.0; - - auto [x, energy] = smith::solveDenseCubicTrustRegionProblemMfem(A, b, cubic, 0.25); - - EXPECT_NEAR(x.Norml2(), 0.25, 1.0e-12); - EXPECT_NEAR(x[0], 0.25, 1.0e-12); - EXPECT_NEAR(energy, 0.5 * 0.25 * 0.25 - 10.0 * 0.25, 1.0e-12); -} - -TEST(TrustRegionCubicSubspaceMfem, HistoryProjectedSubspaceSolveRuns) -{ - mfem::Vector e1(2); - mfem::Vector e2(2); - e1 = 0.0; - e2 = 0.0; - e1[0] = 1.0; - e2[1] = 1.0; - - mfem::Vector h1(2); - mfem::Vector h2(2); - mfem::Vector old_h1(2); - mfem::Vector old_h2(2); - h1 = 0.0; - h2 = 0.0; - old_h1 = 0.0; - old_h2 = 0.0; - h1[0] = 2.0; - h2[1] = 3.0; - old_h1[0] = 1.0; - old_h2[1] = 3.0; - - mfem::Vector previous_step(2); - previous_step = 0.0; - previous_step[0] = 1.0; - - mfem::Vector b(2); - b[0] = 1.0; - b[1] = 0.25; - - std::vector directions = {&e1, &e2}; - std::vector h_directions = {&h1, &h2}; - std::vector old_h_directions = {&old_h1, &old_h2}; - - auto [x, leftvecs, leftvals, energy] = - smith::solveCubicSubspaceProblemMfem(directions, h_directions, old_h_directions, previous_step, b, 0.5, 1); - - EXPECT_LE(x.Norml2(), 0.5 + 1.0e-12); - EXPECT_FALSE(leftvecs.empty()); - EXPECT_EQ(leftvals.size(), 1); - EXPECT_LT(energy, 0.0); -} - -TEST(TrustRegionCubicSubspaceMfem, FallsBackToQuadraticWhenCubicPredictionDoesNotImprove) -{ - mfem::Vector e1(1); - mfem::Vector h1(1); - mfem::Vector old_h1(1); - mfem::Vector previous_step(1); - mfem::Vector b(1); - - e1[0] = 1.0; - h1[0] = 1.0; - old_h1[0] = 1.0; - previous_step[0] = 1.0; - b[0] = 1.0; - - std::vector directions = {&e1}; - std::vector h_directions = {&h1}; - std::vector old_h_directions = {&old_h1}; - - auto [cubic_x, cubic_leftvecs, cubic_leftvals, cubic_energy] = - smith::solveCubicSubspaceProblemMfem(directions, h_directions, old_h_directions, previous_step, b, 1.0, 1); - auto [quadratic_x, quadratic_leftvecs, quadratic_leftvals, quadratic_energy] = - smith::solveSubspaceProblemMfem(directions, h_directions, b, 1.0, 1); - - expectNearVector(cubic_x, quadratic_x, 1.0e-12); - EXPECT_EQ(cubic_leftvecs.size(), quadratic_leftvecs.size()); - EXPECT_EQ(cubic_leftvals.size(), quadratic_leftvals.size()); - EXPECT_NEAR(cubic_energy, quadratic_energy, 1.0e-12); -} - -TEST(TrustRegionCubicSubspaceMfem, PreviousStepSecantIsExactForCompatibleCubic) -{ - mfem::Vector e1(2); - mfem::Vector e2(2); - e1 = 0.0; - e2 = 0.0; - e1[0] = 1.0; - e2[1] = 1.0; - - mfem::Vector h1(2); - mfem::Vector h2(2); - mfem::Vector old_h1(2); - mfem::Vector old_h2(2); - h1 = 0.0; - h2 = 0.0; - old_h1 = 0.0; - old_h2 = 0.0; - h1[0] = 1.0; - h2[1] = 1.0; - old_h1[0] = 7.0; - old_h2[1] = 1.0; - - mfem::Vector previous_step(2); - previous_step = 0.0; - previous_step[0] = 1.0; - - mfem::Vector b(2); - b = 0.0; - b[0] = 0.1; - - std::vector directions = {&e1, &e2}; - std::vector h_directions = {&h1, &h2}; - std::vector old_h_directions = {&old_h1, &old_h2}; - - bool used_cubic = false; - auto [x, leftvecs, leftvals, energy] = - smith::solveCubicSubspaceProblemMfem(directions, h_directions, old_h_directions, previous_step, b, 1.0, 1, - &used_cubic); - - mfem::DenseMatrix A(2); - A = 0.0; - A(0, 0) = 1.0; - A(1, 1) = 1.0; - std::vector cubic(2, mfem::DenseMatrix(2)); - cubic[0] = 0.0; - cubic[1] = 0.0; - cubic[0](0, 0) = -6.0; - auto [expected_x, expected_energy] = smith::solveDenseCubicTrustRegionProblemMfem(A, b, cubic, 1.0); - - EXPECT_TRUE(used_cubic); - expectNearVector(x, expected_x, 1.0e-12); - EXPECT_NEAR(energy, expected_energy, 1.0e-12); - EXPECT_FALSE(leftvecs.empty()); - EXPECT_EQ(leftvals.size(), 1); -} - -TEST(TrustRegionCubicSubspaceMfem, PreviousStepSecantIsExactForRotatedCompatibleCubic) -{ - mfem::Vector e1(2); - mfem::Vector e2(2); - e1 = 0.0; - e2 = 0.0; - e1[0] = 1.0; - e2[1] = 1.0; - - constexpr double lambda = -6.0; - mfem::Vector previous_step(2); - previous_step[0] = 1.0; - previous_step[1] = 1.0; - mfem::Vector u(previous_step); - u /= u.Norml2(); - - mfem::DenseMatrix delta_h(2); - delta_h = 0.0; - for (int i = 0; i < 2; ++i) { - for (int j = 0; j < 2; ++j) { - delta_h(i, j) = lambda * previous_step.Norml2() * u[i] * u[j]; - } - } - - mfem::Vector h1(e1); - mfem::Vector h2(e2); - mfem::Vector old_h1(e1); - mfem::Vector old_h2(e2); - for (int i = 0; i < 2; ++i) { - old_h1[i] -= delta_h(i, 0); - old_h2[i] -= delta_h(i, 1); - } - - mfem::Vector b(2); - b[0] = 0.1 * u[0]; - b[1] = 0.1 * u[1]; - - std::vector directions = {&e1, &e2}; - std::vector h_directions = {&h1, &h2}; - std::vector old_h_directions = {&old_h1, &old_h2}; - - bool used_cubic = false; - auto [x, leftvecs, leftvals, energy] = - smith::solveCubicSubspaceProblemMfem(directions, h_directions, old_h_directions, previous_step, b, 1.0, 1, - &used_cubic); - - mfem::DenseMatrix A(2); - A = 0.0; - A(0, 0) = 1.0; - A(1, 1) = 1.0; - std::vector cubic(2, mfem::DenseMatrix(2)); - cubic[0] = 0.0; - cubic[1] = 0.0; - for (int k = 0; k < 2; ++k) { - for (int i = 0; i < 2; ++i) { - for (int j = 0; j < 2; ++j) { - cubic[size_t(k)](i, j) = lambda * u[k] * u[i] * u[j]; - } - } - } - auto [expected_x, expected_energy] = smith::solveDenseCubicTrustRegionProblemMfem(A, b, cubic, 1.0); - - EXPECT_TRUE(used_cubic); - expectNearVector(x, expected_x, 1.0e-12); - EXPECT_NEAR(energy, expected_energy, 1.0e-12); - EXPECT_FALSE(leftvecs.empty()); - EXPECT_EQ(leftvals.size(), 1); -} int main(int argc, char* argv[]) { diff --git a/src/smith/numerics/tests/test_trust_region_solver_petsc.cpp b/src/smith/numerics/tests/test_trust_region_solver_petsc.cpp index 1e3eae5433..55c7a16f77 100644 --- a/src/smith/numerics/tests/test_trust_region_solver_petsc.cpp +++ b/src/smith/numerics/tests/test_trust_region_solver_petsc.cpp @@ -23,6 +23,8 @@ #include "smith/physics/state/finite_element_vector.hpp" #include "smith/numerics/petsc_solvers.hpp" +#ifdef SMITH_TRUST_REGION_USE_PETSC_SUBSPACE + const std::string MESHTAG = "mesh"; static constexpr int scalar_field_order = 1; @@ -213,6 +215,8 @@ TEST_F(MeshFixture, MfemSubspaceSolveMatchesPetsc) MatDestroy(&A_parallel); } +#endif // SMITH_TRUST_REGION_USE_PETSC_SUBSPACE + int main(int argc, char* argv[]) { ::testing::InitGoogleTest(&argc, argv); diff --git a/src/smith/numerics/trust_region_cubic_subspace.cpp b/src/smith/numerics/trust_region_cubic_subspace.cpp deleted file mode 100644 index 2bbc86b16c..0000000000 --- a/src/smith/numerics/trust_region_cubic_subspace.cpp +++ /dev/null @@ -1,461 +0,0 @@ -// Copyright (c) Lawrence Livermore National Security, LLC and -// other Smith Project Developers. See the top-level LICENSE file for -// details. -// -// SPDX-License-Identifier: (BSD-3-Clause) - -#include "smith/numerics/trust_region_solver.hpp" - -#include - -#include "smith/infrastructure/profiling.hpp" - -namespace smith { - -#ifdef MFEM_USE_LAPACK - -namespace { - -double dot(const mfem::Vector& a, const mfem::Vector& b) -{ - return a * b; -} - -void symmetrize(mfem::DenseMatrix& A) -{ - MFEM_VERIFY(A.Height() == A.Width(), "symmetrize requires square matrix."); - for (int i = 0; i < A.Height(); ++i) { - for (int j = 0; j < i; ++j) { - const double value = 0.5 * (A(i, j) + A(j, i)); - A(i, j) = value; - A(j, i) = value; - } - } -} - -mfem::Vector matrixColumn(const mfem::DenseMatrix& A, int j) -{ - mfem::Vector col(A.Height()); - for (int i = 0; i < A.Height(); ++i) { - col[i] = A(i, j); - } - return col; -} - -mfem::DenseMatrix columnsToMatrix(const std::vector& cols) -{ - mfem::DenseMatrix A(cols.empty() ? 0 : cols[0].Size(), static_cast(cols.size())); - for (int j = 0; j < A.Width(); ++j) { - for (int i = 0; i < A.Height(); ++i) { - A(i, j) = cols[size_t(j)][i]; - } - } - return A; -} - -mfem::DenseMatrix denseDot(const std::vector& s, const std::vector& As) -{ - MFEM_VERIFY(s.size() == As.size(), "Dense dot requires matching direction counts."); - mfem::DenseMatrix result(static_cast(s.size())); - for (int i = 0; i < result.Height(); ++i) { - for (int j = 0; j < result.Width(); ++j) { - result(i, j) = innerProduct(*s[size_t(i)], *As[size_t(j)], MPI_COMM_WORLD); - } - } - return result; -} - -mfem::Vector denseDot(const std::vector& s, const mfem::Vector& b) -{ - mfem::Vector result(static_cast(s.size())); - for (int i = 0; i < result.Size(); ++i) { - result[i] = innerProduct(*s[size_t(i)], b, MPI_COMM_WORLD); - } - return result; -} - -mfem::DenseMatrix orthonormalBasisTransform(const mfem::DenseMatrix& gram) -{ - mfem::DenseMatrix gram_copy(gram); - mfem::Vector evals; - mfem::DenseMatrix evecs; - gram_copy.Eigensystem(evals, evecs); - - double trace_mag = 0.0; - for (int i = 0; i < evals.Size(); ++i) { - trace_mag += std::abs(evals[i]); - } - - std::vector kept_columns; - for (int i = 0; i < evals.Size(); ++i) { - if (evals[i] > 1e-9 * trace_mag) { - mfem::Vector col = matrixColumn(evecs, i); - col /= std::sqrt(evals[i]); - kept_columns.emplace_back(std::move(col)); - } - } - - return columnsToMatrix(kept_columns); -} - -mfem::DenseMatrix tripleProduct(const mfem::DenseMatrix& L, const mfem::DenseMatrix& A, const mfem::DenseMatrix& R) -{ - mfem::DenseMatrix tmp(A.Height(), R.Width()); - mfem::Mult(A, R, tmp); - mfem::DenseMatrix out(L.Width(), R.Width()); - mfem::MultAtB(L, tmp, out); - return out; -} - -mfem::Vector projectWithTranspose(const mfem::DenseMatrix& A, const mfem::Vector& x) -{ - mfem::Vector out(A.Width()); - A.MultTranspose(x, out); - return out; -} - -mfem::DenseMatrix orthonormalBasisWithFirstVector(const mfem::Vector& first) -{ - const int n = first.Size(); - mfem::DenseMatrix Q(n); - Q = 0.0; - - mfem::Vector q0(first); - q0 /= q0.Norml2(); - for (int i = 0; i < n; ++i) { - Q(i, 0) = q0[i]; - } - - int col = 1; - for (int seed = 0; seed < n && col < n; ++seed) { - mfem::Vector candidate(n); - candidate = 0.0; - candidate[seed] = 1.0; - for (int j = 0; j < col; ++j) { - const mfem::Vector qj = matrixColumn(Q, j); - candidate.Add(-dot(candidate, qj), qj); - } - const double norm = candidate.Norml2(); - if (norm > 1.0e-12) { - candidate /= norm; - for (int i = 0; i < n; ++i) { - Q(i, col) = candidate[i]; - } - ++col; - } - } - - MFEM_VERIFY(col == n, "Failed to build orthonormal basis for cubic tensor completion."); - return Q; -} - -std::vector completeSymmetricCubicTensor(const mfem::DenseMatrix& deltaA, - const mfem::Vector& previous_step) -{ - const int n = previous_step.Size(); - const double step_norm = previous_step.Norml2(); - MFEM_VERIFY(step_norm > 0.0, "Cannot complete cubic tensor with zero previous step."); - - const mfem::DenseMatrix Q = orthonormalBasisWithFirstVector(previous_step); - mfem::DenseMatrix delta_hat = tripleProduct(Q, deltaA, Q); - symmetrize(delta_hat); - - std::vector tensor_hat(static_cast(n), mfem::DenseMatrix(n)); - for (auto& matrix : tensor_hat) { - matrix = 0.0; - } - - for (int i = 0; i < n; ++i) { - for (int j = 0; j < n; ++j) { - const double value = delta_hat(i, j) / step_norm; - tensor_hat[0](i, j) = value; - tensor_hat[size_t(i)](0, j) = value; - tensor_hat[size_t(i)](j, 0) = value; - } - } - - std::vector tensor(static_cast(n), mfem::DenseMatrix(n)); - for (auto& matrix : tensor) { - matrix = 0.0; - } - - for (int a = 0; a < n; ++a) { - for (int b = 0; b < n; ++b) { - for (int c = 0; c < n; ++c) { - double value = 0.0; - for (int alpha = 0; alpha < n; ++alpha) { - for (int beta = 0; beta < n; ++beta) { - for (int gamma = 0; gamma < n; ++gamma) { - value += Q(a, alpha) * Q(b, beta) * Q(c, gamma) * tensor_hat[size_t(alpha)](beta, gamma); - } - } - } - tensor[size_t(a)](b, c) = value; - } - } - } - - return tensor; -} - -mfem::Vector combineDirections(const std::vector& states, const mfem::Vector& coeffs) -{ - mfem::Vector out(*states[0]); - out = 0.0; - for (int i = 0; i < coeffs.Size(); ++i) { - out.Add(coeffs[i], *states[size_t(i)]); - } - return out; -} - -void verifyCubicInputs(const mfem::DenseMatrix& A, const mfem::Vector& b, const std::vector& cubic, - double delta) -{ - MFEM_VERIFY(A.Height() == A.Width(), "Dense cubic trust-region matrix must be square."); - MFEM_VERIFY(A.Height() == b.Size(), "Dense cubic trust-region linear term has incompatible size."); - MFEM_VERIFY(delta >= 0.0, "Dense cubic trust-region radius must be nonnegative."); - MFEM_VERIFY(static_cast(cubic.size()) == b.Size(), "Dense cubic tensor must have one matrix per dimension."); - for (const auto& matrix : cubic) { - MFEM_VERIFY(matrix.Height() == b.Size() && matrix.Width() == b.Size(), - "Dense cubic tensor matrix has incompatible size."); - } -} - -double cubicEnergy(const mfem::DenseMatrix& A, const mfem::Vector& b, const std::vector& cubic, - const mfem::Vector& x) -{ - mfem::Vector Ax(x.Size()); - A.Mult(x, Ax); - double energy = 0.5 * dot(x, Ax) - dot(x, b); - for (int k = 0; k < x.Size(); ++k) { - cubic[size_t(k)].Mult(x, Ax); - energy += (x[k] * dot(x, Ax)) / 6.0; - } - return energy; -} - -mfem::Vector cubicGradient(const mfem::DenseMatrix& A, const mfem::Vector& b, - const std::vector& cubic, const mfem::Vector& x) -{ - mfem::Vector grad(x.Size()); - A.Mult(x, grad); - grad -= b; - - mfem::Vector tmp(x.Size()); - for (int i = 0; i < x.Size(); ++i) { - double correction = 0.0; - cubic[size_t(i)].Mult(x, tmp); - correction += dot(x, tmp); - for (int k = 0; k < x.Size(); ++k) { - for (int j = 0; j < x.Size(); ++j) { - correction += x[k] * (cubic[size_t(k)](i, j) + cubic[size_t(k)](j, i)) * x[j]; - } - } - grad[i] += correction / 6.0; - } - - return grad; -} - -void projectToBall(mfem::Vector& x, double delta) -{ - const double norm = x.Norml2(); - if (norm > delta && norm > 0.0) { - x *= delta / norm; - } -} - -mfem::Vector solveQuadraticCandidate(mfem::DenseMatrix A, const mfem::Vector& b, double delta) -{ - const int n = b.Size(); - mfem::DenseMatrix shifted(A); - double trace = 0.0; - for (int i = 0; i < n; ++i) { - trace += std::abs(A(i, i)); - } - const double regularization = std::max(1.0e-14, 1.0e-12 * trace / std::max(n, 1)); - for (int i = 0; i < n; ++i) { - shifted(i, i) += regularization; - } - - mfem::DenseMatrixInverse inv(shifted); - mfem::Vector x(n); - inv.Mult(b, x); - projectToBall(x, delta); - return x; -} - -mfem::Vector projectedGradientSolve(const mfem::DenseMatrix& A, const mfem::Vector& b, - const std::vector& cubic, mfem::Vector x, double delta) -{ - double energy = cubicEnergy(A, b, cubic, x); - constexpr int max_iters = 200; - constexpr double grad_tol = 1.0e-11; - - for (int iter = 0; iter < max_iters; ++iter) { - mfem::Vector grad = cubicGradient(A, b, cubic, x); - if (grad.Norml2() <= grad_tol * std::max(1.0, b.Norml2())) { - break; - } - - double step = 0.25; - bool accepted = false; - for (int ls = 0; ls < 30; ++ls) { - mfem::Vector trial(x); - trial.Add(-step, grad); - projectToBall(trial, delta); - const double trial_energy = cubicEnergy(A, b, cubic, trial); - if (trial_energy < energy - 1.0e-14) { - x = trial; - energy = trial_energy; - accepted = true; - break; - } - step *= 0.5; - } - if (!accepted) { - break; - } - } - - return x; -} - -} // namespace - -DenseCubicTrustRegionResult solveDenseCubicTrustRegionProblemMfem(const mfem::DenseMatrix& A, const mfem::Vector& b, - const std::vector& cubic, - double delta) -{ - SMITH_MARK_FUNCTION; - verifyCubicInputs(A, b, cubic, delta); - - mfem::Vector best(b.Size()); - best = 0.0; - double best_energy = cubicEnergy(A, b, cubic, best); - if (delta == 0.0 || b.Size() == 0) { - return std::make_tuple(best, best_energy); - } - - std::vector starts; - starts.emplace_back(best); - starts.emplace_back(solveQuadraticCandidate(A, b, delta)); - - mfem::Vector direction(b); - if (direction.Norml2() > 0.0) { - direction *= delta / direction.Norml2(); - starts.emplace_back(direction); - direction *= -1.0; - starts.emplace_back(direction); - } - - for (int i = 0; i < b.Size(); ++i) { - mfem::Vector axis(b.Size()); - axis = 0.0; - axis[i] = delta; - starts.emplace_back(axis); - axis[i] = -delta; - starts.emplace_back(axis); - } - - for (const auto& start : starts) { - mfem::Vector candidate = projectedGradientSolve(A, b, cubic, start, delta); - const double energy = cubicEnergy(A, b, cubic, candidate); - if (energy < best_energy) { - best = candidate; - best_energy = energy; - } - } - - return std::make_tuple(best, best_energy); -} - -TrustRegionSubspaceResult solveCubicSubspaceProblemMfem( - const std::vector& directions, const std::vector& A_directions, - const std::vector& previous_A_directions, const mfem::Vector& previous_step, - const mfem::Vector& b, double delta, int num_leftmost, bool* used_cubic) -{ - SMITH_MARK_FUNCTION; - MFEM_VERIFY(directions.size() == A_directions.size(), "Cubic subspace directions and A_directions differ."); - MFEM_VERIFY(directions.size() == previous_A_directions.size(), - "Cubic subspace directions and previous_A_directions differ."); - MFEM_VERIFY(!directions.empty(), "Cubic subspace solve requires at least one direction."); - - mfem::DenseMatrix ss = denseDot(directions, directions); - symmetrize(ss); - mfem::DenseMatrix T = orthonormalBasisTransform(ss); - MFEM_VERIFY(T.Width() > 0, "No independent directions in cubic MFEM subspace solve."); - - mfem::DenseMatrix sAs = denseDot(directions, A_directions); - symmetrize(sAs); - mfem::DenseMatrix pAp = tripleProduct(T, sAs, T); - symmetrize(pAp); - - mfem::DenseMatrix sDeltaA = denseDot(directions, previous_A_directions); - sDeltaA *= -1.0; - sDeltaA += sAs; - symmetrize(sDeltaA); - mfem::DenseMatrix pDeltaAp = tripleProduct(T, sDeltaA, T); - symmetrize(pDeltaAp); - - mfem::Vector previous_coeffs = denseDot(directions, previous_step); - previous_coeffs = projectWithTranspose(T, previous_coeffs); - const double previous_norm_squared = dot(previous_coeffs, previous_coeffs); - - std::vector cubic(size_t(T.Width()), mfem::DenseMatrix(T.Width())); - for (auto& matrix : cubic) { - matrix = 0.0; - } - if (previous_norm_squared > 0.0) { - cubic = completeSymmetricCubicTensor(pDeltaAp, previous_coeffs); - } - - const mfem::Vector sb = denseDot(directions, b); - const mfem::Vector pb = projectWithTranspose(T, sb); - auto [reduced_x, energy] = solveDenseCubicTrustRegionProblemMfem(pAp, pb, cubic, delta); - - mfem::Vector coeffs(T.Height()); - T.Mult(reduced_x, coeffs); - mfem::Vector sol = combineDirections(directions, coeffs); - - auto [quadratic_sol, leftmosts, leftvals, quadratic_energy] = - solveSubspaceProblemMfem(directions, A_directions, b, delta, num_leftmost); - (void)quadratic_energy; - - const mfem::Vector quadratic_s_coeffs = denseDot(directions, quadratic_sol); - const mfem::Vector quadratic_reduced_x = projectWithTranspose(T, quadratic_s_coeffs); - const double quadratic_cubic_energy = cubicEnergy(pAp, pb, cubic, quadratic_reduced_x); - if (quadratic_cubic_energy <= energy) { - if (used_cubic != nullptr) { - *used_cubic = false; - } - return std::make_tuple(quadratic_sol, leftmosts, leftvals, quadratic_cubic_energy); - } - - if (used_cubic != nullptr) { - *used_cubic = true; - } - return std::make_tuple(sol, leftmosts, leftvals, energy); -} - -#else - -DenseCubicTrustRegionResult solveDenseCubicTrustRegionProblemMfem(const mfem::DenseMatrix&, const mfem::Vector& b, - const std::vector&, double) -{ - throw PetscException("MFEM dense cubic trust-region solve requires MFEM LAPACK support."); - return std::make_tuple(b, 0.0); -} - -TrustRegionSubspaceResult solveCubicSubspaceProblemMfem(const std::vector&, - const std::vector&, - const std::vector&, - const mfem::Vector&, const mfem::Vector& b, double, int, bool*) -{ - throw PetscException("MFEM dense cubic trust-region solve requires MFEM LAPACK support."); - return std::make_tuple(b, std::vector> {}, std::vector {}, 0.0); -} - -#endif // MFEM_USE_LAPACK - -} // namespace smith diff --git a/src/smith/numerics/trust_region_solver.hpp b/src/smith/numerics/trust_region_solver.hpp index f076520f0e..960024b33d 100644 --- a/src/smith/numerics/trust_region_solver.hpp +++ b/src/smith/numerics/trust_region_solver.hpp @@ -66,8 +66,6 @@ void resetTrustRegionSubspaceTimings(); TrustRegionSubspaceTimings trustRegionSubspaceTimings(); -using DenseCubicTrustRegionResult = std::tuple; - /// @brief computes the global size of mfem::Vector int globalSize(const mfem::Vector& parallel_v, const MPI_Comm& comm); @@ -80,7 +78,7 @@ TrustRegionSubspaceResult solveSubspaceProblem( const std::vector& directions, const std::vector& A_directions, const mfem::Vector& b, double delta, int num_leftmost); -#ifdef SMITH_USE_SLEPC +#if defined(SMITH_USE_SLEPC) && defined(SMITH_TRUST_REGION_USE_PETSC_SUBSPACE) TrustRegionSubspaceResult solveSubspaceProblemPetsc( const std::vector& directions, const std::vector& A_directions, const mfem::Vector& b, double delta, int num_leftmost); @@ -90,22 +88,7 @@ TrustRegionSubspaceResult solveSubspaceProblemMfem( const std::vector& directions, const std::vector& A_directions, const mfem::Vector& b, double delta, int num_leftmost); -/// @brief solves a small dense cubic trust-region model -/// 1/2 x^T A x - b^T x + 1/6 sum_k x_k x^T cubic[k] x, ||x|| <= delta. -DenseCubicTrustRegionResult solveDenseCubicTrustRegionProblemMfem( - const mfem::DenseMatrix& A, const mfem::Vector& b, const std::vector& cubic, double delta); - -TrustRegionSubspaceResult solveCubicSubspaceProblemMfem( - const std::vector& directions, const std::vector& A_directions, - const std::vector& previous_A_directions, const mfem::Vector& previous_step, - const mfem::Vector& b, double delta, int num_leftmost, bool* used_cubic = nullptr); - std::pair, std::vector> removeDependentDirections( std::vector directions, std::vector A_directions); -std::tuple, std::vector, std::vector> -removeDependentDirectionTriples(std::vector directions, - std::vector A_directions, - std::vector previous_A_directions); - } // namespace smith diff --git a/src/smith/physics/dfem_weak_form.hpp b/src/smith/physics/dfem_weak_form.hpp index f55598039c..83a55d6ddd 100644 --- a/src/smith/physics/dfem_weak_form.hpp +++ b/src/smith/physics/dfem_weak_form.hpp @@ -213,18 +213,6 @@ class DfemWeakForm : public WeakForm { return std::make_unique(); } - /// @overload - std::unique_ptr jacobianOperator( - TimeInfo time_info, ConstFieldPtr /*shape_disp*/, const std::vector& /*fields*/, - size_t /*input_col*/, const std::vector& /*quad_fields*/ = {}) const override - { - SLIC_ERROR_ROOT("DfemWeakForm does not support JacobianOperator construction"); - dt_ = time_info.dt(); - cycle_ = time_info.cycle(); - - return nullptr; - } - /// @overload void jvp(TimeInfo time_info, ConstFieldPtr /*shape_disp*/, const std::vector& /*fields*/, const std::vector& /*quad_fields*/, ConstFieldPtr /*v_shape_disp*/, diff --git a/src/smith/physics/functional_weak_form.hpp b/src/smith/physics/functional_weak_form.hpp index 8e99e71afc..5852a0388a 100644 --- a/src/smith/physics/functional_weak_form.hpp +++ b/src/smith/physics/functional_weak_form.hpp @@ -332,26 +332,6 @@ class FunctionalWeakForm, return J; } - /// @overload - std::unique_ptr jacobianOperator( - TimeInfo time_info, ConstFieldPtr shape_disp, const std::vector& fields, size_t input_col, - [[maybe_unused]] const std::vector& quad_fields = {}) const override - { - SLIC_ERROR_IF(input_col >= fields.size(), "Invalid JacobianOperator input column."); - - dt_ = time_info.dt(); - cycle_ = time_info.cycle(); - - auto jacs = jacobianFunctions(std::make_integer_sequence{}, time_info.time(), - shape_disp, fields); - auto K = smith::get(jacs[input_col](time_info.time(), shape_disp, fields)); - - SLIC_ERROR_IF(K.Height() != K.Width(), - "WeakForm::jacobianOperator currently supports square one-field derivatives only."); - - return std::make_unique>(std::move(K)); - } - /// @overload void jvp(TimeInfo time_info, ConstFieldPtr shape_disp, const std::vector& fields, [[maybe_unused]] const std::vector& quad_fields, diff --git a/src/smith/physics/solid_mechanics.hpp b/src/smith/physics/solid_mechanics.hpp index 504538d4e6..a2d66cf8d5 100644 --- a/src/smith/physics/solid_mechanics.hpp +++ b/src/smith/physics/solid_mechanics.hpp @@ -56,17 +56,6 @@ namespace smith { -struct SolidMechanicsJacobianTimings { - size_t legacy_jacobian_evals = 0; - size_t jacobian_operator_evals = 0; - size_t jacobian_operator_assemblies = 0; - double legacy_derivative_seconds = 0.0; - double legacy_sparse_assembly_seconds = 0.0; - double legacy_essential_elimination_seconds = 0.0; - double jacobian_operator_derivative_seconds = 0.0; - double jacobian_operator_sparse_assembly_seconds = 0.0; - double jacobian_operator_essential_elimination_seconds = 0.0; -}; namespace solid_mechanics { namespace detail { @@ -1065,126 +1054,16 @@ class SolidMechanics, std::integer_se // gradient of residual function [this](const mfem::Vector& u) -> mfem::Operator& { SMITH_MARK_FUNCTION; - using Clock = std::chrono::steady_clock; - auto seconds_since = [](Clock::time_point start) { - return std::chrono::duration_cast>(Clock::now() - start).count(); - }; - auto derivative_start = Clock::now(); auto [r, drdu] = (*residual_)(time_, shapeDisplacement(), differentiate_wrt(u), acceleration_, *parameters_[parameter_indices].state...); - jacobian_timings_.legacy_derivative_seconds += seconds_since(derivative_start); - ++jacobian_timings_.legacy_jacobian_evals; J_.reset(); - auto assembly_start = Clock::now(); J_ = assemble(drdu); - jacobian_timings_.legacy_sparse_assembly_seconds += seconds_since(assembly_start); J_e_.reset(); - auto elimination_start = Clock::now(); J_e_ = bcs_.eliminateAllEssentialDofsFromMatrix(*J_); - jacobian_timings_.legacy_essential_elimination_seconds += seconds_since(elimination_start); return *J_; }); } - /// @brief Matrix-free action of the quasistatic tangent with essential boundary conditions applied. - void quasistaticTangentAction(const mfem::Vector& u, const mfem::Vector& du, mfem::Vector& dr) const - { - SMITH_MARK_FUNCTION; - - mfem::Vector du_interior(du); - du_interior.SetSubVector(bcs_.allEssentialTrueDofs(), 0.0); - - auto [r, drdu] = (*residual_)(time_, shapeDisplacement(), differentiate_wrt(u), acceleration_, - *parameters_[parameter_indices].state...); - drdu.Mult(du_interior, dr); - - const auto& constrained_dofs = bcs_.allEssentialTrueDofs(); - for (int i = 0; i < constrained_dofs.Size(); ++i) { - const int dof = constrained_dofs[i]; - dr[dof] = du[dof]; - } - } - - /// @brief Build a quasistatic JacobianOperator with essential boundary conditions applied. - std::unique_ptr quasistaticJacobianOperator(const mfem::Vector& u) const - { - SMITH_MARK_FUNCTION; - - using Clock = std::chrono::steady_clock; - auto seconds_since = [](Clock::time_point start) { - return std::chrono::duration_cast>(Clock::now() - start).count(); - }; - auto derivative_start = Clock::now(); - auto [r, drdu] = (*residual_)(time_, shapeDisplacement(), differentiate_wrt(u), acceleration_, - *parameters_[parameter_indices].state...); - jacobian_timings_.jacobian_operator_derivative_seconds += seconds_since(derivative_start); - ++jacobian_timings_.jacobian_operator_evals; - - using GradientT = std::remove_reference_t; - - class QuasistaticJacobianOperator : public JacobianOperator { - public: - QuasistaticJacobianOperator( - const GradientT& gradient, const mfem::Array& constrained_dofs, - std::function(mfem::HypreParMatrix&)> eliminate_essential_dofs, - SolidMechanicsJacobianTimings& timings) - : JacobianOperator(gradient.Height(), gradient.Width()), - gradient_(gradient), - constrained_dofs_(constrained_dofs), - eliminate_essential_dofs_(std::move(eliminate_essential_dofs)), - timings_(timings) - { - } - - void Mult(const mfem::Vector& du, mfem::Vector& dr) const override - { - mfem::Vector du_interior(du); - du_interior.SetSubVector(constrained_dofs_, 0.0); - - gradient_.Mult(du_interior, dr); - for (int i = 0; i < constrained_dofs_.Size(); ++i) { - const int dof = constrained_dofs_[i]; - dr[dof] = du[dof]; - } - } - - std::unique_ptr assemble() override - { - using AssemblyClock = std::chrono::steady_clock; - auto seconds_since = [](AssemblyClock::time_point start) { - return std::chrono::duration_cast>(AssemblyClock::now() - start).count(); - }; - auto assembly_start = AssemblyClock::now(); - std::unique_ptr matrix = gradient_.assemble(); - timings_.jacobian_operator_sparse_assembly_seconds += seconds_since(assembly_start); - auto elimination_start = AssemblyClock::now(); - eliminate_essential_dofs_(*matrix); - timings_.jacobian_operator_essential_elimination_seconds += seconds_since(elimination_start); - ++timings_.jacobian_operator_assemblies; - return matrix; - } - - void assembleDiagonal(mfem::Vector& diag) const override - { - gradient_.assembleDiagonal(diag); - for (int i = 0; i < constrained_dofs_.Size(); ++i) { - diag[constrained_dofs_[i]] = 1.0; - } - } - - private: - GradientT gradient_; - mfem::Array constrained_dofs_; - std::function(mfem::HypreParMatrix&)> eliminate_essential_dofs_; - SolidMechanicsJacobianTimings& timings_; - }; - - return std::make_unique( - drdu, bcs_.allEssentialTrueDofs(), - [this](mfem::HypreParMatrix& matrix) { return bcs_.eliminateAllEssentialDofsFromMatrix(matrix); }, - jacobian_timings_); - } - /** * @brief Return the assembled stiffness matrix * @@ -1263,11 +1142,6 @@ class SolidMechanics, std::integer_se #endif nonlin_solver_->setOperator(*residual_with_bcs_); - if (is_quasistatic_) { - nonlin_solver_->setMatrixFreeTangentAction([this](const mfem::Vector& u, const mfem::Vector& du, - mfem::Vector& dr) { quasistaticTangentAction(u, du, dr); }); - nonlin_solver_->setJacobianOperator([this](const mfem::Vector& u) { return quasistaticJacobianOperator(u); }); - } if (checkpoint_to_disk_) { outputStateToDisk(); @@ -1512,18 +1386,6 @@ class SolidMechanics, std::integer_se /// @brief getter for nodal forces (before zeroing-out essential dofs) const smith::FiniteElementDual& reactions() const { return reactions_; }; - /// @brief Get the equation solver used by this physics module - smith::EquationSolver& equationSolver() { return *nonlin_solver_; } - - /// @overload - const smith::EquationSolver& equationSolver() const { return *nonlin_solver_; } - - /// @brief Return accumulated Jacobian construction timings for this physics object. - const SolidMechanicsJacobianTimings& jacobianTimings() const { return jacobian_timings_; } - - /// @brief Reset accumulated Jacobian construction timings for this physics object. - void resetJacobianTimings() const { jacobian_timings_ = {}; } - protected: /// The compile-time finite element trial space for displacement and velocity (H1 of order p) using trial = H1; @@ -1592,9 +1454,6 @@ class SolidMechanics, std::integer_se /// because are associated with essential boundary conditions std::unique_ptr J_e_; - /// Accumulated timing diagnostics for quasistatic Jacobian construction paths. - mutable SolidMechanicsJacobianTimings jacobian_timings_; - /// an intermediate variable used to store the predicted end-step displacement mfem::Vector predicted_displacement_; diff --git a/src/smith/physics/tests/shallow_arch_buckling.cpp b/src/smith/physics/tests/shallow_arch_buckling.cpp index a94a61bb63..4299c14874 100644 --- a/src/smith/physics/tests/shallow_arch_buckling.cpp +++ b/src/smith/physics/tests/shallow_arch_buckling.cpp @@ -31,16 +31,10 @@ constexpr double end_tol = 1.0e-8; constexpr double top_tol = 1.0e-8; std::string solver_name = "TrustRegion"; int print_level = 2; -int pcg_block_len = 10; -double pcg_powell_eta = 0.005; int nonlinear_max_iterations = 300000; -bool pcg_diagonal_preconditioner = false; int trust_subspace_option = static_cast(SubSpaceOptions::NEVER); int trust_num_leftmost = 1; int trust_num_past_steps = 0; -int trust_nonmonotone_window = 0; -bool trust_use_jacobian_operator = false; -bool trust_use_cubic_subspace = false; bool trust_use_solve_start_direction = false; bool trust_use_min_residual_direction = false; @@ -52,12 +46,9 @@ NonlinearSolver selectedNonlinearSolver() if (solver_name == "TrustRegion") { return NonlinearSolver::TrustRegion; } - if (solver_name == "PcgBlock") { - return NonlinearSolver::PcgBlock; - } throw std::runtime_error("Unknown --solver value '" + solver_name + - "'. Use NewtonLineSearch, TrustRegion, or PcgBlock."); + "'. Use NewtonLineSearch or TrustRegion."); } void parseCommandLine(int& argc, char** argv) @@ -69,29 +60,14 @@ void parseCommandLine(int& argc, char** argv) solver_name = arg.substr(std::string("--solver=").size()); } else if (arg.rfind("--print-level=", 0) == 0) { print_level = std::stoi(arg.substr(std::string("--print-level=").size())); - } else if (arg.rfind("--pcg-block-len=", 0) == 0) { - pcg_block_len = std::stoi(arg.substr(std::string("--pcg-block-len=").size())); - } else if (arg.rfind("--pcg-powell-eta=", 0) == 0) { - pcg_powell_eta = std::stod(arg.substr(std::string("--pcg-powell-eta=").size())); } else if (arg.rfind("--nonlinear-max-iterations=", 0) == 0) { nonlinear_max_iterations = std::stoi(arg.substr(std::string("--nonlinear-max-iterations=").size())); - } else if (arg.rfind("--pcg-diagonal-preconditioner=", 0) == 0) { - const std::string value = arg.substr(std::string("--pcg-diagonal-preconditioner=").size()); - pcg_diagonal_preconditioner = (value == "1" || value == "true" || value == "on"); } else if (arg.rfind("--trust-subspace-option=", 0) == 0) { trust_subspace_option = std::stoi(arg.substr(std::string("--trust-subspace-option=").size())); } else if (arg.rfind("--trust-num-leftmost=", 0) == 0) { trust_num_leftmost = std::stoi(arg.substr(std::string("--trust-num-leftmost=").size())); } else if (arg.rfind("--trust-num-past-steps=", 0) == 0) { trust_num_past_steps = std::stoi(arg.substr(std::string("--trust-num-past-steps=").size())); - } else if (arg.rfind("--trust-nonmonotone-window=", 0) == 0) { - trust_nonmonotone_window = std::stoi(arg.substr(std::string("--trust-nonmonotone-window=").size())); - } else if (arg.rfind("--trust-use-jacobian-operator=", 0) == 0) { - const std::string value = arg.substr(std::string("--trust-use-jacobian-operator=").size()); - trust_use_jacobian_operator = (value == "1" || value == "true" || value == "on"); - } else if (arg.rfind("--trust-use-cubic-subspace=", 0) == 0) { - const std::string value = arg.substr(std::string("--trust-use-cubic-subspace=").size()); - trust_use_cubic_subspace = (value == "1" || value == "true" || value == "on"); } else if (arg.rfind("--trust-use-solve-start-direction=", 0) == 0) { const std::string value = arg.substr(std::string("--trust-use-solve-start-direction=").size()); trust_use_solve_start_direction = (value == "1" || value == "true" || value == "on"); @@ -154,18 +130,11 @@ TEST(ShallowArchBuckling, CompressedThinBeamSnapThrough) .absolute_tol = 1.0e-10, .max_iterations = nonlinear_max_iterations, .print_level = print_level, - .trust_nonmonotone_window = trust_nonmonotone_window, - .trust_use_jacobian_operator = trust_use_jacobian_operator, - .trust_use_cubic_subspace = trust_use_cubic_subspace, .subspace_option = static_cast(trust_subspace_option), .num_leftmost = trust_num_leftmost, .trust_num_past_steps = trust_num_past_steps, .trust_use_solve_start_direction = trust_use_solve_start_direction, - .trust_use_min_residual_direction = trust_use_min_residual_direction, - .pcg_block_len = pcg_block_len, - .pcg_powell_eta = pcg_powell_eta, - .pcg_max_block_retries = 40, - .pcg_use_jacobian_diagonal_preconditioner = pcg_diagonal_preconditioner}; + .trust_use_min_residual_direction = trust_use_min_residual_direction}; SolidMechanics solid(nonlinear_options, linear_options, solid_mechanics::default_quasistatic_options, "compressed_beam", mesh); @@ -197,164 +166,16 @@ TEST(ShallowArchBuckling, CompressedThinBeamSnapThrough) mfem::out << "Compressed thin beam snap-through run: solver = " << solver_name << ", trust_subspace_option = " << trust_subspace_option << ", trust_num_leftmost = " << trust_num_leftmost - << ", trust_num_past_steps = " << trust_num_past_steps - << ", trust_nonmonotone_window = " << trust_nonmonotone_window - << ", trust_use_jacobian_operator = " << trust_use_jacobian_operator - << ", trust_use_cubic_subspace = " << trust_use_cubic_subspace - << ", pcg_diagonal_preconditioner = " << pcg_diagonal_preconditioner << '\n'; + << ", trust_num_past_steps = " << trust_num_past_steps << '\n'; } constexpr int num_steps = 5; - int num_converged_steps = 0; for (int step = 0; step < num_steps; ++step) { - solid.resetJacobianTimings(); solid.advanceTimestep(1.0 / num_steps); - const auto& nonlinear_solver = solid.equationSolver().nonlinearSolver(); - if (nonlinear_solver.GetConverged()) { - ++num_converged_steps; - } if (rank == 0) { - mfem::out << "Load step " << step + 1 << "/" << num_steps - << ": converged = " << nonlinear_solver.GetConverged() - << ", nonlinear iterations = " << nonlinear_solver.GetNumIterations() - << ", final relative residual = " << nonlinear_solver.GetFinalRelNorm() << '\n'; + mfem::out << "Load step " << step + 1 << "/" << num_steps << '\n'; } solid.outputStateToDisk("shallow_arch_buckling"); - if (rank == 0 && print_level >= 1) { - if (const auto diagnostics = solid.equationSolver().pcgBlockDiagnostics()) { - mfem::out << " PCG diagnostics: residuals = " << diagnostics->num_residuals - << ", hess-vecs = " << diagnostics->num_hess_vecs - << ", preconditioner applications = " << diagnostics->num_preconds - << ", Jacobian assemblies = " << diagnostics->num_jacobian_assembles - << ", Jacobian operator evals = " << diagnostics->num_jacobian_operator_evals - << ", diagonal assemblies = " << diagnostics->num_diagonal_assembles - << ", preconditioner updates = " << diagnostics->num_preconditioner_updates - << ", accepted blocks = " << diagnostics->num_blocks - << ", accepted steps = " << diagnostics->num_accepted_steps - << ", block rejects = " << diagnostics->num_block_rejects - << ", prefix accepts = " << diagnostics->num_prefix_accepts - << ", momentum resets = " << diagnostics->num_momentum_resets - << ", nonzero beta = " << diagnostics->num_nonzero_beta - << ", zero beta = " << diagnostics->num_zero_beta - << ", Powell restarts = " << diagnostics->num_powell_restarts - << ", descent restarts = " << diagnostics->num_descent_restarts - << ", negative curvature = " << diagnostics->num_negative_curvature - << ", trust capped steps = " << diagnostics->num_trust_capped_steps - << ", line-search backtracks = " << diagnostics->num_line_search_backtracks - << ", final h_scale = " << diagnostics->final_h_scale - << ", last trust ratio = " << diagnostics->last_trust_ratio << '\n'; - mfem::out << " PCG timings: residual = " << diagnostics->residual_seconds - << ", hess-vec = " << diagnostics->hess_vec_seconds - << ", operator hess-vec = " << diagnostics->jacobian_operator_hess_vec_seconds - << ", assembled hess-vec = " << diagnostics->assembled_hess_vec_seconds - << ", matrix-free hess-vec = " << diagnostics->matrix_free_hess_vec_seconds - << ", preconditioner = " << diagnostics->preconditioner_seconds - << ", Jacobian operator eval = " << diagnostics->jacobian_operator_eval_seconds - << ", Jacobian assembly = " << diagnostics->jacobian_assembly_seconds - << ", diagonal assembly = " << diagnostics->diagonal_assembly_seconds - << ", diagonal invert = " << diagnostics->diagonal_invert_seconds - << ", preconditioner update = " << diagnostics->preconditioner_update_seconds - << ", preconditioner setup = " << diagnostics->preconditioner_setup_seconds << '\n'; - } - if (const auto diagnostics = solid.equationSolver().trustRegionDiagnostics()) { - const double operator_timed_seconds = - diagnostics->residual_seconds + diagnostics->hess_vec_seconds + diagnostics->preconditioner_seconds + - diagnostics->jacobian_operator_eval_seconds + diagnostics->diagonal_assembly_seconds + - diagnostics->diagonal_invert_seconds + diagnostics->jacobian_assembly_seconds + - diagnostics->preconditioner_update_seconds; - const double assembled_hess_vec_seconds = - diagnostics->hess_vec_seconds - diagnostics->jacobian_operator_hess_vec_seconds; - mfem::out << " TrustRegion diagnostics: residuals = " << diagnostics->num_residuals - << ", hess-vecs = " << diagnostics->num_hess_vecs - << ", model hess-vecs = " << diagnostics->num_model_hess_vecs - << ", cauchy hess-vecs = " << diagnostics->num_cauchy_hess_vecs - << ", line-search hess-vecs = " << diagnostics->num_line_search_hess_vecs - << ", preconditioner applications = " << diagnostics->num_preconds - << ", Jacobian assemblies = " << diagnostics->num_jacobian_assembles - << ", Jacobian operator evals = " << diagnostics->num_jacobian_operator_evals - << ", diagonal assemblies = " << diagnostics->num_diagonal_assembles - << ", CG iterations = " << diagnostics->num_cg_iterations - << ", subspace solves = " << diagnostics->num_subspace_solves - << ", subspace leftmost hess-vecs = " << diagnostics->num_subspace_leftmost_hess_vecs - << ", subspace hess-vec batches = " << diagnostics->num_subspace_hess_vec_batches - << ", subspace batched hess-vecs = " << diagnostics->num_subspace_batched_hess_vecs - << ", subspace past-step vectors = " << diagnostics->num_subspace_past_step_vectors - << ", subspace past-step hess-vecs = " << diagnostics->num_subspace_past_step_hess_vecs - << ", quadratic subspace solves = " << diagnostics->num_quadratic_subspace_solves - << ", cubic subspace attempts = " << diagnostics->num_cubic_subspace_attempts - << ", cubic subspace uses = " << diagnostics->num_cubic_subspace_uses - << ", cubic subspace quadratic fallbacks = " << diagnostics->num_cubic_subspace_quadratic_fallbacks - << ", nonmonotone work accepts = " << diagnostics->num_nonmonotone_work_accepts - << ", monotone work would reject = " << diagnostics->num_monotone_work_would_reject - << ", preconditioner updates = " << diagnostics->num_preconditioner_updates << '\n'; - mfem::out << " TrustRegion timings: total = " << diagnostics->total_seconds - << ", operator-timed = " << operator_timed_seconds << ", residual = " << diagnostics->residual_seconds - << ", hess-vec = " << diagnostics->hess_vec_seconds - << ", model hess-vec = " << diagnostics->model_hess_vec_seconds - << ", cauchy hess-vec = " << diagnostics->cauchy_hess_vec_seconds - << ", line-search hess-vec = " << diagnostics->line_search_hess_vec_seconds - << ", operator hess-vec = " << diagnostics->jacobian_operator_hess_vec_seconds - << ", assembled hess-vec = " << assembled_hess_vec_seconds - << ", preconditioner = " << diagnostics->preconditioner_seconds - << ", Jacobian operator eval = " << diagnostics->jacobian_operator_eval_seconds - << ", diagonal assembly = " << diagnostics->diagonal_assembly_seconds - << ", diagonal invert = " << diagnostics->diagonal_invert_seconds - << ", model solve = " << diagnostics->model_solve_seconds - << ", subspace = " << diagnostics->subspace_seconds - << ", subspace leftmost = " << diagnostics->subspace_leftmost_seconds - << ", subspace hess-vec batches = " << diagnostics->subspace_hess_vec_batch_seconds - << ", subspace filter = " << diagnostics->subspace_filter_seconds - << ", subspace backend = " << diagnostics->subspace_backend_seconds - << ", subspace project A = " << diagnostics->subspace_project_A_seconds - << ", subspace project gram = " << diagnostics->subspace_project_gram_seconds - << ", subspace project b = " << diagnostics->subspace_project_b_seconds - << ", subspace basis = " << diagnostics->subspace_basis_seconds - << ", subspace reduced A = " << diagnostics->subspace_reduced_A_seconds - << ", subspace dense eigensystem = " << diagnostics->subspace_dense_eigensystem_seconds - << ", subspace dense trust solve = " << diagnostics->subspace_dense_trust_solve_seconds - << ", subspace reconstruct solution = " << diagnostics->subspace_reconstruct_solution_seconds - << ", subspace reconstruct leftmost = " << diagnostics->subspace_reconstruct_leftmost_seconds - << ", subspace finalize = " << diagnostics->subspace_finalize_seconds - << ", cauchy point = " << diagnostics->cauchy_point_seconds - << ", dogleg = " << diagnostics->dogleg_seconds - << ", line search = " << diagnostics->line_search_seconds << ", dot = " << diagnostics->dot_seconds - << ", dot count = " << diagnostics->num_dot_products - << ", dot reductions = " << diagnostics->num_dot_reductions - << ", model dots = " << diagnostics->num_model_dot_products << " / " << diagnostics->model_dot_seconds - << ", cauchy dots = " << diagnostics->num_cauchy_dot_products << " / " - << diagnostics->cauchy_dot_seconds << ", dogleg dots = " << diagnostics->num_dogleg_dot_products - << " / " << diagnostics->dogleg_dot_seconds - << ", line-search dots = " << diagnostics->num_line_search_dot_products << " / " - << diagnostics->line_search_dot_seconds << ", setup dots = " << diagnostics->num_setup_dot_products - << " / " << diagnostics->setup_dot_seconds - << ", vector update = " << diagnostics->vector_update_seconds - << ", vector copy/scale = " << diagnostics->vector_copy_scale_seconds - << ", projection = " << diagnostics->projection_seconds - << ", Jacobian assembly = " << diagnostics->jacobian_assembly_seconds - << ", preconditioner update = " << diagnostics->preconditioner_update_seconds - << ", preconditioner setup = " << diagnostics->preconditioner_setup_seconds - << ", work objective = " << diagnostics->last_work_objective - << ", nonmonotone work reference = " << diagnostics->last_nonmonotone_work_reference << '\n'; - } - const auto& jacobian_timings = solid.jacobianTimings(); - mfem::out << " Solid Jacobian timings: legacy evals = " << jacobian_timings.legacy_jacobian_evals - << ", legacy derivative = " << jacobian_timings.legacy_derivative_seconds - << ", legacy sparse assembly = " << jacobian_timings.legacy_sparse_assembly_seconds - << ", legacy EBC elimination = " << jacobian_timings.legacy_essential_elimination_seconds - << ", operator evals = " << jacobian_timings.jacobian_operator_evals - << ", operator assemblies = " << jacobian_timings.jacobian_operator_assemblies - << ", operator derivative = " << jacobian_timings.jacobian_operator_derivative_seconds - << ", operator sparse assembly = " << jacobian_timings.jacobian_operator_sparse_assembly_seconds - << ", operator EBC elimination = " << jacobian_timings.jacobian_operator_essential_elimination_seconds - << '\n'; - } - if (!nonlinear_solver.GetConverged()) { - throw std::runtime_error("Nonlinear solve failed to converge at load step " + std::to_string(step + 1)); - } - } - - if (rank == 0) { - mfem::out << "Converged load steps: " << num_converged_steps << "/" << num_steps << '\n'; } } diff --git a/src/smith/physics/tests/solid.cpp b/src/smith/physics/tests/solid.cpp index 44a68c4240..e48bed601f 100644 --- a/src/smith/physics/tests/solid.cpp +++ b/src/smith/physics/tests/solid.cpp @@ -236,66 +236,6 @@ TEST(SolidMechanics, 2DQuadParameterizedStatic) { functional_parameterized_solid TEST(SolidMechanics, 3DQuadStaticJ2) { functional_solid_test_static_J2(); } -TEST(SolidMechanics, PcgBlockLinearElasticity) -{ - MPI_Barrier(MPI_COMM_WORLD); - - constexpr int p = 1; - constexpr int dim = 2; - constexpr int serial_refinement = 1; - constexpr int parallel_refinement = 0; - - axom::sidre::DataStore datastore; - smith::StateManager::initialize(datastore, "pcg_block_linear_elasticity"); - - std::string filename = SMITH_REPO_DIR "/data/meshes/square.mesh"; - auto mesh = - std::make_shared(buildMeshFromFile(filename), "mesh", serial_refinement, parallel_refinement); - mesh->addDomainOfBoundaryElements("fixed", by_attr(1)); - - smith::LinearSolverOptions linear_options{.linear_solver = LinearSolver::CG, - .preconditioner = Preconditioner::HypreL1Jacobi, - .relative_tol = 1.0e-14, - .absolute_tol = 1.0e-16, - .max_iterations = 500, - .print_level = 0}; - - smith::NonlinearSolverOptions nonlinear_options{.nonlin_solver = NonlinearSolver::PcgBlock, - .relative_tol = 1.0e-12, - .absolute_tol = 1.0e-14, - .max_iterations = 200, - .print_level = 0, - .pcg_block_len = 10}; - - SolidMechanics solid(nonlinear_options, linear_options, solid_mechanics::default_quasistatic_options, - "pcg_block_solid", mesh); - - solid_mechanics::LinearIsotropic mat{.density = 1.0, .K = 0.5, .G = 1.0}; - solid.setMaterial(mat, mesh->entireBody()); - solid.setFixedBCs(mesh->domain("fixed")); - - tensor constant_force{}; - constant_force[0] = 0.1; - constant_force[1] = -0.05; - solid_mechanics::ConstantBodyForce force{constant_force}; - solid.addBodyForce(force, mesh->entireBody()); - - solid.completeSetup(); - solid.advanceTimestep(1.0); - - const auto& nonlinear_solver = solid.equationSolver().nonlinearSolver(); - const auto diagnostics = solid.equationSolver().pcgBlockDiagnostics(); - - ASSERT_TRUE(diagnostics.has_value()); - EXPECT_TRUE(nonlinear_solver.GetConverged()); - EXPECT_LE(nonlinear_solver.GetNumIterations(), solid.displacement().space().GlobalTrueVSize()); - EXPECT_LT(nonlinear_solver.GetFinalRelNorm(), 1.0e-10); - EXPECT_EQ(diagnostics->num_block_rejects, 0u); - EXPECT_EQ(diagnostics->num_powell_restarts, 0u); - EXPECT_EQ(diagnostics->num_negative_curvature, 0u); - EXPECT_EQ(diagnostics->num_line_search_backtracks, 0u); -} - TEST(SolidMechanics, TDofBoundaryCondition) { /* diff --git a/src/smith/physics/tests/solid_statics_patch.cpp b/src/smith/physics/tests/solid_statics_patch.cpp index 2d09ab2cff..9ed9daa247 100644 --- a/src/smith/physics/tests/solid_statics_patch.cpp +++ b/src/smith/physics/tests/solid_statics_patch.cpp @@ -241,78 +241,6 @@ double solution_error(PatchBoundaryCondition bc) return computeL2Error(solid.displacement(), exact_solution_coef); } -template -double pcg_block_solution_error(PatchBoundaryCondition bc) -{ - MPI_Barrier(MPI_COMM_WORLD); - - axom::sidre::DataStore datastore; - smith::StateManager::initialize(datastore, "solid_static_pcg_block_solve"); - - constexpr int p = element_type::order; - constexpr int dim = dimension_of(element_type::geometry); - - static_assert(dim == 2 || dim == 3, "Dimension must be 2 or 3 for solid test"); - - AffineSolution exact_displacement; - - std::string meshdir = std::string(SMITH_REPO_DIR) + "/data/meshes/"; - std::string filename; - switch (element_type::geometry) { - case mfem::Geometry::TRIANGLE: - filename = meshdir + "patch2D_tris.mesh"; - break; - case mfem::Geometry::SQUARE: - filename = meshdir + "patch2D_quads.mesh"; - break; - case mfem::Geometry::TETRAHEDRON: - filename = meshdir + "patch3D_tets.mesh"; - break; - case mfem::Geometry::CUBE: - filename = meshdir + "patch3D_hexes.mesh"; - break; - default: - SLIC_ERROR_ROOT("unsupported element type for patch test"); - break; - } - - auto mesh = std::make_shared(buildMeshFromFile(filename), "mesh_tag"); - - smith::NonlinearSolverOptions nonlin_solver_options{.nonlin_solver = NonlinearSolver::PcgBlock, - .relative_tol = 0.0, - .absolute_tol = 5.0e-14, - .max_iterations = 200, - .print_level = 0, - .pcg_block_len = 10, - .pcg_ls_max_backtracks = 8}; - - auto equation_solver = std::make_unique( - nonlin_solver_options, smith::solid_mechanics::default_linear_options, mesh->getComm()); - - SolidMechanics solid(std::move(equation_solver), solid_mechanics::default_quasistatic_options, "solid", mesh); - - solid_mechanics::NeoHookean mat{.density = 1.0, .K = 1.0, .G = 1.0}; - solid.setMaterial(mat, mesh->entireBody()); - - mesh->addDomainOfBoundaryElements("essential_boundary", by_attr(essentialBoundaryAttributes(bc))); - exact_displacement.applyLoads(mat, solid, mesh->domain("essential_boundary")); - - solid.completeSetup(); - solid.advanceTimestep(1.0); - - const auto& nonlinear_solver = solid.equationSolver().nonlinearSolver(); - const auto diagnostics = solid.equationSolver().pcgBlockDiagnostics(); - EXPECT_TRUE(nonlinear_solver.GetConverged()); - EXPECT_LT(nonlinear_solver.GetFinalRelNorm(), 1.0e-10); - EXPECT_TRUE(diagnostics.has_value()); - if (diagnostics.has_value()) { - EXPECT_GT(diagnostics->num_blocks, 0u); - } - - mfem::VectorFunctionCoefficient exact_solution_coef(dim, exact_displacement); - return computeL2Error(solid.displacement(), exact_solution_coef); -} - /** * @brief Solve pressure-driven problem with 10% uniaxial strain and compare numerical solution to exact answer * @@ -536,13 +464,6 @@ TEST(SolidMechanics, PatchTest2dQ1EssentialAndNaturalBcs) EXPECT_LT(quad_error, tol); } -TEST(SolidMechanics, PcgBlockPatchTest2dQ1EssentialAndNaturalBcs) -{ - using quadrilateral = finite_element >; - double quad_error = pcg_block_solution_error(PatchBoundaryCondition::EssentialAndNatural); - EXPECT_LT(quad_error, 1.0e-6); -} - TEST(SolidMechanics, PatchTest3dQ1EssentialAndNaturalBcs) { using tetrahedron = finite_element >; diff --git a/src/smith/physics/tests/test_functional_weak_form.cpp b/src/smith/physics/tests/test_functional_weak_form.cpp index 0dc318ab82..61ea04e68d 100644 --- a/src/smith/physics/tests/test_functional_weak_form.cpp +++ b/src/smith/physics/tests/test_functional_weak_form.cpp @@ -247,38 +247,6 @@ TEST_F(WeakFormFixture, JvpConsistency) } } -TEST_F(WeakFormFixture, JacobianOperatorConsistency) -{ - auto input_fields = getConstFieldPointers(states, params); - auto field_tangents = getConstFieldPointers(state_tangents, param_tangents); - - std::vector jacobian_weights(input_fields.size()); - jacobian_weights[DISP] = 1.0; - - auto J = weak_form->jacobian(time_info, shape_disp.get(), input_fields, jacobian_weights); - auto J_op = weak_form->jacobianOperator(time_info, shape_disp.get(), input_fields, DISP); - - smith::FiniteElementDual jvp_slow(states[DISP].space(), "jvp_slow"); - smith::FiniteElementDual jvp_op(states[DISP].space(), "jvp_op"); - J->Mult(*field_tangents[DISP], jvp_slow); - J_op->Mult(*field_tangents[DISP], jvp_op); - EXPECT_NEAR(jvp_slow.Norml2(), jvp_op.Norml2(), 1e-12); - - std::unique_ptr J_op_assembled = J_op->assemble(); - smith::FiniteElementDual jvp_op_assembled(states[DISP].space(), "jvp_op_assembled"); - J_op_assembled->Mult(*field_tangents[DISP], jvp_op_assembled); - EXPECT_NEAR(jvp_slow.Norml2(), jvp_op_assembled.Norml2(), 1e-12); - - mfem::Vector diag_direct(J_op->Height()); - mfem::Vector diag_assembled(J->Height()); - J_op->assembleDiagonal(diag_direct); - J->GetDiag(diag_assembled); - - mfem::Vector diag_diff(diag_direct.Size()); - subtract(diag_direct, diag_assembled, diag_diff); - EXPECT_NEAR(0.0, diag_diff.Norml2() / diag_assembled.Norml2(), 1.e-14); -} - int main(int argc, char* argv[]) { ::testing::InitGoogleTest(&argc, argv); diff --git a/src/smith/physics/weak_form.hpp b/src/smith/physics/weak_form.hpp index 1675545ba2..8bd7e48e8d 100644 --- a/src/smith/physics/weak_form.hpp +++ b/src/smith/physics/weak_form.hpp @@ -26,7 +26,6 @@ class HypreParMatrix; namespace smith { -class JacobianOperator; class FiniteElementState; class FiniteElementDual; @@ -71,22 +70,6 @@ class WeakForm { const std::vector& field_argument_tangents, const std::vector& quad_fields = {}) const = 0; - /** @brief Derivative of the residual with respect to one field argument as a solver-facing JacobianOperator. - * - * The returned operator represents one derivative column, d{r}/d{fields}_field_argument_index. The first supported - * use case is the square solved-field derivative used by PCG-block tangent products and diagonal extraction. - * - * @param time_info time and timestep information - * @param shape_disp smith::FiniteElementState*, change in model coordinates relative to the initially read in mesh - * @param fields vector of smith::FiniteElementState* - * @param field_argument_index field argument to differentiate with respect to - * @param quad_fields vector of ConstQuadratureFieldPtr - * @return std::unique_ptr returns d{r}/d{fields}_field_argument_index - */ - virtual std::unique_ptr jacobianOperator( - TimeInfo time_info, ConstFieldPtr shape_disp, const std::vector& fields, - size_t field_argument_index, const std::vector& quad_fields = {}) const = 0; - /** * @brief Jacobian-vector product, will overwrite any existing values in jvp_reactions * @param time_info time and timestep information From db08e49d5d0471c61cebc6b4713cacd81c937e38 Mon Sep 17 00:00:00 2001 From: Michael Tupek Date: Fri, 8 May 2026 18:29:05 -0600 Subject: [PATCH 11/27] Cleanup debug timings. --- src/smith/numerics/equation_solver.cpp | 508 ++---------------- src/smith/numerics/equation_solver.hpp | 136 ----- .../numerics/mfem_trust_region_subspace.cpp | 55 -- src/smith/numerics/trust_region_solver.hpp | 21 - 4 files changed, 46 insertions(+), 674 deletions(-) diff --git a/src/smith/numerics/equation_solver.cpp b/src/smith/numerics/equation_solver.cpp index 4db0fc096d..d5712c9b30 100644 --- a/src/smith/numerics/equation_solver.cpp +++ b/src/smith/numerics/equation_solver.cpp @@ -6,7 +6,6 @@ #include "smith/numerics/equation_solver.hpp" -#include #include #include #include @@ -26,17 +25,6 @@ namespace smith { -namespace { - -using Clock = std::chrono::steady_clock; - -double secondsSince(Clock::time_point start) -{ - return std::chrono::duration_cast>(Clock::now() - start).count(); -} - -} // namespace - /// Newton solver with a 2-way line-search. Reverts to regular Newton if max_line_search_iterations is set to 0. class NewtonSolver : public mfem::NewtonSolver { protected: @@ -377,114 +365,6 @@ class TrustRegion : public mfem::NewtonSolver { mutable size_t print_level = 0; public: - /// internal counter for hess-vecs - mutable size_t num_hess_vecs = 0; - /// internal counter for model CG hess-vecs - mutable size_t num_model_hess_vecs = 0; - /// internal counter for Cauchy-point hess-vecs - mutable size_t num_cauchy_hess_vecs = 0; - /// internal counter for line-search hess-vecs - mutable size_t num_line_search_hess_vecs = 0; - /// internal counter for preconditions - mutable size_t num_preconds = 0; - /// internal counter for residuals - mutable size_t num_residuals = 0; - /// internal counter for subspace solves - mutable size_t num_subspace_solves = 0; - /// internal counter for retained-leftmost Hessian-vector products used by subspace solves - mutable size_t num_subspace_leftmost_hess_vecs = 0; - /// internal counter for batched Hessian-vector groups used by subspace solves - mutable size_t num_subspace_hess_vec_batches = 0; - /// internal counter for Hessian-vector products inside subspace batches - mutable size_t num_subspace_batched_hess_vecs = 0; - /// internal counter for accepted-step history vectors added to subspace solves - mutable size_t num_subspace_past_step_vectors = 0; - /// internal counter for accepted-step history Hessian-vector products - mutable size_t num_subspace_past_step_hess_vecs = 0; - /// internal counter for nonlinear-solve-start directions added to subspace solves - mutable size_t num_subspace_solve_start_vectors = 0; - /// internal counter for nonlinear-solve-start Hessian-vector products - mutable size_t num_subspace_solve_start_hess_vecs = 0; - /// internal counter for quadratic subspace backend solves - mutable size_t num_quadratic_subspace_solves = 0; - /// internal counter for matrix assembles - mutable size_t num_jacobian_assembles = 0; - /// internal counter for model CG iterations - mutable size_t num_cg_iterations = 0; - /// internal counter for preconditioner operator updates - mutable size_t num_preconditioner_updates = 0; - /// time spent evaluating residuals - mutable double residual_seconds = 0.0; - /// time spent applying Hessian-vector products - mutable double hess_vec_seconds = 0.0; - /// time spent applying model CG Hessian-vector products - mutable double model_hess_vec_seconds = 0.0; - /// time spent applying Cauchy-point Hessian-vector products - mutable double cauchy_hess_vec_seconds = 0.0; - /// time spent applying line-search Hessian-vector products - mutable double line_search_hess_vec_seconds = 0.0; - /// time spent applying preconditioners - mutable double preconditioner_seconds = 0.0; - /// total time spent in the nonlinear solve - mutable double total_seconds = 0.0; - /// time spent solving trust-region model problems - mutable double model_solve_seconds = 0.0; - /// total time spent in trust-region subspace solves - mutable double subspace_seconds = 0.0; - /// time spent building retained leftmost subspace directions - mutable double subspace_leftmost_seconds = 0.0; - /// time spent in subspace Hessian-vector batches - mutable double subspace_hess_vec_batch_seconds = 0.0; - /// time spent removing dependent directions for subspace solves - mutable double subspace_filter_seconds = 0.0; - /// time spent in dense subspace backend assembly/solve work - mutable double subspace_backend_seconds = 0.0; - /// time spent in subspace postprocessing and model-energy comparison - mutable double subspace_finalize_seconds = 0.0; - /// time spent building the Cauchy point - mutable double cauchy_point_seconds = 0.0; - /// time spent constructing dogleg steps - mutable double dogleg_seconds = 0.0; - /// time spent in line-search and trust-radius acceptance logic - mutable double line_search_seconds = 0.0; - /// time spent in dot products - mutable double dot_seconds = 0.0; - /// number of dot products - mutable size_t num_dot_products = 0; - /// number of dot product batches/reductions - mutable size_t num_dot_reductions = 0; - /// number of dot products in trust-region model solves - mutable size_t num_model_dot_products = 0; - /// number of dot products in Cauchy-point construction - mutable size_t num_cauchy_dot_products = 0; - /// number of dot products in dogleg construction - mutable size_t num_dogleg_dot_products = 0; - /// number of dot products in line-search and acceptance logic - mutable size_t num_line_search_dot_products = 0; - /// number of setup dot products outside the main per-step kernels - mutable size_t num_setup_dot_products = 0; - /// time spent in trust-region model-solve dot products - mutable double model_dot_seconds = 0.0; - /// time spent in Cauchy-point dot products - mutable double cauchy_dot_seconds = 0.0; - /// time spent in dogleg dot products - mutable double dogleg_dot_seconds = 0.0; - /// time spent in line-search dot products - mutable double line_search_dot_seconds = 0.0; - /// time spent in setup dot products - mutable double setup_dot_seconds = 0.0; - /// time spent in vector add/update operations - mutable double vector_update_seconds = 0.0; - /// time spent in vector copies and scaling operations - mutable double vector_copy_scale_seconds = 0.0; - /// time spent in boundary projection operations - mutable double projection_seconds = 0.0; - /// time spent assembling Jacobians - mutable double jacobian_assembly_seconds = 0.0; - /// time spent refreshing preconditioners - mutable double preconditioner_update_seconds = 0.0; - /// time spent in preconditioner SetOperator calls - mutable double preconditioner_setup_seconds = 0.0; #ifdef MFEM_USE_MPI /// constructor TrustRegion(MPI_Comm comm_, const NonlinearSolverOptions& nonlinear_opts, const LinearSolverOptions& linear_opts, @@ -494,32 +374,17 @@ class TrustRegion : public mfem::NewtonSolver { } #endif - /// Timed dot product with global and grouped accounting. - double timedDot(const mfem::Vector& a, const mfem::Vector& b, size_t& group_count, double& group_seconds) const - { - auto start = Clock::now(); - const double value = Dot(a, b); - const double seconds = secondsSince(start); - ++num_dot_products; - ++num_dot_reductions; - ++group_count; - dot_seconds += seconds; - group_seconds += seconds; - return value; - } - - /// Timed pair of dot products with one local vector pass and one MPI reduction when possible. - std::pair timedDot2(const mfem::Vector& a0, const mfem::Vector& b0, const mfem::Vector& a1, - const mfem::Vector& b1, size_t& group_count, double& group_seconds) const + /// Pair of dot products with one local vector pass and one MPI reduction when possible. + std::pair dot2(const mfem::Vector& a0, const mfem::Vector& b0, const mfem::Vector& a1, + const mfem::Vector& b1) const { if (dot_oper) { - return {timedDot(a0, b0, group_count, group_seconds), timedDot(a1, b1, group_count, group_seconds)}; + return {Dot(a0, b0), Dot(a1, b1)}; } MFEM_ASSERT(a0.Size() == b0.Size(), "Incompatible vector sizes."); MFEM_ASSERT(a1.Size() == b1.Size(), "Incompatible vector sizes."); - auto start = Clock::now(); mfem::real_t products[2] = {0.0, 0.0}; if (a0.Size() == a1.Size()) { for (int i = 0; i < a0.Size(); ++i) { @@ -545,12 +410,6 @@ class TrustRegion : public mfem::NewtonSolver { } #endif - const double seconds = secondsSince(start); - num_dot_products += 2; - ++num_dot_reductions; - group_count += 2; - dot_seconds += seconds; - group_seconds += seconds; return {products[0], products[1]}; } @@ -561,16 +420,16 @@ class TrustRegion : public mfem::NewtonSolver { double v3 = 0.0; }; - /// Timed four-dot batch with one local vector pass and one MPI reduction when possible. - Dot4Result timedDot4(const mfem::Vector& a0, const mfem::Vector& b0, const mfem::Vector& a1, const mfem::Vector& b1, - const mfem::Vector& a2, const mfem::Vector& b2, const mfem::Vector& a3, - const mfem::Vector& b3, size_t& group_count, double& group_seconds) const + /// Four-dot batch with one local vector pass and one MPI reduction when possible. + Dot4Result dot4(const mfem::Vector& a0, const mfem::Vector& b0, const mfem::Vector& a1, const mfem::Vector& b1, + const mfem::Vector& a2, const mfem::Vector& b2, const mfem::Vector& a3, + const mfem::Vector& b3) const { if (dot_oper) { - return {.v0 = timedDot(a0, b0, group_count, group_seconds), - .v1 = timedDot(a1, b1, group_count, group_seconds), - .v2 = timedDot(a2, b2, group_count, group_seconds), - .v3 = timedDot(a3, b3, group_count, group_seconds)}; + return {.v0 = Dot(a0, b0), + .v1 = Dot(a1, b1), + .v2 = Dot(a2, b2), + .v3 = Dot(a3, b3)}; } MFEM_ASSERT(a0.Size() == b0.Size(), "Incompatible vector sizes."); @@ -580,7 +439,6 @@ class TrustRegion : public mfem::NewtonSolver { MFEM_ASSERT(a0.Size() == a1.Size() && a0.Size() == a2.Size() && a0.Size() == a3.Size(), "timedDot4 currently requires equal vector sizes."); - auto start = Clock::now(); mfem::real_t products[4] = {0.0, 0.0, 0.0, 0.0}; for (int i = 0; i < a0.Size(); ++i) { products[0] += a0[i] * b0[i]; @@ -600,12 +458,6 @@ class TrustRegion : public mfem::NewtonSolver { } #endif - const double seconds = secondsSince(start); - num_dot_products += 4; - ++num_dot_reductions; - group_count += 4; - dot_seconds += seconds; - group_seconds += seconds; return {.v0 = products[0], .v1 = products[1], .v2 = products[2], .v3 = products[3]}; } @@ -618,40 +470,9 @@ class TrustRegion : public mfem::NewtonSolver { return; } - auto start = Clock::now(); - ++num_subspace_hess_vec_batches; - num_subspace_batched_hess_vecs += inputs.size(); for (size_t i = 0; i < inputs.size(); ++i) { hess_vec_func(*inputs[i], *outputs[i]); } - subspace_hess_vec_batch_seconds += secondsSince(start); - } - - template - void timedModelHessVec(HessVecFunc hess_vec_func, const mfem::Vector& input, mfem::Vector& output) const - { - auto start = Clock::now(); - hess_vec_func(input, output); - model_hess_vec_seconds += secondsSince(start); - ++num_model_hess_vecs; - } - - template - void timedCauchyHessVec(HessVecFunc hess_vec_func, const mfem::Vector& input, mfem::Vector& output) const - { - auto start = Clock::now(); - hess_vec_func(input, output); - cauchy_hess_vec_seconds += secondsSince(start); - ++num_cauchy_hess_vecs; - } - - template - void timedLineSearchHessVec(HessVecFunc hess_vec_func, const mfem::Vector& input, mfem::Vector& output) const - { - auto start = Clock::now(); - hess_vec_func(input, output); - line_search_hess_vec_seconds += secondsSince(start); - ++num_line_search_hess_vecs; } void pushAcceptedStepHistory(const mfem::Vector& step) const @@ -672,13 +493,11 @@ class TrustRegion : public mfem::NewtonSolver { void projectToBoundaryWithCoefs(mfem::Vector& z, const mfem::Vector& d, double delta, double zz, double zd, double dd) const { - auto start = Clock::now(); // find z + tau d double deltadelta_m_zz = delta * delta - zz; if (deltadelta_m_zz == 0) return; // already on boundary double tau = (std::sqrt(deltadelta_m_zz * dd + zd * zd) - zd) / dd; z.Add(tau, d); - projection_seconds += secondsSince(start); } /// solve the exact trust-region subspace problem with directions ds, and the leftmosts @@ -691,9 +510,6 @@ class TrustRegion : public mfem::NewtonSolver { [[maybe_unused]] std::vector>& candidate_left_mosts) const { SMITH_MARK_FUNCTION; - auto subspace_start = Clock::now(); - ++num_subspace_solves; - std::vector directions; for (auto& d : ds) { directions.emplace_back(d); @@ -719,20 +535,15 @@ class TrustRegion : public mfem::NewtonSolver { double energy_change; try { - auto backend_start = Clock::now(); - ++num_quadratic_subspace_solves; std::tie(sol, leftvecs, leftvals, energy_change) = solveSubspaceProblem(directions, H_directions, b, delta, num_leftmost); - subspace_backend_seconds += secondsSince(backend_start); } catch (const std::exception& e) { if (print_level >= 1) { mfem::out << "subspace solve failed with " << e.what() << std::endl; } - subspace_seconds += secondsSince(subspace_start); return; } - auto finalize_start = Clock::now(); candidate_left_mosts.clear(); for (auto& lv : leftvecs) { candidate_left_mosts.emplace_back(std::move(lv)); @@ -750,55 +561,41 @@ class TrustRegion : public mfem::NewtonSolver { if (subspace_energy < base_energy) { z = sol; } - subspace_finalize_seconds += secondsSince(finalize_start); - subspace_seconds += secondsSince(subspace_start); } /// finds tau s.t. (z + tau*(y-z))^2 = trSize^2 void projectToBoundaryBetweenWithCoefs(mfem::Vector& z, const mfem::Vector& y, double trSize, double zz, double zy, double yy) const { - auto start = Clock::now(); double dd = yy - 2 * zy + zz; double zd = zy - zz; double tau = (std::sqrt((trSize * trSize - zz) * dd + zd * zd) - zd) / dd; z.Add(-tau, z); z.Add(tau, y); - projection_seconds += secondsSince(start); } /// take a dogleg step in direction s, solution norm must be within trSize void doglegStep(const mfem::Vector& cp, const mfem::Vector& newtonP, double trSize, mfem::Vector& s) const { SMITH_MARK_FUNCTION; - auto [cc, nn] = timedDot2(cp, cp, newtonP, newtonP, num_dogleg_dot_products, dogleg_dot_seconds); + auto [cc, nn] = dot2(cp, cp, newtonP, newtonP); double tt = trSize * trSize; - auto update_start = Clock::now(); s = 0.0; - vector_copy_scale_seconds += secondsSince(update_start); if (cc >= tt) { - update_start = Clock::now(); add(s, std::sqrt(tt / cc), cp, s); - vector_update_seconds += secondsSince(update_start); - } else if (cc > nn) { + } else if (cc > nn) { if (print_level >= 2) { mfem::out << "cp outside newton, preconditioner likely inaccurate\n"; } - update_start = Clock::now(); add(s, 1.0, cp, s); - vector_update_seconds += secondsSince(update_start); - } else if (nn > tt) { // on the dogleg (we have nn >= cc, and tt >= cc) - update_start = Clock::now(); + } else if (nn > tt) { // on the dogleg (we have nn >= cc, and tt >= cc) add(s, 1.0, cp, s); - vector_update_seconds += secondsSince(update_start); - double cn = timedDot(cp, newtonP, num_dogleg_dot_products, dogleg_dot_seconds); + double cn = Dot(cp, newtonP); projectToBoundaryBetweenWithCoefs(s, newtonP, trSize, cc, cn, nn); } else { - update_start = Clock::now(); s = newtonP; - vector_copy_scale_seconds += secondsSince(update_start); - } + } } /// compute the energy of the linearized system for a given solution vector z @@ -806,11 +603,11 @@ class TrustRegion : public mfem::NewtonSolver { double computeEnergy(const mfem::Vector& r_local, const HessVecFunc& H, const mfem::Vector& z) const { SMITH_MARK_FUNCTION; - double rz = timedDot(r_local, z, num_line_search_dot_products, line_search_dot_seconds); + double rz = Dot(r_local, z); mfem::Vector tmp(r_local); tmp = 0.0; H(z, tmp); - return rz + 0.5 * timedDot(z, tmp, num_line_search_dot_products, line_search_dot_seconds); + return rz + 0.5 * Dot(z, tmp); } /// Minimize quadratic sub-problem given residual vector, the action of the stiffness and a preconditioner @@ -840,36 +637,30 @@ class TrustRegion : public mfem::NewtonSolver { return; } - auto copy_start = Clock::now(); rCurrent = r0; - vector_copy_scale_seconds += secondsSince(copy_start); precond(rCurrent, Pr); // d = -Pr - copy_start = Clock::now(); d = Pr; d *= -1.0; z = 0.0; - vector_copy_scale_seconds += secondsSince(copy_start); double zz = 0.; - double rPr = timedDot(rCurrent, Pr, num_model_dot_products, model_dot_seconds); + double rPr = Dot(rCurrent, Pr); // std::cout << "initial energy = " << computeEnergy(r0, hess_vec_func, z) << std::endl; for (cgIter = 1; cgIter <= settings.max_cg_iterations; ++cgIter) { hess_vec_func(d, Hd); - const auto dots = timedDot4(d, rCurrent, d, Hd, z, d, d, d, num_model_dot_products, model_dot_seconds); + const auto dots = dot4(d, rCurrent, d, Hd, z, d, d, d); double descent_check = dots.v0; double curvature = dots.v1; double zd = dots.v2; double dd = dots.v3; if (descent_check > 0) { - copy_start = Clock::now(); d *= -1; Hd *= -1; - vector_copy_scale_seconds += secondsSince(copy_start); - results.interior_status = TrustRegionResults::Status::NonDescentDirection; + results.interior_status = TrustRegionResults::Status::NonDescentDirection; descent_check *= -1.0; curvature *= -1.0; zd *= -1.0; @@ -892,14 +683,10 @@ class TrustRegion : public mfem::NewtonSolver { auto& zPred = Pr; // re-use Pr memory. // This predicted step will no longer be used by the time Pr is, so we can avoid an extra // vector floating around - auto update_start = Clock::now(); - add(z, alphaCg, d, zPred); - vector_update_seconds += secondsSince(update_start); - - copy_start = Clock::now(); + add(z, alphaCg, d, zPred); + z = zPred; - vector_copy_scale_seconds += secondsSince(copy_start); - + if (results.interior_status == TrustRegionResults::Status::NonDescentDirection) { if (print_level >= 2) { mfem::out << "Found a non descent direction\n"; @@ -907,23 +694,19 @@ class TrustRegion : public mfem::NewtonSolver { return; } - update_start = Clock::now(); add(rCurrent, alphaCg, Hd, rCurrent); - vector_update_seconds += secondsSince(update_start); - + precond(rCurrent, Pr); auto [rPrNp1, r_current_norm_squared] = - timedDot2(rCurrent, Pr, rCurrent, rCurrent, num_model_dot_products, model_dot_seconds); + dot2(rCurrent, Pr, rCurrent, rCurrent); if (r_current_norm_squared <= cg_tol_squared && cgIter >= settings.min_cg_iterations) { return; } double beta = rPrNp1 / rPr; rPr = rPrNp1; - update_start = Clock::now(); add(-1.0, Pr, beta, d, d); - vector_update_seconds += secondsSince(update_start); - + zz = zzNp1; } cgIter--; // if all cg iterations are taken, correct for output @@ -947,185 +730,46 @@ class TrustRegion : public mfem::NewtonSolver { void assembleJacobian(const mfem::Vector& x) const { SMITH_MARK_FUNCTION; - auto start = Clock::now(); - ++num_jacobian_assembles; grad = &oper->GetGradient(x); if (nonlinear_options.force_monolithic) { auto* grad_blocked = dynamic_cast(grad); if (grad_blocked) grad = buildMonolithicMatrix(*grad_blocked).release(); } - jacobian_assembly_seconds += secondsSince(start); } /// evaluate the nonlinear residual mfem::real_t computeResidual(const mfem::Vector& x_, mfem::Vector& r_) const { SMITH_MARK_FUNCTION; - auto start = Clock::now(); - ++num_residuals; oper->Mult(x_, r_); - const auto norm = Norm(r_); - residual_seconds += secondsSince(start); - return norm; + return Norm(r_); } /// apply the action of the current Jacobian representation to a vector void hessVec(const mfem::Vector& x_, mfem::Vector& v_) const { SMITH_MARK_FUNCTION; - auto start = Clock::now(); - ++num_hess_vecs; grad->Mult(x_, v_); - hess_vec_seconds += secondsSince(start); } /// apply trust region specific preconditioner void precond(const mfem::Vector& x_, mfem::Vector& v_) const { SMITH_MARK_FUNCTION; - auto start = Clock::now(); - ++num_preconds; tr_precond.Mult(x_, v_); - preconditioner_seconds += secondsSince(start); }; - /// Return solver diagnostic counters. - TrustRegionDiagnostics diagnostics() const - { - return {.num_residuals = num_residuals, - .num_hess_vecs = num_hess_vecs, - .num_model_hess_vecs = num_model_hess_vecs, - .num_cauchy_hess_vecs = num_cauchy_hess_vecs, - .num_line_search_hess_vecs = num_line_search_hess_vecs, - .num_preconds = num_preconds, - .num_jacobian_assembles = num_jacobian_assembles, - .num_cg_iterations = num_cg_iterations, - .num_subspace_solves = num_subspace_solves, - .num_subspace_leftmost_hess_vecs = num_subspace_leftmost_hess_vecs, - .num_subspace_hess_vec_batches = num_subspace_hess_vec_batches, - .num_subspace_batched_hess_vecs = num_subspace_batched_hess_vecs, - .num_subspace_past_step_vectors = num_subspace_past_step_vectors, - .num_subspace_past_step_hess_vecs = num_subspace_past_step_hess_vecs, - .num_subspace_solve_start_vectors = num_subspace_solve_start_vectors, - .num_subspace_solve_start_hess_vecs = num_subspace_solve_start_hess_vecs, - .num_quadratic_subspace_solves = num_quadratic_subspace_solves, - .num_preconditioner_updates = num_preconditioner_updates, - .residual_seconds = residual_seconds, - .hess_vec_seconds = hess_vec_seconds, - .model_hess_vec_seconds = model_hess_vec_seconds, - .cauchy_hess_vec_seconds = cauchy_hess_vec_seconds, - .line_search_hess_vec_seconds = line_search_hess_vec_seconds, - .preconditioner_seconds = preconditioner_seconds, - .total_seconds = total_seconds, - .model_solve_seconds = model_solve_seconds, - .subspace_seconds = subspace_seconds, - .subspace_leftmost_seconds = subspace_leftmost_seconds, - .subspace_hess_vec_batch_seconds = subspace_hess_vec_batch_seconds, - .subspace_filter_seconds = subspace_filter_seconds, - .subspace_backend_seconds = subspace_backend_seconds, - .subspace_project_A_seconds = trustRegionSubspaceTimings().project_A_seconds, - .subspace_project_gram_seconds = trustRegionSubspaceTimings().project_gram_seconds, - .subspace_project_b_seconds = trustRegionSubspaceTimings().project_b_seconds, - .subspace_basis_seconds = trustRegionSubspaceTimings().basis_seconds, - .subspace_reduced_A_seconds = trustRegionSubspaceTimings().reduced_A_seconds, - .subspace_dense_eigensystem_seconds = trustRegionSubspaceTimings().dense_eigensystem_seconds, - .subspace_dense_trust_solve_seconds = trustRegionSubspaceTimings().dense_trust_solve_seconds, - .subspace_reconstruct_solution_seconds = trustRegionSubspaceTimings().reconstruct_solution_seconds, - .subspace_reconstruct_leftmost_seconds = trustRegionSubspaceTimings().reconstruct_leftmost_seconds, - .subspace_finalize_seconds = subspace_finalize_seconds, - .cauchy_point_seconds = cauchy_point_seconds, - .dogleg_seconds = dogleg_seconds, - .line_search_seconds = line_search_seconds, - .dot_seconds = dot_seconds, - .num_dot_products = num_dot_products, - .num_dot_reductions = num_dot_reductions, - .num_model_dot_products = num_model_dot_products, - .num_cauchy_dot_products = num_cauchy_dot_products, - .num_dogleg_dot_products = num_dogleg_dot_products, - .num_line_search_dot_products = num_line_search_dot_products, - .num_setup_dot_products = num_setup_dot_products, - .model_dot_seconds = model_dot_seconds, - .cauchy_dot_seconds = cauchy_dot_seconds, - .dogleg_dot_seconds = dogleg_dot_seconds, - .line_search_dot_seconds = line_search_dot_seconds, - .setup_dot_seconds = setup_dot_seconds, - .vector_update_seconds = vector_update_seconds, - .vector_copy_scale_seconds = vector_copy_scale_seconds, - .projection_seconds = projection_seconds, - .jacobian_assembly_seconds = jacobian_assembly_seconds, - .preconditioner_update_seconds = preconditioner_update_seconds, - .preconditioner_setup_seconds = preconditioner_setup_seconds}; - } - /// @overload void Mult(const mfem::Vector&, mfem::Vector& X) const { MFEM_ASSERT(oper != NULL, "the Operator is not set (use SetOperator)."); MFEM_ASSERT(prec != NULL, "the Solver is not set (use SetSolver)."); - auto total_start = Clock::now(); - print_level = static_cast(std::max(nonlinear_options.print_level, 0)); print_level = print_options.iterations ? std::max(1, print_level) : print_level; print_level = print_options.summary ? std::max(2, print_level) : print_level; using real_t = mfem::real_t; - num_hess_vecs = 0; - num_model_hess_vecs = 0; - num_cauchy_hess_vecs = 0; - num_line_search_hess_vecs = 0; - num_preconds = 0; - num_residuals = 0; - num_subspace_solves = 0; - num_subspace_leftmost_hess_vecs = 0; - num_subspace_hess_vec_batches = 0; - num_subspace_batched_hess_vecs = 0; - num_subspace_past_step_vectors = 0; - num_subspace_past_step_hess_vecs = 0; - num_subspace_solve_start_vectors = 0; - num_subspace_solve_start_hess_vecs = 0; - num_quadratic_subspace_solves = 0; - num_jacobian_assembles = 0; - num_cg_iterations = 0; - num_preconditioner_updates = 0; - residual_seconds = 0.0; - hess_vec_seconds = 0.0; - model_hess_vec_seconds = 0.0; - cauchy_hess_vec_seconds = 0.0; - line_search_hess_vec_seconds = 0.0; - preconditioner_seconds = 0.0; - total_seconds = 0.0; - model_solve_seconds = 0.0; - subspace_seconds = 0.0; - subspace_leftmost_seconds = 0.0; - subspace_hess_vec_batch_seconds = 0.0; - subspace_filter_seconds = 0.0; - subspace_backend_seconds = 0.0; - subspace_finalize_seconds = 0.0; - cauchy_point_seconds = 0.0; - dogleg_seconds = 0.0; - line_search_seconds = 0.0; - dot_seconds = 0.0; - num_dot_products = 0; - num_dot_reductions = 0; - num_model_dot_products = 0; - num_cauchy_dot_products = 0; - num_dogleg_dot_products = 0; - num_line_search_dot_products = 0; - num_setup_dot_products = 0; - model_dot_seconds = 0.0; - cauchy_dot_seconds = 0.0; - dogleg_dot_seconds = 0.0; - line_search_dot_seconds = 0.0; - setup_dot_seconds = 0.0; - vector_update_seconds = 0.0; - vector_copy_scale_seconds = 0.0; - projection_seconds = 0.0; - jacobian_assembly_seconds = 0.0; - preconditioner_update_seconds = 0.0; - preconditioner_setup_seconds = 0.0; - accepted_step_history.clear(); - resetTrustRegionSubspaceTimings(); solve_start_x.SetSize(X.Size()); solve_start_x = X; min_residual_x.SetSize(X.Size()); @@ -1163,11 +807,9 @@ class TrustRegion : public mfem::NewtonSolver { int subspace_option = nonlinear_options.subspace_option; int num_leftmost = nonlinear_options.num_leftmost; - auto copy_start = Clock::now(); scratch = 1.0; - vector_copy_scale_seconds += secondsSince(copy_start); double tr_size = nonlinear_options.trust_region_scaling * - std::sqrt(timedDot(scratch, scratch, num_setup_dot_products, setup_dot_seconds)); + std::sqrt(Dot(scratch, scratch)); size_t cumulative_cg_iters_from_last_precond_update = 0; int it = 0; @@ -1202,12 +844,7 @@ class TrustRegion : public mfem::NewtonSolver { if (it == 0 || (trResults.cg_iterations_count >= settings.max_cg_iterations || cumulative_cg_iters_from_last_precond_update >= settings.max_cumulative_iteration)) { - auto preconditioner_update_start = Clock::now(); - auto preconditioner_setup_start = Clock::now(); tr_precond.SetOperator(*grad); - preconditioner_setup_seconds += secondsSince(preconditioner_setup_start); - preconditioner_update_seconds += secondsSince(preconditioner_update_start); - ++num_preconditioner_updates; cumulative_cg_iters_from_last_precond_update = 0; } @@ -1218,28 +855,22 @@ class TrustRegion : public mfem::NewtonSolver { trResults.reset(); { - auto cauchy_start = Clock::now(); - timedCauchyHessVec(hess_vec_func, r, trResults.H_d); - const double gKg = timedDot(r, trResults.H_d, num_cauchy_dot_products, cauchy_dot_seconds); + hess_vec_func(r, trResults.H_d); + const double gKg = Dot(r, trResults.H_d); const double residual_norm_squared = norm * norm; if (gKg > 0) { const double alphaCp = -residual_norm_squared / gKg; - auto update_start = Clock::now(); - add(trResults.cauchy_point, alphaCp, r, trResults.cauchy_point); - vector_update_seconds += secondsSince(update_start); - cauchyPointNormSquared = - timedDot(trResults.cauchy_point, trResults.cauchy_point, num_cauchy_dot_products, cauchy_dot_seconds); + add(trResults.cauchy_point, alphaCp, r, trResults.cauchy_point); + cauchyPointNormSquared = + Dot(trResults.cauchy_point, trResults.cauchy_point); } else { const double alphaTr = -tr_size / norm; - auto update_start = Clock::now(); - add(trResults.cauchy_point, alphaTr, r, trResults.cauchy_point); - vector_update_seconds += secondsSince(update_start); - if (print_level >= 2) { + add(trResults.cauchy_point, alphaTr, r, trResults.cauchy_point); + if (print_level >= 2) { mfem::out << "Negative curvature un-preconditioned cauchy point direction found." << "\n"; } } - cauchy_point_seconds += secondsSince(cauchy_start); } if (cauchyPointNormSquared >= tr_size * tr_size) { @@ -1254,16 +885,10 @@ class TrustRegion : public mfem::NewtonSolver { trResults.interior_status = TrustRegionResults::Status::OnBoundary; } else { settings.cg_tol = std::max(0.5 * norm_goal, 5e-5 * norm); - auto model_start = Clock::now(); - auto model_hess_vec_func = [&](const mfem::Vector& x_, mfem::Vector& v_) { - timedModelHessVec(hess_vec_func, x_, v_); - }; - solveTrustRegionModelProblem(r, scratch, model_hess_vec_func, precond_func, settings, tr_size, trResults, + solveTrustRegionModelProblem(r, scratch, hess_vec_func, precond_func, settings, tr_size, trResults, norm * norm); - model_solve_seconds += secondsSince(model_start); } cumulative_cg_iters_from_last_precond_update += trResults.cg_iterations_count; - num_cg_iterations += trResults.cg_iterations_count; bool have_computed_Hvs = false; bool have_computed_H_left_mosts = false; @@ -1271,17 +896,13 @@ class TrustRegion : public mfem::NewtonSolver { int lineSearchIter = 0; while (lineSearchIter <= nonlinear_options.max_line_search_iterations) { - auto line_search_start = Clock::now(); ++lineSearchIter; - auto dogleg_start = Clock::now(); doglegStep(trResults.cauchy_point, trResults.z, tr_size, trResults.d); - dogleg_seconds += secondsSince(dogleg_start); - const bool check_subspace_boundary = subspace_option >= 1; const double d_norm = check_subspace_boundary - ? std::sqrt(timedDot(trResults.d, trResults.d, num_line_search_dot_products, line_search_dot_seconds)) + ? std::sqrt(Dot(trResults.d, trResults.d)) : 0.0; bool use_with_option1 = (subspace_option >= 1) && (trResults.interior_status == TrustRegionResults::Status::NonDescentDirection || @@ -1306,7 +927,6 @@ class TrustRegion : public mfem::NewtonSolver { if (!have_computed_H_left_mosts) { have_computed_H_left_mosts = true; - auto leftmost_start = Clock::now(); previous_H_left_mosts = H_left_mosts; H_left_mosts.clear(); std::vector leftmost_inputs; @@ -1315,9 +935,7 @@ class TrustRegion : public mfem::NewtonSolver { H_left_mosts.emplace_back(std::make_shared(*left)); leftmost_inputs.push_back(left.get()); leftmost_outputs.push_back(H_left_mosts.back().get()); - ++num_subspace_leftmost_hess_vecs; } - subspace_leftmost_seconds += secondsSince(leftmost_start); batchedSubspaceHessVec(hess_vec_func, leftmost_inputs, leftmost_outputs); } @@ -1344,8 +962,6 @@ class TrustRegion : public mfem::NewtonSolver { past_step_outputs.push_back(&H_past_steps.back()); } if (!past_step_inputs.empty()) { - num_subspace_past_step_vectors += past_step_inputs.size(); - num_subspace_past_step_hess_vecs += past_step_inputs.size(); batchedSubspaceHessVec(hess_vec_func, past_step_inputs, past_step_outputs); for (size_t i = 0; i < past_step_inputs.size(); ++i) { ds.push_back(past_step_inputs[i]); @@ -1362,8 +978,6 @@ class TrustRegion : public mfem::NewtonSolver { H_solve_start_direction.SetSize(X.Size()); std::vector solve_start_inputs{&solve_start_direction}; std::vector solve_start_outputs{&H_solve_start_direction}; - ++num_subspace_solve_start_vectors; - ++num_subspace_solve_start_hess_vecs; batchedSubspaceHessVec(hess_vec_func, solve_start_inputs, solve_start_outputs); ds.push_back(&solve_start_direction); H_ds.push_back(&H_solve_start_direction); @@ -1380,8 +994,6 @@ class TrustRegion : public mfem::NewtonSolver { std::vector min_res_inputs{&min_residual_direction}; std::vector min_res_outputs{&H_min_residual_direction}; // Reusing solve_start counters for now - ++num_subspace_solve_start_vectors; - ++num_subspace_solve_start_hess_vecs; batchedSubspaceHessVec(hess_vec_func, min_res_inputs, min_res_outputs); ds.push_back(&min_residual_direction); H_ds.push_back(&H_min_residual_direction); @@ -1392,15 +1004,12 @@ class TrustRegion : public mfem::NewtonSolver { static constexpr double roundOffTol = 0.0; // 1e-14; - timedLineSearchHessVec(hess_vec_func, trResults.d, trResults.H_d); - const auto [dHd, rd] = timedDot2(trResults.d, trResults.H_d, r, trResults.d, num_line_search_dot_products, - line_search_dot_seconds); + hess_vec_func(trResults.d, trResults.H_d); + const auto [dHd, rd] = dot2(trResults.d, trResults.H_d, r, trResults.d); double modelObjective = rd + 0.5 * dHd - roundOffTol; - auto update_start = Clock::now(); - add(X, trResults.d, x_pred); - vector_update_seconds += secondsSince(update_start); - + add(X, trResults.d, x_pred); + double realObjective = std::numeric_limits::max(); double normPred = std::numeric_limits::max(); try { @@ -1410,7 +1019,7 @@ class TrustRegion : public mfem::NewtonSolver { min_residual_x = x_pred; } double obj1 = - 0.5 * (rd + timedDot(r_pred, trResults.d, num_line_search_dot_products, line_search_dot_seconds)) - + 0.5 * (rd + Dot(r_pred, trResults.d)) - roundOffTol; realObjective = obj1; } catch (const std::exception&) { @@ -1426,12 +1035,9 @@ class TrustRegion : public mfem::NewtonSolver { if (!candidate_left_mosts.empty()) { left_mosts = std::move(candidate_left_mosts); } - copy_start = Clock::now(); X = x_pred; r = r_pred; - vector_copy_scale_seconds += secondsSince(copy_start); - norm = normPred; - line_search_seconds += secondsSince(line_search_start); + norm = normPred; if (print_level >= 2) { printTrustRegionInfo(realObjective, modelObjective, trResults.cg_iterations_count, tr_size, true); trResults.cg_iterations_count = @@ -1486,15 +1092,11 @@ class TrustRegion : public mfem::NewtonSolver { if (!candidate_left_mosts.empty()) { left_mosts = std::move(candidate_left_mosts); } - copy_start = Clock::now(); X = x_pred; r = r_pred; - vector_copy_scale_seconds += secondsSince(copy_start); - norm = normPred; - line_search_seconds += secondsSince(line_search_start); + norm = normPred; break; } - line_search_seconds += secondsSince(line_search_start); } } @@ -1508,15 +1110,6 @@ class TrustRegion : public mfem::NewtonSolver { if (!converged && print_level >= 1) { // (print_options.summary || print_options.warnings)) { mfem::out << "TrustRegion: No convergence!\n"; } - - if (false && print_level >= 2) { - mfem::out << "num hess vecs = " << num_hess_vecs << "\n"; - mfem::out << "num preconds = " << num_preconds << "\n"; - mfem::out << "num residuals = " << num_residuals << "\n"; - mfem::out << "num subspace solves = " << num_subspace_solves << "\n"; - mfem::out << "num jacobian_assembles = " << num_jacobian_assembles << "\n"; - } - total_seconds = secondsSince(total_start); } }; @@ -1562,15 +1155,6 @@ void EquationSolver::solve(mfem::Vector& x) const nonlin_solver_->Mult(zero, x); } -std::optional EquationSolver::trustRegionDiagnostics() const -{ - auto* trust_region = dynamic_cast(nonlin_solver_.get()); - if (!trust_region) { - return std::nullopt; - } - return trust_region->diagnostics(); -} - void SuperLUSolver::Mult(const mfem::Vector& input, mfem::Vector& output) const { SLIC_ERROR_ROOT_IF(!superlu_mat_, "Operator must be set prior to solving with SuperLU"); diff --git a/src/smith/numerics/equation_solver.hpp b/src/smith/numerics/equation_solver.hpp index 3ddf35edef..a1fb36a788 100644 --- a/src/smith/numerics/equation_solver.hpp +++ b/src/smith/numerics/equation_solver.hpp @@ -28,136 +28,6 @@ namespace smith { -/// Diagnostic counters for the TrustRegion nonlinear solver -struct TrustRegionDiagnostics { - /// Number of nonlinear residual evaluations - size_t num_residuals = 0; - /// Number of Jacobian-vector products - size_t num_hess_vecs = 0; - /// Number of Hessian-vector products in model CG solves - size_t num_model_hess_vecs = 0; - /// Number of Hessian-vector products in Cauchy-point construction - size_t num_cauchy_hess_vecs = 0; - /// Number of Hessian-vector products in line-search model checks - size_t num_line_search_hess_vecs = 0; - /// Number of preconditioner applications - size_t num_preconds = 0; - /// Number of assembled Jacobians - size_t num_jacobian_assembles = 0; - /// Number of trust-region model CG iterations - size_t num_cg_iterations = 0; - /// Number of subspace solves - size_t num_subspace_solves = 0; - /// Number of retained-leftmost Hessian-vector products for subspace solves - size_t num_subspace_leftmost_hess_vecs = 0; - /// Number of batched Hessian-vector groups used for subspace solves - size_t num_subspace_hess_vec_batches = 0; - /// Number of Hessian-vector products inside subspace batches - size_t num_subspace_batched_hess_vecs = 0; - /// Number of accepted-step history vectors added to subspace solves - size_t num_subspace_past_step_vectors = 0; - /// Number of Hessian-vector products for accepted-step history vectors - size_t num_subspace_past_step_hess_vecs = 0; - /// Number of nonlinear-solve-start directions added to subspace solves - size_t num_subspace_solve_start_vectors = 0; - /// Number of Hessian-vector products for nonlinear-solve-start directions - size_t num_subspace_solve_start_hess_vecs = 0; - /// Number of quadratic subspace backend solves - size_t num_quadratic_subspace_solves = 0; - /// Number of preconditioner operator updates - size_t num_preconditioner_updates = 0; - /// Time spent evaluating nonlinear residuals - double residual_seconds = 0.0; - /// Time spent applying Jacobian-vector products - double hess_vec_seconds = 0.0; - /// Time spent applying Hessian-vector products in model CG solves - double model_hess_vec_seconds = 0.0; - /// Time spent applying Hessian-vector products in Cauchy-point construction - double cauchy_hess_vec_seconds = 0.0; - /// Time spent applying Hessian-vector products in line-search model checks - double line_search_hess_vec_seconds = 0.0; - /// Time spent applying preconditioners - double preconditioner_seconds = 0.0; - /// Total time spent in the nonlinear solve - double total_seconds = 0.0; - /// Time spent solving trust-region model problems - double model_solve_seconds = 0.0; - /// Total time spent in trust-region subspace solves - double subspace_seconds = 0.0; - /// Time spent building/applying retained leftmost directions for subspace solves - double subspace_leftmost_seconds = 0.0; - /// Time spent in subspace Hessian-vector batches - double subspace_hess_vec_batch_seconds = 0.0; - /// Time spent removing dependent directions before subspace solves - double subspace_filter_seconds = 0.0; - /// Time spent in dense subspace backend assembly/solve work - double subspace_backend_seconds = 0.0; - /// Time spent projecting dense subspace Hessian - double subspace_project_A_seconds = 0.0; - /// Time spent projecting dense subspace Gram matrix - double subspace_project_gram_seconds = 0.0; - /// Time spent projecting dense subspace gradient - double subspace_project_b_seconds = 0.0; - /// Time spent building dense subspace orthonormal basis - double subspace_basis_seconds = 0.0; - /// Time spent forming reduced dense Hessian - double subspace_reduced_A_seconds = 0.0; - /// Time spent in dense subspace eigensystems - double subspace_dense_eigensystem_seconds = 0.0; - /// Time spent in dense trust-region solve outside eigensystems - double subspace_dense_trust_solve_seconds = 0.0; - /// Time spent reconstructing full-space subspace solution - double subspace_reconstruct_solution_seconds = 0.0; - /// Time spent reconstructing retained leftmost vectors - double subspace_reconstruct_leftmost_seconds = 0.0; - /// Time spent in subspace postprocessing and model-energy comparison - double subspace_finalize_seconds = 0.0; - /// Time spent building the Cauchy point - double cauchy_point_seconds = 0.0; - /// Time spent in dogleg step construction - double dogleg_seconds = 0.0; - /// Time spent in line-search and trust-radius acceptance logic - double line_search_seconds = 0.0; - /// Time spent in TrustRegion dot products - double dot_seconds = 0.0; - /// Number of TrustRegion dot products - size_t num_dot_products = 0; - /// Number of TrustRegion dot batches/reductions - size_t num_dot_reductions = 0; - /// Number of dot products in trust-region model solves - size_t num_model_dot_products = 0; - /// Number of dot products in Cauchy-point construction - size_t num_cauchy_dot_products = 0; - /// Number of dot products in dogleg construction - size_t num_dogleg_dot_products = 0; - /// Number of dot products in line-search and acceptance logic - size_t num_line_search_dot_products = 0; - /// Number of setup dot products outside the main per-step kernels - size_t num_setup_dot_products = 0; - /// Time spent in trust-region model-solve dot products - double model_dot_seconds = 0.0; - /// Time spent in Cauchy-point dot products - double cauchy_dot_seconds = 0.0; - /// Time spent in dogleg dot products - double dogleg_dot_seconds = 0.0; - /// Time spent in line-search dot products - double line_search_dot_seconds = 0.0; - /// Time spent in setup dot products - double setup_dot_seconds = 0.0; - /// Time spent in TrustRegion vector add/update operations - double vector_update_seconds = 0.0; - /// Time spent in TrustRegion vector copies and scaling operations - double vector_copy_scale_seconds = 0.0; - /// Time spent in TrustRegion boundary projection operations - double projection_seconds = 0.0; - /// Time spent assembling sparse Jacobians - double jacobian_assembly_seconds = 0.0; - /// Time spent refreshing preconditioner data - double preconditioner_update_seconds = 0.0; - /// Time spent in preconditioner SetOperator calls - double preconditioner_setup_seconds = 0.0; -}; - /** * @brief This class manages the objects typically required to solve a nonlinear set of equations arising from * discretization of a PDE of the form F(x) = 0. Specifically, it has @@ -226,12 +96,6 @@ class EquationSolver { */ const mfem::NewtonSolver& nonlinearSolver() const { return *nonlin_solver_; } - /** - * Returns diagnostic counters when the nonlinear solver is TrustRegion. - * @return Optional TrustRegion diagnostics; empty for other nonlinear solvers - */ - std::optional trustRegionDiagnostics() const; - /** * Returns the underlying linear solver object * @return A non-owning reference to the underlying linear solver diff --git a/src/smith/numerics/mfem_trust_region_subspace.cpp b/src/smith/numerics/mfem_trust_region_subspace.cpp index 268f832703..19d1e9c147 100644 --- a/src/smith/numerics/mfem_trust_region_subspace.cpp +++ b/src/smith/numerics/mfem_trust_region_subspace.cpp @@ -7,7 +7,6 @@ #include "smith/numerics/trust_region_solver.hpp" #include -#include #include #include @@ -17,31 +16,8 @@ namespace smith { namespace { -using Clock = std::chrono::steady_clock; - -double secondsSince(Clock::time_point start) -{ - return std::chrono::duration_cast>(Clock::now() - start).count(); -} - -TrustRegionSubspaceTimings& mutableTrustRegionSubspaceTimings() -{ - static TrustRegionSubspaceTimings timings; - return timings; -} - } // namespace -void resetTrustRegionSubspaceTimings() -{ - mutableTrustRegionSubspaceTimings() = TrustRegionSubspaceTimings {}; -} - -TrustRegionSubspaceTimings trustRegionSubspaceTimings() -{ - return mutableTrustRegionSubspaceTimings(); -} - int globalSize(const mfem::Vector& parallel_v, const MPI_Comm& comm) { int local_size = parallel_v.Size(); @@ -284,7 +260,6 @@ mfem::DenseMatrix columnsToMatrix(const std::vector& cols) std::tuple, std::vector, bool> exactTrustRegionSolve( mfem::DenseMatrix A, const mfem::Vector& b, double delta, int num_leftmost) { - auto dense_solve_start = Clock::now(); if (A.Height() != A.Width()) { throw PetscException("Exact trust region solver requires square matrices"); } @@ -294,10 +269,7 @@ std::tuple, std::vector, bool> e mfem::Vector sigs; mfem::DenseMatrix V; - auto eig_start = Clock::now(); A.Eigensystem(sigs, V); - mutableTrustRegionSubspaceTimings().dense_eigensystem_seconds += secondsSince(eig_start); - std::vector leftmosts; std::vector minsigs; const int num_leftmost_possible = std::min(num_leftmost, sigs.Size()); @@ -320,7 +292,6 @@ std::tuple, std::vector, bool> e const double eps = 1e-12 * sigScale; if ((minSig >= eps) && (norm(bvOverSigs) <= delta)) { - mutableTrustRegionSubspaceTimings().dense_trust_solve_seconds += secondsSince(dense_solve_start); return std::make_tuple(solveDense(A, b), leftmosts, minsigs, true); } @@ -351,7 +322,6 @@ std::tuple, std::vector, bool> e const double e1 = quadraticEnergy(A, b, x1); const double e2 = quadraticEnergy(A, b, x2); - mutableTrustRegionSubspaceTimings().dense_trust_solve_seconds += secondsSince(dense_solve_start); return std::make_tuple(e1 < e2 ? x1 : x2, leftmosts, minsigs, true); } @@ -391,7 +361,6 @@ std::tuple, std::vector, bool> e x *= (e2 < e1 ? -delta : delta) / norm(x); - mutableTrustRegionSubspaceTimings().dense_trust_solve_seconds += secondsSince(dense_solve_start); return std::make_tuple(x, leftmosts, minsigs, success); } @@ -452,15 +421,8 @@ TrustRegionSubspaceResult solveSubspaceProblemMfem(const std::vector(T.Width()); - timings.max_reduced_dim = std::max(timings.max_reduced_dim, static_cast(T.Width())); - - auto reduced_A_start = Clock::now(); mfem::DenseMatrix pAp = tripleProduct(T, sAs, T); - timings.reduced_A_seconds += secondsSince(reduced_A_start); symmetrize(pAp); - auto project_b_start = Clock::now(); const mfem::Vector& sb = projections.sb; - timings.project_b_seconds += secondsSince(project_b_start); const mfem::Vector pb = projectWithTranspose(T, sb); auto [reduced_x, leftvecs, leftvals, success] = exactTrustRegionSolve(pAp, pb, delta, num_leftmost); (void)success; const double energy = quadraticEnergy(pAp, pb, reduced_x); - auto reconstruct_solution_start = Clock::now(); mfem::Vector coeffs(T.Height()); T.Mult(reduced_x, coeffs); mfem::Vector sol = combineDirections(states, coeffs); - timings.reconstruct_solution_seconds += secondsSince(reconstruct_solution_start); - - auto reconstruct_leftmost_start = Clock::now(); std::vector> leftmosts; for (const auto& leftvec : leftvecs) { mfem::Vector left_coeffs(T.Height()); T.Mult(leftvec, left_coeffs); leftmosts.emplace_back(std::make_shared(combineDirections(states, left_coeffs))); } - timings.reconstruct_leftmost_seconds += secondsSince(reconstruct_leftmost_start); - return std::make_tuple(sol, leftmosts, leftvals, energy); } diff --git a/src/smith/numerics/trust_region_solver.hpp b/src/smith/numerics/trust_region_solver.hpp index 960024b33d..2f5290b93f 100644 --- a/src/smith/numerics/trust_region_solver.hpp +++ b/src/smith/numerics/trust_region_solver.hpp @@ -45,27 +45,6 @@ enum class TrustRegionSubspaceBackend { using TrustRegionSubspaceResult = std::tuple>, std::vector, double>; -struct TrustRegionSubspaceTimings { - size_t num_solves = 0; - size_t total_input_dim = 0; - size_t total_reduced_dim = 0; - size_t max_input_dim = 0; - size_t max_reduced_dim = 0; - double project_A_seconds = 0.0; - double project_gram_seconds = 0.0; - double project_b_seconds = 0.0; - double basis_seconds = 0.0; - double reduced_A_seconds = 0.0; - double dense_eigensystem_seconds = 0.0; - double dense_trust_solve_seconds = 0.0; - double reconstruct_solution_seconds = 0.0; - double reconstruct_leftmost_seconds = 0.0; -}; - -void resetTrustRegionSubspaceTimings(); - -TrustRegionSubspaceTimings trustRegionSubspaceTimings(); - /// @brief computes the global size of mfem::Vector int globalSize(const mfem::Vector& parallel_v, const MPI_Comm& comm); From f5b87e72d467c273b22c69ac685ee443fa599797 Mon Sep 17 00:00:00 2001 From: Michael Tupek Date: Sat, 9 May 2026 11:19:40 -0600 Subject: [PATCH 12/27] Some cleanup of the subspace improvements. --- .../numerics/mfem_trust_region_subspace.cpp | 55 +++++++++++-------- .../physics/tests/shallow_arch_buckling.cpp | 1 + 2 files changed, 32 insertions(+), 24 deletions(-) diff --git a/src/smith/numerics/mfem_trust_region_subspace.cpp b/src/smith/numerics/mfem_trust_region_subspace.cpp index 19d1e9c147..2f4bf90713 100644 --- a/src/smith/numerics/mfem_trust_region_subspace.cpp +++ b/src/smith/numerics/mfem_trust_region_subspace.cpp @@ -14,10 +14,6 @@ namespace smith { -namespace { - -} // namespace - int globalSize(const mfem::Vector& parallel_v, const MPI_Comm& comm) { int local_size = parallel_v.Size(); @@ -153,8 +149,8 @@ struct SubspaceProjections { mfem::Vector sb; }; -SubspaceProjections denseSubspaceProjections(const std::vector& states, - const std::vector& Astates, const mfem::Vector& b) +void checkProjectionInputs(const std::vector& states, + const std::vector& Astates, const mfem::Vector& b) { MFEM_VERIFY(states.size() == Astates.size(), "Search directions and their linear operator result must have same number of columns"); @@ -167,7 +163,13 @@ SubspaceProjections denseSubspaceProjections(const std::vectorSize() == vector_size, "Subspace Hessian-vector sizes differ."); } MFEM_VERIFY(b.Size() == vector_size, "Subspace right-hand-side size differs."); +} +SubspaceProjections globalSubspaceProjectionFromLocalInnerProducts(const std::vector& states, + const std::vector& Astates, + const mfem::Vector& b) +{ + const int n = static_cast(states.size()); const int triangular_size = n * (n + 1) / 2; const auto triangular_index = [n](int i, int j) { return i * n - (i * (i - 1)) / 2 + (j - i); @@ -176,32 +178,30 @@ SubspaceProjections denseSubspaceProjections(const std::vector local(size_t(buffer_size), 0.0); - std::vector global(size_t(buffer_size), 0.0); - - for (int k = 0; k < vector_size; ++k) { - const double b_k = b[k]; - for (int i = 0; i < n; ++i) { - const double s_i = (*states[size_t(i)])[k]; - local[size_t(sb_offset + i)] += s_i * b_k; - for (int j = i; j < n; ++j) { - const size_t ij = size_t(triangular_index(i, j)); - local[size_t(sAs_offset) + ij] += s_i * (*Astates[size_t(j)])[k]; - local[size_t(ss_offset) + ij] += s_i * (*states[size_t(j)])[k]; - } + std::vector local_projection_entries(size_t(buffer_size), 0.0); + std::vector global_projection_entries(size_t(buffer_size), 0.0); + + for (int i = 0; i < n; ++i) { + local_projection_entries[size_t(sb_offset + i)] = mfem::InnerProduct(*states[size_t(i)], b); + for (int j = i; j < n; ++j) { + const size_t ij = size_t(triangular_index(i, j)); + local_projection_entries[size_t(sAs_offset) + ij] = + mfem::InnerProduct(*states[size_t(i)], *Astates[size_t(j)]); + local_projection_entries[size_t(ss_offset) + ij] = mfem::InnerProduct(*states[size_t(i)], *states[size_t(j)]); } } - MPI_Allreduce(local.data(), global.data(), buffer_size, MFEM_MPI_REAL_T, MPI_SUM, MPI_COMM_WORLD); + MPI_Allreduce(local_projection_entries.data(), global_projection_entries.data(), buffer_size, MFEM_MPI_REAL_T, + MPI_SUM, MPI_COMM_WORLD); SubspaceProjections projections{mfem::DenseMatrix(n), mfem::DenseMatrix(n), mfem::Vector(n)}; for (int i = 0; i < n; ++i) { - projections.sb[i] = global[size_t(sb_offset + i)]; + projections.sb[i] = global_projection_entries[size_t(sb_offset + i)]; for (int j = i; j < n; ++j) { const size_t ij = size_t(triangular_index(i, j)); - projections.sAs(i, j) = global[size_t(sAs_offset) + ij]; + projections.sAs(i, j) = global_projection_entries[size_t(sAs_offset) + ij]; projections.sAs(j, i) = projections.sAs(i, j); - projections.ss(i, j) = global[size_t(ss_offset) + ij]; + projections.ss(i, j) = global_projection_entries[size_t(ss_offset) + ij]; projections.ss(j, i) = projections.ss(i, j); } } @@ -209,6 +209,13 @@ SubspaceProjections denseSubspaceProjections(const std::vector& states, + const std::vector& Astates, const mfem::Vector& b) +{ + checkProjectionInputs(states, Astates, b); + return globalSubspaceProjectionFromLocalInnerProducts(states, Astates, b); +} + mfem::Vector solveDense(const mfem::DenseMatrix& A, const mfem::Vector& b) { mfem::DenseMatrix A_copy(A); @@ -421,7 +428,7 @@ TrustRegionSubspaceResult solveSubspaceProblemMfem(const std::vector Date: Sat, 9 May 2026 11:48:00 -0600 Subject: [PATCH 13/27] Control when printing occurs better. --- src/smith/numerics/equation_solver.cpp | 44 ++++++++++++++++++++++++-- 1 file changed, 41 insertions(+), 3 deletions(-) diff --git a/src/smith/numerics/equation_solver.cpp b/src/smith/numerics/equation_solver.cpp index d5712c9b30..0a194f50ba 100644 --- a/src/smith/numerics/equation_solver.cpp +++ b/src/smith/numerics/equation_solver.cpp @@ -25,6 +25,26 @@ namespace smith { +namespace { + +#ifdef MFEM_USE_MPI +size_t rootOnlyPrintLevel(size_t level, MPI_Comm comm) +{ + if (level > 0 && comm != MPI_COMM_NULL) { + int rank = 0; + MPI_Comm_rank(comm, &rank); + if (rank != 0) { + return 0; + } + } + return level; +} +#else +size_t rootOnlyPrintLevel(size_t level) { return level; } +#endif + +} // namespace + /// Newton solver with a 2-way line-search. Reverts to regular Newton if max_line_search_iterations is set to 0. class NewtonSolver : public mfem::NewtonSolver { protected: @@ -97,6 +117,12 @@ class NewtonSolver : public mfem::NewtonSolver { print_level = static_cast(std::max(nonlinear_options.print_level, 0)); print_level = print_options.iterations ? std::max(1, print_level) : print_level; print_level = print_options.summary ? std::max(2, print_level) : print_level; + print_level = rootOnlyPrintLevel(print_level +#ifdef MFEM_USE_MPI + , + GetComm() +#endif + ); using real_t = mfem::real_t; @@ -553,9 +579,15 @@ class TrustRegion : public mfem::NewtonSolver { double subspace_energy = computeEnergy(g, hess_vec_func, sol); if (print_level >= 2) { - double leftval = leftvals.size() ? leftvals[0] : 1.0; - mfem::out << "Energy using subspace solver from: " << base_energy << ", to: " << subspace_energy << " / " - << energy_change << ". Min eig: " << leftval << std::endl; + int rank = 0; +#ifdef MFEM_USE_MPI + MPI_Comm_rank(GetComm(), &rank); +#endif + if (rank == 0) { + double leftval = leftvals.size() ? leftvals[0] : 1.0; + mfem::out << "Energy using subspace solver from: " << base_energy << ", to: " << subspace_energy << " / " + << energy_change << ". Min eig: " << leftval << std::endl; + } } if (subspace_energy < base_energy) { @@ -767,6 +799,12 @@ class TrustRegion : public mfem::NewtonSolver { print_level = static_cast(std::max(nonlinear_options.print_level, 0)); print_level = print_options.iterations ? std::max(1, print_level) : print_level; print_level = print_options.summary ? std::max(2, print_level) : print_level; + print_level = rootOnlyPrintLevel(print_level +#ifdef MFEM_USE_MPI + , + GetComm() +#endif + ); using real_t = mfem::real_t; From f79f0b2ef8cb19e35b457274542f4bb557f7038d Mon Sep 17 00:00:00 2001 From: Michael Tupek Date: Sat, 9 May 2026 11:49:25 -0600 Subject: [PATCH 14/27] another fix. --- src/smith/numerics/equation_solver.cpp | 79 ++++++++++---------------- 1 file changed, 30 insertions(+), 49 deletions(-) diff --git a/src/smith/numerics/equation_solver.cpp b/src/smith/numerics/equation_solver.cpp index 0a194f50ba..57bb75dcb1 100644 --- a/src/smith/numerics/equation_solver.cpp +++ b/src/smith/numerics/equation_solver.cpp @@ -343,9 +343,9 @@ struct TrustRegionResults { /// trust region printing utility function void printTrustRegionInfo(double realWork, double modelObjective, size_t cgIters, double trSize, bool willAccept) { - mfem::out << "real work = " << std::setw(13) << realWork << ", model energy = " << std::setw(13) - << modelObjective << ", cg iter = " << std::setw(7) << cgIters << ", next tr size = " << std::setw(8) - << trSize << ", accepting = " << willAccept << std::endl; + mfem::out << "real work = " << std::setw(13) << realWork << ", model energy = " << std::setw(13) << modelObjective + << ", cg iter = " << std::setw(7) << cgIters << ", next tr size = " << std::setw(8) << trSize + << ", accepting = " << willAccept << std::endl; } /** @@ -448,14 +448,10 @@ class TrustRegion : public mfem::NewtonSolver { /// Four-dot batch with one local vector pass and one MPI reduction when possible. Dot4Result dot4(const mfem::Vector& a0, const mfem::Vector& b0, const mfem::Vector& a1, const mfem::Vector& b1, - const mfem::Vector& a2, const mfem::Vector& b2, const mfem::Vector& a3, - const mfem::Vector& b3) const + const mfem::Vector& a2, const mfem::Vector& b2, const mfem::Vector& a3, const mfem::Vector& b3) const { if (dot_oper) { - return {.v0 = Dot(a0, b0), - .v1 = Dot(a1, b1), - .v2 = Dot(a2, b2), - .v3 = Dot(a3, b3)}; + return {.v0 = Dot(a0, b0), .v1 = Dot(a1, b1), .v2 = Dot(a2, b2), .v3 = Dot(a3, b3)}; } MFEM_ASSERT(a0.Size() == b0.Size(), "Incompatible vector sizes."); @@ -579,15 +575,9 @@ class TrustRegion : public mfem::NewtonSolver { double subspace_energy = computeEnergy(g, hess_vec_func, sol); if (print_level >= 2) { - int rank = 0; -#ifdef MFEM_USE_MPI - MPI_Comm_rank(GetComm(), &rank); -#endif - if (rank == 0) { - double leftval = leftvals.size() ? leftvals[0] : 1.0; - mfem::out << "Energy using subspace solver from: " << base_energy << ", to: " << subspace_energy << " / " - << energy_change << ". Min eig: " << leftval << std::endl; - } + double leftval = leftvals.size() ? leftvals[0] : 1.0; + mfem::out << "Energy using subspace solver from: " << base_energy << ", to: " << subspace_energy << " / " + << energy_change << ". Min eig: " << leftval << std::endl; } if (subspace_energy < base_energy) { @@ -616,18 +606,18 @@ class TrustRegion : public mfem::NewtonSolver { s = 0.0; if (cc >= tt) { add(s, std::sqrt(tt / cc), cp, s); - } else if (cc > nn) { + } else if (cc > nn) { if (print_level >= 2) { mfem::out << "cp outside newton, preconditioner likely inaccurate\n"; } add(s, 1.0, cp, s); - } else if (nn > tt) { // on the dogleg (we have nn >= cc, and tt >= cc) + } else if (nn > tt) { // on the dogleg (we have nn >= cc, and tt >= cc) add(s, 1.0, cp, s); - double cn = Dot(cp, newtonP); + double cn = Dot(cp, newtonP); projectToBoundaryBetweenWithCoefs(s, newtonP, trSize, cc, cn, nn); } else { s = newtonP; - } + } } /// compute the energy of the linearized system for a given solution vector z @@ -692,7 +682,7 @@ class TrustRegion : public mfem::NewtonSolver { if (descent_check > 0) { d *= -1; Hd *= -1; - results.interior_status = TrustRegionResults::Status::NonDescentDirection; + results.interior_status = TrustRegionResults::Status::NonDescentDirection; descent_check *= -1.0; curvature *= -1.0; zd *= -1.0; @@ -715,10 +705,10 @@ class TrustRegion : public mfem::NewtonSolver { auto& zPred = Pr; // re-use Pr memory. // This predicted step will no longer be used by the time Pr is, so we can avoid an extra // vector floating around - add(z, alphaCg, d, zPred); - + add(z, alphaCg, d, zPred); + z = zPred; - + if (results.interior_status == TrustRegionResults::Status::NonDescentDirection) { if (print_level >= 2) { mfem::out << "Found a non descent direction\n"; @@ -727,10 +717,9 @@ class TrustRegion : public mfem::NewtonSolver { } add(rCurrent, alphaCg, Hd, rCurrent); - + precond(rCurrent, Pr); - auto [rPrNp1, r_current_norm_squared] = - dot2(rCurrent, Pr, rCurrent, rCurrent); + auto [rPrNp1, r_current_norm_squared] = dot2(rCurrent, Pr, rCurrent, rCurrent); if (r_current_norm_squared <= cg_tol_squared && cgIter >= settings.min_cg_iterations) { return; } @@ -738,7 +727,7 @@ class TrustRegion : public mfem::NewtonSolver { double beta = rPrNp1 / rPr; rPr = rPrNp1; add(-1.0, Pr, beta, d, d); - + zz = zzNp1; } cgIter--; // if all cg iterations are taken, correct for output @@ -846,8 +835,7 @@ class TrustRegion : public mfem::NewtonSolver { int num_leftmost = nonlinear_options.num_leftmost; scratch = 1.0; - double tr_size = nonlinear_options.trust_region_scaling * - std::sqrt(Dot(scratch, scratch)); + double tr_size = nonlinear_options.trust_region_scaling * std::sqrt(Dot(scratch, scratch)); size_t cumulative_cg_iters_from_last_precond_update = 0; int it = 0; @@ -898,13 +886,12 @@ class TrustRegion : public mfem::NewtonSolver { const double residual_norm_squared = norm * norm; if (gKg > 0) { const double alphaCp = -residual_norm_squared / gKg; - add(trResults.cauchy_point, alphaCp, r, trResults.cauchy_point); - cauchyPointNormSquared = - Dot(trResults.cauchy_point, trResults.cauchy_point); + add(trResults.cauchy_point, alphaCp, r, trResults.cauchy_point); + cauchyPointNormSquared = Dot(trResults.cauchy_point, trResults.cauchy_point); } else { const double alphaTr = -tr_size / norm; - add(trResults.cauchy_point, alphaTr, r, trResults.cauchy_point); - if (print_level >= 2) { + add(trResults.cauchy_point, alphaTr, r, trResults.cauchy_point); + if (print_level >= 2) { mfem::out << "Negative curvature un-preconditioned cauchy point direction found." << "\n"; } @@ -938,10 +925,7 @@ class TrustRegion : public mfem::NewtonSolver { doglegStep(trResults.cauchy_point, trResults.z, tr_size, trResults.d); const bool check_subspace_boundary = subspace_option >= 1; - const double d_norm = - check_subspace_boundary - ? std::sqrt(Dot(trResults.d, trResults.d)) - : 0.0; + const double d_norm = check_subspace_boundary ? std::sqrt(Dot(trResults.d, trResults.d)) : 0.0; bool use_with_option1 = (subspace_option >= 1) && (trResults.interior_status == TrustRegionResults::Status::NonDescentDirection || trResults.interior_status == TrustRegionResults::Status::NegativeCurvature || @@ -1046,8 +1030,8 @@ class TrustRegion : public mfem::NewtonSolver { const auto [dHd, rd] = dot2(trResults.d, trResults.H_d, r, trResults.d); double modelObjective = rd + 0.5 * dHd - roundOffTol; - add(X, trResults.d, x_pred); - + add(X, trResults.d, x_pred); + double realObjective = std::numeric_limits::max(); double normPred = std::numeric_limits::max(); try { @@ -1056,9 +1040,7 @@ class TrustRegion : public mfem::NewtonSolver { min_residual_norm = normPred; min_residual_x = x_pred; } - double obj1 = - 0.5 * (rd + Dot(r_pred, trResults.d)) - - roundOffTol; + double obj1 = 0.5 * (rd + Dot(r_pred, trResults.d)) - roundOffTol; realObjective = obj1; } catch (const std::exception&) { realObjective = std::numeric_limits::max(); @@ -1075,7 +1057,7 @@ class TrustRegion : public mfem::NewtonSolver { } X = x_pred; r = r_pred; - norm = normPred; + norm = normPred; if (print_level >= 2) { printTrustRegionInfo(realObjective, modelObjective, trResults.cg_iterations_count, tr_size, true); trResults.cg_iterations_count = @@ -1132,7 +1114,7 @@ class TrustRegion : public mfem::NewtonSolver { } X = x_pred; r = r_pred; - norm = normPred; + norm = normPred; break; } } @@ -1151,7 +1133,6 @@ class TrustRegion : public mfem::NewtonSolver { } }; - EquationSolver::EquationSolver(NonlinearSolverOptions nonlinear_opts, LinearSolverOptions lin_opts, MPI_Comm comm) { auto [lin_solver, preconditioner] = buildLinearSolverAndPreconditioner(lin_opts, comm); From a589e33f0adbd58f1c078db7867d450c964186da Mon Sep 17 00:00:00 2001 From: Michael Tupek Date: Sat, 9 May 2026 17:16:12 -0600 Subject: [PATCH 15/27] Pull trust-region into its own file. --- src/smith/numerics/CMakeLists.txt | 2 + src/smith/numerics/equation_solver.cpp | 353 +++++------------- .../numerics/mfem_trust_region_subspace.cpp | 166 ++++---- .../numerics/petsc_trust_region_subspace.cpp | 4 +- src/smith/numerics/steihaug_toint_cg.cpp | 134 +++++++ src/smith/numerics/steihaug_toint_cg.hpp | 140 +++++++ src/smith/numerics/tests/CMakeLists.txt | 1 + .../numerics/tests/test_steihaug_toint_cg.cpp | 133 +++++++ .../tests/test_trust_region_solver_mfem.cpp | 48 +-- .../tests/test_trust_region_solver_petsc.cpp | 8 +- src/smith/numerics/trust_region_solver.hpp | 9 +- .../physics/tests/shallow_arch_buckling.cpp | 10 +- 12 files changed, 586 insertions(+), 422 deletions(-) create mode 100644 src/smith/numerics/steihaug_toint_cg.cpp create mode 100644 src/smith/numerics/steihaug_toint_cg.hpp create mode 100644 src/smith/numerics/tests/test_steihaug_toint_cg.cpp diff --git a/src/smith/numerics/CMakeLists.txt b/src/smith/numerics/CMakeLists.txt index 8bc793fedd..f3031866ef 100644 --- a/src/smith/numerics/CMakeLists.txt +++ b/src/smith/numerics/CMakeLists.txt @@ -8,6 +8,7 @@ add_subdirectory(functional) set(numerics_headers equation_solver.hpp + steihaug_toint_cg.hpp odes.hpp solver_config.hpp stdfunction_operator.hpp @@ -19,6 +20,7 @@ set(numerics_headers set(numerics_sources equation_solver.cpp + steihaug_toint_cg.cpp petsc_trust_region_subspace.cpp mfem_trust_region_subspace.cpp odes.cpp diff --git a/src/smith/numerics/equation_solver.cpp b/src/smith/numerics/equation_solver.cpp index 57bb75dcb1..cb82935b5d 100644 --- a/src/smith/numerics/equation_solver.cpp +++ b/src/smith/numerics/equation_solver.cpp @@ -5,8 +5,11 @@ // SPDX-License-Identifier: (BSD-3-Clause) #include "smith/numerics/equation_solver.hpp" +#include "smith/numerics/steihaug_toint_cg.hpp" +#include #include +#include #include #include #include @@ -243,102 +246,7 @@ class NewtonSolver : public mfem::NewtonSolver { } }; -/// Internal structure for storing trust region settings -struct TrustRegionSettings { - /// cg tol - double cg_tol = 1e-8; - /// min cg iters - size_t min_cg_iterations = 0; // - /// max cg iters should be around # of system dofs - size_t max_cg_iterations = 10000; // - /// max cumulative iterations - size_t max_cumulative_iteration = 1; - /// minimum trust region size - double min_tr_size = 1e-13; - /// trust region decrease factor - double t1 = 0.25; - /// trust region increase factor - double t2 = 1.75; - /// worse case energy drop ratio. trust region accepted if energy drop is better than this. - double eta1 = 1e-9; - /// non-ideal energy drop ratio. trust region decreases if energy drop is worse than this. - double eta2 = 0.1; - /// ideal energy drop ratio. trust region increases if energy drop is better than this. - double eta3 = 0.6; - /// parameter limiting how fast the energy can drop relative to the prediction (in case the energy surrogate is poor) - double eta4 = 4.2; -}; - -/// Internal structure for storing trust region stateful data -struct TrustRegionResults { - /// Constructor takes the size of the solution vector - TrustRegionResults(int size) - { - z.SetSize(size); - H_z.SetSize(size); - d_old.SetSize(size); - H_d_old.SetSize(size); - H_d_old_at_accept.SetSize(size); - d.SetSize(size); - H_d.SetSize(size); - Pr.SetSize(size); - cauchy_point.SetSize(size); - H_cauchy_point.SetSize(size); - z = 0.0; - H_z = 0.0; - d_old = 0.0; - H_d_old = 0.0; - H_d_old_at_accept = 0.0; - d = 0.0; - H_d = 0.0; - Pr = 0.0; - cauchy_point = 0.0; - H_cauchy_point = 0.0; - } - - /// resets trust region results for a new outer iteration - void reset() - { - z = 0.0; - cauchy_point = 0.0; - } - - /// enumerates the possible final status of the trust region steps - enum class Status - { - Interior, - NegativeCurvature, - OnBoundary, - NonDescentDirection - }; - /// step direction - mfem::Vector z; - /// action of hessian on current step z - mfem::Vector H_z; - /// old step direction - mfem::Vector d_old; - /// action of hessian on previous step z_old - mfem::Vector H_d_old; - /// action of previous accepted hessian on previous step z_old - mfem::Vector H_d_old_at_accept; - /// true after at least one accepted line-search step has populated d_old - bool has_d_old = false; - /// incrementalCG direction - mfem::Vector d; - /// action of hessian on direction d - mfem::Vector H_d; - /// preconditioned residual - mfem::Vector Pr; - /// cauchy point - mfem::Vector cauchy_point; - /// action of hessian on direction of cauchy point - mfem::Vector H_cauchy_point; - /// specifies if step is interior, exterior, negative curvature, etc. - Status interior_status = Status::Interior; - /// iteration counter - size_t cg_iterations_count = 0; -}; /// trust region printing utility function void printTrustRegionInfo(double realWork, double modelObjective, size_t cgIters, double trSize, bool willAccept) @@ -357,7 +265,7 @@ void printTrustRegionInfo(double realWork, double modelObjective, size_t cgIters * rely on an incremental work approximation: 0.5 (f^n + f^{n+1}) dot (u^{n+1} - u^n). While less theoretically sound, * it appears to be very effective in practice. */ -class TrustRegion : public mfem::NewtonSolver { +class TrustRegion : public mfem::NewtonSolver, public SteihaugTointDelegate { protected: /// predicted solution mutable mfem::Vector x_pred; @@ -372,11 +280,14 @@ class TrustRegion : public mfem::NewtonSolver { /// previous accepted-iteration Hessian actions on the retained left most eigenvectors mutable std::vector> previous_H_left_mosts; /// accepted TrustRegion steps, newest first - mutable std::vector> accepted_step_history; + mutable std::deque> accepted_step_history; /// initial state for this nonlinear solve, used as an optional history direction mutable mfem::Vector solve_start_x; mutable mfem::Vector min_residual_x; mutable double min_residual_norm = -1.0; + + /// Workspace vector for exact subspace solver to avoid small allocations + mutable mfem::Vector exact_solver_workspace; /// nonlinear solution options NonlinearSolverOptions nonlinear_options; @@ -400,87 +311,72 @@ class TrustRegion : public mfem::NewtonSolver { } #endif - /// Pair of dot products with one local vector pass and one MPI reduction when possible. - std::pair dot2(const mfem::Vector& a0, const mfem::Vector& b0, const mfem::Vector& a1, - const mfem::Vector& b1) const + template + std::array dot_many(const Args&... args) const { + static_assert(sizeof...(Args) % 2 == 0, "dot_many requires an even number of arguments"); + constexpr size_t num_pairs = sizeof...(Args) / 2; + std::array products; + products.fill(0.0); + if (dot_oper) { - return {Dot(a0, b0), Dot(a1, b1)}; + auto tuple_args = std::tie(args...); + auto do_dots = [&](std::index_sequence) { + ((products[I] = Dot(std::get<2 * I>(tuple_args), std::get<2 * I + 1>(tuple_args))), ...); + }; + do_dots(std::make_index_sequence{}); + return products; } - MFEM_ASSERT(a0.Size() == b0.Size(), "Incompatible vector sizes."); - MFEM_ASSERT(a1.Size() == b1.Size(), "Incompatible vector sizes."); + auto tuple_args = std::tie(args...); + std::array sizes; + std::array ptr_a; + std::array ptr_b; + + auto populate_arrays = [&](std::index_sequence) { + (( + sizes[I] = std::get<2 * I>(tuple_args).Size(), + [&](){ MFEM_ASSERT(sizes[I] == std::get<2 * I + 1>(tuple_args).Size(), "Incompatible vector sizes."); }(), + ptr_a[I] = std::get<2 * I>(tuple_args).GetData(), + ptr_b[I] = std::get<2 * I + 1>(tuple_args).GetData() + ), ...); + }; + populate_arrays(std::make_index_sequence{}); + + bool all_same_size = true; + for (size_t i = 1; i < num_pairs; ++i) { + if (sizes[i] != sizes[0]) { + all_same_size = false; + break; + } + } - mfem::real_t products[2] = {0.0, 0.0}; - if (a0.Size() == a1.Size()) { - for (int i = 0; i < a0.Size(); ++i) { - products[0] += a0[i] * b0[i]; - products[1] += a1[i] * b1[i]; + if (all_same_size && num_pairs > 0) { + for (int j = 0; j < sizes[0]; ++j) { + for (size_t i = 0; i < num_pairs; ++i) { + products[i] += ptr_a[i][j] * ptr_b[i][j]; + } } } else { - for (int i = 0; i < a0.Size(); ++i) { - products[0] += a0[i] * b0[i]; - } - for (int i = 0; i < a1.Size(); ++i) { - products[1] += a1[i] * b1[i]; + for (size_t i = 0; i < num_pairs; ++i) { + for (int j = 0; j < sizes[i]; ++j) { + products[i] += ptr_a[i][j] * ptr_b[i][j]; + } } } #ifdef MFEM_USE_MPI const MPI_Comm dot_comm = GetComm(); if (dot_comm != MPI_COMM_NULL) { - mfem::real_t global_products[2] = {0.0, 0.0}; - MPI_Allreduce(products, global_products, 2, MFEM_MPI_REAL_T, MPI_SUM, dot_comm); - products[0] = global_products[0]; - products[1] = global_products[1]; - } -#endif - - return {products[0], products[1]}; - } - - struct Dot4Result { - double v0 = 0.0; - double v1 = 0.0; - double v2 = 0.0; - double v3 = 0.0; - }; - - /// Four-dot batch with one local vector pass and one MPI reduction when possible. - Dot4Result dot4(const mfem::Vector& a0, const mfem::Vector& b0, const mfem::Vector& a1, const mfem::Vector& b1, - const mfem::Vector& a2, const mfem::Vector& b2, const mfem::Vector& a3, const mfem::Vector& b3) const - { - if (dot_oper) { - return {.v0 = Dot(a0, b0), .v1 = Dot(a1, b1), .v2 = Dot(a2, b2), .v3 = Dot(a3, b3)}; - } - - MFEM_ASSERT(a0.Size() == b0.Size(), "Incompatible vector sizes."); - MFEM_ASSERT(a1.Size() == b1.Size(), "Incompatible vector sizes."); - MFEM_ASSERT(a2.Size() == b2.Size(), "Incompatible vector sizes."); - MFEM_ASSERT(a3.Size() == b3.Size(), "Incompatible vector sizes."); - MFEM_ASSERT(a0.Size() == a1.Size() && a0.Size() == a2.Size() && a0.Size() == a3.Size(), - "timedDot4 currently requires equal vector sizes."); - - mfem::real_t products[4] = {0.0, 0.0, 0.0, 0.0}; - for (int i = 0; i < a0.Size(); ++i) { - products[0] += a0[i] * b0[i]; - products[1] += a1[i] * b1[i]; - products[2] += a2[i] * b2[i]; - products[3] += a3[i] * b3[i]; - } - -#ifdef MFEM_USE_MPI - const MPI_Comm dot_comm = GetComm(); - if (dot_comm != MPI_COMM_NULL) { - mfem::real_t global_products[4] = {0.0, 0.0, 0.0, 0.0}; - MPI_Allreduce(products, global_products, 4, MFEM_MPI_REAL_T, MPI_SUM, dot_comm); - for (int i = 0; i < 4; ++i) { + std::array global_products; + MPI_Allreduce(products.data(), global_products.data(), num_pairs, MFEM_MPI_REAL_T, MPI_SUM, dot_comm); + for (size_t i = 0; i < num_pairs; ++i) { products[i] = global_products[i]; } } #endif - return {.v0 = products[0], .v1 = products[1], .v2 = products[2], .v3 = products[3]}; + return products; } template @@ -504,20 +400,32 @@ class TrustRegion : public mfem::NewtonSolver { return; } - accepted_step_history.insert(accepted_step_history.begin(), std::make_shared(step)); + accepted_step_history.push_front(std::make_shared(step)); const size_t max_size = static_cast(nonlinear_options.trust_num_past_steps) + 1; while (accepted_step_history.size() > max_size) { accepted_step_history.pop_back(); } } - /// finds tau s.t. (z + tau*d)^2 = trSize^2 + std::array dot_many_4(const mfem::Vector& a0, const mfem::Vector& b0, + const mfem::Vector& a1, const mfem::Vector& b1, + const mfem::Vector& a2, const mfem::Vector& b2, + const mfem::Vector& a3, const mfem::Vector& b3) const override + { + return dot_many(a0, b0, a1, b1, a2, b2, a3, b3); + } + + std::array dot_many_2(const mfem::Vector& a0, const mfem::Vector& b0, + const mfem::Vector& a1, const mfem::Vector& b1) const override + { + return dot_many(a0, b0, a1, b1); + } + void projectToBoundaryWithCoefs(mfem::Vector& z, const mfem::Vector& d, double delta, double zz, double zd, - double dd) const + double dd) const override { - // find z + tau d double deltadelta_m_zz = delta * delta - zz; - if (deltadelta_m_zz == 0) return; // already on boundary + if (deltadelta_m_zz == 0) return; double tau = (std::sqrt(deltadelta_m_zz * dd + zd * zd) - zd) / dd; z.Add(tau, d); } @@ -557,8 +465,11 @@ class TrustRegion : public mfem::NewtonSolver { double energy_change; try { + if (exact_solver_workspace.Size() < 2000) { + exact_solver_workspace.SetSize(2000); + } std::tie(sol, leftvecs, leftvals, energy_change) = - solveSubspaceProblem(directions, H_directions, b, delta, num_leftmost); + solveSubspaceProblem(directions, H_directions, b, delta, num_leftmost, exact_solver_workspace); } catch (const std::exception& e) { if (print_level >= 1) { mfem::out << "subspace solve failed with " << e.what() << std::endl; @@ -600,7 +511,7 @@ class TrustRegion : public mfem::NewtonSolver { void doglegStep(const mfem::Vector& cp, const mfem::Vector& newtonP, double trSize, mfem::Vector& s) const { SMITH_MARK_FUNCTION; - auto [cc, nn] = dot2(cp, cp, newtonP, newtonP); + auto [cc, nn] = dot_many(cp, cp, newtonP, newtonP); double tt = trSize * trSize; s = 0.0; @@ -633,104 +544,11 @@ class TrustRegion : public mfem::NewtonSolver { } /// Minimize quadratic sub-problem given residual vector, the action of the stiffness and a preconditioner - template - void solveTrustRegionModelProblem(const mfem::Vector& r0, mfem::Vector& rCurrent, HessVecFunc hess_vec_func, - PrecondFunc precond, const TrustRegionSettings& settings, double& trSize, - TrustRegionResults& results, double r0_norm_squared) const + void solveModelProblem(const mfem::Vector& r0, mfem::Vector& rCurrent, const mfem::Operator& H, + const mfem::Solver* P, const TrustRegionSettings& settings, double& trSize, + TrustRegionResults& results, double r0_norm_squared) const { - SMITH_MARK_FUNCTION; - // minimize r0@z + 0.5*z@J@z - results.interior_status = TrustRegionResults::Status::Interior; - results.cg_iterations_count = 0; - - auto& z = results.z; - auto& cgIter = results.cg_iterations_count; - auto& d = results.d; - auto& Pr = results.Pr; - auto& Hd = results.H_d; - - const double cg_tol_squared = settings.cg_tol * settings.cg_tol; - - if (r0_norm_squared <= cg_tol_squared && settings.min_cg_iterations == 0) { - if (print_level >= 2) { - mfem::out << "Trust region solution state within tolerance on first iteration." - << "\n"; - } - return; - } - - rCurrent = r0; - precond(rCurrent, Pr); - - // d = -Pr - d = Pr; - d *= -1.0; - - z = 0.0; - double zz = 0.; - double rPr = Dot(rCurrent, Pr); - - // std::cout << "initial energy = " << computeEnergy(r0, hess_vec_func, z) << std::endl; - - for (cgIter = 1; cgIter <= settings.max_cg_iterations; ++cgIter) { - hess_vec_func(d, Hd); - const auto dots = dot4(d, rCurrent, d, Hd, z, d, d, d); - double descent_check = dots.v0; - double curvature = dots.v1; - double zd = dots.v2; - double dd = dots.v3; - if (descent_check > 0) { - d *= -1; - Hd *= -1; - results.interior_status = TrustRegionResults::Status::NonDescentDirection; - descent_check *= -1.0; - curvature *= -1.0; - zd *= -1.0; - } - - const double alphaCg = curvature != 0.0 ? rPr / curvature : 0.0; - const double zzNp1 = zz + 2.0 * alphaCg * zd + alphaCg * alphaCg * dd; - - const bool go_to_boundary = curvature <= 0 || zzNp1 >= trSize * trSize; - if (go_to_boundary) { - projectToBoundaryWithCoefs(z, d, trSize, zz, zd, dd); - if (curvature <= 0) { - results.interior_status = TrustRegionResults::Status::NegativeCurvature; - } else { - results.interior_status = TrustRegionResults::Status::OnBoundary; - } - return; - } - - auto& zPred = Pr; // re-use Pr memory. - // This predicted step will no longer be used by the time Pr is, so we can avoid an extra - // vector floating around - add(z, alphaCg, d, zPred); - - z = zPred; - - if (results.interior_status == TrustRegionResults::Status::NonDescentDirection) { - if (print_level >= 2) { - mfem::out << "Found a non descent direction\n"; - } - return; - } - - add(rCurrent, alphaCg, Hd, rCurrent); - - precond(rCurrent, Pr); - auto [rPrNp1, r_current_norm_squared] = dot2(rCurrent, Pr, rCurrent, rCurrent); - if (r_current_norm_squared <= cg_tol_squared && cgIter >= settings.min_cg_iterations) { - return; - } - - double beta = rPrNp1 / rPr; - rPr = rPrNp1; - add(-1.0, Pr, beta, d, d); - - zz = zzNp1; - } - cgIter--; // if all cg iterations are taken, correct for output + steihaugTointCG(r0, rCurrent, H, P, settings, trSize, results, r0_norm_squared, *this); } std::unique_ptr cloneAssembledOperator(const mfem::Operator& op) const @@ -781,7 +599,7 @@ class TrustRegion : public mfem::NewtonSolver { }; /// @overload - void Mult(const mfem::Vector&, mfem::Vector& X) const + void Mult(const mfem::Vector&, mfem::Vector& X) const override { MFEM_ASSERT(oper != NULL, "the Operator is not set (use SetOperator)."); MFEM_ASSERT(prec != NULL, "the Solver is not set (use SetSolver)."); @@ -875,7 +693,6 @@ class TrustRegion : public mfem::NewtonSolver { } auto hess_vec_func = [&](const mfem::Vector& x_, mfem::Vector& v_) { hessVec(x_, v_); }; - auto precond_func = [&](const mfem::Vector& x_, mfem::Vector& v_) { precond(x_, v_); }; double cauchyPointNormSquared = tr_size * tr_size; trResults.reset(); @@ -910,7 +727,7 @@ class TrustRegion : public mfem::NewtonSolver { trResults.interior_status = TrustRegionResults::Status::OnBoundary; } else { settings.cg_tol = std::max(0.5 * norm_goal, 5e-5 * norm); - solveTrustRegionModelProblem(r, scratch, hess_vec_func, precond_func, settings, tr_size, trResults, + solveModelProblem(r, scratch, *this->oper, &this->tr_precond, settings, tr_size, trResults, norm * norm); } cumulative_cg_iters_from_last_precond_update += trResults.cg_iterations_count; @@ -1027,7 +844,7 @@ class TrustRegion : public mfem::NewtonSolver { static constexpr double roundOffTol = 0.0; // 1e-14; hess_vec_func(trResults.d, trResults.H_d); - const auto [dHd, rd] = dot2(trResults.d, trResults.H_d, r, trResults.d); + const auto [dHd, rd] = dot_many(trResults.d, trResults.H_d, r, trResults.d); double modelObjective = rd + 0.5 * dHd - roundOffTol; add(X, trResults.d, x_pred); diff --git a/src/smith/numerics/mfem_trust_region_subspace.cpp b/src/smith/numerics/mfem_trust_region_subspace.cpp index 2f4bf90713..65652e2d73 100644 --- a/src/smith/numerics/mfem_trust_region_subspace.cpp +++ b/src/smith/numerics/mfem_trust_region_subspace.cpp @@ -27,51 +27,13 @@ double innerProduct(const mfem::Vector& a, const mfem::Vector& b, const MPI_Comm return mfem::InnerProduct(comm, a, b); } -std::pair, std::vector> removeDependentDirections( - std::vector directions, std::vector A_directions) -{ - SMITH_MARK_FUNCTION; - std::vector norms; - size_t num_dirs = directions.size(); - - for (size_t i = 0; i < num_dirs; ++i) { - norms.push_back(std::sqrt(mfem::InnerProduct(MPI_COMM_WORLD, *directions[i], *directions[i]))); - } - - std::vector> kepts; - for (size_t i = 0; i < num_dirs; ++i) { - bool keepi = true; - if (norms[i] == 0) keepi = false; - for (auto&& kept_and_j : kepts) { - size_t j = kept_and_j.second; - double dot_ij = mfem::InnerProduct(MPI_COMM_WORLD, *directions[i], *kept_and_j.first); - if (dot_ij > 0.999 * norms[i] * norms[j]) { - keepi = false; - } - } - if (keepi) { - kepts.emplace_back(std::make_pair(directions[i], i)); - } - } - - std::vector directions_new; - std::vector A_directions_new; - - for (auto kept_and_j : kepts) { - directions_new.push_back(directions[kept_and_j.second]); - A_directions_new.push_back(A_directions[kept_and_j.second]); - } - - return std::make_pair(directions_new, A_directions_new); -} - #ifdef MFEM_USE_LAPACK TrustRegionSubspaceResult solveSubspaceProblem(const std::vector& directions, const std::vector& A_directions, - const mfem::Vector& b, double delta, int num_leftmost) + const mfem::Vector& b, double delta, int num_leftmost, mfem::Vector& workspace) { - return solveSubspaceProblemMfem(directions, A_directions, b, delta, num_leftmost); + return solveSubspaceProblemMfem(directions, A_directions, b, delta, num_leftmost, workspace); } namespace { @@ -86,32 +48,7 @@ double norm(const mfem::Vector& x) return x.Norml2(); } -mfem::Vector operator+(const mfem::Vector& x, double value) -{ - mfem::Vector out(x); - for (int i = 0; i < out.Size(); ++i) { - out[i] += value; - } - return out; -} -mfem::Vector pointwiseMultiply(const mfem::Vector& a, const mfem::Vector& b) -{ - mfem::Vector out(a.Size()); - for (int i = 0; i < a.Size(); ++i) { - out[i] = a[i] * b[i]; - } - return out; -} - -mfem::Vector pointwiseDivide(const mfem::Vector& a, const mfem::Vector& b) -{ - mfem::Vector out(a.Size()); - for (int i = 0; i < a.Size(); ++i) { - out[i] = a[i] / b[i]; - } - return out; -} double sumAbs(const mfem::Vector& x) { @@ -122,15 +59,6 @@ double sumAbs(const mfem::Vector& x) return total; } -double sum(const mfem::Vector& x) -{ - double total = 0.0; - for (int i = 0; i < x.Size(); ++i) { - total += x[i]; - } - return total; -} - void symmetrize(mfem::DenseMatrix& A) { MFEM_VERIFY(A.Height() == A.Width(), "symmetrize requires square matrix"); @@ -234,14 +162,20 @@ double quadraticEnergy(const mfem::DenseMatrix& A, const mfem::Vector& b, const double pnormSquared(const mfem::Vector& bvv, const mfem::Vector& sig) { - return sum(pointwiseDivide(bvv, pointwiseMultiply(sig, sig))); + double total = 0.0; + for (int i = 0; i < bvv.Size(); ++i) { + total += bvv[i] / (sig[i] * sig[i]); + } + return total; } double qnormSquared(const mfem::Vector& bvv, const mfem::Vector& sig) { - mfem::Vector sig_sq = pointwiseMultiply(sig, sig); - mfem::Vector sig_cu = pointwiseMultiply(sig_sq, sig); - return sum(pointwiseDivide(bvv, sig_cu)); + double total = 0.0; + for (int i = 0; i < bvv.Size(); ++i) { + total += bvv[i] / (sig[i] * sig[i] * sig[i]); + } + return total; } mfem::Vector matrixColumn(const mfem::DenseMatrix& A, int j) @@ -264,8 +198,30 @@ mfem::DenseMatrix columnsToMatrix(const std::vector& cols) return A; } +/** + * @brief Solves the exact trust region subproblem: + * min 1/2 x^T A x - b^T x, subject to ||x|| <= delta. + * + * Implements a variant of the Moore-Sorensen algorithm: + * 1. Computes the eigensystem of A. + * 2. Checks if the unconstrained minimum lies strictly inside the trust region. + * 3. Checks for the "hard case" where the minimum eigenvalue is near zero or negative, + * and the Newton step points outside the trust region, requiring a shift along the leftmost eigenvector. + * 4. Otherwise, performs a Newton iteration on the secular equation (1/||p(\lambda)|| - 1/delta = 0) + * to find the optimal Lagrange multiplier \lambda. + * + * @param A The reduced Hessian matrix (square). + * @param b The reduced gradient vector. + * @param delta The trust region radius. + * @param num_leftmost The number of leftmost eigenvectors/values to return. + * @return A tuple containing: + * - The optimal solution vector. + * - A list of the leftmost eigenvectors. + * - A list of the corresponding leftmost eigenvalues. + * - A boolean indicating success. + */ std::tuple, std::vector, bool> exactTrustRegionSolve( - mfem::DenseMatrix A, const mfem::Vector& b, double delta, int num_leftmost) + mfem::DenseMatrix A, const mfem::Vector& b, double delta, int num_leftmost, mfem::Vector& workspace) { if (A.Height() != A.Width()) { throw PetscException("Exact trust region solver requires square matrices"); @@ -274,8 +230,17 @@ std::tuple, std::vector, bool> e throw PetscException("The right hand size for exact trust region solve must be consistent with the input matrix size"); } - mfem::Vector sigs; - mfem::DenseMatrix V; + int offset = 0; + auto alloc_vector = [&](int size) { + mfem::Vector v(workspace.GetData() + offset, size); + offset += size; + return v; + }; + + mfem::Vector sigs = alloc_vector(b.Size()); + mfem::DenseMatrix V(workspace.GetData() + offset, b.Size(), b.Size()); + offset += b.Size() * b.Size(); + A.Eigensystem(sigs, V); std::vector leftmosts; std::vector minsigs; @@ -288,13 +253,14 @@ std::tuple, std::vector, bool> e const mfem::Vector leftMost = matrixColumn(V, 0); const double minSig = sigs[0]; - mfem::Vector bv(sigs.Size()); + mfem::Vector bv = alloc_vector(sigs.Size()); for (int i = 0; i < sigs.Size(); ++i) { const mfem::Vector vi = matrixColumn(V, i); bv[i] = dot(vi, b); } - mfem::Vector bvOverSigs = pointwiseDivide(bv, sigs); + mfem::Vector bvOverSigs = alloc_vector(sigs.Size()); + for (int i = 0; i < sigs.Size(); ++i) bvOverSigs[i] = bv[i] / sigs[i]; const double sigScale = sumAbs(sigs) / sigs.Size(); const double eps = 1e-12 * sigScale; @@ -303,11 +269,12 @@ std::tuple, std::vector, bool> e } double lam = minSig < eps ? -minSig + eps : 0.0; - mfem::Vector sigsPlusLam = sigs + lam; - bvOverSigs = pointwiseDivide(bv, sigsPlusLam); + mfem::Vector sigsPlusLam = alloc_vector(sigs.Size()); + for (int i = 0; i < sigs.Size(); ++i) sigsPlusLam[i] = sigs[i] + lam; + for (int i = 0; i < sigs.Size(); ++i) bvOverSigs[i] = bv[i] / sigsPlusLam[i]; if ((minSig < eps) && (norm(bvOverSigs) < delta)) { - mfem::Vector p(b.Size()); + mfem::Vector p = alloc_vector(b.Size()); p = 0.0; for (int i = 0; i < b.Size(); ++i) { const mfem::Vector vi = matrixColumn(V, i); @@ -321,8 +288,8 @@ std::tuple, std::vector, bool> e const double tau1 = -pz + std::sqrt(pz * pz + ddmpp); const double tau2 = -pz - std::sqrt(pz * pz + ddmpp); - mfem::Vector x1(p); - mfem::Vector x2(p); + mfem::Vector x1 = alloc_vector(p.Size()); x1 = p; + mfem::Vector x2 = alloc_vector(p.Size()); x2 = p; x1.Add(tau1, leftMost); x2.Add(tau2, leftMost); @@ -332,8 +299,9 @@ std::tuple, std::vector, bool> e return std::make_tuple(e1 < e2 ? x1 : x2, leftmosts, minsigs, true); } - const mfem::Vector bvbv = pointwiseMultiply(bv, bv); - sigsPlusLam = sigs + lam; + mfem::Vector bvbv = alloc_vector(bv.Size()); + for(int i=0; i, std::vector, bool> e while ((std::abs(bError) > 1e-9) && (iters++ < maxIters)) { const double qNormSq = qnormSquared(bvbv, sigsPlusLam); lam += (pNormSq / qNormSq) * bError; - sigsPlusLam = sigs + lam; + for (int i = 0; i < sigs.Size(); ++i) sigsPlusLam[i] = sigs[i] + lam; pNormSq = pnormSquared(bvbv, sigsPlusLam); pNorm = std::sqrt(pNormSq); bError = (pNorm - delta) / delta; @@ -352,9 +320,9 @@ std::tuple, std::vector, bool> e const bool success = iters < maxIters; - bvOverSigs = pointwiseDivide(bv, sigsPlusLam); + for (int i = 0; i < sigs.Size(); ++i) bvOverSigs[i] = bv[i] / sigsPlusLam[i]; - mfem::Vector x(b.Size()); + mfem::Vector x = alloc_vector(b.Size()); x = 0.0; for (int i = 0; i < b.Size(); ++i) { const mfem::Vector vi = matrixColumn(V, i); @@ -362,7 +330,8 @@ std::tuple, std::vector, bool> e } const double e1 = quadraticEnergy(A, b, x); - mfem::Vector neg_x(x); + mfem::Vector neg_x = alloc_vector(x.Size()); + neg_x = x; neg_x *= -1.0; const double e2 = quadraticEnergy(A, b, neg_x); @@ -425,7 +394,7 @@ mfem::Vector combineDirections(const std::vector& states, c TrustRegionSubspaceResult solveSubspaceProblemMfem(const std::vector& states, const std::vector& Astates, - const mfem::Vector& b, double delta, int num_leftmost) + const mfem::Vector& b, double delta, int num_leftmost, mfem::Vector& workspace) { SMITH_MARK_FUNCTION; SubspaceProjections projections = projectSubspaceGlobally(states, Astates, b); @@ -445,6 +414,11 @@ TrustRegionSubspaceResult solveSubspaceProblemMfem(const std::vector>{}, std::vector{}, 0.0); + } if (T.Width() == 0) { throw PetscException("No independent directions in MFEM subspace solve."); } @@ -454,7 +428,7 @@ TrustRegionSubspaceResult solveSubspaceProblemMfem(const std::vector remove_at(const std::vector& states, const std::vector& Astates, - const mfem::Vector& b, double delta, int num_leftmost) + const mfem::Vector& b, double delta, int num_leftmost, mfem::Vector& workspace) { SMITH_MARK_FUNCTION; DenseMat sAs1 = dot(states, Astates); @@ -318,7 +318,7 @@ TrustRegionSubspaceResult solveSubspaceProblemPetsc(const std::vectorMult(rCurrent, Pr); + } else { + Pr = rCurrent; + } + + // d = -Pr + d = Pr; + d *= -1.0; + + z = 0.0; + double zz = 0.; + + // rPr = dot(rCurrent, Pr) + auto rPr_arr = delegate.dot_many_2(rCurrent, Pr, rCurrent, Pr); // We only need the first + double rPr = rPr_arr[0]; + + for (cgIter = 1; cgIter <= settings.max_cg_iterations; ++cgIter) { + H.Mult(d, Hd); + + auto dots = delegate.dot_many_4(d, rCurrent, d, Hd, z, d, d, d); + double descent_check = dots[0]; + double curvature = dots[1]; + double zd = dots[2]; + double dd = dots[3]; + + if (descent_check > 0) { + d *= -1; + Hd *= -1; + results.interior_status = TrustRegionResults::Status::NonDescentDirection; + descent_check *= -1.0; + curvature *= -1.0; + zd *= -1.0; + } + + const double alphaCg = curvature != 0.0 ? rPr / curvature : 0.0; + const double zzNp1 = zz + 2.0 * alphaCg * zd + alphaCg * alphaCg * dd; + + const bool go_to_boundary = curvature <= 0 || zzNp1 >= trSize * trSize; + if (go_to_boundary) { + delegate.projectToBoundaryWithCoefs(z, d, trSize, zz, zd, dd); + if (curvature <= 0) { + results.interior_status = TrustRegionResults::Status::NegativeCurvature; + } else { + results.interior_status = TrustRegionResults::Status::OnBoundary; + } + return; + } + + auto& zPred = Pr; + smith_add(z, alphaCg, d, zPred); + z = zPred; + + if (results.interior_status == TrustRegionResults::Status::NonDescentDirection) { + return; + } + + smith_add(rCurrent, alphaCg, Hd, rCurrent); + + if (P) { + P->Mult(rCurrent, Pr); + } else { + Pr = rCurrent; + } + + auto dots2 = delegate.dot_many_2(rCurrent, Pr, rCurrent, rCurrent); + double rPrNp1 = dots2[0]; + double r_current_norm_squared = dots2[1]; + + if (r_current_norm_squared <= cg_tol_squared && cgIter >= settings.min_cg_iterations) { + return; + } + + double beta = rPrNp1 / rPr; + rPr = rPrNp1; + d *= beta; + d.Add(-1.0, Pr); + + zz = zzNp1; + } + cgIter--; +} + +} // namespace smith \ No newline at end of file diff --git a/src/smith/numerics/steihaug_toint_cg.hpp b/src/smith/numerics/steihaug_toint_cg.hpp new file mode 100644 index 0000000000..ad7a27e66d --- /dev/null +++ b/src/smith/numerics/steihaug_toint_cg.hpp @@ -0,0 +1,140 @@ +// Copyright (c) Lawrence Livermore National Security, LLC and +// other Smith Project Developers. See the top-level LICENSE file for +// details. +// +// SPDX-License-Identifier: (BSD-3-Clause) + +#pragma once + +#include "mfem.hpp" +#include + +namespace smith { + +/// Internal structure for storing trust region settings +struct TrustRegionSettings { + /// cg tol + double cg_tol = 1e-8; + /// min cg iters + size_t min_cg_iterations = 0; // + /// max cg iters should be around # of system dofs + size_t max_cg_iterations = 10000; // + /// max cumulative iterations + size_t max_cumulative_iteration = 1; + /// minimum trust region size + double min_tr_size = 1e-13; + /// trust region decrease factor + double t1 = 0.25; + /// trust region increase factor + double t2 = 1.75; + /// worse case energy drop ratio. trust region accepted if energy drop is better than this. + double eta1 = 1e-9; + /// non-ideal energy drop ratio. trust region decreases if energy drop is worse than this. + double eta2 = 0.1; + /// ideal energy drop ratio. trust region increases if energy drop is better than this. + double eta3 = 0.6; + /// parameter limiting how fast the energy can drop relative to the prediction (in case the energy surrogate is poor) + double eta4 = 4.2; +}; + +/// Internal structure for storing trust region stateful data +struct TrustRegionResults { + /// Constructor takes the size of the solution vector + TrustRegionResults(int size) + { + z.SetSize(size); + H_z.SetSize(size); + d_old.SetSize(size); + H_d_old.SetSize(size); + H_d_old_at_accept.SetSize(size); + d.SetSize(size); + H_d.SetSize(size); + Pr.SetSize(size); + cauchy_point.SetSize(size); + H_cauchy_point.SetSize(size); + z = 0.0; + H_z = 0.0; + d_old = 0.0; + H_d_old = 0.0; + H_d_old_at_accept = 0.0; + d = 0.0; + H_d = 0.0; + Pr = 0.0; + cauchy_point = 0.0; + H_cauchy_point = 0.0; + } + + /// resets trust region results for a new outer iteration + void reset() + { + z = 0.0; + cauchy_point = 0.0; + } + + /// enumerates the possible final status of the trust region steps + enum class Status + { + Interior, + NegativeCurvature, + OnBoundary, + NonDescentDirection + }; + + /// step direction + mfem::Vector z; + /// action of hessian on current step z + mfem::Vector H_z; + /// old step direction + mfem::Vector d_old; + /// action of hessian on previous step z_old + mfem::Vector H_d_old; + /// action of previous accepted hessian on previous step z_old + mfem::Vector H_d_old_at_accept; + /// true after at least one accepted line-search step has populated d_old + bool has_d_old = false; + /// incrementalCG direction + mfem::Vector d; + /// action of hessian on direction d + mfem::Vector H_d; + /// preconditioned residual + mfem::Vector Pr; + /// cauchy point + mfem::Vector cauchy_point; + /// action of hessian on direction of cauchy point + mfem::Vector H_cauchy_point; + /// specifies if step is interior, exterior, negative curvature, etc. + Status interior_status = Status::Interior; + /// iteration counter + size_t cg_iterations_count = 0; +}; + +class SteihaugTointDelegate { +public: + virtual ~SteihaugTointDelegate() = default; + + virtual std::array dot_many_4(const mfem::Vector& a0, const mfem::Vector& b0, + const mfem::Vector& a1, const mfem::Vector& b1, + const mfem::Vector& a2, const mfem::Vector& b2, + const mfem::Vector& a3, const mfem::Vector& b3) const = 0; + + virtual std::array dot_many_2(const mfem::Vector& a0, const mfem::Vector& b0, + const mfem::Vector& a1, const mfem::Vector& b1) const = 0; + + virtual void projectToBoundaryWithCoefs(mfem::Vector& z, const mfem::Vector& d, double delta, double zz, double zd, + double dd) const = 0; +}; + +/** + * @brief Minimize quadratic sub-problem given residual vector, the action of the stiffness and a preconditioner + * + * This is a standard implementation of 'The Conjugate Gradient Method and Trust Regions in Large Scale Optimization' + * by T. Steihaug. It is also called the Steihaug-Toint CG trust region algorithm (see also Trust Region Methods + * by Conn, Gould, and Toint). + */ +void steihaugTointCG(const mfem::Vector& r0, mfem::Vector& rCurrent, + const mfem::Operator& H, const mfem::Solver* P, + const TrustRegionSettings& settings, double& trSize, + TrustRegionResults& results, double r0_norm_squared, + const SteihaugTointDelegate& delegate); + +} // namespace smith \ No newline at end of file diff --git a/src/smith/numerics/tests/CMakeLists.txt b/src/smith/numerics/tests/CMakeLists.txt index a2577051e2..617a0a4f11 100644 --- a/src/smith/numerics/tests/CMakeLists.txt +++ b/src/smith/numerics/tests/CMakeLists.txt @@ -10,6 +10,7 @@ set(numerics_serial_test_sources test_equationsolver.cpp test_operator.cpp test_odes.cpp + test_steihaug_toint_cg.cpp test_block_preconditioner.cpp test_block_preconditioner_backend.cpp test_block_preconditioner_custom_operators.cpp diff --git a/src/smith/numerics/tests/test_steihaug_toint_cg.cpp b/src/smith/numerics/tests/test_steihaug_toint_cg.cpp new file mode 100644 index 0000000000..755419667a --- /dev/null +++ b/src/smith/numerics/tests/test_steihaug_toint_cg.cpp @@ -0,0 +1,133 @@ +// Copyright (c) Lawrence Livermore National Security, LLC and +// other Smith Project Developers. See the top-level LICENSE file for +// details. +// +// SPDX-License-Identifier: (BSD-3-Clause) + +#include +#include "smith/numerics/steihaug_toint_cg.hpp" + +namespace { + +class TestDelegate : public smith::SteihaugTointDelegate { +public: + std::array dot_many_4(const mfem::Vector& a0, const mfem::Vector& b0, + const mfem::Vector& a1, const mfem::Vector& b1, + const mfem::Vector& a2, const mfem::Vector& b2, + const mfem::Vector& a3, const mfem::Vector& b3) const override + { + return {a0 * b0, a1 * b1, a2 * b2, a3 * b3}; + } + + std::array dot_many_2(const mfem::Vector& a0, const mfem::Vector& b0, + const mfem::Vector& a1, const mfem::Vector& b1) const override + { + return {a0 * b0, a1 * b1}; + } + + void projectToBoundaryWithCoefs(mfem::Vector& z, const mfem::Vector& d, double delta, double zz, double zd, + double dd) const override + { + double deltadelta_m_zz = delta * delta - zz; + if (deltadelta_m_zz <= 0) return; + double tau = (std::sqrt(deltadelta_m_zz * dd + zd * zd) - zd) / dd; + z.Add(tau, d); + } +}; + +class DiagonalOperator : public mfem::Operator { +public: + DiagonalOperator(const mfem::Vector& diag) : mfem::Operator(diag.Size()), diag_(diag) {} + void Mult(const mfem::Vector& x, mfem::Vector& y) const override + { + for (int i = 0; i < height; ++i) { + y[i] = diag_[i] * x[i]; + } + } +private: + const mfem::Vector& diag_; +}; + +} // namespace + +TEST(SteihaugTointCG, SolvesSPDInsideBoundary) +{ + int size = 2; + mfem::Vector diag(size); + diag[0] = 2.0; + diag[1] = 4.0; + DiagonalOperator H(diag); + + mfem::Vector r0(size); + r0[0] = 1.0; + r0[1] = 1.0; + + smith::TrustRegionSettings settings; + settings.cg_tol = 1e-10; + settings.max_cg_iterations = 10; + + double trSize = 100.0; // Huge trust region + smith::TrustRegionResults results(size); + + mfem::Vector rCurrent(size); + TestDelegate delegate; + + smith::steihaugTointCG(r0, rCurrent, H, nullptr, settings, trSize, results, r0.Norml2() * r0.Norml2(), delegate); + + // Solution should be H^{-1} (-r0) + // x = -0.5, y = -0.25 + EXPECT_NEAR(results.z[0], -0.5, 1e-9); + EXPECT_NEAR(results.z[1], -0.25, 1e-9); + EXPECT_EQ(results.interior_status, smith::TrustRegionResults::Status::Interior); +} + +TEST(SteihaugTointCG, HitsBoundary) +{ + int size = 1; + mfem::Vector diag(size); + diag[0] = 1.0; + DiagonalOperator H(diag); + + mfem::Vector r0(size); + r0[0] = 1.0; + + smith::TrustRegionSettings settings; + settings.max_cg_iterations = 10; + + double trSize = 0.5; // Small trust region, solution would be -1.0 + smith::TrustRegionResults results(size); + + mfem::Vector rCurrent(size); + TestDelegate delegate; + + smith::steihaugTointCG(r0, rCurrent, H, nullptr, settings, trSize, results, r0.Norml2() * r0.Norml2(), delegate); + + EXPECT_NEAR(results.z.Norml2(), 0.5, 1e-9); + EXPECT_EQ(results.interior_status, smith::TrustRegionResults::Status::OnBoundary); +} + +TEST(SteihaugTointCG, DetectsNegativeCurvature) +{ + int size = 1; + mfem::Vector diag(size); + diag[0] = -1.0; // Negative curvature + DiagonalOperator H(diag); + + mfem::Vector r0(size); + r0[0] = 1.0; + + smith::TrustRegionSettings settings; + settings.max_cg_iterations = 10; + + double trSize = 2.0; + smith::TrustRegionResults results(size); + + mfem::Vector rCurrent(size); + TestDelegate delegate; + + smith::steihaugTointCG(r0, rCurrent, H, nullptr, settings, trSize, results, r0.Norml2() * r0.Norml2(), delegate); + + // For negative curvature, it should go to boundary + EXPECT_NEAR(results.z.Norml2(), 2.0, 1e-9); + EXPECT_EQ(results.interior_status, smith::TrustRegionResults::Status::NegativeCurvature); +} diff --git a/src/smith/numerics/tests/test_trust_region_solver_mfem.cpp b/src/smith/numerics/tests/test_trust_region_solver_mfem.cpp index 62c7730205..a476c02ee5 100644 --- a/src/smith/numerics/tests/test_trust_region_solver_mfem.cpp +++ b/src/smith/numerics/tests/test_trust_region_solver_mfem.cpp @@ -75,43 +75,6 @@ struct DiagonalSubspaceFixture { } // namespace -TEST(TrustRegionSubspaceMfem, RemoveDependentDirectionsDropsDuplicatesAndZero) -{ - mfem::Vector d1(4); - mfem::Vector d2(4); - mfem::Vector d3(4); - mfem::Vector hd1(4); - mfem::Vector hd2(4); - mfem::Vector hd3(4); - - d1 = 0.0; - d2 = 0.0; - d3 = 0.0; - hd1 = 0.0; - hd2 = 0.0; - hd3 = 0.0; - - d1[0] = 1.0; - d1[1] = 2.0; - d2 = d1; - d2 *= 3.0; - - hd1[0] = 2.0; - hd1[1] = 5.0; - hd2 = hd1; - hd2 *= 3.0; - - std::vector dirs = {&d1, &d2, &d3}; - std::vector hdirs = {&hd1, &hd2, &hd3}; - - auto [dirs_new, hdirs_new] = smith::removeDependentDirections(dirs, hdirs); - - ASSERT_EQ(dirs_new.size(), 1); - ASSERT_EQ(hdirs_new.size(), 1); - expectNearVector(*dirs_new[0], d1, 0.0); - expectNearVector(*hdirs_new[0], hd1, 0.0); -} - TEST(TrustRegionSubspaceMfem, SolveHitsTrustRegionBoundary) @@ -122,8 +85,9 @@ TEST(TrustRegionSubspaceMfem, SolveHitsTrustRegionBoundary) const auto astates = applyDiagonalOperator(fixture.diag, states); const auto astate_ptrs = toPointers(astates); + mfem::Vector workspace(2000); auto [sol, leftvecs, leftvals, energy] = - smith::solveSubspaceProblemMfem(states, astate_ptrs, fixture.b, test_delta, 1); + smith::solveSubspaceProblemMfem(states, astate_ptrs, fixture.b, test_delta, 1, workspace); EXPECT_NEAR(sol.Norml2(), test_delta, 1.0e-12); EXPECT_FALSE(leftvecs.empty()); @@ -139,10 +103,11 @@ TEST(TrustRegionSubspaceMfem, GenericSolveUsesMfemBackend) const auto astates = applyDiagonalOperator(fixture.diag, states); const auto astate_ptrs = toPointers(astates); + mfem::Vector workspace(2000); auto [generic_sol, generic_leftvecs, generic_leftvals, generic_energy] = - smith::solveSubspaceProblem(states, astate_ptrs, fixture.b, test_delta, 2); + smith::solveSubspaceProblem(states, astate_ptrs, fixture.b, test_delta, 2, workspace); auto [mfem_sol, mfem_leftvecs, mfem_leftvals, mfem_energy] = - smith::solveSubspaceProblemMfem(states, astate_ptrs, fixture.b, test_delta, 2); + smith::solveSubspaceProblemMfem(states, astate_ptrs, fixture.b, test_delta, 2, workspace); expectNearVector(generic_sol, mfem_sol, 1.0e-12); ASSERT_EQ(generic_leftvecs.size(), mfem_leftvecs.size()); @@ -182,7 +147,8 @@ TEST(TrustRegionSubspaceMfem, SolveHandlesZeroDirection) const auto astates = applyDiagonalOperator(diag, states); const auto astate_ptrs = toPointers(astates); - auto [sol, leftvecs, leftvals, energy] = smith::solveSubspaceProblemMfem(states, astate_ptrs, b, 0.25, 1); + mfem::Vector workspace(2000); + auto [sol, leftvecs, leftvals, energy] = smith::solveSubspaceProblemMfem(states, astate_ptrs, b, 0.25, 1, workspace); EXPECT_LE(sol.Norml2(), 0.25 + 1.0e-12); EXPECT_FALSE(leftvecs.empty()); diff --git a/src/smith/numerics/tests/test_trust_region_solver_petsc.cpp b/src/smith/numerics/tests/test_trust_region_solver_petsc.cpp index 55c7a16f77..d0746e83b9 100644 --- a/src/smith/numerics/tests/test_trust_region_solver_petsc.cpp +++ b/src/smith/numerics/tests/test_trust_region_solver_petsc.cpp @@ -153,7 +153,8 @@ TEST_F(MeshFixture, PetscSubspaceSolveHitsTrustRegionBoundary) } double delta = 0.001; - auto [sol, leftvecs, leftvals, energy] = smith::solveSubspaceProblemPetsc(states, AstatePtrs, b, delta, 1); + mfem::Vector workspace(2000); + auto [sol, leftvecs, leftvals, energy] = smith::solveSubspaceProblemPetsc(states, AstatePtrs, b, delta, 1, workspace); EXPECT_NEAR(sol.Norml2(), delta, 1e-12); EXPECT_FALSE(leftvecs.empty()); @@ -190,10 +191,11 @@ TEST_F(MeshFixture, MfemSubspaceSolveMatchesPetsc) AstatePtrs.push_back(&Astates[i]); } + mfem::Vector workspace(2000); auto [petsc_sol, petsc_leftvecs, petsc_leftvals, petsc_energy] = - smith::solveSubspaceProblemPetsc(states, AstatePtrs, b, 0.001, 2); + smith::solveSubspaceProblemPetsc(states, AstatePtrs, b, 0.001, 2, workspace); auto [mfem_sol, mfem_leftvecs, mfem_leftvals, mfem_energy] = - smith::solveSubspaceProblemMfem(states, AstatePtrs, b, 0.001, 2); + smith::solveSubspaceProblemMfem(states, AstatePtrs, b, 0.001, 2, workspace); expectNearVector(mfem_sol, petsc_sol, 1e-10); ASSERT_EQ(mfem_leftvecs.size(), petsc_leftvecs.size()); diff --git a/src/smith/numerics/trust_region_solver.hpp b/src/smith/numerics/trust_region_solver.hpp index 2f5290b93f..9d26fc3c36 100644 --- a/src/smith/numerics/trust_region_solver.hpp +++ b/src/smith/numerics/trust_region_solver.hpp @@ -55,19 +55,16 @@ double innerProduct(const mfem::Vector& a, const mfem::Vector& b, const MPI_Comm /// and their eigenvalues, and the predicted model energy change TrustRegionSubspaceResult solveSubspaceProblem( const std::vector& directions, const std::vector& A_directions, - const mfem::Vector& b, double delta, int num_leftmost); + const mfem::Vector& b, double delta, int num_leftmost, mfem::Vector& workspace); #if defined(SMITH_USE_SLEPC) && defined(SMITH_TRUST_REGION_USE_PETSC_SUBSPACE) TrustRegionSubspaceResult solveSubspaceProblemPetsc( const std::vector& directions, const std::vector& A_directions, - const mfem::Vector& b, double delta, int num_leftmost); + const mfem::Vector& b, double delta, int num_leftmost, mfem::Vector& workspace); #endif TrustRegionSubspaceResult solveSubspaceProblemMfem( const std::vector& directions, const std::vector& A_directions, - const mfem::Vector& b, double delta, int num_leftmost); - -std::pair, std::vector> removeDependentDirections( - std::vector directions, std::vector A_directions); + const mfem::Vector& b, double delta, int num_leftmost, mfem::Vector& workspace); } // namespace smith diff --git a/src/smith/physics/tests/shallow_arch_buckling.cpp b/src/smith/physics/tests/shallow_arch_buckling.cpp index 6b1d324712..00ca99d7fa 100644 --- a/src/smith/physics/tests/shallow_arch_buckling.cpp +++ b/src/smith/physics/tests/shallow_arch_buckling.cpp @@ -47,8 +47,7 @@ NonlinearSolver selectedNonlinearSolver() return NonlinearSolver::TrustRegion; } - throw std::runtime_error("Unknown --solver value '" + solver_name + - "'. Use NewtonLineSearch or TrustRegion."); + throw std::runtime_error("Unknown --solver value '" + solver_name + "'. Use NewtonLineSearch or TrustRegion."); } void parseCommandLine(int& argc, char** argv) @@ -121,7 +120,7 @@ TEST(ShallowArchBuckling, CompressedThinBeamSnapThrough) .preconditioner = Preconditioner::HypreJacobi, .relative_tol = 1.0e-8, .absolute_tol = 1.0e-14, - .max_iterations = 10000, + .max_iterations = 100000, .print_level = 0}; smith::NonlinearSolverOptions nonlinear_options{ @@ -165,8 +164,8 @@ TEST(ShallowArchBuckling, CompressedThinBeamSnapThrough) if (rank == 0) { mfem::out << "Compressed thin beam snap-through run: solver = " << solver_name << ", trust_subspace_option = " << trust_subspace_option - << ", trust_num_leftmost = " << trust_num_leftmost - << ", trust_num_past_steps = " << trust_num_past_steps << '\n'; + << ", trust_num_leftmost = " << trust_num_leftmost << ", trust_num_past_steps = " << trust_num_past_steps + << '\n'; } constexpr int num_steps = 5; @@ -177,7 +176,6 @@ TEST(ShallowArchBuckling, CompressedThinBeamSnapThrough) } solid.outputStateToDisk("shallow_arch_buckling"); } - } } // namespace smith From d426fc5b8e15f873b93771c0fd883e73108a1204 Mon Sep 17 00:00:00 2001 From: Michael Tupek Date: Sat, 9 May 2026 17:17:06 -0600 Subject: [PATCH 16/27] style. --- src/smith/numerics/equation_solver.cpp | 33 ++++++------- .../numerics/mfem_trust_region_subspace.cpp | 40 +++++++--------- .../numerics/petsc_trust_region_subspace.cpp | 3 +- src/smith/numerics/solver_config.hpp | 3 +- src/smith/numerics/steihaug_toint_cg.cpp | 26 +++++----- src/smith/numerics/steihaug_toint_cg.hpp | 25 +++++----- .../numerics/tests/test_steihaug_toint_cg.cpp | 48 +++++++++---------- .../tests/test_trust_region_solver_mfem.cpp | 13 ++--- src/smith/numerics/trust_region_solver.hpp | 24 ++++++---- 9 files changed, 99 insertions(+), 116 deletions(-) diff --git a/src/smith/numerics/equation_solver.cpp b/src/smith/numerics/equation_solver.cpp index cb82935b5d..0780abc23b 100644 --- a/src/smith/numerics/equation_solver.cpp +++ b/src/smith/numerics/equation_solver.cpp @@ -246,8 +246,6 @@ class NewtonSolver : public mfem::NewtonSolver { } }; - - /// trust region printing utility function void printTrustRegionInfo(double realWork, double modelObjective, size_t cgIters, double trSize, bool willAccept) { @@ -285,7 +283,7 @@ class TrustRegion : public mfem::NewtonSolver, public SteihaugTointDelegate { mutable mfem::Vector solve_start_x; mutable mfem::Vector min_residual_x; mutable double min_residual_norm = -1.0; - + /// Workspace vector for exact subspace solver to avoid small allocations mutable mfem::Vector exact_solver_workspace; @@ -332,14 +330,13 @@ class TrustRegion : public mfem::NewtonSolver, public SteihaugTointDelegate { std::array sizes; std::array ptr_a; std::array ptr_b; - + auto populate_arrays = [&](std::index_sequence) { (( - sizes[I] = std::get<2 * I>(tuple_args).Size(), - [&](){ MFEM_ASSERT(sizes[I] == std::get<2 * I + 1>(tuple_args).Size(), "Incompatible vector sizes."); }(), - ptr_a[I] = std::get<2 * I>(tuple_args).GetData(), - ptr_b[I] = std::get<2 * I + 1>(tuple_args).GetData() - ), ...); + sizes[I] = std::get<2 * I>(tuple_args).Size(), + [&]() { MFEM_ASSERT(sizes[I] == std::get<2 * I + 1>(tuple_args).Size(), "Incompatible vector sizes."); }(), + ptr_a[I] = std::get<2 * I>(tuple_args).GetData(), ptr_b[I] = std::get<2 * I + 1>(tuple_args).GetData()), + ...); }; populate_arrays(std::make_index_sequence{}); @@ -407,16 +404,15 @@ class TrustRegion : public mfem::NewtonSolver, public SteihaugTointDelegate { } } - std::array dot_many_4(const mfem::Vector& a0, const mfem::Vector& b0, - const mfem::Vector& a1, const mfem::Vector& b1, - const mfem::Vector& a2, const mfem::Vector& b2, + std::array dot_many_4(const mfem::Vector& a0, const mfem::Vector& b0, const mfem::Vector& a1, + const mfem::Vector& b1, const mfem::Vector& a2, const mfem::Vector& b2, const mfem::Vector& a3, const mfem::Vector& b3) const override { return dot_many(a0, b0, a1, b1, a2, b2, a3, b3); } - std::array dot_many_2(const mfem::Vector& a0, const mfem::Vector& b0, - const mfem::Vector& a1, const mfem::Vector& b1) const override + std::array dot_many_2(const mfem::Vector& a0, const mfem::Vector& b0, const mfem::Vector& a1, + const mfem::Vector& b1) const override { return dot_many(a0, b0, a1, b1); } @@ -544,9 +540,9 @@ class TrustRegion : public mfem::NewtonSolver, public SteihaugTointDelegate { } /// Minimize quadratic sub-problem given residual vector, the action of the stiffness and a preconditioner - void solveModelProblem(const mfem::Vector& r0, mfem::Vector& rCurrent, const mfem::Operator& H, - const mfem::Solver* P, const TrustRegionSettings& settings, double& trSize, - TrustRegionResults& results, double r0_norm_squared) const + void solveModelProblem(const mfem::Vector& r0, mfem::Vector& rCurrent, const mfem::Operator& H, const mfem::Solver* P, + const TrustRegionSettings& settings, double& trSize, TrustRegionResults& results, + double r0_norm_squared) const { steihaugTointCG(r0, rCurrent, H, P, settings, trSize, results, r0_norm_squared, *this); } @@ -727,8 +723,7 @@ class TrustRegion : public mfem::NewtonSolver, public SteihaugTointDelegate { trResults.interior_status = TrustRegionResults::Status::OnBoundary; } else { settings.cg_tol = std::max(0.5 * norm_goal, 5e-5 * norm); - solveModelProblem(r, scratch, *this->oper, &this->tr_precond, settings, tr_size, trResults, - norm * norm); + solveModelProblem(r, scratch, *this->oper, &this->tr_precond, settings, tr_size, trResults, norm * norm); } cumulative_cg_iters_from_last_precond_update += trResults.cg_iterations_count; diff --git a/src/smith/numerics/mfem_trust_region_subspace.cpp b/src/smith/numerics/mfem_trust_region_subspace.cpp index 65652e2d73..716a2700ab 100644 --- a/src/smith/numerics/mfem_trust_region_subspace.cpp +++ b/src/smith/numerics/mfem_trust_region_subspace.cpp @@ -31,24 +31,17 @@ double innerProduct(const mfem::Vector& a, const mfem::Vector& b, const MPI_Comm TrustRegionSubspaceResult solveSubspaceProblem(const std::vector& directions, const std::vector& A_directions, - const mfem::Vector& b, double delta, int num_leftmost, mfem::Vector& workspace) + const mfem::Vector& b, double delta, int num_leftmost, + mfem::Vector& workspace) { return solveSubspaceProblemMfem(directions, A_directions, b, delta, num_leftmost, workspace); } namespace { -double dot(const mfem::Vector& a, const mfem::Vector& b) -{ - return a * b; -} - -double norm(const mfem::Vector& x) -{ - return x.Norml2(); -} - +double dot(const mfem::Vector& a, const mfem::Vector& b) { return a * b; } +double norm(const mfem::Vector& x) { return x.Norml2(); } double sumAbs(const mfem::Vector& x) { @@ -99,9 +92,7 @@ SubspaceProjections globalSubspaceProjectionFromLocalInnerProducts(const std::ve { const int n = static_cast(states.size()); const int triangular_size = n * (n + 1) / 2; - const auto triangular_index = [n](int i, int j) { - return i * n - (i * (i - 1)) / 2 + (j - i); - }; + const auto triangular_index = [n](int i, int j) { return i * n - (i * (i - 1)) / 2 + (j - i); }; const int sAs_offset = 0; const int ss_offset = triangular_size; const int sb_offset = 2 * triangular_size; @@ -113,8 +104,7 @@ SubspaceProjections globalSubspaceProjectionFromLocalInnerProducts(const std::ve local_projection_entries[size_t(sb_offset + i)] = mfem::InnerProduct(*states[size_t(i)], b); for (int j = i; j < n; ++j) { const size_t ij = size_t(triangular_index(i, j)); - local_projection_entries[size_t(sAs_offset) + ij] = - mfem::InnerProduct(*states[size_t(i)], *Astates[size_t(j)]); + local_projection_entries[size_t(sAs_offset) + ij] = mfem::InnerProduct(*states[size_t(i)], *Astates[size_t(j)]); local_projection_entries[size_t(ss_offset) + ij] = mfem::InnerProduct(*states[size_t(i)], *states[size_t(j)]); } } @@ -227,7 +217,8 @@ std::tuple, std::vector, bool> e throw PetscException("Exact trust region solver requires square matrices"); } if (A.Height() != b.Size()) { - throw PetscException("The right hand size for exact trust region solve must be consistent with the input matrix size"); + throw PetscException( + "The right hand size for exact trust region solve must be consistent with the input matrix size"); } int offset = 0; @@ -288,8 +279,10 @@ std::tuple, std::vector, bool> e const double tau1 = -pz + std::sqrt(pz * pz + ddmpp); const double tau2 = -pz - std::sqrt(pz * pz + ddmpp); - mfem::Vector x1 = alloc_vector(p.Size()); x1 = p; - mfem::Vector x2 = alloc_vector(p.Size()); x2 = p; + mfem::Vector x1 = alloc_vector(p.Size()); + x1 = p; + mfem::Vector x2 = alloc_vector(p.Size()); + x2 = p; x1.Add(tau1, leftMost); x2.Add(tau2, leftMost); @@ -300,7 +293,7 @@ std::tuple, std::vector, bool> e } mfem::Vector bvbv = alloc_vector(bv.Size()); - for(int i=0; i& states, c TrustRegionSubspaceResult solveSubspaceProblemMfem(const std::vector& states, const std::vector& Astates, - const mfem::Vector& b, double delta, int num_leftmost, mfem::Vector& workspace) + const mfem::Vector& b, double delta, int num_leftmost, + mfem::Vector& workspace) { SMITH_MARK_FUNCTION; SubspaceProjections projections = projectSubspaceGlobally(states, Astates, b); @@ -454,7 +448,7 @@ TrustRegionSubspaceResult solveSubspaceProblem(const std::vector> {}, std::vector {}, 0.0); + return std::make_tuple(b, std::vector>{}, std::vector{}, 0.0); #endif } @@ -463,7 +457,7 @@ TrustRegionSubspaceResult solveSubspaceProblemMfem(const std::vector> {}, std::vector {}, 0.0); + return std::make_tuple(b, std::vector>{}, std::vector{}, 0.0); } #endif // MFEM_USE_LAPACK diff --git a/src/smith/numerics/petsc_trust_region_subspace.cpp b/src/smith/numerics/petsc_trust_region_subspace.cpp index 9669359388..bb82215528 100644 --- a/src/smith/numerics/petsc_trust_region_subspace.cpp +++ b/src/smith/numerics/petsc_trust_region_subspace.cpp @@ -290,7 +290,8 @@ std::vector remove_at(const std::vector& states, const std::vector& Astates, - const mfem::Vector& b, double delta, int num_leftmost, mfem::Vector& workspace) + const mfem::Vector& b, double delta, int num_leftmost, + mfem::Vector& workspace) { SMITH_MARK_FUNCTION; DenseMat sAs1 = dot(states, Astates); diff --git a/src/smith/numerics/solver_config.hpp b/src/smith/numerics/solver_config.hpp index 6cfdc53014..dc031c4d85 100644 --- a/src/smith/numerics/solver_config.hpp +++ b/src/smith/numerics/solver_config.hpp @@ -473,7 +473,8 @@ struct NonlinearSolverOptions { /// Include the displacement from current nonlinear-solve state back to the nonlinear-solve initial state. bool trust_use_solve_start_direction = false; - /// Include the displacement from current nonlinear-solve state to the state with the minimum residual seen so far in this nonlinear solve. + /// Include the displacement from current nonlinear-solve state to the state with the minimum residual seen so far in + /// this nonlinear solve. bool trust_use_min_residual_direction = false; /// Should the gradient be converted to a monolithic matrix diff --git a/src/smith/numerics/steihaug_toint_cg.cpp b/src/smith/numerics/steihaug_toint_cg.cpp index 9b4db432da..6c20abea17 100644 --- a/src/smith/numerics/steihaug_toint_cg.cpp +++ b/src/smith/numerics/steihaug_toint_cg.cpp @@ -23,13 +23,11 @@ void smith_add(const mfem::Vector& a, double b, const mfem::Vector& c, mfem::Vec } } -} // namespace +} // namespace -void steihaugTointCG(const mfem::Vector& r0, mfem::Vector& rCurrent, - const mfem::Operator& H, const mfem::Solver* P, - const TrustRegionSettings& settings, double& trSize, - TrustRegionResults& results, double r0_norm_squared, - const SteihaugTointDelegate& delegate) +void steihaugTointCG(const mfem::Vector& r0, mfem::Vector& rCurrent, const mfem::Operator& H, const mfem::Solver* P, + const TrustRegionSettings& settings, double& trSize, TrustRegionResults& results, + double r0_norm_squared, const SteihaugTointDelegate& delegate) { // minimize r0@z + 0.5*z@J@z results.interior_status = TrustRegionResults::Status::Interior; @@ -60,20 +58,20 @@ void steihaugTointCG(const mfem::Vector& r0, mfem::Vector& rCurrent, z = 0.0; double zz = 0.; - + // rPr = dot(rCurrent, Pr) - auto rPr_arr = delegate.dot_many_2(rCurrent, Pr, rCurrent, Pr); // We only need the first + auto rPr_arr = delegate.dot_many_2(rCurrent, Pr, rCurrent, Pr); // We only need the first double rPr = rPr_arr[0]; for (cgIter = 1; cgIter <= settings.max_cg_iterations; ++cgIter) { H.Mult(d, Hd); - + auto dots = delegate.dot_many_4(d, rCurrent, d, Hd, z, d, d, d); double descent_check = dots[0]; double curvature = dots[1]; double zd = dots[2]; double dd = dots[3]; - + if (descent_check > 0) { d *= -1; Hd *= -1; @@ -97,7 +95,7 @@ void steihaugTointCG(const mfem::Vector& r0, mfem::Vector& rCurrent, return; } - auto& zPred = Pr; + auto& zPred = Pr; smith_add(z, alphaCg, d, zPred); z = zPred; @@ -112,11 +110,11 @@ void steihaugTointCG(const mfem::Vector& r0, mfem::Vector& rCurrent, } else { Pr = rCurrent; } - + auto dots2 = delegate.dot_many_2(rCurrent, Pr, rCurrent, rCurrent); double rPrNp1 = dots2[0]; double r_current_norm_squared = dots2[1]; - + if (r_current_norm_squared <= cg_tol_squared && cgIter >= settings.min_cg_iterations) { return; } @@ -128,7 +126,7 @@ void steihaugTointCG(const mfem::Vector& r0, mfem::Vector& rCurrent, zz = zzNp1; } - cgIter--; + cgIter--; } } // namespace smith \ No newline at end of file diff --git a/src/smith/numerics/steihaug_toint_cg.hpp b/src/smith/numerics/steihaug_toint_cg.hpp index ad7a27e66d..a2a6087073 100644 --- a/src/smith/numerics/steihaug_toint_cg.hpp +++ b/src/smith/numerics/steihaug_toint_cg.hpp @@ -109,16 +109,15 @@ struct TrustRegionResults { }; class SteihaugTointDelegate { -public: + public: virtual ~SteihaugTointDelegate() = default; - virtual std::array dot_many_4(const mfem::Vector& a0, const mfem::Vector& b0, - const mfem::Vector& a1, const mfem::Vector& b1, - const mfem::Vector& a2, const mfem::Vector& b2, + virtual std::array dot_many_4(const mfem::Vector& a0, const mfem::Vector& b0, const mfem::Vector& a1, + const mfem::Vector& b1, const mfem::Vector& a2, const mfem::Vector& b2, const mfem::Vector& a3, const mfem::Vector& b3) const = 0; - virtual std::array dot_many_2(const mfem::Vector& a0, const mfem::Vector& b0, - const mfem::Vector& a1, const mfem::Vector& b1) const = 0; + virtual std::array dot_many_2(const mfem::Vector& a0, const mfem::Vector& b0, const mfem::Vector& a1, + const mfem::Vector& b1) const = 0; virtual void projectToBoundaryWithCoefs(mfem::Vector& z, const mfem::Vector& d, double delta, double zz, double zd, double dd) const = 0; @@ -126,15 +125,13 @@ class SteihaugTointDelegate { /** * @brief Minimize quadratic sub-problem given residual vector, the action of the stiffness and a preconditioner - * - * This is a standard implementation of 'The Conjugate Gradient Method and Trust Regions in Large Scale Optimization' - * by T. Steihaug. It is also called the Steihaug-Toint CG trust region algorithm (see also Trust Region Methods + * + * This is a standard implementation of 'The Conjugate Gradient Method and Trust Regions in Large Scale Optimization' + * by T. Steihaug. It is also called the Steihaug-Toint CG trust region algorithm (see also Trust Region Methods * by Conn, Gould, and Toint). */ -void steihaugTointCG(const mfem::Vector& r0, mfem::Vector& rCurrent, - const mfem::Operator& H, const mfem::Solver* P, - const TrustRegionSettings& settings, double& trSize, - TrustRegionResults& results, double r0_norm_squared, - const SteihaugTointDelegate& delegate); +void steihaugTointCG(const mfem::Vector& r0, mfem::Vector& rCurrent, const mfem::Operator& H, const mfem::Solver* P, + const TrustRegionSettings& settings, double& trSize, TrustRegionResults& results, + double r0_norm_squared, const SteihaugTointDelegate& delegate); } // namespace smith \ No newline at end of file diff --git a/src/smith/numerics/tests/test_steihaug_toint_cg.cpp b/src/smith/numerics/tests/test_steihaug_toint_cg.cpp index 755419667a..bd48fcbba4 100644 --- a/src/smith/numerics/tests/test_steihaug_toint_cg.cpp +++ b/src/smith/numerics/tests/test_steihaug_toint_cg.cpp @@ -10,17 +10,16 @@ namespace { class TestDelegate : public smith::SteihaugTointDelegate { -public: - std::array dot_many_4(const mfem::Vector& a0, const mfem::Vector& b0, - const mfem::Vector& a1, const mfem::Vector& b1, - const mfem::Vector& a2, const mfem::Vector& b2, + public: + std::array dot_many_4(const mfem::Vector& a0, const mfem::Vector& b0, const mfem::Vector& a1, + const mfem::Vector& b1, const mfem::Vector& a2, const mfem::Vector& b2, const mfem::Vector& a3, const mfem::Vector& b3) const override { return {a0 * b0, a1 * b1, a2 * b2, a3 * b3}; } - std::array dot_many_2(const mfem::Vector& a0, const mfem::Vector& b0, - const mfem::Vector& a1, const mfem::Vector& b1) const override + std::array dot_many_2(const mfem::Vector& a0, const mfem::Vector& b0, const mfem::Vector& a1, + const mfem::Vector& b1) const override { return {a0 * b0, a1 * b1}; } @@ -36,7 +35,7 @@ class TestDelegate : public smith::SteihaugTointDelegate { }; class DiagonalOperator : public mfem::Operator { -public: + public: DiagonalOperator(const mfem::Vector& diag) : mfem::Operator(diag.Size()), diag_(diag) {} void Mult(const mfem::Vector& x, mfem::Vector& y) const override { @@ -44,11 +43,12 @@ class DiagonalOperator : public mfem::Operator { y[i] = diag_[i] * x[i]; } } -private: + + private: const mfem::Vector& diag_; }; -} // namespace +} // namespace TEST(SteihaugTointCG, SolvesSPDInsideBoundary) { @@ -65,15 +65,15 @@ TEST(SteihaugTointCG, SolvesSPDInsideBoundary) smith::TrustRegionSettings settings; settings.cg_tol = 1e-10; settings.max_cg_iterations = 10; - - double trSize = 100.0; // Huge trust region + + double trSize = 100.0; // Huge trust region smith::TrustRegionResults results(size); - + mfem::Vector rCurrent(size); TestDelegate delegate; - + smith::steihaugTointCG(r0, rCurrent, H, nullptr, settings, trSize, results, r0.Norml2() * r0.Norml2(), delegate); - + // Solution should be H^{-1} (-r0) // x = -0.5, y = -0.25 EXPECT_NEAR(results.z[0], -0.5, 1e-9); @@ -93,15 +93,15 @@ TEST(SteihaugTointCG, HitsBoundary) smith::TrustRegionSettings settings; settings.max_cg_iterations = 10; - - double trSize = 0.5; // Small trust region, solution would be -1.0 + + double trSize = 0.5; // Small trust region, solution would be -1.0 smith::TrustRegionResults results(size); - + mfem::Vector rCurrent(size); TestDelegate delegate; - + smith::steihaugTointCG(r0, rCurrent, H, nullptr, settings, trSize, results, r0.Norml2() * r0.Norml2(), delegate); - + EXPECT_NEAR(results.z.Norml2(), 0.5, 1e-9); EXPECT_EQ(results.interior_status, smith::TrustRegionResults::Status::OnBoundary); } @@ -110,7 +110,7 @@ TEST(SteihaugTointCG, DetectsNegativeCurvature) { int size = 1; mfem::Vector diag(size); - diag[0] = -1.0; // Negative curvature + diag[0] = -1.0; // Negative curvature DiagonalOperator H(diag); mfem::Vector r0(size); @@ -118,15 +118,15 @@ TEST(SteihaugTointCG, DetectsNegativeCurvature) smith::TrustRegionSettings settings; settings.max_cg_iterations = 10; - + double trSize = 2.0; smith::TrustRegionResults results(size); - + mfem::Vector rCurrent(size); TestDelegate delegate; - + smith::steihaugTointCG(r0, rCurrent, H, nullptr, settings, trSize, results, r0.Norml2() * r0.Norml2(), delegate); - + // For negative curvature, it should go to boundary EXPECT_NEAR(results.z.Norml2(), 2.0, 1e-9); EXPECT_EQ(results.interior_status, smith::TrustRegionResults::Status::NegativeCurvature); diff --git a/src/smith/numerics/tests/test_trust_region_solver_mfem.cpp b/src/smith/numerics/tests/test_trust_region_solver_mfem.cpp index a476c02ee5..9a1657fd81 100644 --- a/src/smith/numerics/tests/test_trust_region_solver_mfem.cpp +++ b/src/smith/numerics/tests/test_trust_region_solver_mfem.cpp @@ -18,7 +18,8 @@ namespace { constexpr int test_size = 5; constexpr double test_delta = 1.0e-3; -std::vector applyDiagonalOperator(const mfem::Vector& diag, const std::vector& states) +std::vector applyDiagonalOperator(const mfem::Vector& diag, + const std::vector& states) { std::vector out; out.reserve(states.size()); @@ -50,12 +51,7 @@ std::vector toPointers(const std::vector& vec } struct DiagonalSubspaceFixture { - DiagonalSubspaceFixture(int size) - : u1(size), - u2(size), - u3(size), - diag(size), - b(size) + DiagonalSubspaceFixture(int size) : u1(size), u2(size), u3(size), diag(size), b(size) { u1 = 1.0; for (int i = 0; i < size; ++i) { @@ -75,8 +71,6 @@ struct DiagonalSubspaceFixture { } // namespace - - TEST(TrustRegionSubspaceMfem, SolveHitsTrustRegionBoundary) { DiagonalSubspaceFixture fixture(test_size); @@ -156,7 +150,6 @@ TEST(TrustRegionSubspaceMfem, SolveHandlesZeroDirection) EXPECT_LT(energy, 0.0); } - int main(int argc, char* argv[]) { ::testing::InitGoogleTest(&argc, argv); diff --git a/src/smith/numerics/trust_region_solver.hpp b/src/smith/numerics/trust_region_solver.hpp index 9d26fc3c36..a02824e44a 100644 --- a/src/smith/numerics/trust_region_solver.hpp +++ b/src/smith/numerics/trust_region_solver.hpp @@ -37,7 +37,8 @@ class PetscException : public std::exception { std::string msg; }; -enum class TrustRegionSubspaceBackend { +enum class TrustRegionSubspaceBackend +{ Petsc, Mfem }; @@ -53,18 +54,21 @@ double innerProduct(const mfem::Vector& a, const mfem::Vector& b, const MPI_Comm /// @brief returns the solution, as well as a list of the N leftmost eigenvectors /// and their eigenvalues, and the predicted model energy change -TrustRegionSubspaceResult solveSubspaceProblem( - const std::vector& directions, const std::vector& A_directions, - const mfem::Vector& b, double delta, int num_leftmost, mfem::Vector& workspace); +TrustRegionSubspaceResult solveSubspaceProblem(const std::vector& directions, + const std::vector& A_directions, + const mfem::Vector& b, double delta, int num_leftmost, + mfem::Vector& workspace); #if defined(SMITH_USE_SLEPC) && defined(SMITH_TRUST_REGION_USE_PETSC_SUBSPACE) -TrustRegionSubspaceResult solveSubspaceProblemPetsc( - const std::vector& directions, const std::vector& A_directions, - const mfem::Vector& b, double delta, int num_leftmost, mfem::Vector& workspace); +TrustRegionSubspaceResult solveSubspaceProblemPetsc(const std::vector& directions, + const std::vector& A_directions, + const mfem::Vector& b, double delta, int num_leftmost, + mfem::Vector& workspace); #endif -TrustRegionSubspaceResult solveSubspaceProblemMfem( - const std::vector& directions, const std::vector& A_directions, - const mfem::Vector& b, double delta, int num_leftmost, mfem::Vector& workspace); +TrustRegionSubspaceResult solveSubspaceProblemMfem(const std::vector& directions, + const std::vector& A_directions, + const mfem::Vector& b, double delta, int num_leftmost, + mfem::Vector& workspace); } // namespace smith From 8106675616df2240b2d8d1682c36cc9e9a83a16a Mon Sep 17 00:00:00 2001 From: Michael Tupek Date: Sat, 9 May 2026 17:48:38 -0600 Subject: [PATCH 17/27] Simplify solver changes, use SLIC in test. --- src/smith/numerics/equation_solver.cpp | 31 ++----------------- .../numerics/mfem_trust_region_subspace.cpp | 25 +++++++-------- .../numerics/petsc_trust_region_subspace.cpp | 11 +++---- src/smith/numerics/steihaug_toint_cg.cpp | 4 +-- src/smith/numerics/steihaug_toint_cg.hpp | 6 +--- .../tests/test_trust_region_solver_mfem.cpp | 11 +++---- .../tests/test_trust_region_solver_petsc.cpp | 8 ++--- src/smith/numerics/trust_region_solver.hpp | 20 ++++-------- .../physics/tests/shallow_arch_buckling.cpp | 22 ++++++------- 9 files changed, 43 insertions(+), 95 deletions(-) diff --git a/src/smith/numerics/equation_solver.cpp b/src/smith/numerics/equation_solver.cpp index 0780abc23b..6f34db7fc2 100644 --- a/src/smith/numerics/equation_solver.cpp +++ b/src/smith/numerics/equation_solver.cpp @@ -275,8 +275,6 @@ class TrustRegion : public mfem::NewtonSolver, public SteihaugTointDelegate { mutable std::vector> left_mosts; /// the action of the stiffness/hessian (H) on the left most eigenvectors mutable std::vector> H_left_mosts; - /// previous accepted-iteration Hessian actions on the retained left most eigenvectors - mutable std::vector> previous_H_left_mosts; /// accepted TrustRegion steps, newest first mutable std::deque> accepted_step_history; /// initial state for this nonlinear solve, used as an optional history direction @@ -284,9 +282,6 @@ class TrustRegion : public mfem::NewtonSolver, public SteihaugTointDelegate { mutable mfem::Vector min_residual_x; mutable double min_residual_norm = -1.0; - /// Workspace vector for exact subspace solver to avoid small allocations - mutable mfem::Vector exact_solver_workspace; - /// nonlinear solution options NonlinearSolverOptions nonlinear_options; /// linear solution options @@ -461,11 +456,8 @@ class TrustRegion : public mfem::NewtonSolver, public SteihaugTointDelegate { double energy_change; try { - if (exact_solver_workspace.Size() < 2000) { - exact_solver_workspace.SetSize(2000); - } std::tie(sol, leftvecs, leftvals, energy_change) = - solveSubspaceProblem(directions, H_directions, b, delta, num_leftmost, exact_solver_workspace); + solveSubspaceProblem(directions, H_directions, b, delta, num_leftmost); } catch (const std::exception& e) { if (print_level >= 1) { mfem::out << "subspace solve failed with " << e.what() << std::endl; @@ -547,20 +539,6 @@ class TrustRegion : public mfem::NewtonSolver, public SteihaugTointDelegate { steihaugTointCG(r0, rCurrent, H, P, settings, trSize, results, r0_norm_squared, *this); } - std::unique_ptr cloneAssembledOperator(const mfem::Operator& op) const - { - if (const auto* hypre_matrix = dynamic_cast(&op)) { - return std::make_unique(*hypre_matrix); - } - if (const auto* sparse_matrix = dynamic_cast(&op)) { - return std::make_unique(*sparse_matrix); - } - if (const auto* block_operator = dynamic_cast(&op)) { - return buildMonolithicMatrix(*block_operator); - } - return nullptr; - } - /// assemble the jacobian void assembleJacobian(const mfem::Vector& x) const { @@ -615,7 +593,6 @@ class TrustRegion : public mfem::NewtonSolver, public SteihaugTointDelegate { solve_start_x = X; min_residual_x.SetSize(X.Size()); min_residual_x = X; - previous_H_left_mosts.clear(); real_t norm, norm_goal = 0.0; norm = initial_norm = computeResidual(X, r); @@ -723,7 +700,7 @@ class TrustRegion : public mfem::NewtonSolver, public SteihaugTointDelegate { trResults.interior_status = TrustRegionResults::Status::OnBoundary; } else { settings.cg_tol = std::max(0.5 * norm_goal, 5e-5 * norm); - solveModelProblem(r, scratch, *this->oper, &this->tr_precond, settings, tr_size, trResults, norm * norm); + solveModelProblem(r, scratch, *grad, &this->tr_precond, settings, tr_size, trResults, norm * norm); } cumulative_cg_iters_from_last_precond_update += trResults.cg_iterations_count; @@ -761,7 +738,6 @@ class TrustRegion : public mfem::NewtonSolver, public SteihaugTointDelegate { if (!have_computed_H_left_mosts) { have_computed_H_left_mosts = true; - previous_H_left_mosts = H_left_mosts; H_left_mosts.clear(); std::vector leftmost_inputs; std::vector leftmost_outputs; @@ -827,7 +803,6 @@ class TrustRegion : public mfem::NewtonSolver, public SteihaugTointDelegate { H_min_residual_direction.SetSize(X.Size()); std::vector min_res_inputs{&min_residual_direction}; std::vector min_res_outputs{&H_min_residual_direction}; - // Reusing solve_start counters for now batchedSubspaceHessVec(hess_vec_func, min_res_inputs, min_res_outputs); ds.push_back(&min_residual_direction); H_ds.push_back(&H_min_residual_direction); @@ -861,7 +836,6 @@ class TrustRegion : public mfem::NewtonSolver, public SteihaugTointDelegate { if (normPred <= norm_goal) { trResults.d_old = trResults.d; - trResults.H_d_old_at_accept = trResults.H_d; trResults.has_d_old = true; pushAcceptedStepHistory(trResults.d); if (!candidate_left_mosts.empty()) { @@ -918,7 +892,6 @@ class TrustRegion : public mfem::NewtonSolver, public SteihaugTointDelegate { if (willAccept) { trResults.d_old = trResults.d; - trResults.H_d_old_at_accept = trResults.H_d; trResults.has_d_old = true; pushAcceptedStepHistory(trResults.d); if (!candidate_left_mosts.empty()) { diff --git a/src/smith/numerics/mfem_trust_region_subspace.cpp b/src/smith/numerics/mfem_trust_region_subspace.cpp index 716a2700ab..ac66e814c3 100644 --- a/src/smith/numerics/mfem_trust_region_subspace.cpp +++ b/src/smith/numerics/mfem_trust_region_subspace.cpp @@ -31,10 +31,9 @@ double innerProduct(const mfem::Vector& a, const mfem::Vector& b, const MPI_Comm TrustRegionSubspaceResult solveSubspaceProblem(const std::vector& directions, const std::vector& A_directions, - const mfem::Vector& b, double delta, int num_leftmost, - mfem::Vector& workspace) + const mfem::Vector& b, double delta, int num_leftmost) { - return solveSubspaceProblemMfem(directions, A_directions, b, delta, num_leftmost, workspace); + return solveSubspaceProblemMfem(directions, A_directions, b, delta, num_leftmost); } namespace { @@ -211,16 +210,17 @@ mfem::DenseMatrix columnsToMatrix(const std::vector& cols) * - A boolean indicating success. */ std::tuple, std::vector, bool> exactTrustRegionSolve( - mfem::DenseMatrix A, const mfem::Vector& b, double delta, int num_leftmost, mfem::Vector& workspace) + mfem::DenseMatrix A, const mfem::Vector& b, double delta, int num_leftmost) { if (A.Height() != A.Width()) { - throw PetscException("Exact trust region solver requires square matrices"); + throw TrustRegionException("Exact trust region solver requires square matrices"); } if (A.Height() != b.Size()) { - throw PetscException( + throw TrustRegionException( "The right hand size for exact trust region solve must be consistent with the input matrix size"); } + mfem::Vector workspace(b.Size() * b.Size() + 8 * b.Size()); int offset = 0; auto alloc_vector = [&](int size) { mfem::Vector v(workspace.GetData() + offset, size); @@ -387,8 +387,7 @@ mfem::Vector combineDirections(const std::vector& states, c TrustRegionSubspaceResult solveSubspaceProblemMfem(const std::vector& states, const std::vector& Astates, - const mfem::Vector& b, double delta, int num_leftmost, - mfem::Vector& workspace) + const mfem::Vector& b, double delta, int num_leftmost) { SMITH_MARK_FUNCTION; SubspaceProjections projections = projectSubspaceGlobally(states, Astates, b); @@ -398,7 +397,7 @@ TrustRegionSubspaceResult solveSubspaceProblemMfem(const std::vector>{}, std::vector{}, 0.0); } if (T.Width() == 0) { - throw PetscException("No independent directions in MFEM subspace solve."); + throw TrustRegionException("No independent directions in MFEM subspace solve."); } mfem::DenseMatrix pAp = tripleProduct(T, sAs, T); symmetrize(pAp); @@ -422,7 +421,7 @@ TrustRegionSubspaceResult solveSubspaceProblemMfem(const std::vector>{}, std::vector{}, 0.0); #endif } @@ -456,7 +455,7 @@ TrustRegionSubspaceResult solveSubspaceProblemMfem(const std::vector&, const mfem::Vector& b, double, int) { - throw PetscException("MFEM trust-region subspace solve requires MFEM LAPACK support."); + throw TrustRegionException("MFEM trust-region subspace solve requires MFEM LAPACK support."); return std::make_tuple(b, std::vector>{}, std::vector{}, 0.0); } diff --git a/src/smith/numerics/petsc_trust_region_subspace.cpp b/src/smith/numerics/petsc_trust_region_subspace.cpp index bb82215528..6883f2571c 100644 --- a/src/smith/numerics/petsc_trust_region_subspace.cpp +++ b/src/smith/numerics/petsc_trust_region_subspace.cpp @@ -141,7 +141,7 @@ auto qr(const std::vector& states) MatCreateSeqDense(PETSC_COMM_SELF, num_cols, num_cols, NULL, &R); auto error = BVOrthogonalize(Q, R); - if (error) throw PetscException("BVOrthogonalize failed."); + if (error) throw TrustRegionException("BVOrthogonalize failed."); return std::make_pair(Q, DenseMat(R)); } @@ -290,21 +290,20 @@ std::vector remove_at(const std::vector& states, const std::vector& Astates, - const mfem::Vector& b, double delta, int num_leftmost, - mfem::Vector& workspace) + const mfem::Vector& b, double delta, int num_leftmost) { SMITH_MARK_FUNCTION; DenseMat sAs1 = dot(states, Astates); DenseMat sAs = sym(sAs1); if (sAs.hasNan()) { - throw PetscException("States in subspace solve contain NaNs."); + throw TrustRegionException("States in subspace solve contain NaNs."); } auto [Q_parallel, R] = qr(states); if (R.hasNan()) { - throw PetscException("R from qr returning with a NaN."); + throw TrustRegionException("R from qr returning with a NaN."); } auto [rows, cols] = R.size(); @@ -319,7 +318,7 @@ TrustRegionSubspaceResult solveSubspaceProblemPetsc(const std::vector #include #include #include @@ -24,10 +25,10 @@ namespace smith { -class PetscException : public std::exception { +class TrustRegionException : public std::exception { public: /// constructor - PetscException(const std::string& message) : msg(message) {} + TrustRegionException(const std::string& message) : msg(message) {} /// what is message const char* what() const noexcept override { return msg.c_str(); } @@ -37,12 +38,6 @@ class PetscException : public std::exception { std::string msg; }; -enum class TrustRegionSubspaceBackend -{ - Petsc, - Mfem -}; - using TrustRegionSubspaceResult = std::tuple>, std::vector, double>; @@ -56,19 +51,16 @@ double innerProduct(const mfem::Vector& a, const mfem::Vector& b, const MPI_Comm /// and their eigenvalues, and the predicted model energy change TrustRegionSubspaceResult solveSubspaceProblem(const std::vector& directions, const std::vector& A_directions, - const mfem::Vector& b, double delta, int num_leftmost, - mfem::Vector& workspace); + const mfem::Vector& b, double delta, int num_leftmost); #if defined(SMITH_USE_SLEPC) && defined(SMITH_TRUST_REGION_USE_PETSC_SUBSPACE) TrustRegionSubspaceResult solveSubspaceProblemPetsc(const std::vector& directions, const std::vector& A_directions, - const mfem::Vector& b, double delta, int num_leftmost, - mfem::Vector& workspace); + const mfem::Vector& b, double delta, int num_leftmost); #endif TrustRegionSubspaceResult solveSubspaceProblemMfem(const std::vector& directions, const std::vector& A_directions, - const mfem::Vector& b, double delta, int num_leftmost, - mfem::Vector& workspace); + const mfem::Vector& b, double delta, int num_leftmost); } // namespace smith diff --git a/src/smith/physics/tests/shallow_arch_buckling.cpp b/src/smith/physics/tests/shallow_arch_buckling.cpp index 00ca99d7fa..b554a11efd 100644 --- a/src/smith/physics/tests/shallow_arch_buckling.cpp +++ b/src/smith/physics/tests/shallow_arch_buckling.cpp @@ -4,6 +4,7 @@ // // SPDX-License-Identifier: (BSD-3-Clause) +#include #include #include #include @@ -14,6 +15,7 @@ #include "mfem.hpp" #include "smith/infrastructure/application_manager.hpp" +#include "smith/infrastructure/logger.hpp" #include "smith/numerics/functional/domain.hpp" #include "smith/numerics/functional/tensor.hpp" #include "smith/numerics/solver_config.hpp" @@ -86,13 +88,11 @@ void parseCommandLine(int& argc, char** argv) TEST(ShallowArchBuckling, CompressedThinBeamSnapThrough) { MPI_Barrier(MPI_COMM_WORLD); - int rank = 0; - MPI_Comm_rank(MPI_COMM_WORLD, &rank); constexpr int p = 1; constexpr int dim = 2; - constexpr int nx = 150; - constexpr int ny = 6; + constexpr int nx = 120; + constexpr int ny = 5; axom::sidre::DataStore datastore; smith::StateManager::initialize(datastore, "shallow_arch_buckling"); @@ -161,19 +161,15 @@ TEST(ShallowArchBuckling, CompressedThinBeamSnapThrough) solid.completeSetup(); solid.outputStateToDisk("shallow_arch_buckling"); - if (rank == 0) { - mfem::out << "Compressed thin beam snap-through run: solver = " << solver_name - << ", trust_subspace_option = " << trust_subspace_option - << ", trust_num_leftmost = " << trust_num_leftmost << ", trust_num_past_steps = " << trust_num_past_steps - << '\n'; - } + SLIC_INFO_ROOT( + std::format("Compressed thin beam snap-through run: solver = {}, trust_subspace_option = {}, " + "trust_num_leftmost = {}, trust_num_past_steps = {}", + solver_name, trust_subspace_option, trust_num_leftmost, trust_num_past_steps)); constexpr int num_steps = 5; for (int step = 0; step < num_steps; ++step) { solid.advanceTimestep(1.0 / num_steps); - if (rank == 0) { - mfem::out << "Load step " << step + 1 << "/" << num_steps << '\n'; - } + SLIC_INFO_ROOT(std::format("Load step {}/{}", step + 1, num_steps)); solid.outputStateToDisk("shallow_arch_buckling"); } } From dae08a6cda222f9f47229fca46b31cf55b39fb76 Mon Sep 17 00:00:00 2001 From: Michael Tupek Date: Sat, 9 May 2026 18:16:42 -0600 Subject: [PATCH 18/27] Docs and style. --- src/smith/numerics/equation_solver.cpp | 8 ++++++++ src/smith/numerics/functional/differentiate_wrt.hpp | 8 ++++---- src/smith/numerics/mfem_trust_region_subspace.cpp | 1 + src/smith/numerics/steihaug_toint_cg.hpp | 5 +++++ src/smith/numerics/trust_region_solver.hpp | 2 ++ 5 files changed, 20 insertions(+), 4 deletions(-) diff --git a/src/smith/numerics/equation_solver.cpp b/src/smith/numerics/equation_solver.cpp index 6f34db7fc2..f5f205c9fe 100644 --- a/src/smith/numerics/equation_solver.cpp +++ b/src/smith/numerics/equation_solver.cpp @@ -279,7 +279,9 @@ class TrustRegion : public mfem::NewtonSolver, public SteihaugTointDelegate { mutable std::deque> accepted_step_history; /// initial state for this nonlinear solve, used as an optional history direction mutable mfem::Vector solve_start_x; + /// state with the lowest residual norm seen in this nonlinear solve mutable mfem::Vector min_residual_x; + /// lowest residual norm seen in this nonlinear solve mutable double min_residual_norm = -1.0; /// nonlinear solution options @@ -304,6 +306,7 @@ class TrustRegion : public mfem::NewtonSolver, public SteihaugTointDelegate { } #endif + /// compute several vector inner products with a single MPI reduction when possible template std::array dot_many(const Args&... args) const { @@ -371,6 +374,7 @@ class TrustRegion : public mfem::NewtonSolver, public SteihaugTointDelegate { return products; } + /// apply Hessian-vector products for all supplied subspace directions template void batchedSubspaceHessVec(HessVecFunc hess_vec_func, const std::vector& inputs, const std::vector& outputs) const @@ -385,6 +389,7 @@ class TrustRegion : public mfem::NewtonSolver, public SteihaugTointDelegate { } } + /// store accepted steps for optional later subspace enrichment void pushAcceptedStepHistory(const mfem::Vector& step) const { if (nonlinear_options.trust_num_past_steps <= 0) { @@ -399,6 +404,7 @@ class TrustRegion : public mfem::NewtonSolver, public SteihaugTointDelegate { } } + /// SteihaugTointDelegate implementation for four inner products. std::array dot_many_4(const mfem::Vector& a0, const mfem::Vector& b0, const mfem::Vector& a1, const mfem::Vector& b1, const mfem::Vector& a2, const mfem::Vector& b2, const mfem::Vector& a3, const mfem::Vector& b3) const override @@ -406,12 +412,14 @@ class TrustRegion : public mfem::NewtonSolver, public SteihaugTointDelegate { return dot_many(a0, b0, a1, b1, a2, b2, a3, b3); } + /// SteihaugTointDelegate implementation for two inner products. std::array dot_many_2(const mfem::Vector& a0, const mfem::Vector& b0, const mfem::Vector& a1, const mfem::Vector& b1) const override { return dot_many(a0, b0, a1, b1); } + /// SteihaugTointDelegate implementation for projecting to the trust-region boundary. void projectToBoundaryWithCoefs(mfem::Vector& z, const mfem::Vector& d, double delta, double zz, double zd, double dd) const override { diff --git a/src/smith/numerics/functional/differentiate_wrt.hpp b/src/smith/numerics/functional/differentiate_wrt.hpp index d72a0bfdd0..a3c71102bc 100644 --- a/src/smith/numerics/functional/differentiate_wrt.hpp +++ b/src/smith/numerics/functional/differentiate_wrt.hpp @@ -15,19 +15,19 @@ template struct DifferentiateWRT {}; /** - * @brief this type exists solely as a way to signal to `smith::Functional` that the function - * smith::Functional::operator()` should differentiate w.r.t. a specific argument + * @brief this type exists solely as a way to signal to smith::Functional that the function + * smith::Functional::operator() should differentiate w.r.t. a specific argument */ struct differentiate_wrt_this { const mfem::Vector& ref; ///< the actual data wrapped by this type - /// @brief implicitly convert back to `mfem::Vector` to extract the actual data + /// @brief implicitly convert back to mfem::Vector to extract the actual data operator const mfem::Vector&() const { return ref; } }; /** * @brief this function is intended to only be used in combination with - * `smith::Functional::operator()`, as a way for the user to express that + * smith::Functional::operator(), as a way for the user to express that * it should both evaluate and differentiate w.r.t. a specific argument (only 1 argument at a time) * * For example: diff --git a/src/smith/numerics/mfem_trust_region_subspace.cpp b/src/smith/numerics/mfem_trust_region_subspace.cpp index ac66e814c3..0f2eddb32a 100644 --- a/src/smith/numerics/mfem_trust_region_subspace.cpp +++ b/src/smith/numerics/mfem_trust_region_subspace.cpp @@ -451,6 +451,7 @@ TrustRegionSubspaceResult solveSubspaceProblem(const std::vector&, const std::vector&, const mfem::Vector& b, double, int) diff --git a/src/smith/numerics/steihaug_toint_cg.hpp b/src/smith/numerics/steihaug_toint_cg.hpp index 8ace278a61..c44316b637 100644 --- a/src/smith/numerics/steihaug_toint_cg.hpp +++ b/src/smith/numerics/steihaug_toint_cg.hpp @@ -104,17 +104,22 @@ struct TrustRegionResults { size_t cg_iterations_count = 0; }; +/// Delegate for operations Steihaug-Toint CG needs from its caller. class SteihaugTointDelegate { public: + /// destructor virtual ~SteihaugTointDelegate() = default; + /// compute four inner products with one global reduction. virtual std::array dot_many_4(const mfem::Vector& a0, const mfem::Vector& b0, const mfem::Vector& a1, const mfem::Vector& b1, const mfem::Vector& a2, const mfem::Vector& b2, const mfem::Vector& a3, const mfem::Vector& b3) const = 0; + /// compute two inner products with one global reduction. virtual std::array dot_many_2(const mfem::Vector& a0, const mfem::Vector& b0, const mfem::Vector& a1, const mfem::Vector& b1) const = 0; + /// project z along d to the trust-region boundary using precomputed dot products. virtual void projectToBoundaryWithCoefs(mfem::Vector& z, const mfem::Vector& d, double delta, double zz, double zd, double dd) const = 0; }; diff --git a/src/smith/numerics/trust_region_solver.hpp b/src/smith/numerics/trust_region_solver.hpp index 7cbd1463aa..98dc9486b8 100644 --- a/src/smith/numerics/trust_region_solver.hpp +++ b/src/smith/numerics/trust_region_solver.hpp @@ -25,6 +25,7 @@ namespace smith { +/// Exception type for trust-region subspace solve failures. class TrustRegionException : public std::exception { public: /// constructor @@ -38,6 +39,7 @@ class TrustRegionException : public std::exception { std::string msg; }; +/// Subspace solution, leftmost eigenvectors, leftmost eigenvalues, and predicted model energy change. using TrustRegionSubspaceResult = std::tuple>, std::vector, double>; From 03dbbfddf14eddd039770e301a9436432dfe9475 Mon Sep 17 00:00:00 2001 From: Michael Tupek Date: Tue, 12 May 2026 10:45:21 -0700 Subject: [PATCH 19/27] Completely remove petsc subspace options, use pure mfem one only going forward. Try to simpify some of the testing and changes. --- cmake/thirdparty/FindMFEM.cmake | 17 - src/smith/numerics/CMakeLists.txt | 2 - src/smith/numerics/dense_petsc.hpp | 380 ------------------ src/smith/numerics/equation_solver.cpp | 208 ++-------- .../numerics/functional/differentiate_wrt.hpp | 8 +- .../tests/functional_comparisons.cpp | 32 -- .../numerics/mfem_trust_region_subspace.cpp | 4 - .../numerics/petsc_trust_region_subspace.cpp | 362 ----------------- src/smith/numerics/solver_config.hpp | 10 - src/smith/numerics/steihaug_toint_cg.cpp | 29 +- src/smith/numerics/steihaug_toint_cg.hpp | 29 +- src/smith/numerics/tests/CMakeLists.txt | 1 - .../numerics/tests/test_steihaug_toint_cg.cpp | 60 +-- .../tests/test_trust_region_solver_petsc.cpp | 225 ----------- src/smith/numerics/trust_region_solver.hpp | 6 - src/smith/physics/functional_weak_form.hpp | 1 - src/smith/physics/solid_mechanics.hpp | 2 - .../physics/tests/shallow_arch_buckling.cpp | 20 +- 18 files changed, 83 insertions(+), 1313 deletions(-) delete mode 100644 src/smith/numerics/dense_petsc.hpp delete mode 100644 src/smith/numerics/petsc_trust_region_subspace.cpp delete mode 100644 src/smith/numerics/tests/test_trust_region_solver_petsc.cpp diff --git a/cmake/thirdparty/FindMFEM.cmake b/cmake/thirdparty/FindMFEM.cmake index bb7d37037d..cd7d45c8af 100644 --- a/cmake/thirdparty/FindMFEM.cmake +++ b/cmake/thirdparty/FindMFEM.cmake @@ -107,17 +107,6 @@ else() # filter out items containing "Xlinker" set(_mfem_tpl_list ${mfem_tpl_lnk_flags}) separate_arguments(_mfem_tpl_list) - foreach(_link_flag ${_mfem_tpl_list}) - if(_link_flag MATCHES "^-L(.+)") - set(_link_dir "${CMAKE_MATCH_1}") - if(EXISTS "${_link_dir}/liblapack.dylib" OR EXISTS "${_link_dir}/libblas.dylib") - list(APPEND _mfem_tpl_link_dirs "${_link_dir}") - endif() - endif() - endforeach() - if(_mfem_tpl_link_dirs) - list(REMOVE_DUPLICATES _mfem_tpl_link_dirs) - endif() list(FILTER _mfem_tpl_list EXCLUDE REGEX Xlinker) # On Apple, -Wl,-rpath,... entries duplicate CMake's own rpath management # (CMAKE_INSTALL_RPATH_USE_LINK_PATH) and cause ld "duplicate -rpath" warnings @@ -149,12 +138,6 @@ else() TREAT_INCLUDES_AS_SYSTEM ON EXPORTABLE ON) - if(APPLE AND _mfem_tpl_link_dirs) - foreach(_link_dir ${_mfem_tpl_link_dirs}) - target_link_options(mfem INTERFACE "LINKER:-rpath,${_link_dir}") - endforeach() - endif() - install(TARGETS mfem EXPORT smith-targets DESTINATION lib diff --git a/src/smith/numerics/CMakeLists.txt b/src/smith/numerics/CMakeLists.txt index f3031866ef..500453a808 100644 --- a/src/smith/numerics/CMakeLists.txt +++ b/src/smith/numerics/CMakeLists.txt @@ -14,14 +14,12 @@ set(numerics_headers stdfunction_operator.hpp petsc_solvers.hpp trust_region_solver.hpp - dense_petsc.hpp block_preconditioner.hpp ) set(numerics_sources equation_solver.cpp steihaug_toint_cg.cpp - petsc_trust_region_subspace.cpp mfem_trust_region_subspace.cpp odes.cpp petsc_solvers.cpp diff --git a/src/smith/numerics/dense_petsc.hpp b/src/smith/numerics/dense_petsc.hpp deleted file mode 100644 index 516b3ee01d..0000000000 --- a/src/smith/numerics/dense_petsc.hpp +++ /dev/null @@ -1,380 +0,0 @@ -// Copyright (c) Lawrence Livermore National Security, LLC and -// other Smith Project Developers. See the top-level LICENSE file for -// details. -// -// SPDX-License-Identifier: (BSD-3-Clause) - -#pragma once - -#ifdef SMITH_USE_SLEPC - -#include -#include -#include - -struct DenseVec; - -/// Dense Matrix class which wraps petsc matrix for the case of a SeqDense matrix (on 1 processor) -struct DenseMat { - /// @brief copy constructor - /// @param a matrix - DenseMat(const Mat& a) : A(a) {} - - /// @brief constructor - /// @param a matrix - DenseMat(const DenseMat& a) - { - MatDuplicate(a.A, MAT_COPY_VALUES, &A); - MatCopy(a.A, A, SAME_NONZERO_PATTERN); - MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY); - MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY); - } - - /// @brief destructor - ~DenseMat() { MatDestroy(&A); } - - /// @brief size - auto size() const - { - int isize; - int jsize; - MatGetSize(A, &isize, &jsize); - return std::make_pair(isize, jsize); - } - - /// @brief index into - double operator()(int i, int j) const - { - double val; - MatGetValue(A, i, j, &val); - return val; - } - - /// @brief set value - void setValue(int i, int j, double val) { MatSetValues(A, 1, &i, 1, &j, &val, INSERT_VALUES); } - - /// @brief matrix-vector multiply - DenseVec operator*(const DenseVec& v) const; - - /// @brief solve - DenseVec solve(const DenseVec& v) const; - - /// @brief multiply this by P transpose on left and P on the right - DenseMat PtAP(const DenseMat& P) const; - - /// @brief print utility - void print(std::string first = "") const - { - if (first.size()) { - std::cout << first << ": "; - } - MatView(A, PETSC_VIEWER_STDOUT_SELF); - } - - /// @brief check for nans - bool hasNan() const - { - auto [rows, cols] = size(); - for (int i = 0; i < rows; ++i) { - for (int j = 0; j < cols; ++j) { - double val = (*this)(i, j); - if (val != val) return true; - } - } - return false; - } - - /// @brief reassemble petsc dense matrix after values have been modified - void reassemble() - { - MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY); - MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY); - } - - /// petsc matrix - Mat A; -}; - -/// matrix inverse -/// @param a matrix -DenseMat inverse(const DenseMat& a) -{ - Mat inv; - MatDuplicate(a.A, MAT_COPY_VALUES, &inv); - MatSeqDenseInvert(inv); - return inv; -} - -/// compute the symmetric part -/// @param a matrix -DenseMat sym(const DenseMat& a) -{ - DenseMat b = a; - auto [rows, cols] = b.size(); - SLIC_ERROR_IF(rows != cols, "Calling sym on a non-square DenseMat"); - - for (int i = 0; i < rows; ++i) { - for (int j = 0; j < i; ++j) { - auto val = 0.5 * a(i, j) + 0.5 * a(j, i); - b.setValue(i, j, val); - b.setValue(j, i, val); - } - } - - b.reassemble(); - - return b; -} - -/// Dense Vector class which wraps petsc vector for the case of a SeqDense vector (on 1 processor) -struct DenseVec { - /// @brief constructor - DenseVec(const Vec& vin) : v(vin) {} - - /// @brief constructor - DenseVec(const DenseVec& vin) - { - VecDuplicate(vin.v, &v); - VecCopy(vin.v, v); - } - - /// @brief constructor from size - DenseVec(size_t size) { VecCreateSeq(PETSC_COMM_SELF, static_cast(size), &v); } - - /// @brief constructor from size - DenseVec(int size) { VecCreateSeq(PETSC_COMM_SELF, size, &v); } - - /// @brief constructor standard vector - DenseVec(const std::vector vin) - { - const auto sz = vin.size(); - std::vector allints(sz); - for (size_t i = 0; i < sz; ++i) { - allints[i] = static_cast(i); - } - int sz_int = static_cast(sz); - VecCreateSeq(PETSC_COMM_SELF, sz_int, &v); - VecSetValues(v, sz_int, &allints[0], &vin[0], INSERT_VALUES); - } - - /// @brief assignment - DenseVec& operator=(const DenseVec& vin) - { - VecCopy(vin.v, v); - return *this; - } - - /// @brief assignment from scalar - DenseVec& operator=(const double val) - { - VecSet(v, val); - return *this; - } - - /// @brief destructor - ~DenseVec() - { - if (v) VecDestroy(&v); - } - - /// @brief negate - DenseVec operator-() const - { - Vec minus; - VecDuplicate(v, &minus); - VecCopy(v, minus); - VecScale(minus, -1.0); - return minus; - } - - /// @brief scale - DenseVec& operator*=(double scale) - { - VecScale(v, scale); - return *this; - } - - /// @brief size - int size() const - { - int isize; - VecGetSize(v, &isize); - return isize; - } - - /// @brief index into - double operator[](int i) const - { - double val; - VecGetValues(v, 1, &i, &val); - return val; - } - - /// @brief index into - double operator[](size_t i) const { return (*this)[int(i)]; } - - /// @brief set value - void setValue(int i, double val) { VecSetValues(v, 1, &i, &val, INSERT_VALUES); } - - /// @brief set value - void setValue(size_t i, double val) { setValue(int(i), val); } - - /// @brief add scaled vector - void add(double val, const DenseVec& w) { VecAXPY(v, val, w.v); } - - /// @brief convert to standard vector - std::vector getValues() const - { - size_t sz = static_cast(size()); - std::vector vout(sz); - std::vector allints(sz); - for (size_t i = 0; i < sz; ++i) { - allints[i] = static_cast(i); - } - int sz_int = static_cast(sz); - VecGetValues(v, sz_int, &allints[0], &vout[0]); - return vout; - } - - /// @brief print utility - void print(std::string first = "") const - { - if (first.size()) { - std::cout << first << ": "; - } - VecView(v, PETSC_VIEWER_STDOUT_SELF); - } - - /// petsc vector - Vec v; -}; - -/// @brief matrix vector multiply -DenseVec DenseMat::operator*(const DenseVec& v) const -{ - Vec out; - auto [rows, cols] = size(); - SLIC_ERROR_IF(cols != v.size(), "Column size of dense matrix and length of multiplied vector do not match"); - VecCreateSeq(PETSC_COMM_SELF, rows, &out); - MatMult(A, v.v, out); - return out; -} - -/// @brief matrix linear solve -DenseVec DenseMat::solve(const DenseVec& v) const -{ - Vec out; - VecDuplicate(v.v, &out); - MatLUFactor(A, NULL, NULL, NULL); // not efficient if done a lot - MatSolve(A, v.v, out); - return out; -} - -/// @brief multiply matrix by P-transpose on left, P on right -DenseMat DenseMat::PtAP(const DenseMat& P) const -{ - Mat pAp; - MatPtAP(A, P.A, MAT_INITIAL_MATRIX, PETSC_DETERMINE, &pAp); - return pAp; -} - -/// @brief vector dot product -double dot(const DenseVec& a, const DenseVec& b) -{ - double d; - VecDot(a.v, b.v, &d); - return d; -} - -/// @brief add a scalar to a vector -DenseVec operator+(const DenseVec& a, double b) -{ - Vec c; - VecDuplicate(a.v, &c); - VecSet(c, b); - VecAXPY(c, 1.0, a.v); - return c; -} - -DenseVec operator+(double b, const DenseVec& a) { return a + b; } - -/// @brief component-wise multiplication of vectors -DenseVec operator*(const DenseVec& a, const DenseVec& b) -{ - Vec c; - VecDuplicate(a.v, &c); - VecPointwiseMult(c, a.v, b.v); - return c; -} - -/// @brief component-wise vector divide -DenseVec operator/(const DenseVec& a, const DenseVec& b) -{ - Vec c; - VecDuplicate(a.v, &c); - VecPointwiseDivide(c, a.v, b.v); - return c; -} - -/// @brief component-wise vector absolute value -DenseVec abs(const DenseVec& a) -{ - Vec absa; - VecDuplicate(a.v, &absa); - VecCopy(a.v, absa); - VecAbs(absa); - return absa; -} - -/// @brief sum values in a vector -double sum(const DenseVec& a) -{ - double s; - VecSum(a.v, &s); - return s; -} - -/// @brief l2-norm of vector -double norm(const DenseVec& a) -{ - double n; - VecNorm(a.v, NORM_2, &n); - return n; -} - -/// @brief computes the eigenvectors and eigenvalues of a dense symmetric matrix -auto eigh(const DenseMat& Adense) -{ - auto [isize, jsize] = Adense.size(); - SLIC_ERROR_IF(isize != jsize, "Eig must be called for symmetric matrices"); - - const Mat& A = Adense.A; - - EPS eps; - EPSCreate(PETSC_COMM_SELF, &eps); - EPSSetOperators(eps, A, NULL); - EPSSetProblemType(eps, EPS_HEP); - EPSSetWhichEigenpairs(eps, EPS_SMALLEST_REAL); - EPSSetDimensions(eps, isize, PETSC_DETERMINE, PETSC_DETERMINE); - EPSSetFromOptions(eps); - - EPSSolve(eps); - - EPSType type; - EPSGetType(eps, &type); - EPSGetDimensions(eps, &jsize, NULL, NULL); - - DenseVec eigenvalues(isize); - std::vector eigenvectors; - for (int i = 0; i < isize; ++i) { - eigenvectors.emplace_back(isize); - double eigenvalue; - EPSGetEigenpair(eps, i, &eigenvalue, PETSC_NULLPTR, eigenvectors[static_cast(i)].v, PETSC_NULLPTR); - eigenvalues.setValue(i, eigenvalue); - } - - EPSDestroy(&eps); - return std::make_pair(std::move(eigenvalues), std::move(eigenvectors)); -} - -#endif // SMITH_USE_SLEPC diff --git a/src/smith/numerics/equation_solver.cpp b/src/smith/numerics/equation_solver.cpp index f5f205c9fe..9d83657269 100644 --- a/src/smith/numerics/equation_solver.cpp +++ b/src/smith/numerics/equation_solver.cpp @@ -9,7 +9,6 @@ #include #include -#include #include #include #include @@ -263,7 +262,7 @@ void printTrustRegionInfo(double realWork, double modelObjective, size_t cgIters * rely on an incremental work approximation: 0.5 (f^n + f^{n+1}) dot (u^{n+1} - u^n). While less theoretically sound, * it appears to be very effective in practice. */ -class TrustRegion : public mfem::NewtonSolver, public SteihaugTointDelegate { +class TrustRegion : public mfem::NewtonSolver { protected: /// predicted solution mutable mfem::Vector x_pred; @@ -275,14 +274,6 @@ class TrustRegion : public mfem::NewtonSolver, public SteihaugTointDelegate { mutable std::vector> left_mosts; /// the action of the stiffness/hessian (H) on the left most eigenvectors mutable std::vector> H_left_mosts; - /// accepted TrustRegion steps, newest first - mutable std::deque> accepted_step_history; - /// initial state for this nonlinear solve, used as an optional history direction - mutable mfem::Vector solve_start_x; - /// state with the lowest residual norm seen in this nonlinear solve - mutable mfem::Vector min_residual_x; - /// lowest residual norm seen in this nonlinear solve - mutable double min_residual_norm = -1.0; /// nonlinear solution options NonlinearSolverOptions nonlinear_options; @@ -307,53 +298,43 @@ class TrustRegion : public mfem::NewtonSolver, public SteihaugTointDelegate { #endif /// compute several vector inner products with a single MPI reduction when possible - template - std::array dot_many(const Args&... args) const + std::vector dot_many(const std::vector& pairs) const { - static_assert(sizeof...(Args) % 2 == 0, "dot_many requires an even number of arguments"); - constexpr size_t num_pairs = sizeof...(Args) / 2; - std::array products; - products.fill(0.0); + std::vector products(pairs.size(), 0.0); if (dot_oper) { - auto tuple_args = std::tie(args...); - auto do_dots = [&](std::index_sequence) { - ((products[I] = Dot(std::get<2 * I>(tuple_args), std::get<2 * I + 1>(tuple_args))), ...); - }; - do_dots(std::make_index_sequence{}); + for (size_t i = 0; i < pairs.size(); ++i) { + products[i] = Dot(*pairs[i].first, *pairs[i].second); + } return products; } - auto tuple_args = std::tie(args...); - std::array sizes; - std::array ptr_a; - std::array ptr_b; - - auto populate_arrays = [&](std::index_sequence) { - (( - sizes[I] = std::get<2 * I>(tuple_args).Size(), - [&]() { MFEM_ASSERT(sizes[I] == std::get<2 * I + 1>(tuple_args).Size(), "Incompatible vector sizes."); }(), - ptr_a[I] = std::get<2 * I>(tuple_args).GetData(), ptr_b[I] = std::get<2 * I + 1>(tuple_args).GetData()), - ...); - }; - populate_arrays(std::make_index_sequence{}); + std::vector sizes(pairs.size()); + std::vector ptr_a(pairs.size()); + std::vector ptr_b(pairs.size()); + for (size_t i = 0; i < pairs.size(); ++i) { + sizes[i] = pairs[i].first->Size(); + MFEM_ASSERT(sizes[i] == pairs[i].second->Size(), "Incompatible vector sizes."); + ptr_a[i] = pairs[i].first->GetData(); + ptr_b[i] = pairs[i].second->GetData(); + } bool all_same_size = true; - for (size_t i = 1; i < num_pairs; ++i) { + for (size_t i = 1; i < pairs.size(); ++i) { if (sizes[i] != sizes[0]) { all_same_size = false; break; } } - if (all_same_size && num_pairs > 0) { + if (all_same_size && !pairs.empty()) { for (int j = 0; j < sizes[0]; ++j) { - for (size_t i = 0; i < num_pairs; ++i) { + for (size_t i = 0; i < pairs.size(); ++i) { products[i] += ptr_a[i][j] * ptr_b[i][j]; } } } else { - for (size_t i = 0; i < num_pairs; ++i) { + for (size_t i = 0; i < pairs.size(); ++i) { for (int j = 0; j < sizes[i]; ++j) { products[i] += ptr_a[i][j] * ptr_b[i][j]; } @@ -363,72 +344,16 @@ class TrustRegion : public mfem::NewtonSolver, public SteihaugTointDelegate { #ifdef MFEM_USE_MPI const MPI_Comm dot_comm = GetComm(); if (dot_comm != MPI_COMM_NULL) { - std::array global_products; - MPI_Allreduce(products.data(), global_products.data(), num_pairs, MFEM_MPI_REAL_T, MPI_SUM, dot_comm); - for (size_t i = 0; i < num_pairs; ++i) { - products[i] = global_products[i]; - } + std::vector global_products(pairs.size()); + MPI_Allreduce(products.data(), global_products.data(), static_cast(pairs.size()), MFEM_MPI_REAL_T, MPI_SUM, + dot_comm); + products.assign(global_products.begin(), global_products.end()); } #endif return products; } - /// apply Hessian-vector products for all supplied subspace directions - template - void batchedSubspaceHessVec(HessVecFunc hess_vec_func, const std::vector& inputs, - const std::vector& outputs) const - { - MFEM_VERIFY(inputs.size() == outputs.size(), "Subspace Hessian-vector batch input/output size mismatch"); - if (inputs.empty()) { - return; - } - - for (size_t i = 0; i < inputs.size(); ++i) { - hess_vec_func(*inputs[i], *outputs[i]); - } - } - - /// store accepted steps for optional later subspace enrichment - void pushAcceptedStepHistory(const mfem::Vector& step) const - { - if (nonlinear_options.trust_num_past_steps <= 0) { - accepted_step_history.clear(); - return; - } - - accepted_step_history.push_front(std::make_shared(step)); - const size_t max_size = static_cast(nonlinear_options.trust_num_past_steps) + 1; - while (accepted_step_history.size() > max_size) { - accepted_step_history.pop_back(); - } - } - - /// SteihaugTointDelegate implementation for four inner products. - std::array dot_many_4(const mfem::Vector& a0, const mfem::Vector& b0, const mfem::Vector& a1, - const mfem::Vector& b1, const mfem::Vector& a2, const mfem::Vector& b2, - const mfem::Vector& a3, const mfem::Vector& b3) const override - { - return dot_many(a0, b0, a1, b1, a2, b2, a3, b3); - } - - /// SteihaugTointDelegate implementation for two inner products. - std::array dot_many_2(const mfem::Vector& a0, const mfem::Vector& b0, const mfem::Vector& a1, - const mfem::Vector& b1) const override - { - return dot_many(a0, b0, a1, b1); - } - - /// SteihaugTointDelegate implementation for projecting to the trust-region boundary. - void projectToBoundaryWithCoefs(mfem::Vector& z, const mfem::Vector& d, double delta, double zz, double zd, - double dd) const override - { - double deltadelta_m_zz = delta * delta - zz; - if (deltadelta_m_zz == 0) return; - double tau = (std::sqrt(deltadelta_m_zz * dd + zd * zd) - zd) / dd; - z.Add(tau, d); - } - /// solve the exact trust-region subspace problem with directions ds, and the leftmosts template void solveTheSubspaceProblem([[maybe_unused]] mfem::Vector& z, [[maybe_unused]] const HessVecFunc& hess_vec_func, @@ -507,7 +432,9 @@ class TrustRegion : public mfem::NewtonSolver, public SteihaugTointDelegate { void doglegStep(const mfem::Vector& cp, const mfem::Vector& newtonP, double trSize, mfem::Vector& s) const { SMITH_MARK_FUNCTION; - auto [cc, nn] = dot_many(cp, cp, newtonP, newtonP); + const auto dots = dot_many({{&cp, &cp}, {&newtonP, &newtonP}}); + const double cc = dots[0]; + const double nn = dots[1]; double tt = trSize * trSize; s = 0.0; @@ -544,7 +471,8 @@ class TrustRegion : public mfem::NewtonSolver, public SteihaugTointDelegate { const TrustRegionSettings& settings, double& trSize, TrustRegionResults& results, double r0_norm_squared) const { - steihaugTointCG(r0, rCurrent, H, P, settings, trSize, results, r0_norm_squared, *this); + auto dot_many_lambda = [this](const std::vector& pairs) { return dot_many(pairs); }; + steihaugTointCG(r0, rCurrent, H, P, settings, trSize, results, r0_norm_squared, dot_many_lambda); } /// assemble the jacobian @@ -597,14 +525,8 @@ class TrustRegion : public mfem::NewtonSolver, public SteihaugTointDelegate { using real_t = mfem::real_t; - solve_start_x.SetSize(X.Size()); - solve_start_x = X; - min_residual_x.SetSize(X.Size()); - min_residual_x = X; - real_t norm, norm_goal = 0.0; norm = initial_norm = computeResidual(X, r); - min_residual_norm = initial_norm; if (norm == 0.0) return; norm_goal = std::max(rel_tol * initial_norm, abs_tol); @@ -741,7 +663,11 @@ class TrustRegion : public mfem::NewtonSolver, public SteihaugTointDelegate { subspace_hess_outputs.push_back(&trResults.H_d_old); } - batchedSubspaceHessVec(hess_vec_func, subspace_hess_inputs, subspace_hess_outputs); + MFEM_VERIFY(subspace_hess_inputs.size() == subspace_hess_outputs.size(), + "Subspace Hessian-vector batch input/output size mismatch"); + for (size_t i = 0; i < subspace_hess_inputs.size(); ++i) { + hess_vec_func(*subspace_hess_inputs[i], *subspace_hess_outputs[i]); + } } if (!have_computed_H_left_mosts) { @@ -754,7 +680,11 @@ class TrustRegion : public mfem::NewtonSolver, public SteihaugTointDelegate { leftmost_inputs.push_back(left.get()); leftmost_outputs.push_back(H_left_mosts.back().get()); } - batchedSubspaceHessVec(hess_vec_func, leftmost_inputs, leftmost_outputs); + MFEM_VERIFY(leftmost_inputs.size() == leftmost_outputs.size(), + "Subspace Hessian-vector batch input/output size mismatch"); + for (size_t i = 0; i < leftmost_inputs.size(); ++i) { + hess_vec_func(*leftmost_inputs[i], *leftmost_outputs[i]); + } } std::vector ds{&trResults.z, &trResults.cauchy_point}; @@ -764,65 +694,15 @@ class TrustRegion : public mfem::NewtonSolver, public SteihaugTointDelegate { H_ds.push_back(&trResults.H_d_old); } - std::vector H_past_steps; - std::vector past_step_inputs; - std::vector past_step_outputs; - const size_t max_past_steps = static_cast(std::max(nonlinear_options.trust_num_past_steps, 0)); - const size_t num_past_steps = - accepted_step_history.size() > 1 ? std::min(max_past_steps, accepted_step_history.size() - 1) : 0; - H_past_steps.reserve(num_past_steps); - past_step_inputs.reserve(num_past_steps); - past_step_outputs.reserve(num_past_steps); - for (size_t i = 0; i < num_past_steps; ++i) { - const auto& past_step = accepted_step_history[i + 1]; - H_past_steps.emplace_back(past_step->Size()); - past_step_inputs.push_back(past_step.get()); - past_step_outputs.push_back(&H_past_steps.back()); - } - if (!past_step_inputs.empty()) { - batchedSubspaceHessVec(hess_vec_func, past_step_inputs, past_step_outputs); - for (size_t i = 0; i < past_step_inputs.size(); ++i) { - ds.push_back(past_step_inputs[i]); - H_ds.push_back(past_step_outputs[i]); - } - } - - mfem::Vector solve_start_direction; - mfem::Vector H_solve_start_direction; - if (nonlinear_options.trust_use_solve_start_direction && solve_start_x.Size() == X.Size()) { - solve_start_direction.SetSize(X.Size()); - subtract(solve_start_x, X, solve_start_direction); - if (solve_start_direction.Norml2() > 0.0) { - H_solve_start_direction.SetSize(X.Size()); - std::vector solve_start_inputs{&solve_start_direction}; - std::vector solve_start_outputs{&H_solve_start_direction}; - batchedSubspaceHessVec(hess_vec_func, solve_start_inputs, solve_start_outputs); - ds.push_back(&solve_start_direction); - H_ds.push_back(&H_solve_start_direction); - } - } - - mfem::Vector min_residual_direction; - mfem::Vector H_min_residual_direction; - if (nonlinear_options.trust_use_min_residual_direction && min_residual_x.Size() == X.Size()) { - min_residual_direction.SetSize(X.Size()); - subtract(min_residual_x, X, min_residual_direction); - if (min_residual_direction.Norml2() > 0.0) { - H_min_residual_direction.SetSize(X.Size()); - std::vector min_res_inputs{&min_residual_direction}; - std::vector min_res_outputs{&H_min_residual_direction}; - batchedSubspaceHessVec(hess_vec_func, min_res_inputs, min_res_outputs); - ds.push_back(&min_residual_direction); - H_ds.push_back(&H_min_residual_direction); - } - } solveTheSubspaceProblem(trResults.d, hess_vec_func, ds, H_ds, r, tr_size, num_leftmost, candidate_left_mosts); } static constexpr double roundOffTol = 0.0; // 1e-14; hess_vec_func(trResults.d, trResults.H_d); - const auto [dHd, rd] = dot_many(trResults.d, trResults.H_d, r, trResults.d); + const auto dots = dot_many({{&trResults.d, &trResults.H_d}, {&r, &trResults.d}}); + const double dHd = dots[0]; + const double rd = dots[1]; double modelObjective = rd + 0.5 * dHd - roundOffTol; add(X, trResults.d, x_pred); @@ -831,10 +711,6 @@ class TrustRegion : public mfem::NewtonSolver, public SteihaugTointDelegate { double normPred = std::numeric_limits::max(); try { normPred = computeResidual(x_pred, r_pred); - if (normPred < min_residual_norm) { - min_residual_norm = normPred; - min_residual_x = x_pred; - } double obj1 = 0.5 * (rd + Dot(r_pred, trResults.d)) - roundOffTol; realObjective = obj1; } catch (const std::exception&) { @@ -845,7 +721,6 @@ class TrustRegion : public mfem::NewtonSolver, public SteihaugTointDelegate { if (normPred <= norm_goal) { trResults.d_old = trResults.d; trResults.has_d_old = true; - pushAcceptedStepHistory(trResults.d); if (!candidate_left_mosts.empty()) { left_mosts = std::move(candidate_left_mosts); } @@ -901,7 +776,6 @@ class TrustRegion : public mfem::NewtonSolver, public SteihaugTointDelegate { if (willAccept) { trResults.d_old = trResults.d; trResults.has_d_old = true; - pushAcceptedStepHistory(trResults.d); if (!candidate_left_mosts.empty()) { left_mosts = std::move(candidate_left_mosts); } diff --git a/src/smith/numerics/functional/differentiate_wrt.hpp b/src/smith/numerics/functional/differentiate_wrt.hpp index a3c71102bc..d72a0bfdd0 100644 --- a/src/smith/numerics/functional/differentiate_wrt.hpp +++ b/src/smith/numerics/functional/differentiate_wrt.hpp @@ -15,19 +15,19 @@ template struct DifferentiateWRT {}; /** - * @brief this type exists solely as a way to signal to smith::Functional that the function - * smith::Functional::operator() should differentiate w.r.t. a specific argument + * @brief this type exists solely as a way to signal to `smith::Functional` that the function + * smith::Functional::operator()` should differentiate w.r.t. a specific argument */ struct differentiate_wrt_this { const mfem::Vector& ref; ///< the actual data wrapped by this type - /// @brief implicitly convert back to mfem::Vector to extract the actual data + /// @brief implicitly convert back to `mfem::Vector` to extract the actual data operator const mfem::Vector&() const { return ref; } }; /** * @brief this function is intended to only be used in combination with - * smith::Functional::operator(), as a way for the user to express that + * `smith::Functional::operator()`, as a way for the user to express that * it should both evaluate and differentiate w.r.t. a specific argument (only 1 argument at a time) * * For example: diff --git a/src/smith/numerics/functional/tests/functional_comparisons.cpp b/src/smith/numerics/functional/tests/functional_comparisons.cpp index eab6ca2bb4..a272498955 100644 --- a/src/smith/numerics/functional/tests/functional_comparisons.cpp +++ b/src/smith/numerics/functional/tests/functional_comparisons.cpp @@ -5,7 +5,6 @@ // SPDX-License-Identifier: (BSD-3-Clause) #include -#include #include #include #include @@ -33,8 +32,6 @@ using namespace smith; int nsamples = 1; // because mfem doesn't take in unsigned int -bool run_diagonal_benchmark = false; -int diagonal_benchmark_samples = 5; constexpr bool verbose = false; std::unique_ptr mesh2D; @@ -462,31 +459,6 @@ TEST(Elasticity, 3DLinear) { functional_test(*mesh3D, H1<1, 3>{}, H1<1, 3>{}, Di TEST(Elasticity, 3DQuadratic) { functional_test(*mesh3D, H1<2, 3>{}, H1<2, 3>{}, Dimension<3>{}); } TEST(Elasticity, 3DCubic) { functional_test(*mesh3D, H1<3, 3>{}, H1<3, 3>{}, Dimension<3>{}); } -namespace { - -template -double time_on_slowest_rank(Function&& function) -{ - auto [num_ranks, rank] = smith::getMPIInfo(); - (void)rank; - if (num_ranks > 1) { - MPI_Barrier(MPI_COMM_WORLD); - } - - auto start = std::chrono::steady_clock::now(); - function(); - auto stop = std::chrono::steady_clock::now(); - - double elapsed = std::chrono::duration_cast>(stop - start).count(); - double max_elapsed = elapsed; - if (num_ranks > 1) { - MPI_Allreduce(&elapsed, &max_elapsed, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD); - } - return max_elapsed; -} - -} // namespace - // TODO: reenable these once hcurl implements of simplex elements is finished // TEST(Hcurl, 2DLinear) { functional_test(*mesh2D, Hcurl<1>{}, Hcurl<1>{}, Dimension<2>{}); } // TEST(Hcurl, 2DQuadratic) { functional_test(*mesh2D, Hcurl<2>{}, Hcurl<2>{}, Dimension<2>{}); } @@ -509,10 +481,6 @@ int main(int argc, char* argv[]) args.AddOption(&serial_refinement, "-r", "--ref", ""); args.AddOption(¶llel_refinement, "-pr", "--pref", ""); args.AddOption(&nsamples, "-n", "--n-samples", "Samples per test"); - args.AddOption(&run_diagonal_benchmark, "-rdb", "--run-diagonal-benchmark", "-sdb", "--skip-diagonal-benchmark", - "Run direct diagonal vs sparse assemble+GetDiag timing benchmark."); - args.AddOption(&diagonal_benchmark_samples, "-dbs", "--diagonal-benchmark-samples", - "Samples for the diagonal assembly benchmark."); args.Parse(); if (!args.Good()) { diff --git a/src/smith/numerics/mfem_trust_region_subspace.cpp b/src/smith/numerics/mfem_trust_region_subspace.cpp index 0f2eddb32a..9edb69756e 100644 --- a/src/smith/numerics/mfem_trust_region_subspace.cpp +++ b/src/smith/numerics/mfem_trust_region_subspace.cpp @@ -443,12 +443,8 @@ TrustRegionSubspaceResult solveSubspaceProblem(const std::vector& A_directions, const mfem::Vector& b, double delta, int num_leftmost) { -#if defined(SMITH_USE_SLEPC) && defined(SMITH_TRUST_REGION_USE_PETSC_SUBSPACE) - return solveSubspaceProblemPetsc(directions, A_directions, b, delta, num_leftmost); -#else throw TrustRegionException("Trust-region subspace solve requires MFEM LAPACK support."); return std::make_tuple(b, std::vector>{}, std::vector{}, 0.0); -#endif } /// @brief report unavailable MFEM subspace solve when MFEM was built without LAPACK. diff --git a/src/smith/numerics/petsc_trust_region_subspace.cpp b/src/smith/numerics/petsc_trust_region_subspace.cpp deleted file mode 100644 index 6883f2571c..0000000000 --- a/src/smith/numerics/petsc_trust_region_subspace.cpp +++ /dev/null @@ -1,362 +0,0 @@ -// Copyright (c) Lawrence Livermore National Security, LLC and -// other Smith Project Developers. See the top-level LICENSE file for -// details. -// -// SPDX-License-Identifier: (BSD-3-Clause) - -#include "smith/numerics/trust_region_solver.hpp" - -#if defined(SMITH_USE_SLEPC) && defined(SMITH_TRUST_REGION_USE_PETSC_SUBSPACE) - -#include - -#include "smith/infrastructure/profiling.hpp" -#include "smith/numerics/dense_petsc.hpp" - -namespace smith { -namespace { - -/// @brief struct which aids in moving between mfem::Vector and petsc BV -struct BasisVectors { - BasisVectors(const mfem::Vector& state) : local_rows(state.Size()), global_rows(globalSize(state, PETSC_COMM_WORLD)) - { - VecCreateMPI(PETSC_COMM_WORLD, local_rows, global_rows, &v); - - PetscInt iStart, iEnd; - VecGetOwnershipRange(v, &iStart, &iEnd); - - col_indices.reserve(size_t(local_rows)); - for (int i = iStart; i < iEnd; ++i) { - col_indices.push_back(i); - } - } - - ~BasisVectors() { VecDestroy(&v); } - - BV constructBases(const std::vector& states) const - { - size_t num_cols = states.size(); - BV Q; - BVCreate(PETSC_COMM_SELF, &Q); - BVSetType(Q, BVVECS); - BVSetSizesFromVec(Q, v, static_cast(num_cols)); - for (size_t c = 0; c < num_cols; ++c) { - VecSetValues(v, local_rows, &col_indices[0], &(*states[c])[0], INSERT_VALUES); - VecAssemblyBegin(v); - VecAssemblyEnd(v); - int c_int = static_cast(c); - BVInsertVec(Q, c_int, v); - } - return Q; - } - - private: - const int local_rows; - const int global_rows; - - std::vector col_indices; - Vec v; -}; - -Vec petscVec(const mfem::Vector& state) -{ - const int local_rows = state.Size(); - const int global_rows = globalSize(state, PETSC_COMM_WORLD); - - Vec v; - VecCreateMPI(PETSC_COMM_WORLD, local_rows, global_rows, &v); - - PetscInt iStart, iEnd; - VecGetOwnershipRange(v, &iStart, &iEnd); - - std::vector col_indices; - col_indices.reserve(static_cast(local_rows)); - for (int i = iStart; i < iEnd; ++i) { - col_indices.push_back(i); - } - - VecSetValues(v, local_rows, &col_indices[0], &state[0], INSERT_VALUES); - - VecAssemblyBegin(v); - VecAssemblyEnd(v); - - return v; -} - -void copy(const Vec& v, mfem::Vector& s) -{ - const int local_rows = s.Size(); - PetscInt iStart, iEnd; - VecGetOwnershipRange(v, &iStart, &iEnd); - - SLIC_ERROR_IF(local_rows != iEnd - iStart, - "Inconsistency between local t-dof vector size and petsc start and end indices"); - - std::vector col_indices; - col_indices.reserve(static_cast(local_rows)); - for (int i = iStart; i < iEnd; ++i) { - col_indices.push_back(i); - } - - VecGetValues(v, local_rows, &col_indices[0], &s[0]); -} - -Mat dot(const std::vector& s, const std::vector& As) -{ - SLIC_ERROR_IF(s.size() != As.size(), - "Search directions and their linear operator result must have same number of columns"); - size_t num_cols = s.size(); - int num_cols_int = static_cast(num_cols); - Mat sAs; - MatCreateSeqDense(PETSC_COMM_SELF, num_cols_int, num_cols_int, NULL, &sAs); - for (size_t i = 0; i < num_cols; ++i) { - for (size_t j = 0; j < num_cols; ++j) { - MatSetValue(sAs, static_cast(i), static_cast(j), mfem::InnerProduct(PETSC_COMM_WORLD, *s[i], *As[j]), - INSERT_VALUES); - } - } - MatAssemblyBegin(sAs, MAT_FINAL_ASSEMBLY); - MatAssemblyEnd(sAs, MAT_FINAL_ASSEMBLY); - return sAs; -} - -Vec dot(const std::vector& s, const mfem::Vector& b) -{ - size_t num_cols = s.size(); - Vec sb; - VecCreateSeq(PETSC_COMM_SELF, static_cast(num_cols), &sb); - for (size_t i = 0; i < num_cols; ++i) { - VecSetValue(sb, static_cast(i), mfem::InnerProduct(PETSC_COMM_WORLD, *s[i], b), INSERT_VALUES); - } - return sb; -} - -auto qr(const std::vector& states) -{ - BasisVectors bvs(*states[0]); - BV Q = bvs.constructBases(states); - - Mat R; - int num_cols = static_cast(states.size()); - MatCreateSeqDense(PETSC_COMM_SELF, num_cols, num_cols, NULL, &R); - auto error = BVOrthogonalize(Q, R); - - if (error) throw TrustRegionException("BVOrthogonalize failed."); - - return std::make_pair(Q, DenseMat(R)); -} - -double quadraticEnergy(const DenseMat& A, const DenseVec& b, const DenseVec& x) -{ - DenseVec Ax = A * x; - double xAx = dot(x, Ax); - double xb = dot(x, b); - return 0.5 * xAx - xb; -} - -double pnorm_squared(const DenseVec& bvv, const DenseVec& sig) -{ - auto bvv_div_sig_squared = bvv / (sig * sig); - return sum(bvv_div_sig_squared); -} - -double qnorm_squared(const DenseVec& bvv, const DenseVec& sig) -{ - auto bvv_div_sig_cubed = bvv / (sig * sig * sig); - return sum(bvv_div_sig_cubed); -} - -auto exactTrustRegionSolve(DenseMat A, const DenseVec& b, double delta, int num_leftmost) -{ - auto [isize, jsize] = A.size(); - auto isize2 = b.size(); - SLIC_ERROR_IF(isize != jsize, "Exact trust region solver requires square matrices"); - SLIC_ERROR_IF(isize != isize2, - "The right hand size for exact trust region solve must be consistent with the input matrix size"); - - auto [sigs, V] = eigh(A); - std::vector leftmosts; - std::vector minsigs; - size_t num_leftmost_possible(size_t(std::min(num_leftmost, isize))); - for (size_t i = 0; i < num_leftmost_possible; ++i) { - leftmosts.emplace_back(V[i]); - minsigs.emplace_back(sigs[i]); - } - - const auto& leftMost = V[0]; - double minSig = sigs[0]; - - DenseVec bv(isize); - for (size_t i = 0; i < size_t(isize); ++i) { - bv.setValue(i, dot(V[i], b)); - } - - DenseVec bvOverSigs = bv / sigs; - double sigScale = sum(abs(sigs)) / isize; - double eps = 1e-12 * sigScale; - - if ((minSig >= eps) && (norm(bvOverSigs) <= delta)) { - return std::make_tuple(A.solve(b), leftmosts, minsigs, true); - } - - double lam = minSig < eps ? -minSig + eps : 0.0; - - DenseVec sigsPlusLam = sigs + lam; - - bvOverSigs = bv / sigsPlusLam; - - if ((minSig < eps) && (norm(bvOverSigs) < delta)) { - DenseVec p(isize); - p = 0.0; - for (int i = 0; i < isize; ++i) { - p.add(bv[i], V[size_t(i)]); - } - - const auto& z = leftMost; - double pz = dot(p, z); - double pp = dot(p, p); - double ddmpp = std::max(delta * delta - pp, 0.0); - - double tau1 = -pz + std::sqrt(pz * pz + ddmpp); - double tau2 = -pz - std::sqrt(pz * pz + ddmpp); - - DenseVec x1(p); - DenseVec x2(p); - x1.add(tau1, z); - x2.add(tau2, z); - - double e1 = quadraticEnergy(A, b, x1); - double e2 = quadraticEnergy(A, b, x2); - - DenseVec x = e1 < e2 ? x1 : x2; - - return std::make_tuple(x, leftmosts, minsigs, true); - } - DenseVec bvbv = bv * bv; - sigsPlusLam = sigs + lam; - - double pNormSq = pnorm_squared(bvbv, sigsPlusLam); - double pNorm = std::sqrt(pNormSq); - double bError = (pNorm - delta) / delta; - - size_t iters = 0; - size_t maxIters = 30; - while ((std::abs(bError) > 1e-9) && (iters++ < maxIters)) { - double qNormSq = qnorm_squared(bvbv, sigsPlusLam); - lam += (pNormSq / qNormSq) * bError; - sigsPlusLam = sigs + lam; - pNormSq = pnorm_squared(bvbv, sigsPlusLam); - pNorm = std::sqrt(pNormSq); - bError = (pNorm - delta) / delta; - } - - bool success = true; - if (iters >= maxIters) { - success = false; - } - - bvOverSigs = bv / sigsPlusLam; - - DenseVec x(isize); - x = 0.0; - for (int i = 0; i < isize; ++i) { - x.add(bvOverSigs[i], V[size_t(i)]); - } - - double e1 = quadraticEnergy(A, b, x); - double e2 = quadraticEnergy(A, b, -x); - - if (e2 < e1) { - x *= -delta / norm(x); - } else { - x *= delta / norm(x); - } - - return std::make_tuple(x, leftmosts, minsigs, success); -} - -std::vector remove_at(const std::vector& a, size_t j) -{ - std::vector b; - for (size_t i = 0; i < a.size(); ++i) { - if (i != j) { - b.emplace_back(a[i]); - } - } - return b; -} - -} // namespace - -TrustRegionSubspaceResult solveSubspaceProblemPetsc(const std::vector& states, - const std::vector& Astates, - const mfem::Vector& b, double delta, int num_leftmost) -{ - SMITH_MARK_FUNCTION; - DenseMat sAs1 = dot(states, Astates); - DenseMat sAs = sym(sAs1); - - if (sAs.hasNan()) { - throw TrustRegionException("States in subspace solve contain NaNs."); - } - - auto [Q_parallel, R] = qr(states); - - if (R.hasNan()) { - throw TrustRegionException("R from qr returning with a NaN."); - } - - auto [rows, cols] = R.size(); - SLIC_ERROR_IF(rows != cols, "R matrix is not square in subspace problem solve\n"); - - double trace_mag = 0.0; - for (int i = 0; i < rows; ++i) { - trace_mag += std::abs(R(i, i)); - } - - for (int i = 0; i < rows; ++i) { - if (R(i, i) < 1e-9 * trace_mag) { - auto statesNew = remove_at(states, size_t(i)); - auto AstatesNew = remove_at(Astates, size_t(i)); - return solveSubspaceProblemPetsc(statesNew, AstatesNew, b, delta, num_leftmost); - } - } - - auto Rinv = inverse(R); - DenseMat pAp = sAs.PtAP(Rinv); - - Vec b_parallel = petscVec(b); - std::vector pb_vec(states.size()); - BVDotVec(Q_parallel, b_parallel, &pb_vec[0]); - DenseVec pb(pb_vec); - - auto [reduced_x, leftvecs, leftvals, success] = exactTrustRegionSolve(pAp, pb, delta, num_leftmost); - (void)success; - - double energy = quadraticEnergy(pAp, pb, reduced_x); - - Vec x_parallel; - VecDuplicate(b_parallel, &x_parallel); - - std::vector reduced_x_vec = reduced_x.getValues(); - BVMultVec(Q_parallel, 1.0, 0.0, x_parallel, &reduced_x_vec[0]); - mfem::Vector sol(b); - copy(x_parallel, sol); - - std::vector> leftmosts; - for (size_t i = 0; i < leftvecs.size(); ++i) { - auto reduced_leftvec = leftvecs[i].getValues(); - BVMultVec(Q_parallel, 1.0, 0.0, x_parallel, &reduced_leftvec[0]); - leftmosts.emplace_back(std::make_shared(b)); - copy(x_parallel, *leftmosts[i]); - } - - BVDestroy(&Q_parallel); - VecDestroy(&b_parallel); - VecDestroy(&x_parallel); - return std::make_tuple(sol, leftmosts, leftvals, energy); -} - -} // namespace smith - -#endif // SMITH_USE_SLEPC diff --git a/src/smith/numerics/solver_config.hpp b/src/smith/numerics/solver_config.hpp index dc031c4d85..aebf795305 100644 --- a/src/smith/numerics/solver_config.hpp +++ b/src/smith/numerics/solver_config.hpp @@ -467,16 +467,6 @@ struct NonlinearSolverOptions { /// Number of extra leftmost eigenvector to be stored between solves int num_leftmost = 1; - /// Number of additional older accepted TrustRegion steps to include in subspace solves. - int trust_num_past_steps = 0; - - /// Include the displacement from current nonlinear-solve state back to the nonlinear-solve initial state. - bool trust_use_solve_start_direction = false; - - /// Include the displacement from current nonlinear-solve state to the state with the minimum residual seen so far in - /// this nonlinear solve. - bool trust_use_min_residual_direction = false; - /// Should the gradient be converted to a monolithic matrix bool force_monolithic = false; }; diff --git a/src/smith/numerics/steihaug_toint_cg.cpp b/src/smith/numerics/steihaug_toint_cg.cpp index 546fcd81e5..4964d3a641 100644 --- a/src/smith/numerics/steihaug_toint_cg.cpp +++ b/src/smith/numerics/steihaug_toint_cg.cpp @@ -10,22 +10,19 @@ namespace smith { namespace { -void smith_add(const mfem::Vector& a, double b, const mfem::Vector& c, mfem::Vector& out) +void projectToBoundaryWithCoefs(mfem::Vector& z, const mfem::Vector& d, double delta, double zz, double zd, double dd) { - if (out.GetData() == c.GetData()) { - out = a; - out.Add(b, c); - } else { - out = a; - out.Add(b, c); - } + const double deltadelta_m_zz = delta * delta - zz; + if (deltadelta_m_zz <= 0.0) return; + const double tau = (std::sqrt(deltadelta_m_zz * dd + zd * zd) - zd) / dd; + z.Add(tau, d); } } // namespace void steihaugTointCG(const mfem::Vector& r0, mfem::Vector& rCurrent, const mfem::Operator& H, const mfem::Solver* P, const TrustRegionSettings& settings, double& trSize, TrustRegionResults& results, - double r0_norm_squared, const SteihaugTointDelegate& delegate) + double r0_norm_squared, const DotManyFunction& dot_many) { // minimize r0@z + 0.5*z@J@z results.interior_status = TrustRegionResults::Status::Interior; @@ -58,13 +55,12 @@ void steihaugTointCG(const mfem::Vector& r0, mfem::Vector& rCurrent, const mfem: double zz = 0.; // rPr = dot(rCurrent, Pr) - auto rPr_arr = delegate.dot_many_2(rCurrent, Pr, rCurrent, Pr); // We only need the first - double rPr = rPr_arr[0]; + double rPr = dot_many({{&rCurrent, &Pr}, {&rCurrent, &Pr}})[0]; for (cgIter = 1; cgIter <= settings.max_cg_iterations; ++cgIter) { H.Mult(d, Hd); - auto dots = delegate.dot_many_4(d, rCurrent, d, Hd, z, d, d, d); + auto dots = dot_many({{&d, &rCurrent}, {&d, &Hd}, {&z, &d}, {&d, &d}}); double descent_check = dots[0]; double curvature = dots[1]; double zd = dots[2]; @@ -84,7 +80,7 @@ void steihaugTointCG(const mfem::Vector& r0, mfem::Vector& rCurrent, const mfem: const bool go_to_boundary = curvature <= 0 || zzNp1 >= trSize * trSize; if (go_to_boundary) { - delegate.projectToBoundaryWithCoefs(z, d, trSize, zz, zd, dd); + projectToBoundaryWithCoefs(z, d, trSize, zz, zd, dd); if (curvature <= 0) { results.interior_status = TrustRegionResults::Status::NegativeCurvature; } else { @@ -94,14 +90,15 @@ void steihaugTointCG(const mfem::Vector& r0, mfem::Vector& rCurrent, const mfem: } auto& zPred = Pr; - smith_add(z, alphaCg, d, zPred); + zPred = z; + zPred.Add(alphaCg, d); z = zPred; if (results.interior_status == TrustRegionResults::Status::NonDescentDirection) { return; } - smith_add(rCurrent, alphaCg, Hd, rCurrent); + rCurrent.Add(alphaCg, Hd); if (P) { P->Mult(rCurrent, Pr); @@ -109,7 +106,7 @@ void steihaugTointCG(const mfem::Vector& r0, mfem::Vector& rCurrent, const mfem: Pr = rCurrent; } - auto dots2 = delegate.dot_many_2(rCurrent, Pr, rCurrent, rCurrent); + auto dots2 = dot_many({{&rCurrent, &Pr}, {&rCurrent, &rCurrent}}); double rPrNp1 = dots2[0]; double r_current_norm_squared = dots2[1]; diff --git a/src/smith/numerics/steihaug_toint_cg.hpp b/src/smith/numerics/steihaug_toint_cg.hpp index c44316b637..d519590920 100644 --- a/src/smith/numerics/steihaug_toint_cg.hpp +++ b/src/smith/numerics/steihaug_toint_cg.hpp @@ -6,8 +6,12 @@ #pragma once -#include "mfem.hpp" #include +#include +#include +#include + +#include "mfem.hpp" namespace smith { @@ -104,25 +108,8 @@ struct TrustRegionResults { size_t cg_iterations_count = 0; }; -/// Delegate for operations Steihaug-Toint CG needs from its caller. -class SteihaugTointDelegate { - public: - /// destructor - virtual ~SteihaugTointDelegate() = default; - - /// compute four inner products with one global reduction. - virtual std::array dot_many_4(const mfem::Vector& a0, const mfem::Vector& b0, const mfem::Vector& a1, - const mfem::Vector& b1, const mfem::Vector& a2, const mfem::Vector& b2, - const mfem::Vector& a3, const mfem::Vector& b3) const = 0; - - /// compute two inner products with one global reduction. - virtual std::array dot_many_2(const mfem::Vector& a0, const mfem::Vector& b0, const mfem::Vector& a1, - const mfem::Vector& b1) const = 0; - - /// project z along d to the trust-region boundary using precomputed dot products. - virtual void projectToBoundaryWithCoefs(mfem::Vector& z, const mfem::Vector& d, double delta, double zz, double zd, - double dd) const = 0; -}; +using DotPair = std::pair; +using DotManyFunction = std::function(const std::vector&)>; /** * @brief Minimize quadratic sub-problem given residual vector, the action of the stiffness and a preconditioner @@ -133,6 +120,6 @@ class SteihaugTointDelegate { */ void steihaugTointCG(const mfem::Vector& r0, mfem::Vector& rCurrent, const mfem::Operator& H, const mfem::Solver* P, const TrustRegionSettings& settings, double& trSize, TrustRegionResults& results, - double r0_norm_squared, const SteihaugTointDelegate& delegate); + double r0_norm_squared, const DotManyFunction& dot_many); } // namespace smith diff --git a/src/smith/numerics/tests/CMakeLists.txt b/src/smith/numerics/tests/CMakeLists.txt index 617a0a4f11..aa2ea5e011 100644 --- a/src/smith/numerics/tests/CMakeLists.txt +++ b/src/smith/numerics/tests/CMakeLists.txt @@ -32,7 +32,6 @@ if(PETSC_FOUND) if(SLEPC_FOUND) set(slepc_solver_tests test_eigensolver.cpp - test_trust_region_solver_petsc.cpp ) smith_add_tests(SOURCES ${slepc_solver_tests} DEPENDS_ON ${numerics_test_dependencies} diff --git a/src/smith/numerics/tests/test_steihaug_toint_cg.cpp b/src/smith/numerics/tests/test_steihaug_toint_cg.cpp index bd48fcbba4..03c212aa0d 100644 --- a/src/smith/numerics/tests/test_steihaug_toint_cg.cpp +++ b/src/smith/numerics/tests/test_steihaug_toint_cg.cpp @@ -9,44 +9,15 @@ namespace { -class TestDelegate : public smith::SteihaugTointDelegate { - public: - std::array dot_many_4(const mfem::Vector& a0, const mfem::Vector& b0, const mfem::Vector& a1, - const mfem::Vector& b1, const mfem::Vector& a2, const mfem::Vector& b2, - const mfem::Vector& a3, const mfem::Vector& b3) const override - { - return {a0 * b0, a1 * b1, a2 * b2, a3 * b3}; - } - - std::array dot_many_2(const mfem::Vector& a0, const mfem::Vector& b0, const mfem::Vector& a1, - const mfem::Vector& b1) const override - { - return {a0 * b0, a1 * b1}; - } - - void projectToBoundaryWithCoefs(mfem::Vector& z, const mfem::Vector& d, double delta, double zz, double zd, - double dd) const override - { - double deltadelta_m_zz = delta * delta - zz; - if (deltadelta_m_zz <= 0) return; - double tau = (std::sqrt(deltadelta_m_zz * dd + zd * zd) - zd) / dd; - z.Add(tau, d); - } -}; - -class DiagonalOperator : public mfem::Operator { - public: - DiagonalOperator(const mfem::Vector& diag) : mfem::Operator(diag.Size()), diag_(diag) {} - void Mult(const mfem::Vector& x, mfem::Vector& y) const override - { - for (int i = 0; i < height; ++i) { - y[i] = diag_[i] * x[i]; - } +std::vector dot_many(const std::vector& pairs) +{ + std::vector out; + out.reserve(pairs.size()); + for (const auto& [a, b] : pairs) { + out.push_back((*a) * (*b)); } - - private: - const mfem::Vector& diag_; -}; + return out; +} } // namespace @@ -56,7 +27,7 @@ TEST(SteihaugTointCG, SolvesSPDInsideBoundary) mfem::Vector diag(size); diag[0] = 2.0; diag[1] = 4.0; - DiagonalOperator H(diag); + mfem::SparseMatrix H(diag); mfem::Vector r0(size); r0[0] = 1.0; @@ -70,9 +41,8 @@ TEST(SteihaugTointCG, SolvesSPDInsideBoundary) smith::TrustRegionResults results(size); mfem::Vector rCurrent(size); - TestDelegate delegate; - smith::steihaugTointCG(r0, rCurrent, H, nullptr, settings, trSize, results, r0.Norml2() * r0.Norml2(), delegate); + smith::steihaugTointCG(r0, rCurrent, H, nullptr, settings, trSize, results, r0.Norml2() * r0.Norml2(), dot_many); // Solution should be H^{-1} (-r0) // x = -0.5, y = -0.25 @@ -86,7 +56,7 @@ TEST(SteihaugTointCG, HitsBoundary) int size = 1; mfem::Vector diag(size); diag[0] = 1.0; - DiagonalOperator H(diag); + mfem::SparseMatrix H(diag); mfem::Vector r0(size); r0[0] = 1.0; @@ -98,9 +68,8 @@ TEST(SteihaugTointCG, HitsBoundary) smith::TrustRegionResults results(size); mfem::Vector rCurrent(size); - TestDelegate delegate; - smith::steihaugTointCG(r0, rCurrent, H, nullptr, settings, trSize, results, r0.Norml2() * r0.Norml2(), delegate); + smith::steihaugTointCG(r0, rCurrent, H, nullptr, settings, trSize, results, r0.Norml2() * r0.Norml2(), dot_many); EXPECT_NEAR(results.z.Norml2(), 0.5, 1e-9); EXPECT_EQ(results.interior_status, smith::TrustRegionResults::Status::OnBoundary); @@ -111,7 +80,7 @@ TEST(SteihaugTointCG, DetectsNegativeCurvature) int size = 1; mfem::Vector diag(size); diag[0] = -1.0; // Negative curvature - DiagonalOperator H(diag); + mfem::SparseMatrix H(diag); mfem::Vector r0(size); r0[0] = 1.0; @@ -123,9 +92,8 @@ TEST(SteihaugTointCG, DetectsNegativeCurvature) smith::TrustRegionResults results(size); mfem::Vector rCurrent(size); - TestDelegate delegate; - smith::steihaugTointCG(r0, rCurrent, H, nullptr, settings, trSize, results, r0.Norml2() * r0.Norml2(), delegate); + smith::steihaugTointCG(r0, rCurrent, H, nullptr, settings, trSize, results, r0.Norml2() * r0.Norml2(), dot_many); // For negative curvature, it should go to boundary EXPECT_NEAR(results.z.Norml2(), 2.0, 1e-9); diff --git a/src/smith/numerics/tests/test_trust_region_solver_petsc.cpp b/src/smith/numerics/tests/test_trust_region_solver_petsc.cpp deleted file mode 100644 index 55c7a16f77..0000000000 --- a/src/smith/numerics/tests/test_trust_region_solver_petsc.cpp +++ /dev/null @@ -1,225 +0,0 @@ -// Copyright (c) Lawrence Livermore National Security, LLC and -// other Smith Project Developers. See the top-level LICENSE file for -// details. -// -// SPDX-License-Identifier: (BSD-3-Clause) - -#include -#include -#include -#include -#include - -#include "gtest/gtest.h" -#include "mfem.hpp" - -#include "smith/physics/state/state_manager.hpp" -#include "smith/infrastructure/application_manager.hpp" -#include "smith/numerics/trust_region_solver.hpp" -#include "smith/infrastructure/profiling.hpp" -#include "smith/mesh_utils/mesh_utils.hpp" -#include "smith/numerics/functional/finite_element.hpp" -#include "smith/physics/state/finite_element_state.hpp" -#include "smith/physics/state/finite_element_vector.hpp" -#include "smith/numerics/petsc_solvers.hpp" - -#ifdef SMITH_TRUST_REGION_USE_PETSC_SUBSPACE - -const std::string MESHTAG = "mesh"; - -static constexpr int scalar_field_order = 1; - -struct MeshFixture : public testing::Test { - void SetUp() - { - smith::StateManager::initialize(datastore_, "solver_test"); - - auto mfem_shape = mfem::Element::QUADRILATERAL; - - double length = 0.5; - double width = 2.0; - auto meshtmp = - smith::mesh::refineAndDistribute(mfem::Mesh::MakeCartesian2D(2, 1, mfem_shape, true, length, width), 0, 0); - mesh_ = &smith::StateManager::setMesh(std::move(meshtmp), MESHTAG); - } - - axom::sidre::DataStore datastore_; - mfem::ParMesh* mesh_; -}; - -std::vector applyLinearOperator(const Mat& A, const std::vector& states) -{ - std::vector Astates; - for (auto s : states) { - Astates.emplace_back(*s); - } - - int local_rows(states[0]->Size()); - int global_rows(smith::globalSize(*states[0], PETSC_COMM_WORLD)); - - Vec x; - Vec y; - - VecCreateMPI(PETSC_COMM_WORLD, local_rows, global_rows, &x); - VecCreateMPI(PETSC_COMM_WORLD, local_rows, global_rows, &y); - - PetscInt iStart, iEnd; - VecGetOwnershipRange(x, &iStart, &iEnd); - - std::vector col_indices; - col_indices.reserve(static_cast(local_rows)); - for (int i = iStart; i < iEnd; ++i) { - col_indices.push_back(i); - } - - size_t num_cols = states.size(); - for (size_t c = 0; c < num_cols; ++c) { - VecSetValues(x, local_rows, &col_indices[0], &(*states[c])[0], INSERT_VALUES); - VecAssemblyBegin(x); - VecAssemblyEnd(x); - MatMult(A, x, y); - VecGetValues(y, local_rows, &col_indices[0], &Astates[c][0]); - } - - VecDestroy(&x); - VecDestroy(&y); - - return Astates; -} - -auto createDiagonalTestMatrix(mfem::Vector& x) -{ - const int local_rows = x.Size(); - const int global_rows = smith::globalSize(x, PETSC_COMM_WORLD); - - Vec b; - VecCreateMPI(PETSC_COMM_WORLD, local_rows, global_rows, &b); - - PetscInt iStart, iEnd; - VecGetOwnershipRange(b, &iStart, &iEnd); - VecDestroy(&b); - - std::vector col_indices; - col_indices.reserve(static_cast(local_rows)); - for (int i = iStart; i < iEnd; ++i) { - col_indices.push_back(i); - } - - std::vector row_offsets(static_cast(local_rows) + 1); - for (int i = 0; i < local_rows + 1; ++i) { - row_offsets[static_cast(i)] = i; - } - - Mat A; - MatCreateMPIAIJWithArrays(PETSC_COMM_WORLD, local_rows, local_rows, global_rows, global_rows, &row_offsets[0], - &col_indices[0], &x[0], &A); - - return A; -} - -void expectNearVector(const mfem::Vector& a, const mfem::Vector& b, double tol) -{ - ASSERT_EQ(a.Size(), b.Size()); - for (int i = 0; i < a.Size(); ++i) { - EXPECT_NEAR(a[i], b[i], tol); - } -} - -TEST_F(MeshFixture, PetscSubspaceSolveHitsTrustRegionBoundary) -{ - SMITH_MARK_FUNCTION; - - auto u1 = smith::StateManager::newState(smith::H1{}, "u1", MESHTAG); - auto u2 = smith::StateManager::newState(smith::H1{}, "u2", MESHTAG); - auto u3 = smith::StateManager::newState(smith::H1{}, "u3", MESHTAG); - auto a = smith::StateManager::newState(smith::H1{}, "a", MESHTAG); - auto b = smith::StateManager::newState(smith::H1{}, "b", MESHTAG); - - u1 = 1.0; - for (int i = 0; i < u2.Size(); ++i) { - u2[i] = i + 2; - u3[i] = i * i - 15.0; - a[i] = 2 * i + 0.01 * i * i + 1.25; - b[i] = -i + 0.02 * i * i + 0.1; - } - std::vector states = {&u1, &u2, &u3}; - - auto A_parallel = createDiagonalTestMatrix(a); - std::vector Astates = applyLinearOperator(A_parallel, states); - - std::vector AstatePtrs; - for (size_t i = 0; i < Astates.size(); ++i) { - AstatePtrs.push_back(&Astates[i]); - } - - double delta = 0.001; - auto [sol, leftvecs, leftvals, energy] = smith::solveSubspaceProblemPetsc(states, AstatePtrs, b, delta, 1); - - EXPECT_NEAR(sol.Norml2(), delta, 1e-12); - EXPECT_FALSE(leftvecs.empty()); - EXPECT_EQ(leftvals.size(), 1); - EXPECT_LT(energy, 0.0); - - MatDestroy(&A_parallel); -} - -TEST_F(MeshFixture, MfemSubspaceSolveMatchesPetsc) -{ - SMITH_MARK_FUNCTION; - - auto u1 = smith::StateManager::newState(smith::H1{}, "u1", MESHTAG); - auto u2 = smith::StateManager::newState(smith::H1{}, "u2", MESHTAG); - auto u3 = smith::StateManager::newState(smith::H1{}, "u3", MESHTAG); - auto a = smith::StateManager::newState(smith::H1{}, "a", MESHTAG); - auto b = smith::StateManager::newState(smith::H1{}, "b", MESHTAG); - - u1 = 1.0; - for (int i = 0; i < u2.Size(); ++i) { - u2[i] = i + 2; - u3[i] = i * i - 15.0; - a[i] = 2 * i + 0.01 * i * i + 1.25; - b[i] = -i + 0.02 * i * i + 0.1; - } - - std::vector states = {&u1, &u2, &u3, &u2}; - auto A_parallel = createDiagonalTestMatrix(a); - std::vector Astates = applyLinearOperator(A_parallel, states); - - std::vector AstatePtrs; - for (size_t i = 0; i < Astates.size(); ++i) { - AstatePtrs.push_back(&Astates[i]); - } - - auto [petsc_sol, petsc_leftvecs, petsc_leftvals, petsc_energy] = - smith::solveSubspaceProblemPetsc(states, AstatePtrs, b, 0.001, 2); - auto [mfem_sol, mfem_leftvecs, mfem_leftvals, mfem_energy] = - smith::solveSubspaceProblemMfem(states, AstatePtrs, b, 0.001, 2); - - expectNearVector(mfem_sol, petsc_sol, 1e-10); - ASSERT_EQ(mfem_leftvecs.size(), petsc_leftvecs.size()); - ASSERT_EQ(mfem_leftvals.size(), petsc_leftvals.size()); - for (size_t i = 0; i < mfem_leftvecs.size(); ++i) { - const double same = smith::innerProduct(*mfem_leftvecs[i], *petsc_leftvecs[i], MPI_COMM_WORLD); - mfem::Vector neg(*petsc_leftvecs[i]); - neg *= -1.0; - const double flipped = smith::innerProduct(*mfem_leftvecs[i], neg, MPI_COMM_WORLD); - if (std::abs(flipped) > std::abs(same)) { - expectNearVector(*mfem_leftvecs[i], neg, 1e-9); - } else { - expectNearVector(*mfem_leftvecs[i], *petsc_leftvecs[i], 1e-9); - } - EXPECT_NEAR(mfem_leftvals[i], petsc_leftvals[i], 1e-10); - } - EXPECT_NEAR(mfem_energy, petsc_energy, 1e-12); - - MatDestroy(&A_parallel); -} - -#endif // SMITH_TRUST_REGION_USE_PETSC_SUBSPACE - -int main(int argc, char* argv[]) -{ - ::testing::InitGoogleTest(&argc, argv); - smith::ApplicationManager applicationManager(argc, argv); - return RUN_ALL_TESTS(); -} diff --git a/src/smith/numerics/trust_region_solver.hpp b/src/smith/numerics/trust_region_solver.hpp index 98dc9486b8..1a024e5032 100644 --- a/src/smith/numerics/trust_region_solver.hpp +++ b/src/smith/numerics/trust_region_solver.hpp @@ -55,12 +55,6 @@ TrustRegionSubspaceResult solveSubspaceProblem(const std::vector& A_directions, const mfem::Vector& b, double delta, int num_leftmost); -#if defined(SMITH_USE_SLEPC) && defined(SMITH_TRUST_REGION_USE_PETSC_SUBSPACE) -TrustRegionSubspaceResult solveSubspaceProblemPetsc(const std::vector& directions, - const std::vector& A_directions, - const mfem::Vector& b, double delta, int num_leftmost); -#endif - TrustRegionSubspaceResult solveSubspaceProblemMfem(const std::vector& directions, const std::vector& A_directions, const mfem::Vector& b, double delta, int num_leftmost); diff --git a/src/smith/physics/functional_weak_form.hpp b/src/smith/physics/functional_weak_form.hpp index 5852a0388a..18c292dcab 100644 --- a/src/smith/physics/functional_weak_form.hpp +++ b/src/smith/physics/functional_weak_form.hpp @@ -15,7 +15,6 @@ #include "smith/physics/weak_form.hpp" #include "smith/physics/mesh.hpp" -#include "smith/numerics/equation_solver.hpp" #include "smith/numerics/functional/shape_aware_functional.hpp" #include "smith/physics/state/finite_element_state.hpp" #include "smith/physics/state/finite_element_dual.hpp" diff --git a/src/smith/physics/solid_mechanics.hpp b/src/smith/physics/solid_mechanics.hpp index a2d66cf8d5..f7d2706114 100644 --- a/src/smith/physics/solid_mechanics.hpp +++ b/src/smith/physics/solid_mechanics.hpp @@ -12,14 +12,12 @@ #pragma once -#include #include #include #include #include #include #include -#include #include #include #include diff --git a/src/smith/physics/tests/shallow_arch_buckling.cpp b/src/smith/physics/tests/shallow_arch_buckling.cpp index b554a11efd..258332731f 100644 --- a/src/smith/physics/tests/shallow_arch_buckling.cpp +++ b/src/smith/physics/tests/shallow_arch_buckling.cpp @@ -36,9 +36,6 @@ int print_level = 2; int nonlinear_max_iterations = 300000; int trust_subspace_option = static_cast(SubSpaceOptions::NEVER); int trust_num_leftmost = 1; -int trust_num_past_steps = 0; -bool trust_use_solve_start_direction = false; -bool trust_use_min_residual_direction = false; NonlinearSolver selectedNonlinearSolver() { @@ -67,14 +64,6 @@ void parseCommandLine(int& argc, char** argv) trust_subspace_option = std::stoi(arg.substr(std::string("--trust-subspace-option=").size())); } else if (arg.rfind("--trust-num-leftmost=", 0) == 0) { trust_num_leftmost = std::stoi(arg.substr(std::string("--trust-num-leftmost=").size())); - } else if (arg.rfind("--trust-num-past-steps=", 0) == 0) { - trust_num_past_steps = std::stoi(arg.substr(std::string("--trust-num-past-steps=").size())); - } else if (arg.rfind("--trust-use-solve-start-direction=", 0) == 0) { - const std::string value = arg.substr(std::string("--trust-use-solve-start-direction=").size()); - trust_use_solve_start_direction = (value == "1" || value == "true" || value == "on"); - } else if (arg.rfind("--trust-use-min-residual-direction=", 0) == 0) { - const std::string value = arg.substr(std::string("--trust-use-min-residual-direction=").size()); - trust_use_min_residual_direction = (value == "1" || value == "true" || value == "on"); } else { argv[write_arg] = argv[read_arg]; ++write_arg; @@ -130,10 +119,7 @@ TEST(ShallowArchBuckling, CompressedThinBeamSnapThrough) .max_iterations = nonlinear_max_iterations, .print_level = print_level, .subspace_option = static_cast(trust_subspace_option), - .num_leftmost = trust_num_leftmost, - .trust_num_past_steps = trust_num_past_steps, - .trust_use_solve_start_direction = trust_use_solve_start_direction, - .trust_use_min_residual_direction = trust_use_min_residual_direction}; + .num_leftmost = trust_num_leftmost}; SolidMechanics solid(nonlinear_options, linear_options, solid_mechanics::default_quasistatic_options, "compressed_beam", mesh); @@ -163,8 +149,8 @@ TEST(ShallowArchBuckling, CompressedThinBeamSnapThrough) SLIC_INFO_ROOT( std::format("Compressed thin beam snap-through run: solver = {}, trust_subspace_option = {}, " - "trust_num_leftmost = {}, trust_num_past_steps = {}", - solver_name, trust_subspace_option, trust_num_leftmost, trust_num_past_steps)); + "trust_num_leftmost = {}", + solver_name, trust_subspace_option, trust_num_leftmost)); constexpr int num_steps = 5; for (int step = 0; step < num_steps; ++step) { From 7d0edd35731ac3dd072b193038a56e09e52efc88 Mon Sep 17 00:00:00 2001 From: Michael Tupek Date: Tue, 12 May 2026 20:07:04 -0700 Subject: [PATCH 20/27] Add some docs. --- src/smith/numerics/steihaug_toint_cg.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/smith/numerics/steihaug_toint_cg.hpp b/src/smith/numerics/steihaug_toint_cg.hpp index d519590920..24b5a43801 100644 --- a/src/smith/numerics/steihaug_toint_cg.hpp +++ b/src/smith/numerics/steihaug_toint_cg.hpp @@ -108,8 +108,8 @@ struct TrustRegionResults { size_t cg_iterations_count = 0; }; -using DotPair = std::pair; -using DotManyFunction = std::function(const std::vector&)>; +using DotPair = std::pair; ///< using +using DotManyFunction = std::function(const std::vector&)>; ///< using /** * @brief Minimize quadratic sub-problem given residual vector, the action of the stiffness and a preconditioner From b4117acfe49e063de96603e347c95f17e7c034e8 Mon Sep 17 00:00:00 2001 From: Michael Tupek Date: Wed, 13 May 2026 12:45:57 -0700 Subject: [PATCH 21/27] Trying to simplify the left most logic on linesearch a bit. --- src/smith/numerics/equation_solver.cpp | 160 ++++----- .../numerics/mfem_trust_region_subspace.cpp | 324 ++++++++++-------- src/smith/numerics/steihaug_toint_cg.cpp | 10 + src/smith/numerics/steihaug_toint_cg.hpp | 3 + .../numerics/tests/test_steihaug_toint_cg.cpp | 23 +- .../tests/test_trust_region_solver_mfem.cpp | 43 +-- src/smith/numerics/trust_region_solver.hpp | 30 ++ 7 files changed, 294 insertions(+), 299 deletions(-) diff --git a/src/smith/numerics/equation_solver.cpp b/src/smith/numerics/equation_solver.cpp index 9d83657269..0e873fc155 100644 --- a/src/smith/numerics/equation_solver.cpp +++ b/src/smith/numerics/equation_solver.cpp @@ -29,9 +29,10 @@ namespace smith { namespace { -#ifdef MFEM_USE_MPI -size_t rootOnlyPrintLevel(size_t level, MPI_Comm comm) +size_t rootOnlyPrintLevel(const mfem::NewtonSolver& solver, size_t level) { +#ifdef MFEM_USE_MPI + const MPI_Comm comm = solver.GetComm(); if (level > 0 && comm != MPI_COMM_NULL) { int rank = 0; MPI_Comm_rank(comm, &rank); @@ -39,11 +40,9 @@ size_t rootOnlyPrintLevel(size_t level, MPI_Comm comm) return 0; } } +#endif return level; } -#else -size_t rootOnlyPrintLevel(size_t level) { return level; } -#endif } // namespace @@ -119,12 +118,7 @@ class NewtonSolver : public mfem::NewtonSolver { print_level = static_cast(std::max(nonlinear_options.print_level, 0)); print_level = print_options.iterations ? std::max(1, print_level) : print_level; print_level = print_options.summary ? std::max(2, print_level) : print_level; - print_level = rootOnlyPrintLevel(print_level -#ifdef MFEM_USE_MPI - , - GetComm() -#endif - ); + print_level = rootOnlyPrintLevel(*this, print_level); using real_t = mfem::real_t; @@ -300,46 +294,15 @@ class TrustRegion : public mfem::NewtonSolver { /// compute several vector inner products with a single MPI reduction when possible std::vector dot_many(const std::vector& pairs) const { - std::vector products(pairs.size(), 0.0); - if (dot_oper) { + std::vector products(pairs.size(), 0.0); for (size_t i = 0; i < pairs.size(); ++i) { products[i] = Dot(*pairs[i].first, *pairs[i].second); } return products; } - std::vector sizes(pairs.size()); - std::vector ptr_a(pairs.size()); - std::vector ptr_b(pairs.size()); - for (size_t i = 0; i < pairs.size(); ++i) { - sizes[i] = pairs[i].first->Size(); - MFEM_ASSERT(sizes[i] == pairs[i].second->Size(), "Incompatible vector sizes."); - ptr_a[i] = pairs[i].first->GetData(); - ptr_b[i] = pairs[i].second->GetData(); - } - - bool all_same_size = true; - for (size_t i = 1; i < pairs.size(); ++i) { - if (sizes[i] != sizes[0]) { - all_same_size = false; - break; - } - } - - if (all_same_size && !pairs.empty()) { - for (int j = 0; j < sizes[0]; ++j) { - for (size_t i = 0; i < pairs.size(); ++i) { - products[i] += ptr_a[i][j] * ptr_b[i][j]; - } - } - } else { - for (size_t i = 0; i < pairs.size(); ++i) { - for (int j = 0; j < sizes[i]; ++j) { - products[i] += ptr_a[i][j] * ptr_b[i][j]; - } - } - } + std::vector products = smith::dotMany(pairs); #ifdef MFEM_USE_MPI const MPI_Comm dot_comm = GetComm(); @@ -354,43 +317,47 @@ class TrustRegion : public mfem::NewtonSolver { return products; } - /// solve the exact trust-region subspace problem with directions ds, and the leftmosts - template - void solveTheSubspaceProblem([[maybe_unused]] mfem::Vector& z, [[maybe_unused]] const HessVecFunc& hess_vec_func, - [[maybe_unused]] const std::vector ds, - [[maybe_unused]] const std::vector Hds, - [[maybe_unused]] const mfem::Vector& g, [[maybe_unused]] double delta, - [[maybe_unused]] int num_leftmost, - [[maybe_unused]] std::vector>& candidate_left_mosts) const + /// build reusable subspace data for line-search retries + void prepareSubspaceProblemCache([[maybe_unused]] const std::vector& ds, + [[maybe_unused]] const std::vector& Hds, + [[maybe_unused]] const mfem::Vector& g, [[maybe_unused]] int num_leftmost, + [[maybe_unused]] CachedTrustRegionSubspaceProblem& prepared_subspace) const { +#ifdef MFEM_USE_LAPACK SMITH_MARK_FUNCTION; - std::vector directions; - for (auto& d : ds) { - directions.emplace_back(d); - } - for (auto& left : left_mosts) { - directions.emplace_back(left.get()); - } - - std::vector H_directions; - for (auto& Hd : Hds) { - H_directions.emplace_back(Hd); - } - for (auto& H_left : H_left_mosts) { - H_directions.emplace_back(H_left.get()); - } + std::vector directions(ds.begin(), ds.end()); + std::vector H_directions(Hds.begin(), Hds.end()); + for (auto& left : left_mosts) directions.emplace_back(left.get()); + for (auto& H_left : H_left_mosts) H_directions.emplace_back(H_left.get()); mfem::Vector b(g); b *= -1; + try { + prepared_subspace = smith::prepareSubspaceProblem(directions, H_directions, b, num_leftmost); + } catch (const std::exception& e) { + if (print_level >= 1) { + mfem::out << "subspace solve failed with " << e.what() << std::endl; + } + return; + } +#endif + } + + /// solve cached exact trust-region subspace problem for current trust-region size + template + void solvePreparedSubspaceProblem([[maybe_unused]] mfem::Vector& z, [[maybe_unused]] const HessVecFunc& hess_vec_func, + [[maybe_unused]] const CachedTrustRegionSubspaceProblem& prepared_subspace, + [[maybe_unused]] const mfem::Vector& g, [[maybe_unused]] double delta) const + { +#ifdef MFEM_USE_LAPACK + SMITH_MARK_FUNCTION; mfem::Vector sol; - std::vector> leftvecs; - std::vector leftvals; double energy_change; try { - std::tie(sol, leftvecs, leftvals, energy_change) = - solveSubspaceProblem(directions, H_directions, b, delta, num_leftmost); + std::tie(sol, std::ignore, std::ignore, energy_change) = + smith::solvePreparedSubspaceProblem(prepared_subspace, delta); } catch (const std::exception& e) { if (print_level >= 1) { mfem::out << "subspace solve failed with " << e.what() << std::endl; @@ -398,16 +365,11 @@ class TrustRegion : public mfem::NewtonSolver { return; } - candidate_left_mosts.clear(); - for (auto& lv : leftvecs) { - candidate_left_mosts.emplace_back(std::move(lv)); - } - double base_energy = computeEnergy(g, hess_vec_func, z); double subspace_energy = computeEnergy(g, hess_vec_func, sol); if (print_level >= 2) { - double leftval = leftvals.size() ? leftvals[0] : 1.0; + double leftval = prepared_subspace.leftvals.size() ? prepared_subspace.leftvals[0] : 1.0; mfem::out << "Energy using subspace solver from: " << base_energy << ", to: " << subspace_energy << " / " << energy_change << ". Min eig: " << leftval << std::endl; } @@ -415,6 +377,7 @@ class TrustRegion : public mfem::NewtonSolver { if (subspace_energy < base_energy) { z = sol; } +#endif } /// finds tau s.t. (z + tau*(y-z))^2 = trSize^2 @@ -516,12 +479,7 @@ class TrustRegion : public mfem::NewtonSolver { print_level = static_cast(std::max(nonlinear_options.print_level, 0)); print_level = print_options.iterations ? std::max(1, print_level) : print_level; print_level = print_options.summary ? std::max(2, print_level) : print_level; - print_level = rootOnlyPrintLevel(print_level -#ifdef MFEM_USE_MPI - , - GetComm() -#endif - ); + print_level = rootOnlyPrintLevel(*this, print_level); using real_t = mfem::real_t; @@ -636,7 +594,13 @@ class TrustRegion : public mfem::NewtonSolver { bool have_computed_Hvs = false; bool have_computed_H_left_mosts = false; - std::vector> candidate_left_mosts; + bool have_prepared_subspace = false; + CachedTrustRegionSubspaceProblem prepared_subspace; +#ifdef MFEM_USE_LAPACK + constexpr bool can_use_subspace_solver = true; +#else + constexpr bool can_use_subspace_solver = false; +#endif int lineSearchIter = 0; while (lineSearchIter <= nonlinear_options.max_line_search_iterations) { @@ -652,7 +616,7 @@ class TrustRegion : public mfem::NewtonSolver { bool use_with_option2 = (subspace_option >= 2) && (d_norm > (1.0 - 1.0e-6) * tr_size); bool use_with_option3 = (subspace_option >= 3); - if (use_with_option1 || use_with_option2 || use_with_option3) { + if (can_use_subspace_solver && (use_with_option1 || use_with_option2 || use_with_option3)) { if (!have_computed_Hvs) { have_computed_Hvs = true; @@ -687,14 +651,20 @@ class TrustRegion : public mfem::NewtonSolver { } } - std::vector ds{&trResults.z, &trResults.cauchy_point}; - std::vector H_ds{&trResults.H_z, &trResults.H_cauchy_point}; - if (trResults.has_d_old) { - ds.push_back(&trResults.d_old); - H_ds.push_back(&trResults.H_d_old); + if (!have_prepared_subspace) { + have_prepared_subspace = true; + + std::vector ds{&trResults.z, &trResults.cauchy_point}; + std::vector H_ds{&trResults.H_z, &trResults.H_cauchy_point}; + if (trResults.has_d_old) { + ds.push_back(&trResults.d_old); + H_ds.push_back(&trResults.H_d_old); + } + + prepareSubspaceProblemCache(ds, H_ds, r, num_leftmost, prepared_subspace); } - solveTheSubspaceProblem(trResults.d, hess_vec_func, ds, H_ds, r, tr_size, num_leftmost, candidate_left_mosts); + solvePreparedSubspaceProblem(trResults.d, hess_vec_func, prepared_subspace, r, tr_size); } static constexpr double roundOffTol = 0.0; // 1e-14; @@ -721,8 +691,8 @@ class TrustRegion : public mfem::NewtonSolver { if (normPred <= norm_goal) { trResults.d_old = trResults.d; trResults.has_d_old = true; - if (!candidate_left_mosts.empty()) { - left_mosts = std::move(candidate_left_mosts); + if (!prepared_subspace.leftmosts.empty()) { + left_mosts = prepared_subspace.leftmosts; } X = x_pred; r = r_pred; @@ -776,8 +746,8 @@ class TrustRegion : public mfem::NewtonSolver { if (willAccept) { trResults.d_old = trResults.d; trResults.has_d_old = true; - if (!candidate_left_mosts.empty()) { - left_mosts = std::move(candidate_left_mosts); + if (!prepared_subspace.leftmosts.empty()) { + left_mosts = prepared_subspace.leftmosts; } X = x_pred; r = r_pred; diff --git a/src/smith/numerics/mfem_trust_region_subspace.cpp b/src/smith/numerics/mfem_trust_region_subspace.cpp index 9edb69756e..b725dcb53c 100644 --- a/src/smith/numerics/mfem_trust_region_subspace.cpp +++ b/src/smith/numerics/mfem_trust_region_subspace.cpp @@ -29,13 +29,6 @@ double innerProduct(const mfem::Vector& a, const mfem::Vector& b, const MPI_Comm #ifdef MFEM_USE_LAPACK -TrustRegionSubspaceResult solveSubspaceProblem(const std::vector& directions, - const std::vector& A_directions, - const mfem::Vector& b, double delta, int num_leftmost) -{ - return solveSubspaceProblemMfem(directions, A_directions, b, delta, num_leftmost); -} - namespace { double dot(const mfem::Vector& a, const mfem::Vector& b) { return a * b; } @@ -133,15 +126,6 @@ SubspaceProjections projectSubspaceGlobally(const std::vector& cols) return A; } -/** - * @brief Solves the exact trust region subproblem: - * min 1/2 x^T A x - b^T x, subject to ||x|| <= delta. - * - * Implements a variant of the Moore-Sorensen algorithm: - * 1. Computes the eigensystem of A. - * 2. Checks if the unconstrained minimum lies strictly inside the trust region. - * 3. Checks for the "hard case" where the minimum eigenvalue is near zero or negative, - * and the Newton step points outside the trust region, requiring a shift along the leftmost eigenvector. - * 4. Otherwise, performs a Newton iteration on the secular equation (1/||p(\lambda)|| - 1/delta = 0) - * to find the optimal Lagrange multiplier \lambda. - * - * @param A The reduced Hessian matrix (square). - * @param b The reduced gradient vector. - * @param delta The trust region radius. - * @param num_leftmost The number of leftmost eigenvectors/values to return. - * @return A tuple containing: - * - The optimal solution vector. - * - A list of the leftmost eigenvectors. - * - A list of the corresponding leftmost eigenvalues. - * - A boolean indicating success. - */ -std::tuple, std::vector, bool> exactTrustRegionSolve( - mfem::DenseMatrix A, const mfem::Vector& b, double delta, int num_leftmost) +mfem::DenseMatrix orthonormalBasisTransform(const mfem::DenseMatrix& gram, double& trace_mag) +{ + mfem::DenseMatrix gram_copy(gram); + mfem::Vector evals; + mfem::DenseMatrix evecs; + gram_copy.Eigensystem(evals, evecs); + + trace_mag = 0.0; + for (int i = 0; i < evals.Size(); ++i) { + trace_mag += std::abs(evals[i]); + } + + std::vector kept_columns; + for (int i = 0; i < evals.Size(); ++i) { + if (evals[i] > 1e-9 * trace_mag) { + mfem::Vector col = matrixColumn(evecs, i); + col /= std::sqrt(evals[i]); + kept_columns.emplace_back(std::move(col)); + } + } + + return columnsToMatrix(kept_columns); +} + +mfem::DenseMatrix tripleProduct(const mfem::DenseMatrix& L, const mfem::DenseMatrix& A, const mfem::DenseMatrix& R) +{ + mfem::DenseMatrix tmp(A.Height(), R.Width()); + mfem::Mult(A, R, tmp); + mfem::DenseMatrix out(L.Width(), R.Width()); + mfem::MultAtB(L, tmp, out); + return out; +} + +mfem::Vector projectWithTranspose(const mfem::DenseMatrix& A, const mfem::Vector& x) +{ + mfem::Vector out(A.Width()); + A.MultTranspose(x, out); + return out; +} + +mfem::Vector combineDirections(const std::vector& states, const mfem::Vector& coeffs) +{ + mfem::Vector out(*states[0]); + out = 0.0; + for (int i = 0; i < coeffs.Size(); ++i) { + out.Add(coeffs[i], *states[size_t(i)]); + } + return out; +} + +std::vector toPointers(const std::vector>& vectors) +{ + std::vector ptrs; + ptrs.reserve(vectors.size()); + for (const auto& vector : vectors) { + ptrs.push_back(vector.get()); + } + return ptrs; +} + +std::vector prepareExactTrustRegionLeftmosts(CachedTrustRegionSubspaceProblem& prepared, int num_leftmost) { - if (A.Height() != A.Width()) { - throw TrustRegionException("Exact trust region solver requires square matrices"); + prepared.eigenvalues.SetSize(prepared.projected_rhs.Size()); + prepared.eigenvectors.SetSize(prepared.projected_hessian.Height(), prepared.projected_hessian.Width()); + + mfem::DenseMatrix projected_hessian_copy(prepared.projected_hessian); + projected_hessian_copy.Eigensystem(prepared.eigenvalues, prepared.eigenvectors); + + prepared.eigen_rhs.SetSize(prepared.eigenvalues.Size()); + for (int i = 0; i < prepared.eigenvalues.Size(); ++i) { + const mfem::Vector vi = matrixColumn(prepared.eigenvectors, i); + prepared.eigen_rhs[i] = dot(vi, prepared.projected_rhs); } - if (A.Height() != b.Size()) { - throw TrustRegionException( - "The right hand size for exact trust region solve must be consistent with the input matrix size"); + + std::vector reduced_leftmosts; + const int num_leftmost_possible = std::min(num_leftmost, prepared.eigenvalues.Size()); + reduced_leftmosts.reserve(static_cast(num_leftmost_possible)); + prepared.leftvals.clear(); + prepared.leftvals.reserve(static_cast(num_leftmost_possible)); + for (int i = 0; i < num_leftmost_possible; ++i) { + reduced_leftmosts.emplace_back(matrixColumn(prepared.eigenvectors, i)); + prepared.leftvals.emplace_back(prepared.eigenvalues[i]); } + return reduced_leftmosts; +} - mfem::Vector workspace(b.Size() * b.Size() + 8 * b.Size()); +std::pair solvePreparedExactTrustRegionProblem(const CachedTrustRegionSubspaceProblem& prepared, + double delta) +{ + const mfem::DenseMatrix& A = prepared.projected_hessian; + const mfem::Vector& b = prepared.projected_rhs; + const mfem::Vector& sigs = prepared.eigenvalues; + const mfem::DenseMatrix& V = prepared.eigenvectors; + const mfem::Vector& bv = prepared.eigen_rhs; + + mfem::Vector workspace(6 * b.Size()); int offset = 0; auto alloc_vector = [&](int size) { mfem::Vector v(workspace.GetData() + offset, size); @@ -228,35 +274,23 @@ std::tuple, std::vector, bool> e return v; }; - mfem::Vector sigs = alloc_vector(b.Size()); - mfem::DenseMatrix V(workspace.GetData() + offset, b.Size(), b.Size()); - offset += b.Size() * b.Size(); - - A.Eigensystem(sigs, V); - std::vector leftmosts; - std::vector minsigs; - const int num_leftmost_possible = std::min(num_leftmost, sigs.Size()); - for (int i = 0; i < num_leftmost_possible; ++i) { - leftmosts.emplace_back(matrixColumn(V, i)); - minsigs.emplace_back(sigs[i]); - } - - const mfem::Vector leftMost = matrixColumn(V, 0); - const double minSig = sigs[0]; - - mfem::Vector bv = alloc_vector(sigs.Size()); + mfem::Vector bvOverSigs = alloc_vector(sigs.Size()); for (int i = 0; i < sigs.Size(); ++i) { - const mfem::Vector vi = matrixColumn(V, i); - bv[i] = dot(vi, b); + bvOverSigs[i] = bv[i] / sigs[i]; } - - mfem::Vector bvOverSigs = alloc_vector(sigs.Size()); - for (int i = 0; i < sigs.Size(); ++i) bvOverSigs[i] = bv[i] / sigs[i]; const double sigScale = sumAbs(sigs) / sigs.Size(); const double eps = 1e-12 * sigScale; + const mfem::Vector leftMost = matrixColumn(V, 0); + const double minSig = sigs[0]; if ((minSig >= eps) && (norm(bvOverSigs) <= delta)) { - return std::make_tuple(solveDense(A, b), leftmosts, minsigs, true); + mfem::Vector x = alloc_vector(b.Size()); + x = 0.0; + for (int i = 0; i < b.Size(); ++i) { + const mfem::Vector vi = matrixColumn(V, i); + x.Add(bvOverSigs[i], vi); + } + return std::make_pair(x, true); } double lam = minSig < eps ? -minSig + eps : 0.0; @@ -289,7 +323,7 @@ std::tuple, std::vector, bool> e const double e1 = quadraticEnergy(A, b, x1); const double e2 = quadraticEnergy(A, b, x2); - return std::make_tuple(e1 < e2 ? x1 : x2, leftmosts, minsigs, true); + return std::make_pair(e1 < e2 ? x1 : x2, true); } mfem::Vector bvbv = alloc_vector(bv.Size()); @@ -330,67 +364,22 @@ std::tuple, std::vector, bool> e x *= (e2 < e1 ? -delta : delta) / norm(x); - return std::make_tuple(x, leftmosts, minsigs, success); -} - -mfem::DenseMatrix orthonormalBasisTransform(const mfem::DenseMatrix& gram, double& trace_mag) -{ - mfem::DenseMatrix gram_copy(gram); - mfem::Vector evals; - mfem::DenseMatrix evecs; - gram_copy.Eigensystem(evals, evecs); - - trace_mag = 0.0; - for (int i = 0; i < evals.Size(); ++i) { - trace_mag += std::abs(evals[i]); - } - - std::vector kept_columns; - for (int i = 0; i < evals.Size(); ++i) { - if (evals[i] > 1e-9 * trace_mag) { - mfem::Vector col = matrixColumn(evecs, i); - col /= std::sqrt(evals[i]); - kept_columns.emplace_back(std::move(col)); - } - } - - return columnsToMatrix(kept_columns); -} - -mfem::DenseMatrix tripleProduct(const mfem::DenseMatrix& L, const mfem::DenseMatrix& A, const mfem::DenseMatrix& R) -{ - mfem::DenseMatrix tmp(A.Height(), R.Width()); - mfem::Mult(A, R, tmp); - mfem::DenseMatrix out(L.Width(), R.Width()); - mfem::MultAtB(L, tmp, out); - return out; -} - -mfem::Vector projectWithTranspose(const mfem::DenseMatrix& A, const mfem::Vector& x) -{ - mfem::Vector out(A.Width()); - A.MultTranspose(x, out); - return out; -} - -mfem::Vector combineDirections(const std::vector& states, const mfem::Vector& coeffs) -{ - mfem::Vector out(*states[0]); - out = 0.0; - for (int i = 0; i < coeffs.Size(); ++i) { - out.Add(coeffs[i], *states[size_t(i)]); - } - return out; + return std::make_pair(x, success); } } // namespace -TrustRegionSubspaceResult solveSubspaceProblemMfem(const std::vector& states, - const std::vector& Astates, - const mfem::Vector& b, double delta, int num_leftmost) +/// @brief prepares reduced trust-region subspace data reusable across trust-region radius updates +CachedTrustRegionSubspaceProblem prepareSubspaceProblem(const std::vector& directions, + const std::vector& A_directions, + const mfem::Vector& b, int num_leftmost) { SMITH_MARK_FUNCTION; - SubspaceProjections projections = projectSubspaceGlobally(states, Astates, b); + CachedTrustRegionSubspaceProblem prepared; + prepared.zero_solution = b; + prepared.zero_solution = 0.0; + + SubspaceProjections projections = projectSubspaceGlobally(directions, A_directions, b); mfem::DenseMatrix& sAs = projections.sAs; symmetrize(sAs); @@ -408,33 +397,61 @@ TrustRegionSubspaceResult solveSubspaceProblemMfem(const std::vector>{}, std::vector{}, 0.0); + return prepared; } if (T.Width() == 0) { throw TrustRegionException("No independent directions in MFEM subspace solve."); } - mfem::DenseMatrix pAp = tripleProduct(T, sAs, T); - symmetrize(pAp); + prepared.projected_hessian = tripleProduct(T, sAs, T); + symmetrize(prepared.projected_hessian); const mfem::Vector& sb = projections.sb; - const mfem::Vector pb = projectWithTranspose(T, sb); - - auto [reduced_x, leftvecs, leftvals, success] = exactTrustRegionSolve(pAp, pb, delta, num_leftmost); - (void)success; - const double energy = quadraticEnergy(pAp, pb, reduced_x); - - mfem::Vector coeffs(T.Height()); - T.Mult(reduced_x, coeffs); - mfem::Vector sol = combineDirections(states, coeffs); - std::vector> leftmosts; - for (const auto& leftvec : leftvecs) { - mfem::Vector left_coeffs(T.Height()); - T.Mult(leftvec, left_coeffs); - leftmosts.emplace_back(std::make_shared(combineDirections(states, left_coeffs))); + prepared.projected_rhs = projectWithTranspose(T, sb); + + for (int j = 0; j < T.Width(); ++j) { + prepared.basis.emplace_back(std::make_shared(combineDirections(directions, matrixColumn(T, j)))); + } + const auto reduced_leftmosts = prepareExactTrustRegionLeftmosts(prepared, num_leftmost); + const auto basis_ptrs = toPointers(prepared.basis); + prepared.leftmosts.clear(); + prepared.leftmosts.reserve(reduced_leftmosts.size()); + for (const auto& leftvec : reduced_leftmosts) { + prepared.leftmosts.emplace_back(std::make_shared(combineDirections(basis_ptrs, leftvec))); + } + + return prepared; +} + +/// @brief solves cached reduced trust-region problem for given trust-region radius +TrustRegionSubspaceResult solvePreparedSubspaceProblem(const CachedTrustRegionSubspaceProblem& prepared, double delta) +{ + SMITH_MARK_FUNCTION; + if (prepared.basis.empty()) { + mfem::Vector sol(prepared.zero_solution); + sol = 0.0; + return std::make_tuple(sol, prepared.leftmosts, prepared.leftvals, 0.0); } - return std::make_tuple(sol, leftmosts, leftvals, energy); + + auto [reduced_x, success] = solvePreparedExactTrustRegionProblem(prepared, delta); + const double energy = quadraticEnergy(prepared.projected_hessian, prepared.projected_rhs, reduced_x); + + const auto basis_ptrs = toPointers(prepared.basis); + mfem::Vector sol = combineDirections(basis_ptrs, reduced_x); + return std::make_tuple(sol, prepared.leftmosts, prepared.leftvals, energy); +} + +TrustRegionSubspaceResult solveSubspaceProblem(const std::vector& directions, + const std::vector& A_directions, + const mfem::Vector& b, double delta, int num_leftmost) +{ + return solvePreparedSubspaceProblem(prepareSubspaceProblem(directions, A_directions, b, num_leftmost), delta); +} + +TrustRegionSubspaceResult solveSubspaceProblemMfem(const std::vector& directions, + const std::vector& A_directions, + const mfem::Vector& b, double delta, int num_leftmost) +{ + return solvePreparedSubspaceProblem(prepareSubspaceProblem(directions, A_directions, b, num_leftmost), delta); } #else @@ -447,6 +464,16 @@ TrustRegionSubspaceResult solveSubspaceProblem(const std::vector>{}, std::vector{}, 0.0); } +CachedTrustRegionSubspaceProblem prepareSubspaceProblem(const std::vector& directions, + const std::vector& A_directions, + const mfem::Vector& b, int) +{ + throw TrustRegionException("Trust-region subspace solve requires MFEM LAPACK support."); + CachedTrustRegionSubspaceProblem prepared; + prepared.zero_solution = b; + return prepared; +} + /// @brief report unavailable MFEM subspace solve when MFEM was built without LAPACK. TrustRegionSubspaceResult solveSubspaceProblemMfem(const std::vector&, const std::vector&, const mfem::Vector& b, @@ -456,6 +483,13 @@ TrustRegionSubspaceResult solveSubspaceProblemMfem(const std::vector>{}, std::vector{}, 0.0); } +TrustRegionSubspaceResult solvePreparedSubspaceProblem(const CachedTrustRegionSubspaceProblem& prepared, double) +{ + throw TrustRegionException("Trust-region subspace solve requires MFEM LAPACK support."); + return std::make_tuple(prepared.zero_solution, std::vector>{}, std::vector{}, + 0.0); +} + #endif // MFEM_USE_LAPACK } // namespace smith diff --git a/src/smith/numerics/steihaug_toint_cg.cpp b/src/smith/numerics/steihaug_toint_cg.cpp index 4964d3a641..784dc0ac89 100644 --- a/src/smith/numerics/steihaug_toint_cg.cpp +++ b/src/smith/numerics/steihaug_toint_cg.cpp @@ -20,6 +20,16 @@ void projectToBoundaryWithCoefs(mfem::Vector& z, const mfem::Vector& d, double d } // namespace +std::vector dotMany(const std::vector& pairs) +{ + std::vector products(pairs.size(), 0.0); + for (size_t i = 0; i < pairs.size(); ++i) { + MFEM_ASSERT(pairs[i].first->Size() == pairs[i].second->Size(), "Incompatible vector sizes."); + products[i] = (*pairs[i].first) * (*pairs[i].second); + } + return products; +} + void steihaugTointCG(const mfem::Vector& r0, mfem::Vector& rCurrent, const mfem::Operator& H, const mfem::Solver* P, const TrustRegionSettings& settings, double& trSize, TrustRegionResults& results, double r0_norm_squared, const DotManyFunction& dot_many) diff --git a/src/smith/numerics/steihaug_toint_cg.hpp b/src/smith/numerics/steihaug_toint_cg.hpp index 24b5a43801..672bd26dc2 100644 --- a/src/smith/numerics/steihaug_toint_cg.hpp +++ b/src/smith/numerics/steihaug_toint_cg.hpp @@ -111,6 +111,9 @@ struct TrustRegionResults { using DotPair = std::pair; ///< using using DotManyFunction = std::function(const std::vector&)>; ///< using +/// compute local dot products for many vector pairs +std::vector dotMany(const std::vector& pairs); + /** * @brief Minimize quadratic sub-problem given residual vector, the action of the stiffness and a preconditioner * diff --git a/src/smith/numerics/tests/test_steihaug_toint_cg.cpp b/src/smith/numerics/tests/test_steihaug_toint_cg.cpp index 03c212aa0d..860e6d3192 100644 --- a/src/smith/numerics/tests/test_steihaug_toint_cg.cpp +++ b/src/smith/numerics/tests/test_steihaug_toint_cg.cpp @@ -7,20 +7,6 @@ #include #include "smith/numerics/steihaug_toint_cg.hpp" -namespace { - -std::vector dot_many(const std::vector& pairs) -{ - std::vector out; - out.reserve(pairs.size()); - for (const auto& [a, b] : pairs) { - out.push_back((*a) * (*b)); - } - return out; -} - -} // namespace - TEST(SteihaugTointCG, SolvesSPDInsideBoundary) { int size = 2; @@ -42,7 +28,8 @@ TEST(SteihaugTointCG, SolvesSPDInsideBoundary) mfem::Vector rCurrent(size); - smith::steihaugTointCG(r0, rCurrent, H, nullptr, settings, trSize, results, r0.Norml2() * r0.Norml2(), dot_many); + smith::steihaugTointCG(r0, rCurrent, H, nullptr, settings, trSize, results, r0.Norml2() * r0.Norml2(), + smith::dotMany); // Solution should be H^{-1} (-r0) // x = -0.5, y = -0.25 @@ -69,7 +56,8 @@ TEST(SteihaugTointCG, HitsBoundary) mfem::Vector rCurrent(size); - smith::steihaugTointCG(r0, rCurrent, H, nullptr, settings, trSize, results, r0.Norml2() * r0.Norml2(), dot_many); + smith::steihaugTointCG(r0, rCurrent, H, nullptr, settings, trSize, results, r0.Norml2() * r0.Norml2(), + smith::dotMany); EXPECT_NEAR(results.z.Norml2(), 0.5, 1e-9); EXPECT_EQ(results.interior_status, smith::TrustRegionResults::Status::OnBoundary); @@ -93,7 +81,8 @@ TEST(SteihaugTointCG, DetectsNegativeCurvature) mfem::Vector rCurrent(size); - smith::steihaugTointCG(r0, rCurrent, H, nullptr, settings, trSize, results, r0.Norml2() * r0.Norml2(), dot_many); + smith::steihaugTointCG(r0, rCurrent, H, nullptr, settings, trSize, results, r0.Norml2() * r0.Norml2(), + smith::dotMany); // For negative curvature, it should go to boundary EXPECT_NEAR(results.z.Norml2(), 2.0, 1e-9); diff --git a/src/smith/numerics/tests/test_trust_region_solver_mfem.cpp b/src/smith/numerics/tests/test_trust_region_solver_mfem.cpp index cb5fc328af..f2e46a06c2 100644 --- a/src/smith/numerics/tests/test_trust_region_solver_mfem.cpp +++ b/src/smith/numerics/tests/test_trust_region_solver_mfem.cpp @@ -4,7 +4,6 @@ // // SPDX-License-Identifier: (BSD-3-Clause) -#include #include #include "gtest/gtest.h" @@ -32,14 +31,6 @@ std::vector applyDiagonalOperator(const mfem::Vector& diag, return out; } -void expectNearVector(const mfem::Vector& a, const mfem::Vector& b, double tol) -{ - ASSERT_EQ(a.Size(), b.Size()); - for (int i = 0; i < a.Size(); ++i) { - EXPECT_NEAR(a[i], b[i], tol); - } -} - std::vector toPointers(const std::vector& vectors) { std::vector ptrs; @@ -79,8 +70,7 @@ TEST(TrustRegionSubspaceMfem, SolveHitsTrustRegionBoundary) const auto astates = applyDiagonalOperator(fixture.diag, states); const auto astate_ptrs = toPointers(astates); - auto [sol, leftvecs, leftvals, energy] = - smith::solveSubspaceProblemMfem(states, astate_ptrs, fixture.b, test_delta, 1); + auto [sol, leftvecs, leftvals, energy] = smith::solveSubspaceProblem(states, astate_ptrs, fixture.b, test_delta, 1); EXPECT_NEAR(sol.Norml2(), test_delta, 1.0e-12); EXPECT_FALSE(leftvecs.empty()); @@ -88,37 +78,6 @@ TEST(TrustRegionSubspaceMfem, SolveHitsTrustRegionBoundary) EXPECT_LT(energy, 0.0); } -TEST(TrustRegionSubspaceMfem, GenericSolveUsesMfemBackend) -{ - DiagonalSubspaceFixture fixture(test_size); - - const std::vector states = {&fixture.u1, &fixture.u2, &fixture.u3, &fixture.u2}; - const auto astates = applyDiagonalOperator(fixture.diag, states); - const auto astate_ptrs = toPointers(astates); - - auto [generic_sol, generic_leftvecs, generic_leftvals, generic_energy] = - smith::solveSubspaceProblem(states, astate_ptrs, fixture.b, test_delta, 2); - auto [mfem_sol, mfem_leftvecs, mfem_leftvals, mfem_energy] = - smith::solveSubspaceProblemMfem(states, astate_ptrs, fixture.b, test_delta, 2); - - expectNearVector(generic_sol, mfem_sol, 1.0e-12); - ASSERT_EQ(generic_leftvecs.size(), mfem_leftvecs.size()); - ASSERT_EQ(generic_leftvals.size(), mfem_leftvals.size()); - for (size_t i = 0; i < generic_leftvecs.size(); ++i) { - const double same = smith::innerProduct(*generic_leftvecs[i], *mfem_leftvecs[i], MPI_COMM_WORLD); - mfem::Vector neg(*mfem_leftvecs[i]); - neg *= -1.0; - const double flipped = smith::innerProduct(*generic_leftvecs[i], neg, MPI_COMM_WORLD); - if (std::abs(flipped) > std::abs(same)) { - expectNearVector(*generic_leftvecs[i], neg, 1.0e-10); - } else { - expectNearVector(*generic_leftvecs[i], *mfem_leftvecs[i], 1.0e-10); - } - EXPECT_NEAR(generic_leftvals[i], mfem_leftvals[i], 1.0e-12); - } - EXPECT_NEAR(generic_energy, mfem_energy, 1.0e-12); -} - TEST(TrustRegionSubspaceMfem, SolveHandlesZeroDirection) { mfem::Vector u1(4); diff --git a/src/smith/numerics/trust_region_solver.hpp b/src/smith/numerics/trust_region_solver.hpp index 1a024e5032..11fff281c3 100644 --- a/src/smith/numerics/trust_region_solver.hpp +++ b/src/smith/numerics/trust_region_solver.hpp @@ -43,6 +43,28 @@ class TrustRegionException : public std::exception { using TrustRegionSubspaceResult = std::tuple>, std::vector, double>; +/// Cached reduced trust-region subspace data reusable across trust-region radius updates. +struct CachedTrustRegionSubspaceProblem { + /// zero vector with correct size/layout for empty-subspace returns + mfem::Vector zero_solution; + /// orthonormalized physical-space basis spanning reduced subspace + std::vector> basis; + /// reduced Hessian in cached subspace basis + mfem::DenseMatrix projected_hessian; + /// reduced right-hand side in cached subspace basis + mfem::Vector projected_rhs; + /// eigenvalues of reduced Hessian + mfem::Vector eigenvalues; + /// eigenvectors of reduced Hessian + mfem::DenseMatrix eigenvectors; + /// reduced right-hand side projected onto reduced Hessian eigenvectors + mfem::Vector eigen_rhs; + /// cached leftmost eigenvectors lifted back to physical space + std::vector> leftmosts; + /// eigenvalues corresponding to cached leftmost eigenvectors + std::vector leftvals; +}; + /// @brief computes the global size of mfem::Vector int globalSize(const mfem::Vector& parallel_v, const MPI_Comm& comm); @@ -59,4 +81,12 @@ TrustRegionSubspaceResult solveSubspaceProblemMfem(const std::vector& A_directions, const mfem::Vector& b, double delta, int num_leftmost); +/// @brief prepares reduced trust-region subspace data reusable across trust-region radius updates +CachedTrustRegionSubspaceProblem prepareSubspaceProblem(const std::vector& directions, + const std::vector& A_directions, + const mfem::Vector& b, int num_leftmost); + +/// @brief solves cached reduced trust-region problem for given trust-region radius +TrustRegionSubspaceResult solvePreparedSubspaceProblem(const CachedTrustRegionSubspaceProblem& prepared, double delta); + } // namespace smith From ca4242b818fbda37a2ce5e592a1dab7bcc3067e3 Mon Sep 17 00:00:00 2001 From: chapman39 Date: Tue, 12 May 2026 16:06:29 -0700 Subject: [PATCH 22/27] remove version from `conf.py` --- src/docs/conf.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/docs/conf.py b/src/docs/conf.py index 770b0304db..6769080598 100644 --- a/src/docs/conf.py +++ b/src/docs/conf.py @@ -81,9 +81,9 @@ # built documents. # # The short X.Y version. -version = '0.1' +# version = '0.1' # The full version, including alpha/beta/rc tags. -release = '0.1' +# release = '0.1' # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. From 632d41e16164d427ce45f0f2cd503b68cfaa47e0 Mon Sep 17 00:00:00 2001 From: chapman39 Date: Tue, 12 May 2026 16:32:35 -0700 Subject: [PATCH 23/27] add explaination for adding new readthedocs release version --- src/docs/sphinx/dev_guide/release.rst | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/src/docs/sphinx/dev_guide/release.rst b/src/docs/sphinx/dev_guide/release.rst index fcdc7812a1..23dc1cef7e 100644 --- a/src/docs/sphinx/dev_guide/release.rst +++ b/src/docs/sphinx/dev_guide/release.rst @@ -198,6 +198,21 @@ the history. After merging, the release candidate branch can be deleted. #. Click the ``Update release`` button. +7: Add Version to ReadTheDocs +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +A new version our ReadTheDocs should be created so that users can refer to older snapshots of Smith's documentation. + +#. Go to this page `https://app.readthedocs.org/projects/llnlsmith/_`. + +#. Click "Add version". + +#. Find the version tag that was just created in the previous steps. (It might take a couple minutes to show up.) + +#. Toggle the "Activate" to be on. + +#. Select "Update version". + 8: Merge Main to Develop ^^^^^^^^^^^^^^^^^^^^^^^^^^^ From 95b170d29d9deedf47ac8947552cf2727f3cd258 Mon Sep 17 00:00:00 2001 From: chapman39 Date: Tue, 12 May 2026 16:34:42 -0700 Subject: [PATCH 24/27] syntax error --- src/docs/sphinx/dev_guide/release.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/docs/sphinx/dev_guide/release.rst b/src/docs/sphinx/dev_guide/release.rst index 23dc1cef7e..296194b5fb 100644 --- a/src/docs/sphinx/dev_guide/release.rst +++ b/src/docs/sphinx/dev_guide/release.rst @@ -203,7 +203,7 @@ the history. After merging, the release candidate branch can be deleted. A new version our ReadTheDocs should be created so that users can refer to older snapshots of Smith's documentation. -#. Go to this page `https://app.readthedocs.org/projects/llnlsmith/_`. +#. Go to this page `https://app.readthedocs.org/projects/llnlsmith/`_. #. Click "Add version". From 2c2297d7f8d5a36f13a0965a21b2d417dfb5f8dc Mon Sep 17 00:00:00 2001 From: chapman39 Date: Tue, 12 May 2026 16:36:27 -0700 Subject: [PATCH 25/27] syntax.. again! --- src/docs/sphinx/dev_guide/release.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/docs/sphinx/dev_guide/release.rst b/src/docs/sphinx/dev_guide/release.rst index 296194b5fb..7894f5512e 100644 --- a/src/docs/sphinx/dev_guide/release.rst +++ b/src/docs/sphinx/dev_guide/release.rst @@ -203,7 +203,7 @@ the history. After merging, the release candidate branch can be deleted. A new version our ReadTheDocs should be created so that users can refer to older snapshots of Smith's documentation. -#. Go to this page `https://app.readthedocs.org/projects/llnlsmith/`_. +#. Go to `our ReadTheDocs app page `_. #. Click "Add version". From 974c23c5932f076732976c1a2b72d6df9fae5392 Mon Sep 17 00:00:00 2001 From: Michael Tupek Date: Fri, 22 May 2026 15:01:14 -0600 Subject: [PATCH 26/27] Fix some comm issues, better fallback when trust region fails, use common mfem functions, pull out more free functions. --- src/smith/numerics/equation_solver.cpp | 115 +++++++++++++----- .../numerics/mfem_trust_region_subspace.cpp | 90 +++++--------- src/smith/numerics/steihaug_toint_cg.cpp | 22 ++-- src/smith/numerics/steihaug_toint_cg.hpp | 3 + .../numerics/tests/test_steihaug_toint_cg.cpp | 15 +++ .../tests/test_trust_region_solver_mfem.cpp | 54 +++++++- src/smith/numerics/trust_region_solver.hpp | 10 +- 7 files changed, 202 insertions(+), 107 deletions(-) diff --git a/src/smith/numerics/equation_solver.cpp b/src/smith/numerics/equation_solver.cpp index e7c59c0760..a197d7884e 100644 --- a/src/smith/numerics/equation_solver.cpp +++ b/src/smith/numerics/equation_solver.cpp @@ -133,6 +133,24 @@ ConvergenceStatus scalarConvergenceStatus(double residual_norm, double initial_n return status; } +bool shouldUseSubspaceStep(int subspace_option, TrustRegionResults::Status status, double step_norm, double tr_size, + int line_search_iter) +{ + const bool failed_or_indefinite = status == TrustRegionResults::Status::NonDescentDirection || + status == TrustRegionResults::Status::NegativeCurvature || + ((step_norm > (1.0 - 1.0e-6) * tr_size) && line_search_iter > 1); + const bool on_boundary = step_norm > (1.0 - 1.0e-6) * tr_size; + return ((subspace_option >= 1) && failed_or_indefinite) || ((subspace_option >= 2) && on_boundary) || + (subspace_option >= 3); +} + +enum class SubspaceStepStatus +{ + Unavailable, + Unchanged, + Replaced +}; + } // namespace /// @cond @@ -460,7 +478,7 @@ class TrustRegion : public mfem::NewtonSolver, public ConvergenceManagedNonlinea } /// build reusable subspace data for line-search retries - void prepareSubspaceProblemCache([[maybe_unused]] const std::vector& ds, + bool prepareSubspaceProblemCache([[maybe_unused]] const std::vector& ds, [[maybe_unused]] const std::vector& Hds, [[maybe_unused]] const mfem::Vector& g, [[maybe_unused]] int num_leftmost, [[maybe_unused]] CachedTrustRegionSubspaceProblem& prepared_subspace) const @@ -476,21 +494,25 @@ class TrustRegion : public mfem::NewtonSolver, public ConvergenceManagedNonlinea b *= -1; try { - prepared_subspace = smith::prepareSubspaceProblem(directions, H_directions, b, num_leftmost); + prepared_subspace = smith::prepareSubspaceProblem(directions, H_directions, b, num_leftmost, GetComm()); } catch (const std::exception& e) { if (print_level >= 1) { - mfem::out << "subspace solve failed with " << e.what() << std::endl; + mfem::out << "subspace preparation failed with " << e.what() << "; using dogleg fallback." << std::endl; } - return; + return false; } + return true; +#else + return false; #endif } /// solve cached exact trust-region subspace problem for current trust-region size template - void solvePreparedSubspaceProblem([[maybe_unused]] mfem::Vector& z, [[maybe_unused]] const HessVecFunc& hess_vec_func, - [[maybe_unused]] const CachedTrustRegionSubspaceProblem& prepared_subspace, - [[maybe_unused]] const mfem::Vector& g, [[maybe_unused]] double delta) const + SubspaceStepStatus solvePreparedSubspaceProblem( + [[maybe_unused]] mfem::Vector& z, [[maybe_unused]] const HessVecFunc& hess_vec_func, + [[maybe_unused]] const CachedTrustRegionSubspaceProblem& prepared_subspace, + [[maybe_unused]] const mfem::Vector& g, [[maybe_unused]] double delta) const { #ifdef MFEM_USE_LAPACK SMITH_MARK_FUNCTION; @@ -502,9 +524,9 @@ class TrustRegion : public mfem::NewtonSolver, public ConvergenceManagedNonlinea smith::solvePreparedSubspaceProblem(prepared_subspace, delta); } catch (const std::exception& e) { if (print_level >= 1) { - mfem::out << "subspace solve failed with " << e.what() << std::endl; + mfem::out << "subspace solve failed with " << e.what() << "; using dogleg fallback." << std::endl; } - return; + return SubspaceStepStatus::Unavailable; } double base_energy = computeEnergy(g, hess_vec_func, z); @@ -518,7 +540,11 @@ class TrustRegion : public mfem::NewtonSolver, public ConvergenceManagedNonlinea if (subspace_energy < base_energy) { z = sol; + return SubspaceStepStatus::Replaced; } + return SubspaceStepStatus::Unchanged; +#else + return SubspaceStepStatus::Unavailable; #endif } @@ -528,7 +554,9 @@ class TrustRegion : public mfem::NewtonSolver, public ConvergenceManagedNonlinea { double dd = yy - 2 * zy + zz; double zd = zy - zz; - double tau = (std::sqrt((trSize * trSize - zz) * dd + zd * zd) - zd) / dd; + double boundary_gap = std::max(trSize * trSize - zz, 0.0); + if (boundary_gap == 0.0) return; + double tau = (std::sqrt(boundary_gap * dd + zd * zd) - zd) / dd; z.Add(-tau, z); z.Add(tau, y); } @@ -580,6 +608,30 @@ class TrustRegion : public mfem::NewtonSolver, public ConvergenceManagedNonlinea steihaugTointCG(r0, rCurrent, H, P, settings, trSize, results, r0_norm_squared, dot_many_lambda); } + void fallbackToCauchyPoint(TrustRegionResults& results, const char* reason) const + { + if (print_level >= 2) { + mfem::out << reason << "; using cauchy point fallback." << std::endl; + } + results.d = results.cauchy_point; + } + + bool isDescentStep(const mfem::Vector& step, const mfem::Vector& residual) const + { + auto dot_many_lambda = [this](const std::vector& pairs) { return dot_many(pairs); }; + return smith::isDescentDirection(step, residual, dot_many_lambda); + } + + template + void computeHessianActions(const std::vector& inputs, const std::vector& outputs, + const HessVecFunc& hess_vec_func) const + { + MFEM_VERIFY(inputs.size() == outputs.size(), "Subspace Hessian-vector batch input/output size mismatch"); + for (size_t i = 0; i < inputs.size(); ++i) { + hess_vec_func(*inputs[i], *outputs[i]); + } + } + /// assemble the jacobian void assembleJacobian(const mfem::Vector& x) const { @@ -771,16 +823,13 @@ class TrustRegion : public mfem::NewtonSolver, public ConvergenceManagedNonlinea ++lineSearchIter; doglegStep(trResults.cauchy_point, trResults.z, tr_size, trResults.d); - const bool check_subspace_boundary = subspace_option >= 1; - const double d_norm = check_subspace_boundary ? std::sqrt(Dot(trResults.d, trResults.d)) : 0.0; - bool use_with_option1 = - (subspace_option >= 1) && (trResults.interior_status == TrustRegionResults::Status::NonDescentDirection || - trResults.interior_status == TrustRegionResults::Status::NegativeCurvature || - ((d_norm > (1.0 - 1.0e-6) * tr_size) && lineSearchIter > 1)); - bool use_with_option2 = (subspace_option >= 2) && (d_norm > (1.0 - 1.0e-6) * tr_size); - bool use_with_option3 = (subspace_option >= 3); - - if (can_use_subspace_solver && (use_with_option1 || use_with_option2 || use_with_option3)) { + const double d_norm = subspace_option >= 1 ? std::sqrt(Dot(trResults.d, trResults.d)) : 0.0; + const bool use_subspace = + can_use_subspace_solver && + shouldUseSubspaceStep(subspace_option, trResults.interior_status, d_norm, tr_size, lineSearchIter); + + bool subspace_unavailable = false; + if (use_subspace) { if (!have_computed_Hvs) { have_computed_Hvs = true; @@ -791,11 +840,7 @@ class TrustRegion : public mfem::NewtonSolver, public ConvergenceManagedNonlinea subspace_hess_outputs.push_back(&trResults.H_d_old); } - MFEM_VERIFY(subspace_hess_inputs.size() == subspace_hess_outputs.size(), - "Subspace Hessian-vector batch input/output size mismatch"); - for (size_t i = 0; i < subspace_hess_inputs.size(); ++i) { - hess_vec_func(*subspace_hess_inputs[i], *subspace_hess_outputs[i]); - } + computeHessianActions(subspace_hess_inputs, subspace_hess_outputs, hess_vec_func); } if (!have_computed_H_left_mosts) { @@ -808,11 +853,7 @@ class TrustRegion : public mfem::NewtonSolver, public ConvergenceManagedNonlinea leftmost_inputs.push_back(left.get()); leftmost_outputs.push_back(H_left_mosts.back().get()); } - MFEM_VERIFY(leftmost_inputs.size() == leftmost_outputs.size(), - "Subspace Hessian-vector batch input/output size mismatch"); - for (size_t i = 0; i < leftmost_inputs.size(); ++i) { - hess_vec_func(*leftmost_inputs[i], *leftmost_outputs[i]); - } + computeHessianActions(leftmost_inputs, leftmost_outputs, hess_vec_func); } if (!have_prepared_subspace) { @@ -825,10 +866,20 @@ class TrustRegion : public mfem::NewtonSolver, public ConvergenceManagedNonlinea H_ds.push_back(&trResults.H_d_old); } - prepareSubspaceProblemCache(ds, H_ds, r, num_leftmost, prepared_subspace); + have_prepared_subspace = prepareSubspaceProblemCache(ds, H_ds, r, num_leftmost, prepared_subspace); + subspace_unavailable = !have_prepared_subspace; } - solvePreparedSubspaceProblem(trResults.d, hess_vec_func, prepared_subspace, r, tr_size); + if (have_prepared_subspace) { + const SubspaceStepStatus subspace_status = + solvePreparedSubspaceProblem(trResults.d, hess_vec_func, prepared_subspace, r, tr_size); + subspace_unavailable = subspace_status == SubspaceStepStatus::Unavailable; + } + } + + if (subspace_unavailable || !isDescentStep(trResults.d, r)) { + fallbackToCauchyPoint( + trResults, subspace_unavailable ? "Subspace step unavailable" : "Fallback step is not a descent step"); } static constexpr double roundOffTol = 0.0; // 1e-14; diff --git a/src/smith/numerics/mfem_trust_region_subspace.cpp b/src/smith/numerics/mfem_trust_region_subspace.cpp index b725dcb53c..453d8351a3 100644 --- a/src/smith/numerics/mfem_trust_region_subspace.cpp +++ b/src/smith/numerics/mfem_trust_region_subspace.cpp @@ -44,18 +44,6 @@ double sumAbs(const mfem::Vector& x) return total; } -void symmetrize(mfem::DenseMatrix& A) -{ - MFEM_VERIFY(A.Height() == A.Width(), "symmetrize requires square matrix"); - for (int i = 0; i < A.Height(); ++i) { - for (int j = 0; j < i; ++j) { - const double value = 0.5 * (A(i, j) + A(j, i)); - A(i, j) = value; - A(j, i) = value; - } - } -} - struct SubspaceProjections { mfem::DenseMatrix sAs; mfem::DenseMatrix ss; @@ -80,7 +68,7 @@ void checkProjectionInputs(const std::vector& states, SubspaceProjections globalSubspaceProjectionFromLocalInnerProducts(const std::vector& states, const std::vector& Astates, - const mfem::Vector& b) + const mfem::Vector& b, MPI_Comm comm) { const int n = static_cast(states.size()); const int triangular_size = n * (n + 1) / 2; @@ -102,7 +90,7 @@ SubspaceProjections globalSubspaceProjectionFromLocalInnerProducts(const std::ve } MPI_Allreduce(local_projection_entries.data(), global_projection_entries.data(), buffer_size, MFEM_MPI_REAL_T, - MPI_SUM, MPI_COMM_WORLD); + MPI_SUM, comm); SubspaceProjections projections{mfem::DenseMatrix(n), mfem::DenseMatrix(n), mfem::Vector(n)}; for (int i = 0; i < n; ++i) { @@ -120,10 +108,11 @@ SubspaceProjections globalSubspaceProjectionFromLocalInnerProducts(const std::ve } SubspaceProjections projectSubspaceGlobally(const std::vector& states, - const std::vector& Astates, const mfem::Vector& b) + const std::vector& Astates, const mfem::Vector& b, + MPI_Comm comm) { checkProjectionInputs(states, Astates, b); - return globalSubspaceProjectionFromLocalInnerProducts(states, Astates, b); + return globalSubspaceProjectionFromLocalInnerProducts(states, Astates, b, comm); } double quadraticEnergy(const mfem::DenseMatrix& A, const mfem::Vector& b, const mfem::Vector& x) @@ -211,6 +200,17 @@ mfem::Vector projectWithTranspose(const mfem::DenseMatrix& A, const mfem::Vector return out; } +mfem::Vector combineColumns(const mfem::DenseMatrix& basis, const mfem::Vector& coeffs) +{ + mfem::Vector out(basis.Height()); + out = 0.0; + for (int i = 0; i < coeffs.Size(); ++i) { + const mfem::Vector vi = matrixColumn(basis, i); + out.Add(coeffs[i], vi); + } + return out; +} + mfem::Vector combineDirections(const std::vector& states, const mfem::Vector& coeffs) { mfem::Vector out(*states[0]); @@ -284,12 +284,7 @@ std::pair solvePreparedExactTrustRegionProblem(const CachedT const double minSig = sigs[0]; if ((minSig >= eps) && (norm(bvOverSigs) <= delta)) { - mfem::Vector x = alloc_vector(b.Size()); - x = 0.0; - for (int i = 0; i < b.Size(); ++i) { - const mfem::Vector vi = matrixColumn(V, i); - x.Add(bvOverSigs[i], vi); - } + mfem::Vector x = combineColumns(V, bvOverSigs); return std::make_pair(x, true); } @@ -299,12 +294,7 @@ std::pair solvePreparedExactTrustRegionProblem(const CachedT for (int i = 0; i < sigs.Size(); ++i) bvOverSigs[i] = bv[i] / sigsPlusLam[i]; if ((minSig < eps) && (norm(bvOverSigs) < delta)) { - mfem::Vector p = alloc_vector(b.Size()); - p = 0.0; - for (int i = 0; i < b.Size(); ++i) { - const mfem::Vector vi = matrixColumn(V, i); - p.Add(bv[i], vi); - } + mfem::Vector p = combineColumns(V, bvOverSigs); const double pz = dot(p, leftMost); const double pp = dot(p, p); @@ -349,12 +339,7 @@ std::pair solvePreparedExactTrustRegionProblem(const CachedT for (int i = 0; i < sigs.Size(); ++i) bvOverSigs[i] = bv[i] / sigsPlusLam[i]; - mfem::Vector x = alloc_vector(b.Size()); - x = 0.0; - for (int i = 0; i < b.Size(); ++i) { - const mfem::Vector vi = matrixColumn(V, i); - x.Add(bvOverSigs[i], vi); - } + mfem::Vector x = combineColumns(V, bvOverSigs); const double e1 = quadraticEnergy(A, b, x); mfem::Vector neg_x = alloc_vector(x.Size()); @@ -372,16 +357,16 @@ std::pair solvePreparedExactTrustRegionProblem(const CachedT /// @brief prepares reduced trust-region subspace data reusable across trust-region radius updates CachedTrustRegionSubspaceProblem prepareSubspaceProblem(const std::vector& directions, const std::vector& A_directions, - const mfem::Vector& b, int num_leftmost) + const mfem::Vector& b, int num_leftmost, MPI_Comm comm) { SMITH_MARK_FUNCTION; CachedTrustRegionSubspaceProblem prepared; prepared.zero_solution = b; prepared.zero_solution = 0.0; - SubspaceProjections projections = projectSubspaceGlobally(directions, A_directions, b); + SubspaceProjections projections = projectSubspaceGlobally(directions, A_directions, b, comm); mfem::DenseMatrix& sAs = projections.sAs; - symmetrize(sAs); + sAs.Symmetrize(); for (int i = 0; i < sAs.Height(); ++i) { for (int j = 0; j < sAs.Width(); ++j) { @@ -392,7 +377,7 @@ CachedTrustRegionSubspaceProblem prepareSubspaceProblem(const std::vector& directions, const std::vector& A_directions, - const mfem::Vector& b, double delta, int num_leftmost) -{ - return solvePreparedSubspaceProblem(prepareSubspaceProblem(directions, A_directions, b, num_leftmost), delta); -} - -TrustRegionSubspaceResult solveSubspaceProblemMfem(const std::vector& directions, - const std::vector& A_directions, - const mfem::Vector& b, double delta, int num_leftmost) + const mfem::Vector& b, double delta, int num_leftmost, MPI_Comm comm) { - return solvePreparedSubspaceProblem(prepareSubspaceProblem(directions, A_directions, b, num_leftmost), delta); + return solvePreparedSubspaceProblem(prepareSubspaceProblem(directions, A_directions, b, num_leftmost, comm), delta); } #else TrustRegionSubspaceResult solveSubspaceProblem(const std::vector& directions, const std::vector& A_directions, - const mfem::Vector& b, double delta, int num_leftmost) + const mfem::Vector& b, double delta, int num_leftmost, MPI_Comm) { throw TrustRegionException("Trust-region subspace solve requires MFEM LAPACK support."); return std::make_tuple(b, std::vector>{}, std::vector{}, 0.0); @@ -466,7 +447,7 @@ TrustRegionSubspaceResult solveSubspaceProblem(const std::vector& directions, const std::vector& A_directions, - const mfem::Vector& b, int) + const mfem::Vector& b, int, MPI_Comm) { throw TrustRegionException("Trust-region subspace solve requires MFEM LAPACK support."); CachedTrustRegionSubspaceProblem prepared; @@ -474,15 +455,6 @@ CachedTrustRegionSubspaceProblem prepareSubspaceProblem(const std::vector&, - const std::vector&, const mfem::Vector& b, - double, int) -{ - throw TrustRegionException("MFEM trust-region subspace solve requires MFEM LAPACK support."); - return std::make_tuple(b, std::vector>{}, std::vector{}, 0.0); -} - TrustRegionSubspaceResult solvePreparedSubspaceProblem(const CachedTrustRegionSubspaceProblem& prepared, double) { throw TrustRegionException("Trust-region subspace solve requires MFEM LAPACK support."); diff --git a/src/smith/numerics/steihaug_toint_cg.cpp b/src/smith/numerics/steihaug_toint_cg.cpp index 784dc0ac89..b33fae7864 100644 --- a/src/smith/numerics/steihaug_toint_cg.cpp +++ b/src/smith/numerics/steihaug_toint_cg.cpp @@ -10,10 +10,12 @@ namespace smith { namespace { +bool isDescentDirection(double directional_derivative) { return directional_derivative < 0.0; } + void projectToBoundaryWithCoefs(mfem::Vector& z, const mfem::Vector& d, double delta, double zz, double zd, double dd) { - const double deltadelta_m_zz = delta * delta - zz; - if (deltadelta_m_zz <= 0.0) return; + const double deltadelta_m_zz = std::max(delta * delta - zz, 0.0); + if (deltadelta_m_zz == 0.0) return; const double tau = (std::sqrt(deltadelta_m_zz * dd + zd * zd) - zd) / dd; z.Add(tau, d); } @@ -30,6 +32,11 @@ std::vector dotMany(const std::vector& pairs) return products; } +bool isDescentDirection(const mfem::Vector& direction, const mfem::Vector& residual, const DotManyFunction& dot_many) +{ + return isDescentDirection(dot_many({{&direction, &residual}})[0]); +} + void steihaugTointCG(const mfem::Vector& r0, mfem::Vector& rCurrent, const mfem::Operator& H, const mfem::Solver* P, const TrustRegionSettings& settings, double& trSize, TrustRegionResults& results, double r0_norm_squared, const DotManyFunction& dot_many) @@ -65,7 +72,7 @@ void steihaugTointCG(const mfem::Vector& r0, mfem::Vector& rCurrent, const mfem: double zz = 0.; // rPr = dot(rCurrent, Pr) - double rPr = dot_many({{&rCurrent, &Pr}, {&rCurrent, &Pr}})[0]; + double rPr = dot_many({{&rCurrent, &Pr}})[0]; for (cgIter = 1; cgIter <= settings.max_cg_iterations; ++cgIter) { H.Mult(d, Hd); @@ -76,13 +83,9 @@ void steihaugTointCG(const mfem::Vector& r0, mfem::Vector& rCurrent, const mfem: double zd = dots[2]; double dd = dots[3]; - if (descent_check > 0) { - d *= -1; - Hd *= -1; + if (!isDescentDirection(descent_check)) { results.interior_status = TrustRegionResults::Status::NonDescentDirection; - descent_check *= -1.0; - curvature *= -1.0; - zd *= -1.0; + return; } const double alphaCg = curvature != 0.0 ? rPr / curvature : 0.0; @@ -99,6 +102,7 @@ void steihaugTointCG(const mfem::Vector& r0, mfem::Vector& rCurrent, const mfem: return; } + // Alias Pr as temporary workspace 'zPred' to avoid allocation auto& zPred = Pr; zPred = z; zPred.Add(alphaCg, d); diff --git a/src/smith/numerics/steihaug_toint_cg.hpp b/src/smith/numerics/steihaug_toint_cg.hpp index 672bd26dc2..60c3d2c371 100644 --- a/src/smith/numerics/steihaug_toint_cg.hpp +++ b/src/smith/numerics/steihaug_toint_cg.hpp @@ -114,6 +114,9 @@ using DotManyFunction = std::function(const std::vector dotMany(const std::vector& pairs); +/// true when direction is locally downhill for the quadratic model's linear term +bool isDescentDirection(const mfem::Vector& direction, const mfem::Vector& residual, const DotManyFunction& dot_many); + /** * @brief Minimize quadratic sub-problem given residual vector, the action of the stiffness and a preconditioner * diff --git a/src/smith/numerics/tests/test_steihaug_toint_cg.cpp b/src/smith/numerics/tests/test_steihaug_toint_cg.cpp index 860e6d3192..5522144763 100644 --- a/src/smith/numerics/tests/test_steihaug_toint_cg.cpp +++ b/src/smith/numerics/tests/test_steihaug_toint_cg.cpp @@ -88,3 +88,18 @@ TEST(SteihaugTointCG, DetectsNegativeCurvature) EXPECT_NEAR(results.z.Norml2(), 2.0, 1e-9); EXPECT_EQ(results.interior_status, smith::TrustRegionResults::Status::NegativeCurvature); } + +TEST(SteihaugTointCG, DetectsDirectlyFlippedAscentDirection) +{ + mfem::Vector residual(2); + residual[0] = 1.0; + residual[1] = -2.0; + + mfem::Vector descent_direction(residual); + descent_direction *= -1.0; + EXPECT_TRUE(smith::isDescentDirection(descent_direction, residual, smith::dotMany)); + + mfem::Vector ascent_direction(descent_direction); + ascent_direction *= -1.0; + EXPECT_FALSE(smith::isDescentDirection(ascent_direction, residual, smith::dotMany)); +} diff --git a/src/smith/numerics/tests/test_trust_region_solver_mfem.cpp b/src/smith/numerics/tests/test_trust_region_solver_mfem.cpp index f2e46a06c2..46cbe0e39b 100644 --- a/src/smith/numerics/tests/test_trust_region_solver_mfem.cpp +++ b/src/smith/numerics/tests/test_trust_region_solver_mfem.cpp @@ -4,6 +4,7 @@ // // SPDX-License-Identifier: (BSD-3-Clause) +#include #include #include "gtest/gtest.h" @@ -98,7 +99,7 @@ TEST(TrustRegionSubspaceMfem, SolveHandlesZeroDirection) const auto astates = applyDiagonalOperator(diag, states); const auto astate_ptrs = toPointers(astates); - auto [sol, leftvecs, leftvals, energy] = smith::solveSubspaceProblemMfem(states, astate_ptrs, b, 0.25, 1); + auto [sol, leftvecs, leftvals, energy] = smith::solveSubspaceProblem(states, astate_ptrs, b, 0.25, 1); EXPECT_LE(sol.Norml2(), 0.25 + 1.0e-12); EXPECT_FALSE(leftvecs.empty()); @@ -106,6 +107,57 @@ TEST(TrustRegionSubspaceMfem, SolveHandlesZeroDirection) EXPECT_LT(energy, 0.0); } +TEST(TrustRegionSubspaceMfem, SolveIndefiniteHardCaseUsesShiftedNewtonPoint) +{ + mfem::Vector e0(2); + mfem::Vector e1(2); + mfem::Vector Ae0(2); + mfem::Vector Ae1(2); + mfem::Vector b(2); + + e0 = 0.0; + e1 = 0.0; + Ae0 = 0.0; + Ae1 = 0.0; + b = 0.0; + + e0[0] = 1.0; + e1[1] = 1.0; + Ae0[0] = -1.0; + Ae1[1] = 2.0; + b[1] = 1.0; + + const std::vector states = {&e0, &e1}; + const std::vector astates = {&Ae0, &Ae1}; + + auto [sol, leftvecs, leftvals, energy] = smith::solveSubspaceProblem(states, astates, b, 1.0, 1); + + EXPECT_NEAR(sol.Norml2(), 1.0, 1.0e-12); + EXPECT_NEAR(std::abs(sol[0]), std::sqrt(8.0 / 9.0), 1.0e-10); + EXPECT_NEAR(sol[1], 1.0 / 3.0, 1.0e-10); + EXPECT_EQ(leftvecs.size(), 1); + EXPECT_EQ(leftvals.size(), 1); + EXPECT_NEAR(leftvals[0], -1.0, 1.0e-12); + EXPECT_NEAR(energy, -2.0 / 3.0, 1.0e-10); +} + +TEST(TrustRegionSubspaceMfem, SolveThrowsOnNanProjection) +{ + mfem::Vector state(2); + mfem::Vector astate(2); + mfem::Vector b(2); + + state = 1.0; + astate = 1.0; + b = 0.0; + astate[1] = std::numeric_limits::quiet_NaN(); + + const std::vector states = {&state}; + const std::vector astates = {&astate}; + + EXPECT_THROW(smith::solveSubspaceProblem(states, astates, b, 1.0, 1), smith::TrustRegionException); +} + int main(int argc, char* argv[]) { ::testing::InitGoogleTest(&argc, argv); diff --git a/src/smith/numerics/trust_region_solver.hpp b/src/smith/numerics/trust_region_solver.hpp index 11fff281c3..efd7f27a8b 100644 --- a/src/smith/numerics/trust_region_solver.hpp +++ b/src/smith/numerics/trust_region_solver.hpp @@ -75,16 +75,14 @@ double innerProduct(const mfem::Vector& a, const mfem::Vector& b, const MPI_Comm /// and their eigenvalues, and the predicted model energy change TrustRegionSubspaceResult solveSubspaceProblem(const std::vector& directions, const std::vector& A_directions, - const mfem::Vector& b, double delta, int num_leftmost); - -TrustRegionSubspaceResult solveSubspaceProblemMfem(const std::vector& directions, - const std::vector& A_directions, - const mfem::Vector& b, double delta, int num_leftmost); + const mfem::Vector& b, double delta, int num_leftmost, + MPI_Comm comm = MPI_COMM_WORLD); /// @brief prepares reduced trust-region subspace data reusable across trust-region radius updates CachedTrustRegionSubspaceProblem prepareSubspaceProblem(const std::vector& directions, const std::vector& A_directions, - const mfem::Vector& b, int num_leftmost); + const mfem::Vector& b, int num_leftmost, + MPI_Comm comm = MPI_COMM_WORLD); /// @brief solves cached reduced trust-region problem for given trust-region radius TrustRegionSubspaceResult solvePreparedSubspaceProblem(const CachedTrustRegionSubspaceProblem& prepared, double delta); From 72dfe74a37c77cd48ad4d2a66fc996b1c59ee6cc Mon Sep 17 00:00:00 2001 From: Michael Tupek Date: Tue, 26 May 2026 15:37:02 -0700 Subject: [PATCH 27/27] be a bit more careful about updating contact state when doing finite differencing. --- src/smith/physics/tests/contact_finite_diff.cpp | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/smith/physics/tests/contact_finite_diff.cpp b/src/smith/physics/tests/contact_finite_diff.cpp index 531b7f2eaa..87f141330f 100644 --- a/src/smith/physics/tests/contact_finite_diff.cpp +++ b/src/smith/physics/tests/contact_finite_diff.cpp @@ -162,9 +162,6 @@ TEST_P(ContactFiniteDiff, patch) merged_sol.SetVector(u, 0); merged_sol.SetVector(pressure, u.Size()); mfem::Vector f(merged_sol.Size()); - f = 0.0; - oper->Mult(merged_sol, f); - auto* J_op = &oper->GetGradient(merged_sol); mfem::Vector u_dot(merged_sol.Size()); u_dot = 0.0; // wiggle displacement (col = j) @@ -174,6 +171,9 @@ TEST_P(ContactFiniteDiff, patch) ++dof_ct; continue; } + f = 0.0; + oper->Mult(merged_sol, f); + auto* J_op = &oper->GetGradient(merged_sol); u_dot[j] = 1.0; mfem::Vector J_exact(merged_sol.Size()); J_exact = 0.0; @@ -206,6 +206,10 @@ TEST_P(ContactFiniteDiff, patch) } std::cout << "Max diff = " << std::setprecision(15) << max_diff << std::endl; + // Restore the contact state after the finite-difference probes before advancing the timestep. + f = 0.0; + oper->Mult(merged_sol, f); + solid_solver.advanceTimestep(dt); } }