diff --git a/src/smith/numerics/CMakeLists.txt b/src/smith/numerics/CMakeLists.txt index cc16a2ea04..a39370fdd1 100644 --- a/src/smith/numerics/CMakeLists.txt +++ b/src/smith/numerics/CMakeLists.txt @@ -5,23 +5,23 @@ # SPDX-License-Identifier: (BSD-3-Clause) add_subdirectory(functional) - set(numerics_headers equation_solver.hpp + steihaug_toint_cg.hpp nonlinear_convergence.hpp odes.hpp solver_config.hpp stdfunction_operator.hpp petsc_solvers.hpp trust_region_solver.hpp - dense_petsc.hpp block_preconditioner.hpp ) set(numerics_sources equation_solver.cpp + steihaug_toint_cg.cpp + mfem_trust_region_subspace.cpp nonlinear_convergence.cpp - trust_region_solver.cpp odes.cpp petsc_solvers.cpp block_preconditioner.cpp diff --git a/src/smith/numerics/dense_petsc.hpp b/src/smith/numerics/dense_petsc.hpp deleted file mode 100644 index 516b3ee01d..0000000000 --- a/src/smith/numerics/dense_petsc.hpp +++ /dev/null @@ -1,380 +0,0 @@ -// Copyright (c) Lawrence Livermore National Security, LLC and -// other Smith Project Developers. See the top-level LICENSE file for -// details. -// -// SPDX-License-Identifier: (BSD-3-Clause) - -#pragma once - -#ifdef SMITH_USE_SLEPC - -#include -#include -#include - -struct DenseVec; - -/// Dense Matrix class which wraps petsc matrix for the case of a SeqDense matrix (on 1 processor) -struct DenseMat { - /// @brief copy constructor - /// @param a matrix - DenseMat(const Mat& a) : A(a) {} - - /// @brief constructor - /// @param a matrix - DenseMat(const DenseMat& a) - { - MatDuplicate(a.A, MAT_COPY_VALUES, &A); - MatCopy(a.A, A, SAME_NONZERO_PATTERN); - MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY); - MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY); - } - - /// @brief destructor - ~DenseMat() { MatDestroy(&A); } - - /// @brief size - auto size() const - { - int isize; - int jsize; - MatGetSize(A, &isize, &jsize); - return std::make_pair(isize, jsize); - } - - /// @brief index into - double operator()(int i, int j) const - { - double val; - MatGetValue(A, i, j, &val); - return val; - } - - /// @brief set value - void setValue(int i, int j, double val) { MatSetValues(A, 1, &i, 1, &j, &val, INSERT_VALUES); } - - /// @brief matrix-vector multiply - DenseVec operator*(const DenseVec& v) const; - - /// @brief solve - DenseVec solve(const DenseVec& v) const; - - /// @brief multiply this by P transpose on left and P on the right - DenseMat PtAP(const DenseMat& P) const; - - /// @brief print utility - void print(std::string first = "") const - { - if (first.size()) { - std::cout << first << ": "; - } - MatView(A, PETSC_VIEWER_STDOUT_SELF); - } - - /// @brief check for nans - bool hasNan() const - { - auto [rows, cols] = size(); - for (int i = 0; i < rows; ++i) { - for (int j = 0; j < cols; ++j) { - double val = (*this)(i, j); - if (val != val) return true; - } - } - return false; - } - - /// @brief reassemble petsc dense matrix after values have been modified - void reassemble() - { - MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY); - MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY); - } - - /// petsc matrix - Mat A; -}; - -/// matrix inverse -/// @param a matrix -DenseMat inverse(const DenseMat& a) -{ - Mat inv; - MatDuplicate(a.A, MAT_COPY_VALUES, &inv); - MatSeqDenseInvert(inv); - return inv; -} - -/// compute the symmetric part -/// @param a matrix -DenseMat sym(const DenseMat& a) -{ - DenseMat b = a; - auto [rows, cols] = b.size(); - SLIC_ERROR_IF(rows != cols, "Calling sym on a non-square DenseMat"); - - for (int i = 0; i < rows; ++i) { - for (int j = 0; j < i; ++j) { - auto val = 0.5 * a(i, j) + 0.5 * a(j, i); - b.setValue(i, j, val); - b.setValue(j, i, val); - } - } - - b.reassemble(); - - return b; -} - -/// Dense Vector class which wraps petsc vector for the case of a SeqDense vector (on 1 processor) -struct DenseVec { - /// @brief constructor - DenseVec(const Vec& vin) : v(vin) {} - - /// @brief constructor - DenseVec(const DenseVec& vin) - { - VecDuplicate(vin.v, &v); - VecCopy(vin.v, v); - } - - /// @brief constructor from size - DenseVec(size_t size) { VecCreateSeq(PETSC_COMM_SELF, static_cast(size), &v); } - - /// @brief constructor from size - DenseVec(int size) { VecCreateSeq(PETSC_COMM_SELF, size, &v); } - - /// @brief constructor standard vector - DenseVec(const std::vector vin) - { - const auto sz = vin.size(); - std::vector allints(sz); - for (size_t i = 0; i < sz; ++i) { - allints[i] = static_cast(i); - } - int sz_int = static_cast(sz); - VecCreateSeq(PETSC_COMM_SELF, sz_int, &v); - VecSetValues(v, sz_int, &allints[0], &vin[0], INSERT_VALUES); - } - - /// @brief assignment - DenseVec& operator=(const DenseVec& vin) - { - VecCopy(vin.v, v); - return *this; - } - - /// @brief assignment from scalar - DenseVec& operator=(const double val) - { - VecSet(v, val); - return *this; - } - - /// @brief destructor - ~DenseVec() - { - if (v) VecDestroy(&v); - } - - /// @brief negate - DenseVec operator-() const - { - Vec minus; - VecDuplicate(v, &minus); - VecCopy(v, minus); - VecScale(minus, -1.0); - return minus; - } - - /// @brief scale - DenseVec& operator*=(double scale) - { - VecScale(v, scale); - return *this; - } - - /// @brief size - int size() const - { - int isize; - VecGetSize(v, &isize); - return isize; - } - - /// @brief index into - double operator[](int i) const - { - double val; - VecGetValues(v, 1, &i, &val); - return val; - } - - /// @brief index into - double operator[](size_t i) const { return (*this)[int(i)]; } - - /// @brief set value - void setValue(int i, double val) { VecSetValues(v, 1, &i, &val, INSERT_VALUES); } - - /// @brief set value - void setValue(size_t i, double val) { setValue(int(i), val); } - - /// @brief add scaled vector - void add(double val, const DenseVec& w) { VecAXPY(v, val, w.v); } - - /// @brief convert to standard vector - std::vector getValues() const - { - size_t sz = static_cast(size()); - std::vector vout(sz); - std::vector allints(sz); - for (size_t i = 0; i < sz; ++i) { - allints[i] = static_cast(i); - } - int sz_int = static_cast(sz); - VecGetValues(v, sz_int, &allints[0], &vout[0]); - return vout; - } - - /// @brief print utility - void print(std::string first = "") const - { - if (first.size()) { - std::cout << first << ": "; - } - VecView(v, PETSC_VIEWER_STDOUT_SELF); - } - - /// petsc vector - Vec v; -}; - -/// @brief matrix vector multiply -DenseVec DenseMat::operator*(const DenseVec& v) const -{ - Vec out; - auto [rows, cols] = size(); - SLIC_ERROR_IF(cols != v.size(), "Column size of dense matrix and length of multiplied vector do not match"); - VecCreateSeq(PETSC_COMM_SELF, rows, &out); - MatMult(A, v.v, out); - return out; -} - -/// @brief matrix linear solve -DenseVec DenseMat::solve(const DenseVec& v) const -{ - Vec out; - VecDuplicate(v.v, &out); - MatLUFactor(A, NULL, NULL, NULL); // not efficient if done a lot - MatSolve(A, v.v, out); - return out; -} - -/// @brief multiply matrix by P-transpose on left, P on right -DenseMat DenseMat::PtAP(const DenseMat& P) const -{ - Mat pAp; - MatPtAP(A, P.A, MAT_INITIAL_MATRIX, PETSC_DETERMINE, &pAp); - return pAp; -} - -/// @brief vector dot product -double dot(const DenseVec& a, const DenseVec& b) -{ - double d; - VecDot(a.v, b.v, &d); - return d; -} - -/// @brief add a scalar to a vector -DenseVec operator+(const DenseVec& a, double b) -{ - Vec c; - VecDuplicate(a.v, &c); - VecSet(c, b); - VecAXPY(c, 1.0, a.v); - return c; -} - -DenseVec operator+(double b, const DenseVec& a) { return a + b; } - -/// @brief component-wise multiplication of vectors -DenseVec operator*(const DenseVec& a, const DenseVec& b) -{ - Vec c; - VecDuplicate(a.v, &c); - VecPointwiseMult(c, a.v, b.v); - return c; -} - -/// @brief component-wise vector divide -DenseVec operator/(const DenseVec& a, const DenseVec& b) -{ - Vec c; - VecDuplicate(a.v, &c); - VecPointwiseDivide(c, a.v, b.v); - return c; -} - -/// @brief component-wise vector absolute value -DenseVec abs(const DenseVec& a) -{ - Vec absa; - VecDuplicate(a.v, &absa); - VecCopy(a.v, absa); - VecAbs(absa); - return absa; -} - -/// @brief sum values in a vector -double sum(const DenseVec& a) -{ - double s; - VecSum(a.v, &s); - return s; -} - -/// @brief l2-norm of vector -double norm(const DenseVec& a) -{ - double n; - VecNorm(a.v, NORM_2, &n); - return n; -} - -/// @brief computes the eigenvectors and eigenvalues of a dense symmetric matrix -auto eigh(const DenseMat& Adense) -{ - auto [isize, jsize] = Adense.size(); - SLIC_ERROR_IF(isize != jsize, "Eig must be called for symmetric matrices"); - - const Mat& A = Adense.A; - - EPS eps; - EPSCreate(PETSC_COMM_SELF, &eps); - EPSSetOperators(eps, A, NULL); - EPSSetProblemType(eps, EPS_HEP); - EPSSetWhichEigenpairs(eps, EPS_SMALLEST_REAL); - EPSSetDimensions(eps, isize, PETSC_DETERMINE, PETSC_DETERMINE); - EPSSetFromOptions(eps); - - EPSSolve(eps); - - EPSType type; - EPSGetType(eps, &type); - EPSGetDimensions(eps, &jsize, NULL, NULL); - - DenseVec eigenvalues(isize); - std::vector eigenvectors; - for (int i = 0; i < isize; ++i) { - eigenvectors.emplace_back(isize); - double eigenvalue; - EPSGetEigenpair(eps, i, &eigenvalue, PETSC_NULLPTR, eigenvectors[static_cast(i)].v, PETSC_NULLPTR); - eigenvalues.setValue(i, eigenvalue); - } - - EPSDestroy(&eps); - return std::make_pair(std::move(eigenvalues), std::move(eigenvectors)); -} - -#endif // SMITH_USE_SLEPC diff --git a/src/smith/numerics/equation_solver.cpp b/src/smith/numerics/equation_solver.cpp index 70ebd6a41c..a197d7884e 100644 --- a/src/smith/numerics/equation_solver.cpp +++ b/src/smith/numerics/equation_solver.cpp @@ -5,9 +5,12 @@ // SPDX-License-Identifier: (BSD-3-Clause) #include "smith/numerics/equation_solver.hpp" +#include "smith/numerics/steihaug_toint_cg.hpp" #include "smith/numerics/block_preconditioner.hpp" +#include #include +#include #include #include #include @@ -16,6 +19,7 @@ #include #include #include +#include #include "smith/smith_config.hpp" #include "smith/infrastructure/profiling.hpp" @@ -26,6 +30,21 @@ namespace smith { namespace { +size_t rootOnlyPrintLevel(const mfem::NewtonSolver& solver, size_t level) +{ +#ifdef MFEM_USE_MPI + const MPI_Comm comm = solver.GetComm(); + if (level > 0 && comm != MPI_COMM_NULL) { + int rank = 0; + MPI_Comm_rank(comm, &rank); + if (rank != 0) { + return 0; + } + } +#endif + return level; +} + class SolverWithPreconditioner : public mfem::Solver { public: SolverWithPreconditioner(std::unique_ptr linear_solver, std::unique_ptr preconditioner) @@ -102,6 +121,36 @@ bool monolithicizeOperatorIfNeeded(const LinearSolverOptions& linear_options, mf return true; } +ConvergenceStatus scalarConvergenceStatus(double residual_norm, double initial_norm, double abs_tol, double rel_tol) +{ + ConvergenceStatus status; + status.block_norms = {residual_norm}; + status.global_norm = residual_norm; + const double relative_base = initial_norm > 0.0 ? initial_norm : residual_norm; + status.global_goal = std::max(abs_tol, rel_tol * relative_base); + status.global_converged = status.global_norm <= status.global_goal; + status.converged = status.global_converged; + return status; +} + +bool shouldUseSubspaceStep(int subspace_option, TrustRegionResults::Status status, double step_norm, double tr_size, + int line_search_iter) +{ + const bool failed_or_indefinite = status == TrustRegionResults::Status::NonDescentDirection || + status == TrustRegionResults::Status::NegativeCurvature || + ((step_norm > (1.0 - 1.0e-6) * tr_size) && line_search_iter > 1); + const bool on_boundary = step_norm > (1.0 - 1.0e-6) * tr_size; + return ((subspace_option >= 1) && failed_or_indefinite) || ((subspace_option >= 2) && on_boundary) || + (subspace_option >= 3); +} + +enum class SubspaceStepStatus +{ + Unavailable, + Unchanged, + Replaced +}; + } // namespace /// @cond @@ -163,11 +212,7 @@ class NewtonSolver : public mfem::NewtonSolver, public ConvergenceManagedNonline if (convergence_manager_) { status = convergence_manager_->evaluate(1.0, rOut); } else { - status.block_norms = {Norm(rOut)}; - status.global_norm = status.block_norms.front(); - status.global_goal = std::max(rel_tol * initial_norm, abs_tol); - status.global_converged = status.global_norm <= status.global_goal; - status.converged = status.global_converged; + status = scalarConvergenceStatus(Norm(rOut), initial_norm, abs_tol, rel_tol); } } catch (const std::exception&) { status.global_norm = std::numeric_limits::max(); @@ -209,8 +254,10 @@ class NewtonSolver : public mfem::NewtonSolver, public ConvergenceManagedNonline MFEM_ASSERT(oper != NULL, "the Operator is not set (use SetOperator)."); MFEM_ASSERT(prec != NULL, "the Solver is not set (use SetSolver)."); - print_level = print_options.iterations ? 1 : print_level; - print_level = print_options.summary ? 2 : print_level; + print_level = static_cast(std::max(nonlinear_options.print_level, 0)); + print_level = print_options.iterations ? std::max(1, print_level) : print_level; + print_level = print_options.summary ? std::max(2, print_level) : print_level; + print_level = rootOnlyPrintLevel(*this, print_level); using real_t = mfem::real_t; @@ -336,94 +383,12 @@ class NewtonSolver : public mfem::NewtonSolver, public ConvergenceManagedNonline } }; -/// Internal structure for storing trust region settings -struct TrustRegionSettings { - /// cg tol - double cg_tol = 1e-8; - /// min cg iters - size_t min_cg_iterations = 0; // - /// max cg iters should be around # of system dofs - size_t max_cg_iterations = 10000; // - /// max cumulative iterations - size_t max_cumulative_iteration = 1; - /// minimum trust region size - double min_tr_size = 1e-13; - /// trust region decrease factor - double t1 = 0.25; - /// trust region increase factor - double t2 = 1.75; - /// worse case energy drop ratio. trust region accepted if energy drop is better than this. - double eta1 = 1e-9; - /// non-ideal energy drop ratio. trust region decreases if energy drop is worse than this. - double eta2 = 0.1; - /// ideal energy drop ratio. trust region increases if energy drop is better than this. - double eta3 = 0.6; - /// parameter limiting how fast the energy can drop relative to the prediction (in case the energy surrogate is poor) - double eta4 = 4.2; -}; - -/// Internal structure for storing trust region stateful data -struct TrustRegionResults { - /// Constructor takes the size of the solution vector - TrustRegionResults(int size) - { - z.SetSize(size); - H_z.SetSize(size); - d_old.SetSize(size); - H_d_old.SetSize(size); - d.SetSize(size); - H_d.SetSize(size); - Pr.SetSize(size); - cauchy_point.SetSize(size); - H_cauchy_point.SetSize(size); - } - - /// resets trust region results for a new outer iteration - void reset() - { - z = 0.0; - cauchy_point = 0.0; - } - - /// enumerates the possible final status of the trust region steps - enum class Status - { - Interior, - NegativeCurvature, - OnBoundary, - NonDescentDirection - }; - - /// step direction - mfem::Vector z; - /// action of hessian on current step z - mfem::Vector H_z; - /// old step direction - mfem::Vector d_old; - /// action of hessian on previous step z_old - mfem::Vector H_d_old; - /// incrementalCG direction - mfem::Vector d; - /// action of hessian on direction d - mfem::Vector H_d; - /// preconditioned residual - mfem::Vector Pr; - /// cauchy point - mfem::Vector cauchy_point; - /// action of hessian on direction of cauchy point - mfem::Vector H_cauchy_point; - /// specifies if step is interior, exterior, negative curvature, etc. - Status interior_status = Status::Interior; - /// iteration counter - size_t cg_iterations_count = 0; -}; - /// trust region printing utility function -void printTrustRegionInfo(double realObjective, double modelObjective, size_t cgIters, double trSize, bool willAccept) +void printTrustRegionInfo(double realWork, double modelObjective, size_t cgIters, double trSize, bool willAccept) { - mfem::out << "real energy = " << std::setw(13) << realObjective << ", model energy = " << std::setw(13) - << modelObjective << ", cg iter = " << std::setw(7) << cgIters << ", next tr size = " << std::setw(8) - << trSize << ", accepting = " << willAccept << std::endl; + mfem::out << "real work = " << std::setw(13) << realWork << ", model energy = " << std::setw(13) << modelObjective + << ", cg iter = " << std::setw(7) << cgIters << ", next tr size = " << std::setw(8) << trSize + << ", accepting = " << willAccept << std::endl; } /** @@ -466,17 +431,6 @@ class TrustRegion : public mfem::NewtonSolver, public ConvergenceManagedNonlinea std::shared_ptr convergence_manager_ = nullptr; public: - /// internal counter for hess-vecs - mutable size_t num_hess_vecs = 0; - /// internal counter for preconditions - mutable size_t num_preconds = 0; - /// internal counter for residuals - mutable size_t num_residuals = 0; - /// internal counter for subspace solves - mutable size_t num_subspace_solves = 0; - /// internal counter for matrix assembles - mutable size_t num_jacobian_assembles = 0; - #ifdef MFEM_USE_MPI /// constructor TrustRegion(MPI_Comm comm_, const NonlinearSolverOptions& nonlinear_opts, const LinearSolverOptions& linear_opts, @@ -497,89 +451,100 @@ class TrustRegion : public mfem::NewtonSolver, public ConvergenceManagedNonlinea convergence_manager_ = std::move(convergence_manager); } - /// finds tau s.t. (z + tau*d)^2 = trSize^2 - void projectToBoundaryWithCoefs(mfem::Vector& z, const mfem::Vector& d, double delta, double zz, double zd, - double dd) const + /// compute several vector inner products with a single MPI reduction when possible + std::vector dot_many(const std::vector& pairs) const { - // find z + tau d - double deltadelta_m_zz = delta * delta - zz; - if (deltadelta_m_zz == 0) return; // already on boundary - double tau = (std::sqrt(deltadelta_m_zz * dd + zd * zd) - zd) / dd; - z.Add(tau, d); + if (dot_oper) { + std::vector products(pairs.size(), 0.0); + for (size_t i = 0; i < pairs.size(); ++i) { + products[i] = Dot(*pairs[i].first, *pairs[i].second); + } + return products; + } + + std::vector products = smith::dotMany(pairs); + +#ifdef MFEM_USE_MPI + const MPI_Comm dot_comm = GetComm(); + if (dot_comm != MPI_COMM_NULL) { + std::vector global_products(pairs.size()); + MPI_Allreduce(products.data(), global_products.data(), static_cast(pairs.size()), MFEM_MPI_REAL_T, MPI_SUM, + dot_comm); + products.assign(global_products.begin(), global_products.end()); + } +#endif + + return products; } - /// solve the exact trust-region subspace problem with directions ds, and the leftmosts - template - void solveTheSubspaceProblem([[maybe_unused]] mfem::Vector& z, [[maybe_unused]] const HessVecFunc& hess_vec_func, - [[maybe_unused]] const std::vector ds, - [[maybe_unused]] const std::vector Hds, - [[maybe_unused]] const mfem::Vector& g, [[maybe_unused]] double delta, - [[maybe_unused]] int num_leftmost) const + /// build reusable subspace data for line-search retries + bool prepareSubspaceProblemCache([[maybe_unused]] const std::vector& ds, + [[maybe_unused]] const std::vector& Hds, + [[maybe_unused]] const mfem::Vector& g, [[maybe_unused]] int num_leftmost, + [[maybe_unused]] CachedTrustRegionSubspaceProblem& prepared_subspace) const { -#ifdef SMITH_USE_SLEPC +#ifdef MFEM_USE_LAPACK SMITH_MARK_FUNCTION; - ++num_subspace_solves; - - std::vector directions; - for (auto& d : ds) { - directions.emplace_back(d); - } - for (auto& left : left_mosts) { - directions.emplace_back(left.get()); - } + std::vector directions(ds.begin(), ds.end()); + std::vector H_directions(Hds.begin(), Hds.end()); + for (auto& left : left_mosts) directions.emplace_back(left.get()); + for (auto& H_left : H_left_mosts) H_directions.emplace_back(H_left.get()); - std::vector H_directions; - for (auto& Hd : Hds) { - H_directions.emplace_back(Hd); - } - for (auto& H_left : H_left_mosts) { - H_directions.emplace_back(H_left.get()); - } + mfem::Vector b(g); + b *= -1; try { - std::tie(directions, H_directions) = removeDependentDirections(directions, H_directions); + prepared_subspace = smith::prepareSubspaceProblem(directions, H_directions, b, num_leftmost, GetComm()); } catch (const std::exception& e) { - if (print_level >= 2) { - mfem::out << "remove dependent directions failed with " << e.what() << std::endl; + if (print_level >= 1) { + mfem::out << "subspace preparation failed with " << e.what() << "; using dogleg fallback." << std::endl; } - return; + return false; } + return true; +#else + return false; +#endif + } - mfem::Vector b(g); - b *= -1; - + /// solve cached exact trust-region subspace problem for current trust-region size + template + SubspaceStepStatus solvePreparedSubspaceProblem( + [[maybe_unused]] mfem::Vector& z, [[maybe_unused]] const HessVecFunc& hess_vec_func, + [[maybe_unused]] const CachedTrustRegionSubspaceProblem& prepared_subspace, + [[maybe_unused]] const mfem::Vector& g, [[maybe_unused]] double delta) const + { +#ifdef MFEM_USE_LAPACK + SMITH_MARK_FUNCTION; mfem::Vector sol; - std::vector> leftvecs; - std::vector leftvals; double energy_change; try { - std::tie(sol, leftvecs, leftvals, energy_change) = - solveSubspaceProblem(directions, H_directions, b, delta, num_leftmost); + std::tie(sol, std::ignore, std::ignore, energy_change) = + smith::solvePreparedSubspaceProblem(prepared_subspace, delta); } catch (const std::exception& e) { - if (print_level == 1) { - mfem::out << "subspace solve failed with " << e.what() << std::endl; + if (print_level >= 1) { + mfem::out << "subspace solve failed with " << e.what() << "; using dogleg fallback." << std::endl; } - return; - } - - left_mosts.clear(); - for (auto& lv : leftvecs) { - left_mosts.emplace_back(std::move(lv)); + return SubspaceStepStatus::Unavailable; } double base_energy = computeEnergy(g, hess_vec_func, z); double subspace_energy = computeEnergy(g, hess_vec_func, sol); if (print_level >= 2) { - double leftval = leftvals.size() ? leftvals[0] : 1.0; + double leftval = prepared_subspace.leftvals.size() ? prepared_subspace.leftvals[0] : 1.0; mfem::out << "Energy using subspace solver from: " << base_energy << ", to: " << subspace_energy << " / " << energy_change << ". Min eig: " << leftval << std::endl; } if (subspace_energy < base_energy) { z = sol; + return SubspaceStepStatus::Replaced; } + return SubspaceStepStatus::Unchanged; +#else + return SubspaceStepStatus::Unavailable; #endif } @@ -589,7 +554,9 @@ class TrustRegion : public mfem::NewtonSolver, public ConvergenceManagedNonlinea { double dd = yy - 2 * zy + zz; double zd = zy - zz; - double tau = (std::sqrt((trSize * trSize - zz) * dd + zd * zd) - zd) / dd; + double boundary_gap = std::max(trSize * trSize - zz, 0.0); + if (boundary_gap == 0.0) return; + double tau = (std::sqrt(boundary_gap * dd + zd * zd) - zd) / dd; z.Add(-tau, z); z.Add(tau, y); } @@ -598,9 +565,9 @@ class TrustRegion : public mfem::NewtonSolver, public ConvergenceManagedNonlinea void doglegStep(const mfem::Vector& cp, const mfem::Vector& newtonP, double trSize, mfem::Vector& s) const { SMITH_MARK_FUNCTION; - // MRT, could optimize some of these eventually, compute on the outside and save - double cc = Dot(cp, cp); - double nn = Dot(newtonP, newtonP); + const auto dots = dot_many({{&cp, &cp}, {&newtonP, &newtonP}}); + const double cc = dots[0]; + const double nn = dots[1]; double tt = trSize * trSize; s = 0.0; @@ -633,109 +600,42 @@ class TrustRegion : public mfem::NewtonSolver, public ConvergenceManagedNonlinea } /// Minimize quadratic sub-problem given residual vector, the action of the stiffness and a preconditioner - template - void solveTrustRegionModelProblem(const mfem::Vector& r0, mfem::Vector& rCurrent, HessVecFunc hess_vec_func, - PrecondFunc precond, const TrustRegionSettings& settings, double& trSize, - TrustRegionResults& results) const + void solveModelProblem(const mfem::Vector& r0, mfem::Vector& rCurrent, const mfem::Operator& H, const mfem::Solver* P, + const TrustRegionSettings& settings, double& trSize, TrustRegionResults& results, + double r0_norm_squared) const { - SMITH_MARK_FUNCTION; - // minimize r0@z + 0.5*z@J@z - results.interior_status = TrustRegionResults::Status::Interior; - results.cg_iterations_count = 0; - - auto& z = results.z; - auto& cgIter = results.cg_iterations_count; - auto& d = results.d; - auto& Pr = results.Pr; - auto& Hd = results.H_d; - - const double cg_tol_squared = settings.cg_tol * settings.cg_tol; + auto dot_many_lambda = [this](const std::vector& pairs) { return dot_many(pairs); }; + steihaugTointCG(r0, rCurrent, H, P, settings, trSize, results, r0_norm_squared, dot_many_lambda); + } - if (Dot(r0, r0) <= cg_tol_squared && settings.min_cg_iterations == 0) { - if (print_level >= 2) { - mfem::out << "Trust region solution state within tolerance on first iteration." - << "\n"; - } - return; + void fallbackToCauchyPoint(TrustRegionResults& results, const char* reason) const + { + if (print_level >= 2) { + mfem::out << reason << "; using cauchy point fallback." << std::endl; } + results.d = results.cauchy_point; + } - rCurrent = r0; - precond(rCurrent, Pr); - - // d = -Pr - d = Pr; - d *= -1.0; - - z = 0.0; - double zz = 0.; - double rPr = Dot(rCurrent, Pr); - double zd = 0.0; - double dd = Dot(d, d); - - // std::cout << "initial energy = " << computeEnergy(r0, hess_vec_func, z) << std::endl; - - for (cgIter = 1; cgIter <= settings.max_cg_iterations; ++cgIter) { - // check if this is a descent direction - if (Dot(d, rCurrent) > 0) { - d *= -1; - results.interior_status = TrustRegionResults::Status::NonDescentDirection; - } - - hess_vec_func(d, Hd); - const double curvature = Dot(d, Hd); - const double alphaCg = curvature != 0.0 ? rPr / curvature : 0.0; - - auto& zPred = Pr; // re-use Pr memory. - // This predicted step will no longer be used by the time Pr is, so we can avoid an extra - // vector floating around - add(z, alphaCg, d, zPred); - double zzNp1 = Dot(zPred, zPred); - - const bool go_to_boundary = curvature <= 0 || zzNp1 >= trSize * trSize; - if (go_to_boundary) { - projectToBoundaryWithCoefs(z, d, trSize, zz, zd, dd); - if (curvature <= 0) { - results.interior_status = TrustRegionResults::Status::NegativeCurvature; - } else { - results.interior_status = TrustRegionResults::Status::OnBoundary; - } - return; - } - - z = zPred; - - if (results.interior_status == TrustRegionResults::Status::NonDescentDirection) { - if (print_level >= 2) { - mfem::out << "Found a non descent direction\n"; - } - return; - } - - add(rCurrent, alphaCg, Hd, rCurrent); - - precond(rCurrent, Pr); - double rPrNp1 = Dot(rCurrent, Pr); - - if (Dot(rCurrent, rCurrent) <= cg_tol_squared && cgIter >= settings.min_cg_iterations) { - return; - } - - double beta = rPrNp1 / rPr; - rPr = rPrNp1; - add(-1.0, Pr, beta, d, d); + bool isDescentStep(const mfem::Vector& step, const mfem::Vector& residual) const + { + auto dot_many_lambda = [this](const std::vector& pairs) { return dot_many(pairs); }; + return smith::isDescentDirection(step, residual, dot_many_lambda); + } - zz = zzNp1; - zd = Dot(z, d); - dd = Dot(d, d); + template + void computeHessianActions(const std::vector& inputs, const std::vector& outputs, + const HessVecFunc& hess_vec_func) const + { + MFEM_VERIFY(inputs.size() == outputs.size(), "Subspace Hessian-vector batch input/output size mismatch"); + for (size_t i = 0; i < inputs.size(); ++i) { + hess_vec_func(*inputs[i], *outputs[i]); } - cgIter--; // if all cg iterations are taken, correct for output } /// assemble the jacobian void assembleJacobian(const mfem::Vector& x) const { SMITH_MARK_FUNCTION; - ++num_jacobian_assembles; if (grad_monolithic) { delete grad; grad = nullptr; @@ -749,11 +649,11 @@ class TrustRegion : public mfem::NewtonSolver, public ConvergenceManagedNonlinea mfem::real_t computeResidual(const mfem::Vector& x_, mfem::Vector& r_) const { SMITH_MARK_FUNCTION; - ++num_residuals; oper->Mult(x_, r_); return Norm(r_); } + /// apply the action of the current Jacobian representation to a vector ConvergenceStatus evaluateConvergence(const mfem::Vector& x_, mfem::Vector& r_) const { ConvergenceStatus status; @@ -764,10 +664,7 @@ class TrustRegion : public mfem::NewtonSolver, public ConvergenceManagedNonlinea if (convergence_manager_) { status = convergence_manager_->evaluate(1.0, r_); } else { - status.block_norms = {status.global_norm}; - status.global_goal = std::max(rel_tol * initial_norm, abs_tol); - status.global_converged = status.global_norm <= status.global_goal; - status.converged = status.global_converged; + status = scalarConvergenceStatus(status.global_norm, initial_norm, abs_tol, rel_tol); } } catch (const std::exception&) { status.global_norm = std::numeric_limits::max(); @@ -780,7 +677,6 @@ class TrustRegion : public mfem::NewtonSolver, public ConvergenceManagedNonlinea void hessVec(const mfem::Vector& x_, mfem::Vector& v_) const { SMITH_MARK_FUNCTION; - ++num_hess_vecs; grad->Mult(x_, v_); } @@ -788,7 +684,6 @@ class TrustRegion : public mfem::NewtonSolver, public ConvergenceManagedNonlinea void precond(const mfem::Vector& x_, mfem::Vector& v_) const { SMITH_MARK_FUNCTION; - ++num_preconds; tr_precond.Mult(x_, v_); }; @@ -797,18 +692,13 @@ class TrustRegion : public mfem::NewtonSolver, public ConvergenceManagedNonlinea { MFEM_ASSERT(oper != NULL, "the Operator is not set (use SetOperator)."); MFEM_ASSERT(prec != NULL, "the Solver is not set (use SetSolver)."); - - print_level = print_options.iterations ? 1 : print_level; - print_level = print_options.summary ? 2 : print_level; + print_level = static_cast(std::max(nonlinear_options.print_level, 0)); + print_level = print_options.iterations ? std::max(1, print_level) : print_level; + print_level = print_options.summary ? std::max(2, print_level) : print_level; + print_level = rootOnlyPrintLevel(*this, print_level); using real_t = mfem::real_t; - num_hess_vecs = 0; - num_preconds = 0; - num_residuals = 0; - num_subspace_solves = 0; - num_jacobian_assembles = 0; - ConvergenceStatus status = evaluateConvergence(X, r); real_t norm = status.global_norm; real_t norm_goal = status.global_goal; @@ -880,23 +770,25 @@ class TrustRegion : public mfem::NewtonSolver, public ConvergenceManagedNonlinea } auto hess_vec_func = [&](const mfem::Vector& x_, mfem::Vector& v_) { hessVec(x_, v_); }; - auto precond_func = [&](const mfem::Vector& x_, mfem::Vector& v_) { precond(x_, v_); }; double cauchyPointNormSquared = tr_size * tr_size; trResults.reset(); - hess_vec_func(r, trResults.H_d); - const double gKg = Dot(r, trResults.H_d); - if (gKg > 0) { - const double alphaCp = -Dot(r, r) / gKg; - add(trResults.cauchy_point, alphaCp, r, trResults.cauchy_point); - cauchyPointNormSquared = Dot(trResults.cauchy_point, trResults.cauchy_point); - } else { - const double alphaTr = -tr_size / std::sqrt(Dot(r, r)); - add(trResults.cauchy_point, alphaTr, r, trResults.cauchy_point); - if (print_level >= 2) { - mfem::out << "Negative curvature un-preconditioned cauchy point direction found." - << "\n"; + { + hess_vec_func(r, trResults.H_d); + const double gKg = Dot(r, trResults.H_d); + const double residual_norm_squared = norm * norm; + if (gKg > 0) { + const double alphaCp = -residual_norm_squared / gKg; + add(trResults.cauchy_point, alphaCp, r, trResults.cauchy_point); + cauchyPointNormSquared = Dot(trResults.cauchy_point, trResults.cauchy_point); + } else { + const double alphaTr = -tr_size / norm; + add(trResults.cauchy_point, alphaTr, r, trResults.cauchy_point); + if (print_level >= 2) { + mfem::out << "Negative curvature un-preconditioned cauchy point direction found." + << "\n"; + } } } @@ -912,49 +804,91 @@ class TrustRegion : public mfem::NewtonSolver, public ConvergenceManagedNonlinea trResults.interior_status = TrustRegionResults::Status::OnBoundary; } else { settings.cg_tol = std::max(0.5 * norm_goal, 5e-5 * norm); - solveTrustRegionModelProblem(r, scratch, hess_vec_func, precond_func, settings, tr_size, trResults); + solveModelProblem(r, scratch, *grad, &this->tr_precond, settings, tr_size, trResults, norm * norm); } cumulative_cg_iters_from_last_precond_update += trResults.cg_iterations_count; bool have_computed_Hvs = false; + bool have_computed_H_left_mosts = false; + bool have_prepared_subspace = false; + CachedTrustRegionSubspaceProblem prepared_subspace; +#ifdef MFEM_USE_LAPACK + constexpr bool can_use_subspace_solver = true; +#else + constexpr bool can_use_subspace_solver = false; +#endif int lineSearchIter = 0; while (lineSearchIter <= nonlinear_options.max_line_search_iterations) { ++lineSearchIter; doglegStep(trResults.cauchy_point, trResults.z, tr_size, trResults.d); + const double d_norm = subspace_option >= 1 ? std::sqrt(Dot(trResults.d, trResults.d)) : 0.0; + const bool use_subspace = + can_use_subspace_solver && + shouldUseSubspaceStep(subspace_option, trResults.interior_status, d_norm, tr_size, lineSearchIter); - bool use_with_option1 = - (subspace_option >= 1) && (trResults.interior_status == TrustRegionResults::Status::NonDescentDirection || - trResults.interior_status == TrustRegionResults::Status::NegativeCurvature || - ((Norm(trResults.d) > (1.0 - 1.0e-6) * tr_size) && lineSearchIter > 1)); - bool use_with_option2 = (subspace_option >= 2) && (Norm(trResults.d) > (1.0 - 1.0e-6) * tr_size); - bool use_with_option3 = (subspace_option >= 3); - - if (use_with_option1 || use_with_option2 || use_with_option3) { + bool subspace_unavailable = false; + if (use_subspace) { if (!have_computed_Hvs) { have_computed_Hvs = true; - hess_vec_func(trResults.z, trResults.H_z); - hess_vec_func(trResults.d_old, trResults.H_d_old); - hess_vec_func(trResults.cauchy_point, trResults.H_cauchy_point); + + std::vector subspace_hess_inputs{&trResults.z, &trResults.cauchy_point}; + std::vector subspace_hess_outputs{&trResults.H_z, &trResults.H_cauchy_point}; + if (trResults.has_d_old) { + subspace_hess_inputs.push_back(&trResults.d_old); + subspace_hess_outputs.push_back(&trResults.H_d_old); + } + + computeHessianActions(subspace_hess_inputs, subspace_hess_outputs, hess_vec_func); + } + + if (!have_computed_H_left_mosts) { + have_computed_H_left_mosts = true; + H_left_mosts.clear(); + std::vector leftmost_inputs; + std::vector leftmost_outputs; + for (auto& left : left_mosts) { + H_left_mosts.emplace_back(std::make_shared(*left)); + leftmost_inputs.push_back(left.get()); + leftmost_outputs.push_back(H_left_mosts.back().get()); + } + computeHessianActions(leftmost_inputs, leftmost_outputs, hess_vec_func); + } + + if (!have_prepared_subspace) { + have_prepared_subspace = true; + + std::vector ds{&trResults.z, &trResults.cauchy_point}; + std::vector H_ds{&trResults.H_z, &trResults.H_cauchy_point}; + if (trResults.has_d_old) { + ds.push_back(&trResults.d_old); + H_ds.push_back(&trResults.H_d_old); + } + + have_prepared_subspace = prepareSubspaceProblemCache(ds, H_ds, r, num_leftmost, prepared_subspace); + subspace_unavailable = !have_prepared_subspace; } - H_left_mosts.clear(); - for (auto& left : left_mosts) { - H_left_mosts.emplace_back(std::make_shared(*left)); - hess_vec_func(*left, *H_left_mosts.back()); + if (have_prepared_subspace) { + const SubspaceStepStatus subspace_status = + solvePreparedSubspaceProblem(trResults.d, hess_vec_func, prepared_subspace, r, tr_size); + subspace_unavailable = subspace_status == SubspaceStepStatus::Unavailable; } + } - std::vector ds{&trResults.z, &trResults.d_old, &trResults.cauchy_point}; - std::vector H_ds{&trResults.H_z, &trResults.H_d_old, &trResults.H_cauchy_point}; - solveTheSubspaceProblem(trResults.d, hess_vec_func, ds, H_ds, r, tr_size, num_leftmost); + if (subspace_unavailable || !isDescentStep(trResults.d, r)) { + fallbackToCauchyPoint( + trResults, subspace_unavailable ? "Subspace step unavailable" : "Fallback step is not a descent step"); } static constexpr double roundOffTol = 0.0; // 1e-14; hess_vec_func(trResults.d, trResults.H_d); - double dHd = Dot(trResults.d, trResults.H_d); - double modelObjective = Dot(r, trResults.d) + 0.5 * dHd - roundOffTol; + const auto dots = dot_many({{&trResults.d, &trResults.H_d}, {&r, &trResults.d}}); + const double dHd = dots[0]; + const double rd = dots[1]; + double modelObjective = rd + 0.5 * dHd - roundOffTol; add(X, trResults.d, x_pred); @@ -982,6 +916,23 @@ class TrustRegion : public mfem::NewtonSolver, public ConvergenceManagedNonlinea normPred = std::numeric_limits::max(); } + if (normPred <= norm_goal) { + trResults.d_old = trResults.d; + trResults.has_d_old = true; + if (!prepared_subspace.leftmosts.empty()) { + left_mosts = prepared_subspace.leftmosts; + } + X = x_pred; + r = r_pred; + norm = normPred; + if (print_level >= 2) { + printTrustRegionInfo(realObjective, modelObjective, trResults.cg_iterations_count, tr_size, true); + trResults.cg_iterations_count = + 0; // zero this output so it doesn't look like the linesearch is doing cg iterations + } + break; + } + double modelImprove = -modelObjective; double realImprove = -realObjective; @@ -1012,7 +963,7 @@ class TrustRegion : public mfem::NewtonSolver, public ConvergenceManagedNonlinea // modelRes = g + Jd // modelResNorm = np.linalg.norm(modelRes) // realResNorm = np.linalg.norm(gy) - bool willAccept = rho >= settings.eta1 && rho <= settings.eta4; // or (rho >= -0 and realResNorm <= gNorm) + const bool willAccept = rho >= settings.eta1 && rho <= settings.eta4; if (print_level >= 2) { printTrustRegionInfo(realObjective, modelObjective, trResults.cg_iterations_count, tr_size, willAccept); @@ -1022,9 +973,14 @@ class TrustRegion : public mfem::NewtonSolver, public ConvergenceManagedNonlinea if (willAccept) { trResults.d_old = trResults.d; + trResults.has_d_old = true; + if (!prepared_subspace.leftmosts.empty()) { + left_mosts = prepared_subspace.leftmosts; + } X = x_pred; r = r_pred; - status = convergence_manager_ ? convergence_manager_->evaluate(1.0, r_pred) : status; + status = convergence_manager_ ? convergence_manager_->evaluate(1.0, r_pred) + : scalarConvergenceStatus(normPred, initial_norm, abs_tol, rel_tol); norm = normPred; break; } @@ -1041,14 +997,6 @@ class TrustRegion : public mfem::NewtonSolver, public ConvergenceManagedNonlinea if (!converged && print_level >= 1) { // (print_options.summary || print_options.warnings)) { mfem::out << "TrustRegion: No convergence!\n"; } - - if (false && print_level >= 2) { - mfem::out << "num hess vecs = " << num_hess_vecs << "\n"; - mfem::out << "num preconds = " << num_preconds << "\n"; - mfem::out << "num residuals = " << num_residuals << "\n"; - mfem::out << "num subspace solves = " << num_subspace_solves << "\n"; - mfem::out << "num jacobian_assembles = " << num_jacobian_assembles << "\n"; - } } }; /// @endcond @@ -1540,7 +1488,9 @@ void EquationSolver::defineInputFileSchema(axom::inlet::Container& container) nonlinear_container.addDouble("abs_tol", "Absolute tolerance for the Newton solve.").defaultValue(1.0e-4); nonlinear_container.addInt("max_iter", "Maximum iterations for the Newton solve.").defaultValue(500); nonlinear_container.addInt("print_level", "Nonlinear print level.").defaultValue(0); - nonlinear_container.addString("solver_type", "Solver type (Newton|KINFullStep|KINLineSearch)").defaultValue("Newton"); + nonlinear_container + .addString("solver_type", "Solver type (Newton|NewtonLineSearch|TrustRegion|KINFullStep|KINLineSearch)") + .defaultValue("Newton"); } } // namespace smith @@ -1615,6 +1565,10 @@ smith::NonlinearSolverOptions FromInlet::operator const std::string solver_type = base["solver_type"]; if (solver_type == "Newton") { options.nonlin_solver = smith::NonlinearSolver::Newton; + } else if (solver_type == "NewtonLineSearch") { + options.nonlin_solver = smith::NonlinearSolver::NewtonLineSearch; + } else if (solver_type == "TrustRegion") { + options.nonlin_solver = smith::NonlinearSolver::TrustRegion; } else if (solver_type == "KINFullStep") { options.nonlin_solver = smith::NonlinearSolver::KINFullStep; } else if (solver_type == "KINLineSearch") { diff --git a/src/smith/numerics/equation_solver.hpp b/src/smith/numerics/equation_solver.hpp index baac9936e1..17c5112626 100644 --- a/src/smith/numerics/equation_solver.hpp +++ b/src/smith/numerics/equation_solver.hpp @@ -12,6 +12,7 @@ #pragma once +#include #include #include #include @@ -21,6 +22,7 @@ #include "mfem.hpp" #include "smith/infrastructure/input.hpp" +#include "smith/infrastructure/logger.hpp" #include "smith/numerics/nonlinear_convergence.hpp" #include "smith/numerics/solver_config.hpp" #include "smith/numerics/petsc_solvers.hpp" diff --git a/src/smith/numerics/functional/functional.hpp b/src/smith/numerics/functional/functional.hpp index f5a40d8259..dddeadc4d0 100644 --- a/src/smith/numerics/functional/functional.hpp +++ b/src/smith/numerics/functional/functional.hpp @@ -828,7 +828,7 @@ class Functional { } }; - uint64_t max_buffer_size() + uint64_t max_buffer_size() const { uint64_t max_entries = 0; for (auto& integral : form_.integrals_) { diff --git a/src/smith/numerics/functional/tests/functional_comparisons.cpp b/src/smith/numerics/functional/tests/functional_comparisons.cpp index 12314e45d0..a272498955 100644 --- a/src/smith/numerics/functional/tests/functional_comparisons.cpp +++ b/src/smith/numerics/functional/tests/functional_comparisons.cpp @@ -4,9 +4,9 @@ // // SPDX-License-Identifier: (BSD-3-Clause) +#include #include #include -#include #include #include #include diff --git a/src/smith/numerics/mfem_trust_region_subspace.cpp b/src/smith/numerics/mfem_trust_region_subspace.cpp new file mode 100644 index 0000000000..453d8351a3 --- /dev/null +++ b/src/smith/numerics/mfem_trust_region_subspace.cpp @@ -0,0 +1,467 @@ +// Copyright (c) Lawrence Livermore National Security, LLC and +// other Smith Project Developers. See the top-level LICENSE file for +// details. +// +// SPDX-License-Identifier: (BSD-3-Clause) + +#include "smith/numerics/trust_region_solver.hpp" + +#include +#include +#include + +#include "smith/infrastructure/profiling.hpp" + +namespace smith { + +int globalSize(const mfem::Vector& parallel_v, const MPI_Comm& comm) +{ + int local_size = parallel_v.Size(); + int global_size; + MPI_Allreduce(&local_size, &global_size, 1, MPI_INT, MPI_SUM, comm); + return global_size; +} + +double innerProduct(const mfem::Vector& a, const mfem::Vector& b, const MPI_Comm& comm) +{ + return mfem::InnerProduct(comm, a, b); +} + +#ifdef MFEM_USE_LAPACK + +namespace { + +double dot(const mfem::Vector& a, const mfem::Vector& b) { return a * b; } + +double norm(const mfem::Vector& x) { return x.Norml2(); } + +double sumAbs(const mfem::Vector& x) +{ + double total = 0.0; + for (int i = 0; i < x.Size(); ++i) { + total += std::abs(x[i]); + } + return total; +} + +struct SubspaceProjections { + mfem::DenseMatrix sAs; + mfem::DenseMatrix ss; + mfem::Vector sb; +}; + +void checkProjectionInputs(const std::vector& states, + const std::vector& Astates, const mfem::Vector& b) +{ + MFEM_VERIFY(states.size() == Astates.size(), + "Search directions and their linear operator result must have same number of columns"); + MFEM_VERIFY(!states.empty(), "Subspace projections require at least one direction."); + + const int n = static_cast(states.size()); + const int vector_size = states[0]->Size(); + for (int j = 0; j < n; ++j) { + MFEM_VERIFY(states[size_t(j)]->Size() == vector_size, "Subspace direction sizes differ."); + MFEM_VERIFY(Astates[size_t(j)]->Size() == vector_size, "Subspace Hessian-vector sizes differ."); + } + MFEM_VERIFY(b.Size() == vector_size, "Subspace right-hand-side size differs."); +} + +SubspaceProjections globalSubspaceProjectionFromLocalInnerProducts(const std::vector& states, + const std::vector& Astates, + const mfem::Vector& b, MPI_Comm comm) +{ + const int n = static_cast(states.size()); + const int triangular_size = n * (n + 1) / 2; + const auto triangular_index = [n](int i, int j) { return i * n - (i * (i - 1)) / 2 + (j - i); }; + const int sAs_offset = 0; + const int ss_offset = triangular_size; + const int sb_offset = 2 * triangular_size; + const int buffer_size = 2 * triangular_size + n; + std::vector local_projection_entries(size_t(buffer_size), 0.0); + std::vector global_projection_entries(size_t(buffer_size), 0.0); + + for (int i = 0; i < n; ++i) { + local_projection_entries[size_t(sb_offset + i)] = mfem::InnerProduct(*states[size_t(i)], b); + for (int j = i; j < n; ++j) { + const size_t ij = size_t(triangular_index(i, j)); + local_projection_entries[size_t(sAs_offset) + ij] = mfem::InnerProduct(*states[size_t(i)], *Astates[size_t(j)]); + local_projection_entries[size_t(ss_offset) + ij] = mfem::InnerProduct(*states[size_t(i)], *states[size_t(j)]); + } + } + + MPI_Allreduce(local_projection_entries.data(), global_projection_entries.data(), buffer_size, MFEM_MPI_REAL_T, + MPI_SUM, comm); + + SubspaceProjections projections{mfem::DenseMatrix(n), mfem::DenseMatrix(n), mfem::Vector(n)}; + for (int i = 0; i < n; ++i) { + projections.sb[i] = global_projection_entries[size_t(sb_offset + i)]; + for (int j = i; j < n; ++j) { + const size_t ij = size_t(triangular_index(i, j)); + projections.sAs(i, j) = global_projection_entries[size_t(sAs_offset) + ij]; + projections.sAs(j, i) = projections.sAs(i, j); + projections.ss(i, j) = global_projection_entries[size_t(ss_offset) + ij]; + projections.ss(j, i) = projections.ss(i, j); + } + } + + return projections; +} + +SubspaceProjections projectSubspaceGlobally(const std::vector& states, + const std::vector& Astates, const mfem::Vector& b, + MPI_Comm comm) +{ + checkProjectionInputs(states, Astates, b); + return globalSubspaceProjectionFromLocalInnerProducts(states, Astates, b, comm); +} + +double quadraticEnergy(const mfem::DenseMatrix& A, const mfem::Vector& b, const mfem::Vector& x) +{ + mfem::Vector Ax(x.Size()); + A.Mult(x, Ax); + return 0.5 * dot(x, Ax) - dot(x, b); +} + +double pnormSquared(const mfem::Vector& bvv, const mfem::Vector& sig) +{ + double total = 0.0; + for (int i = 0; i < bvv.Size(); ++i) { + total += bvv[i] / (sig[i] * sig[i]); + } + return total; +} + +double qnormSquared(const mfem::Vector& bvv, const mfem::Vector& sig) +{ + double total = 0.0; + for (int i = 0; i < bvv.Size(); ++i) { + total += bvv[i] / (sig[i] * sig[i] * sig[i]); + } + return total; +} + +mfem::Vector matrixColumn(const mfem::DenseMatrix& A, int j) +{ + mfem::Vector col(A.Height()); + for (int i = 0; i < A.Height(); ++i) { + col[i] = A(i, j); + } + return col; +} + +mfem::DenseMatrix columnsToMatrix(const std::vector& cols) +{ + mfem::DenseMatrix A(cols.empty() ? 0 : cols[0].Size(), static_cast(cols.size())); + for (int j = 0; j < A.Width(); ++j) { + for (int i = 0; i < A.Height(); ++i) { + A(i, j) = cols[size_t(j)][i]; + } + } + return A; +} + +mfem::DenseMatrix orthonormalBasisTransform(const mfem::DenseMatrix& gram, double& trace_mag) +{ + mfem::DenseMatrix gram_copy(gram); + mfem::Vector evals; + mfem::DenseMatrix evecs; + gram_copy.Eigensystem(evals, evecs); + + trace_mag = 0.0; + for (int i = 0; i < evals.Size(); ++i) { + trace_mag += std::abs(evals[i]); + } + + std::vector kept_columns; + for (int i = 0; i < evals.Size(); ++i) { + if (evals[i] > 1e-9 * trace_mag) { + mfem::Vector col = matrixColumn(evecs, i); + col /= std::sqrt(evals[i]); + kept_columns.emplace_back(std::move(col)); + } + } + + return columnsToMatrix(kept_columns); +} + +mfem::DenseMatrix tripleProduct(const mfem::DenseMatrix& L, const mfem::DenseMatrix& A, const mfem::DenseMatrix& R) +{ + mfem::DenseMatrix tmp(A.Height(), R.Width()); + mfem::Mult(A, R, tmp); + mfem::DenseMatrix out(L.Width(), R.Width()); + mfem::MultAtB(L, tmp, out); + return out; +} + +mfem::Vector projectWithTranspose(const mfem::DenseMatrix& A, const mfem::Vector& x) +{ + mfem::Vector out(A.Width()); + A.MultTranspose(x, out); + return out; +} + +mfem::Vector combineColumns(const mfem::DenseMatrix& basis, const mfem::Vector& coeffs) +{ + mfem::Vector out(basis.Height()); + out = 0.0; + for (int i = 0; i < coeffs.Size(); ++i) { + const mfem::Vector vi = matrixColumn(basis, i); + out.Add(coeffs[i], vi); + } + return out; +} + +mfem::Vector combineDirections(const std::vector& states, const mfem::Vector& coeffs) +{ + mfem::Vector out(*states[0]); + out = 0.0; + for (int i = 0; i < coeffs.Size(); ++i) { + out.Add(coeffs[i], *states[size_t(i)]); + } + return out; +} + +std::vector toPointers(const std::vector>& vectors) +{ + std::vector ptrs; + ptrs.reserve(vectors.size()); + for (const auto& vector : vectors) { + ptrs.push_back(vector.get()); + } + return ptrs; +} + +std::vector prepareExactTrustRegionLeftmosts(CachedTrustRegionSubspaceProblem& prepared, int num_leftmost) +{ + prepared.eigenvalues.SetSize(prepared.projected_rhs.Size()); + prepared.eigenvectors.SetSize(prepared.projected_hessian.Height(), prepared.projected_hessian.Width()); + + mfem::DenseMatrix projected_hessian_copy(prepared.projected_hessian); + projected_hessian_copy.Eigensystem(prepared.eigenvalues, prepared.eigenvectors); + + prepared.eigen_rhs.SetSize(prepared.eigenvalues.Size()); + for (int i = 0; i < prepared.eigenvalues.Size(); ++i) { + const mfem::Vector vi = matrixColumn(prepared.eigenvectors, i); + prepared.eigen_rhs[i] = dot(vi, prepared.projected_rhs); + } + + std::vector reduced_leftmosts; + const int num_leftmost_possible = std::min(num_leftmost, prepared.eigenvalues.Size()); + reduced_leftmosts.reserve(static_cast(num_leftmost_possible)); + prepared.leftvals.clear(); + prepared.leftvals.reserve(static_cast(num_leftmost_possible)); + for (int i = 0; i < num_leftmost_possible; ++i) { + reduced_leftmosts.emplace_back(matrixColumn(prepared.eigenvectors, i)); + prepared.leftvals.emplace_back(prepared.eigenvalues[i]); + } + return reduced_leftmosts; +} + +std::pair solvePreparedExactTrustRegionProblem(const CachedTrustRegionSubspaceProblem& prepared, + double delta) +{ + const mfem::DenseMatrix& A = prepared.projected_hessian; + const mfem::Vector& b = prepared.projected_rhs; + const mfem::Vector& sigs = prepared.eigenvalues; + const mfem::DenseMatrix& V = prepared.eigenvectors; + const mfem::Vector& bv = prepared.eigen_rhs; + + mfem::Vector workspace(6 * b.Size()); + int offset = 0; + auto alloc_vector = [&](int size) { + mfem::Vector v(workspace.GetData() + offset, size); + offset += size; + return v; + }; + + mfem::Vector bvOverSigs = alloc_vector(sigs.Size()); + for (int i = 0; i < sigs.Size(); ++i) { + bvOverSigs[i] = bv[i] / sigs[i]; + } + const double sigScale = sumAbs(sigs) / sigs.Size(); + const double eps = 1e-12 * sigScale; + const mfem::Vector leftMost = matrixColumn(V, 0); + const double minSig = sigs[0]; + + if ((minSig >= eps) && (norm(bvOverSigs) <= delta)) { + mfem::Vector x = combineColumns(V, bvOverSigs); + return std::make_pair(x, true); + } + + double lam = minSig < eps ? -minSig + eps : 0.0; + mfem::Vector sigsPlusLam = alloc_vector(sigs.Size()); + for (int i = 0; i < sigs.Size(); ++i) sigsPlusLam[i] = sigs[i] + lam; + for (int i = 0; i < sigs.Size(); ++i) bvOverSigs[i] = bv[i] / sigsPlusLam[i]; + + if ((minSig < eps) && (norm(bvOverSigs) < delta)) { + mfem::Vector p = combineColumns(V, bvOverSigs); + + const double pz = dot(p, leftMost); + const double pp = dot(p, p); + const double ddmpp = std::max(delta * delta - pp, 0.0); + + const double tau1 = -pz + std::sqrt(pz * pz + ddmpp); + const double tau2 = -pz - std::sqrt(pz * pz + ddmpp); + + mfem::Vector x1 = alloc_vector(p.Size()); + x1 = p; + mfem::Vector x2 = alloc_vector(p.Size()); + x2 = p; + x1.Add(tau1, leftMost); + x2.Add(tau2, leftMost); + + const double e1 = quadraticEnergy(A, b, x1); + const double e2 = quadraticEnergy(A, b, x2); + + return std::make_pair(e1 < e2 ? x1 : x2, true); + } + + mfem::Vector bvbv = alloc_vector(bv.Size()); + for (int i = 0; i < bv.Size(); ++i) bvbv[i] = bv[i] * bv[i]; + for (int i = 0; i < sigs.Size(); ++i) sigsPlusLam[i] = sigs[i] + lam; + + double pNormSq = pnormSquared(bvbv, sigsPlusLam); + double pNorm = std::sqrt(pNormSq); + double bError = (pNorm - delta) / delta; + + size_t iters = 0; + constexpr size_t maxIters = 30; + while ((std::abs(bError) > 1e-9) && (iters++ < maxIters)) { + const double qNormSq = qnormSquared(bvbv, sigsPlusLam); + lam += (pNormSq / qNormSq) * bError; + for (int i = 0; i < sigs.Size(); ++i) sigsPlusLam[i] = sigs[i] + lam; + pNormSq = pnormSquared(bvbv, sigsPlusLam); + pNorm = std::sqrt(pNormSq); + bError = (pNorm - delta) / delta; + } + + const bool success = iters < maxIters; + + for (int i = 0; i < sigs.Size(); ++i) bvOverSigs[i] = bv[i] / sigsPlusLam[i]; + + mfem::Vector x = combineColumns(V, bvOverSigs); + + const double e1 = quadraticEnergy(A, b, x); + mfem::Vector neg_x = alloc_vector(x.Size()); + neg_x = x; + neg_x *= -1.0; + const double e2 = quadraticEnergy(A, b, neg_x); + + x *= (e2 < e1 ? -delta : delta) / norm(x); + + return std::make_pair(x, success); +} + +} // namespace + +/// @brief prepares reduced trust-region subspace data reusable across trust-region radius updates +CachedTrustRegionSubspaceProblem prepareSubspaceProblem(const std::vector& directions, + const std::vector& A_directions, + const mfem::Vector& b, int num_leftmost, MPI_Comm comm) +{ + SMITH_MARK_FUNCTION; + CachedTrustRegionSubspaceProblem prepared; + prepared.zero_solution = b; + prepared.zero_solution = 0.0; + + SubspaceProjections projections = projectSubspaceGlobally(directions, A_directions, b, comm); + mfem::DenseMatrix& sAs = projections.sAs; + sAs.Symmetrize(); + + for (int i = 0; i < sAs.Height(); ++i) { + for (int j = 0; j < sAs.Width(); ++j) { + if (std::isnan(sAs(i, j))) { + throw TrustRegionException("States in subspace solve contain NaNs."); + } + } + } + + mfem::DenseMatrix& ss = projections.ss; + ss.Symmetrize(); + + double trace_mag = 0.0; + mfem::DenseMatrix T = orthonormalBasisTransform(ss, trace_mag); + if (trace_mag == 0.0) { + return prepared; + } + if (T.Width() == 0) { + throw TrustRegionException("No independent directions in MFEM subspace solve."); + } + prepared.projected_hessian = tripleProduct(T, sAs, T); + prepared.projected_hessian.Symmetrize(); + + const mfem::Vector& sb = projections.sb; + prepared.projected_rhs = projectWithTranspose(T, sb); + + for (int j = 0; j < T.Width(); ++j) { + prepared.basis.emplace_back(std::make_shared(combineDirections(directions, matrixColumn(T, j)))); + } + const auto reduced_leftmosts = prepareExactTrustRegionLeftmosts(prepared, num_leftmost); + const auto basis_ptrs = toPointers(prepared.basis); + prepared.leftmosts.clear(); + prepared.leftmosts.reserve(reduced_leftmosts.size()); + for (const auto& leftvec : reduced_leftmosts) { + prepared.leftmosts.emplace_back(std::make_shared(combineDirections(basis_ptrs, leftvec))); + } + + return prepared; +} + +/// @brief solves cached reduced trust-region problem for given trust-region radius +TrustRegionSubspaceResult solvePreparedSubspaceProblem(const CachedTrustRegionSubspaceProblem& prepared, double delta) +{ + SMITH_MARK_FUNCTION; + if (prepared.basis.empty()) { + mfem::Vector sol(prepared.zero_solution); + sol = 0.0; + return std::make_tuple(sol, prepared.leftmosts, prepared.leftvals, 0.0); + } + + auto [reduced_x, success] = solvePreparedExactTrustRegionProblem(prepared, delta); + if (!success) { + throw TrustRegionException("Trust-region subspace solve failed to converge."); + } + const double energy = quadraticEnergy(prepared.projected_hessian, prepared.projected_rhs, reduced_x); + + const auto basis_ptrs = toPointers(prepared.basis); + mfem::Vector sol = combineDirections(basis_ptrs, reduced_x); + return std::make_tuple(sol, prepared.leftmosts, prepared.leftvals, energy); +} + +TrustRegionSubspaceResult solveSubspaceProblem(const std::vector& directions, + const std::vector& A_directions, + const mfem::Vector& b, double delta, int num_leftmost, MPI_Comm comm) +{ + return solvePreparedSubspaceProblem(prepareSubspaceProblem(directions, A_directions, b, num_leftmost, comm), delta); +} + +#else + +TrustRegionSubspaceResult solveSubspaceProblem(const std::vector& directions, + const std::vector& A_directions, + const mfem::Vector& b, double delta, int num_leftmost, MPI_Comm) +{ + throw TrustRegionException("Trust-region subspace solve requires MFEM LAPACK support."); + return std::make_tuple(b, std::vector>{}, std::vector{}, 0.0); +} + +CachedTrustRegionSubspaceProblem prepareSubspaceProblem(const std::vector& directions, + const std::vector& A_directions, + const mfem::Vector& b, int, MPI_Comm) +{ + throw TrustRegionException("Trust-region subspace solve requires MFEM LAPACK support."); + CachedTrustRegionSubspaceProblem prepared; + prepared.zero_solution = b; + return prepared; +} + +TrustRegionSubspaceResult solvePreparedSubspaceProblem(const CachedTrustRegionSubspaceProblem& prepared, double) +{ + throw TrustRegionException("Trust-region subspace solve requires MFEM LAPACK support."); + return std::make_tuple(prepared.zero_solution, std::vector>{}, std::vector{}, + 0.0); +} + +#endif // MFEM_USE_LAPACK + +} // namespace smith diff --git a/src/smith/numerics/steihaug_toint_cg.cpp b/src/smith/numerics/steihaug_toint_cg.cpp new file mode 100644 index 0000000000..b33fae7864 --- /dev/null +++ b/src/smith/numerics/steihaug_toint_cg.cpp @@ -0,0 +1,141 @@ +// Copyright (c) Lawrence Livermore National Security, LLC and +// other Smith Project Developers. See the top-level LICENSE file for +// details. +// +// SPDX-License-Identifier: (BSD-3-Clause) + +#include "smith/numerics/steihaug_toint_cg.hpp" + +namespace smith { + +namespace { + +bool isDescentDirection(double directional_derivative) { return directional_derivative < 0.0; } + +void projectToBoundaryWithCoefs(mfem::Vector& z, const mfem::Vector& d, double delta, double zz, double zd, double dd) +{ + const double deltadelta_m_zz = std::max(delta * delta - zz, 0.0); + if (deltadelta_m_zz == 0.0) return; + const double tau = (std::sqrt(deltadelta_m_zz * dd + zd * zd) - zd) / dd; + z.Add(tau, d); +} + +} // namespace + +std::vector dotMany(const std::vector& pairs) +{ + std::vector products(pairs.size(), 0.0); + for (size_t i = 0; i < pairs.size(); ++i) { + MFEM_ASSERT(pairs[i].first->Size() == pairs[i].second->Size(), "Incompatible vector sizes."); + products[i] = (*pairs[i].first) * (*pairs[i].second); + } + return products; +} + +bool isDescentDirection(const mfem::Vector& direction, const mfem::Vector& residual, const DotManyFunction& dot_many) +{ + return isDescentDirection(dot_many({{&direction, &residual}})[0]); +} + +void steihaugTointCG(const mfem::Vector& r0, mfem::Vector& rCurrent, const mfem::Operator& H, const mfem::Solver* P, + const TrustRegionSettings& settings, double& trSize, TrustRegionResults& results, + double r0_norm_squared, const DotManyFunction& dot_many) +{ + // minimize r0@z + 0.5*z@J@z + results.interior_status = TrustRegionResults::Status::Interior; + results.cg_iterations_count = 0; + + auto& z = results.z; + auto& cgIter = results.cg_iterations_count; + auto& d = results.d; + auto& Pr = results.Pr; + auto& Hd = results.H_d; + + const double cg_tol_squared = settings.cg_tol * settings.cg_tol; + + if (r0_norm_squared <= cg_tol_squared && settings.min_cg_iterations == 0) { + return; + } + + rCurrent = r0; + if (P) { + P->Mult(rCurrent, Pr); + } else { + Pr = rCurrent; + } + + // d = -Pr + d = Pr; + d *= -1.0; + + z = 0.0; + double zz = 0.; + + // rPr = dot(rCurrent, Pr) + double rPr = dot_many({{&rCurrent, &Pr}})[0]; + + for (cgIter = 1; cgIter <= settings.max_cg_iterations; ++cgIter) { + H.Mult(d, Hd); + + auto dots = dot_many({{&d, &rCurrent}, {&d, &Hd}, {&z, &d}, {&d, &d}}); + double descent_check = dots[0]; + double curvature = dots[1]; + double zd = dots[2]; + double dd = dots[3]; + + if (!isDescentDirection(descent_check)) { + results.interior_status = TrustRegionResults::Status::NonDescentDirection; + return; + } + + const double alphaCg = curvature != 0.0 ? rPr / curvature : 0.0; + const double zzNp1 = zz + 2.0 * alphaCg * zd + alphaCg * alphaCg * dd; + + const bool go_to_boundary = curvature <= 0 || zzNp1 >= trSize * trSize; + if (go_to_boundary) { + projectToBoundaryWithCoefs(z, d, trSize, zz, zd, dd); + if (curvature <= 0) { + results.interior_status = TrustRegionResults::Status::NegativeCurvature; + } else { + results.interior_status = TrustRegionResults::Status::OnBoundary; + } + return; + } + + // Alias Pr as temporary workspace 'zPred' to avoid allocation + auto& zPred = Pr; + zPred = z; + zPred.Add(alphaCg, d); + z = zPred; + + if (results.interior_status == TrustRegionResults::Status::NonDescentDirection) { + return; + } + + rCurrent.Add(alphaCg, Hd); + + if (P) { + P->Mult(rCurrent, Pr); + } else { + Pr = rCurrent; + } + + auto dots2 = dot_many({{&rCurrent, &Pr}, {&rCurrent, &rCurrent}}); + double rPrNp1 = dots2[0]; + double r_current_norm_squared = dots2[1]; + + if (r_current_norm_squared <= cg_tol_squared && cgIter >= settings.min_cg_iterations) { + return; + } + + double beta = rPrNp1 / rPr; + rPr = rPrNp1; + d *= beta; + d.Add(-1.0, Pr); + + zz = zzNp1; + } + cgIter--; +} + +} // namespace smith diff --git a/src/smith/numerics/steihaug_toint_cg.hpp b/src/smith/numerics/steihaug_toint_cg.hpp new file mode 100644 index 0000000000..60c3d2c371 --- /dev/null +++ b/src/smith/numerics/steihaug_toint_cg.hpp @@ -0,0 +1,131 @@ +// Copyright (c) Lawrence Livermore National Security, LLC and +// other Smith Project Developers. See the top-level LICENSE file for +// details. +// +// SPDX-License-Identifier: (BSD-3-Clause) + +#pragma once + +#include +#include +#include +#include + +#include "mfem.hpp" + +namespace smith { + +/// Internal structure for storing trust region settings +struct TrustRegionSettings { + /// cg tol + double cg_tol = 1e-8; + /// min cg iters + size_t min_cg_iterations = 0; // + /// max cg iters should be around # of system dofs + size_t max_cg_iterations = 10000; // + /// max cumulative iterations + size_t max_cumulative_iteration = 1; + /// minimum trust region size + double min_tr_size = 1e-13; + /// trust region decrease factor + double t1 = 0.25; + /// trust region increase factor + double t2 = 1.75; + /// worse case energy drop ratio. trust region accepted if energy drop is better than this. + double eta1 = 1e-9; + /// non-ideal energy drop ratio. trust region decreases if energy drop is worse than this. + double eta2 = 0.1; + /// ideal energy drop ratio. trust region increases if energy drop is better than this. + double eta3 = 0.6; + /// parameter limiting how fast the energy can drop relative to the prediction (in case the energy surrogate is poor) + double eta4 = 4.2; +}; + +/// Internal structure for storing trust region stateful data +struct TrustRegionResults { + /// Constructor takes the size of the solution vector + TrustRegionResults(int size) + { + z.SetSize(size); + H_z.SetSize(size); + d_old.SetSize(size); + H_d_old.SetSize(size); + d.SetSize(size); + H_d.SetSize(size); + Pr.SetSize(size); + cauchy_point.SetSize(size); + H_cauchy_point.SetSize(size); + z = 0.0; + H_z = 0.0; + d_old = 0.0; + H_d_old = 0.0; + d = 0.0; + H_d = 0.0; + Pr = 0.0; + cauchy_point = 0.0; + H_cauchy_point = 0.0; + } + + /// resets trust region results for a new outer iteration + void reset() + { + z = 0.0; + cauchy_point = 0.0; + } + + /// enumerates the possible final status of the trust region steps + enum class Status + { + Interior, + NegativeCurvature, + OnBoundary, + NonDescentDirection + }; + + /// step direction + mfem::Vector z; + /// action of hessian on current step z + mfem::Vector H_z; + /// old step direction + mfem::Vector d_old; + /// action of hessian on previous step z_old + mfem::Vector H_d_old; + /// true after at least one accepted line-search step has populated d_old + bool has_d_old = false; + /// incrementalCG direction + mfem::Vector d; + /// action of hessian on direction d + mfem::Vector H_d; + /// preconditioned residual + mfem::Vector Pr; + /// cauchy point + mfem::Vector cauchy_point; + /// action of hessian on direction of cauchy point + mfem::Vector H_cauchy_point; + /// specifies if step is interior, exterior, negative curvature, etc. + Status interior_status = Status::Interior; + /// iteration counter + size_t cg_iterations_count = 0; +}; + +using DotPair = std::pair; ///< using +using DotManyFunction = std::function(const std::vector&)>; ///< using + +/// compute local dot products for many vector pairs +std::vector dotMany(const std::vector& pairs); + +/// true when direction is locally downhill for the quadratic model's linear term +bool isDescentDirection(const mfem::Vector& direction, const mfem::Vector& residual, const DotManyFunction& dot_many); + +/** + * @brief Minimize quadratic sub-problem given residual vector, the action of the stiffness and a preconditioner + * + * This is a standard implementation of 'The Conjugate Gradient Method and Trust Regions in Large Scale Optimization' + * by T. Steihaug. It is also called the Steihaug-Toint CG trust region algorithm (see also Trust Region Methods + * by Conn, Gould, and Toint). + */ +void steihaugTointCG(const mfem::Vector& r0, mfem::Vector& rCurrent, const mfem::Operator& H, const mfem::Solver* P, + const TrustRegionSettings& settings, double& trSize, TrustRegionResults& results, + double r0_norm_squared, const DotManyFunction& dot_many); + +} // namespace smith diff --git a/src/smith/numerics/tests/CMakeLists.txt b/src/smith/numerics/tests/CMakeLists.txt index 10e693b21a..aa2ea5e011 100644 --- a/src/smith/numerics/tests/CMakeLists.txt +++ b/src/smith/numerics/tests/CMakeLists.txt @@ -10,9 +10,11 @@ set(numerics_serial_test_sources test_equationsolver.cpp test_operator.cpp test_odes.cpp + test_steihaug_toint_cg.cpp test_block_preconditioner.cpp test_block_preconditioner_backend.cpp test_block_preconditioner_custom_operators.cpp + test_trust_region_solver_mfem.cpp ) smith_add_tests( SOURCES ${numerics_serial_test_sources} @@ -30,7 +32,6 @@ if(PETSC_FOUND) if(SLEPC_FOUND) set(slepc_solver_tests test_eigensolver.cpp - test_trust_region_solver.cpp ) smith_add_tests(SOURCES ${slepc_solver_tests} DEPENDS_ON ${numerics_test_dependencies} diff --git a/src/smith/numerics/tests/test_steihaug_toint_cg.cpp b/src/smith/numerics/tests/test_steihaug_toint_cg.cpp new file mode 100644 index 0000000000..5522144763 --- /dev/null +++ b/src/smith/numerics/tests/test_steihaug_toint_cg.cpp @@ -0,0 +1,105 @@ +// Copyright (c) Lawrence Livermore National Security, LLC and +// other Smith Project Developers. See the top-level LICENSE file for +// details. +// +// SPDX-License-Identifier: (BSD-3-Clause) + +#include +#include "smith/numerics/steihaug_toint_cg.hpp" + +TEST(SteihaugTointCG, SolvesSPDInsideBoundary) +{ + int size = 2; + mfem::Vector diag(size); + diag[0] = 2.0; + diag[1] = 4.0; + mfem::SparseMatrix H(diag); + + mfem::Vector r0(size); + r0[0] = 1.0; + r0[1] = 1.0; + + smith::TrustRegionSettings settings; + settings.cg_tol = 1e-10; + settings.max_cg_iterations = 10; + + double trSize = 100.0; // Huge trust region + smith::TrustRegionResults results(size); + + mfem::Vector rCurrent(size); + + smith::steihaugTointCG(r0, rCurrent, H, nullptr, settings, trSize, results, r0.Norml2() * r0.Norml2(), + smith::dotMany); + + // Solution should be H^{-1} (-r0) + // x = -0.5, y = -0.25 + EXPECT_NEAR(results.z[0], -0.5, 1e-9); + EXPECT_NEAR(results.z[1], -0.25, 1e-9); + EXPECT_EQ(results.interior_status, smith::TrustRegionResults::Status::Interior); +} + +TEST(SteihaugTointCG, HitsBoundary) +{ + int size = 1; + mfem::Vector diag(size); + diag[0] = 1.0; + mfem::SparseMatrix H(diag); + + mfem::Vector r0(size); + r0[0] = 1.0; + + smith::TrustRegionSettings settings; + settings.max_cg_iterations = 10; + + double trSize = 0.5; // Small trust region, solution would be -1.0 + smith::TrustRegionResults results(size); + + mfem::Vector rCurrent(size); + + smith::steihaugTointCG(r0, rCurrent, H, nullptr, settings, trSize, results, r0.Norml2() * r0.Norml2(), + smith::dotMany); + + EXPECT_NEAR(results.z.Norml2(), 0.5, 1e-9); + EXPECT_EQ(results.interior_status, smith::TrustRegionResults::Status::OnBoundary); +} + +TEST(SteihaugTointCG, DetectsNegativeCurvature) +{ + int size = 1; + mfem::Vector diag(size); + diag[0] = -1.0; // Negative curvature + mfem::SparseMatrix H(diag); + + mfem::Vector r0(size); + r0[0] = 1.0; + + smith::TrustRegionSettings settings; + settings.max_cg_iterations = 10; + + double trSize = 2.0; + smith::TrustRegionResults results(size); + + mfem::Vector rCurrent(size); + + smith::steihaugTointCG(r0, rCurrent, H, nullptr, settings, trSize, results, r0.Norml2() * r0.Norml2(), + smith::dotMany); + + // For negative curvature, it should go to boundary + EXPECT_NEAR(results.z.Norml2(), 2.0, 1e-9); + EXPECT_EQ(results.interior_status, smith::TrustRegionResults::Status::NegativeCurvature); +} + +TEST(SteihaugTointCG, DetectsDirectlyFlippedAscentDirection) +{ + mfem::Vector residual(2); + residual[0] = 1.0; + residual[1] = -2.0; + + mfem::Vector descent_direction(residual); + descent_direction *= -1.0; + EXPECT_TRUE(smith::isDescentDirection(descent_direction, residual, smith::dotMany)); + + mfem::Vector ascent_direction(descent_direction); + ascent_direction *= -1.0; + EXPECT_FALSE(smith::isDescentDirection(ascent_direction, residual, smith::dotMany)); +} diff --git a/src/smith/numerics/tests/test_trust_region_solver.cpp b/src/smith/numerics/tests/test_trust_region_solver.cpp deleted file mode 100644 index af030fc4c3..0000000000 --- a/src/smith/numerics/tests/test_trust_region_solver.cpp +++ /dev/null @@ -1,165 +0,0 @@ -// Copyright (c) Lawrence Livermore National Security, LLC and -// other Smith Project Developers. See the top-level LICENSE file for -// details. -// -// SPDX-License-Identifier: (BSD-3-Clause) - -#include -#include -#include -#include - -#include "gtest/gtest.h" -#include "mfem.hpp" - -#include "smith/physics/state/state_manager.hpp" -#include "smith/infrastructure/application_manager.hpp" -#include "smith/numerics/trust_region_solver.hpp" -#include "smith/infrastructure/profiling.hpp" -#include "smith/mesh_utils/mesh_utils.hpp" -#include "smith/numerics/functional/finite_element.hpp" -#include "smith/physics/state/finite_element_state.hpp" -#include "smith/physics/state/finite_element_vector.hpp" -#include "smith/numerics/petsc_solvers.hpp" - -const std::string MESHTAG = "mesh"; - -static constexpr int scalar_field_order = 1; - -struct MeshFixture : public testing::Test { - void SetUp() - { - smith::StateManager::initialize(datastore_, "solver_test"); - - auto mfem_shape = mfem::Element::QUADRILATERAL; - - double length = 0.5; - double width = 2.0; - auto meshtmp = - smith::mesh::refineAndDistribute(mfem::Mesh::MakeCartesian2D(2, 1, mfem_shape, true, length, width), 0, 0); - mesh_ = &smith::StateManager::setMesh(std::move(meshtmp), MESHTAG); - } - - axom::sidre::DataStore datastore_; - mfem::ParMesh* mesh_; -}; - -std::vector applyLinearOperator(const Mat& A, const std::vector& states) -{ - std::vector Astates; - for (auto s : states) { - Astates.emplace_back(*s); - } - - int local_rows(states[0]->Size()); - int global_rows(smith::globalSize(*states[0], PETSC_COMM_WORLD)); - - Vec x; - Vec y; - - VecCreateMPI(PETSC_COMM_WORLD, local_rows, global_rows, &x); - VecCreateMPI(PETSC_COMM_WORLD, local_rows, global_rows, &y); - - PetscInt iStart, iEnd; - VecGetOwnershipRange(x, &iStart, &iEnd); - - std::vector col_indices; - col_indices.reserve(static_cast(local_rows)); - for (int i = iStart; i < iEnd; ++i) { - col_indices.push_back(i); - } - - size_t num_cols = states.size(); - for (size_t c = 0; c < num_cols; ++c) { - VecSetValues(x, local_rows, &col_indices[0], &(*states[c])[0], INSERT_VALUES); - VecAssemblyBegin(x); - VecAssemblyEnd(x); - MatMult(A, x, y); - VecGetValues(y, local_rows, &col_indices[0], &Astates[c][0]); - } - - VecDestroy(&x); - VecDestroy(&y); - - return Astates; -} - -// auto createDiagonalTestMatrix(smith::FiniteElementState& x) -auto createDiagonalTestMatrix(mfem::Vector& x) -{ - const int local_rows = x.Size(); - mfem::Vector one = x; - one = 1.0; - const int global_rows = smith::globalSize(x, PETSC_COMM_WORLD); - - Vec b; - VecCreateMPI(PETSC_COMM_WORLD, local_rows, global_rows, &b); - - PetscInt iStart, iEnd; - VecGetOwnershipRange(b, &iStart, &iEnd); - VecDestroy(&b); - - std::vector col_indices; - col_indices.reserve(static_cast(local_rows)); - for (int i = iStart; i < iEnd; ++i) { - col_indices.push_back(i); - } - - std::vector row_offsets(static_cast(local_rows) + 1); - for (int i = 0; i < local_rows + 1; ++i) { - row_offsets[static_cast(i)] = i; - } - - Mat A; - MatCreateMPIAIJWithArrays(PETSC_COMM_WORLD, local_rows, local_rows, global_rows, global_rows, &row_offsets[0], - &col_indices[0], &x[0], &A); - - return A; -} - -TEST_F(MeshFixture, QR) -{ - SMITH_MARK_FUNCTION; - - auto u1 = smith::StateManager::newState(smith::H1{}, "u1", MESHTAG); - auto u2 = smith::StateManager::newState(smith::H1{}, "u2", MESHTAG); - auto u3 = smith::StateManager::newState(smith::H1{}, "u3", MESHTAG); - auto u4 = smith::StateManager::newState(smith::H1{}, "u4", MESHTAG); - auto a = smith::StateManager::newState(smith::H1{}, "a", MESHTAG); - auto b = smith::StateManager::newState(smith::H1{}, "b", MESHTAG); - - u1 = 1.0; - for (int i = 0; i < u2.Size(); ++i) { - u2[i] = i + 2; - u3[i] = i * i - 15.0; - u4[i] = -i + 0.1 * i * i * i - 1.0; - a[i] = 2 * i + 0.01 * i * i + 1.25; - b[i] = -i + 0.02 * i * i + 0.1; - } - std::vector states = {&u1, &u2, &u3}; //,u4}; - - auto A_parallel = createDiagonalTestMatrix(a); - std::vector Astates = applyLinearOperator(A_parallel, states); - - std::vector AstatePtrs; - for (size_t i = 0; i < Astates.size(); ++i) { - AstatePtrs.push_back(&Astates[i]); - } - - double delta = 0.001; - auto [sol, leftvecs, leftvals, energy] = smith::solveSubspaceProblem(states, AstatePtrs, b, delta, 1); - - smith::FiniteElementState smith_sol(b); - smith_sol = sol; - - EXPECT_NEAR(std::sqrt(smith::innerProduct(smith_sol, smith_sol)), delta, 1e-12); - - MatDestroy(&A_parallel); -} - -int main(int argc, char* argv[]) -{ - ::testing::InitGoogleTest(&argc, argv); - smith::ApplicationManager applicationManager(argc, argv); - return RUN_ALL_TESTS(); -} diff --git a/src/smith/numerics/tests/test_trust_region_solver_mfem.cpp b/src/smith/numerics/tests/test_trust_region_solver_mfem.cpp new file mode 100644 index 0000000000..46cbe0e39b --- /dev/null +++ b/src/smith/numerics/tests/test_trust_region_solver_mfem.cpp @@ -0,0 +1,166 @@ +// Copyright (c) Lawrence Livermore National Security, LLC and +// other Smith Project Developers. See the top-level LICENSE file for +// details. +// +// SPDX-License-Identifier: (BSD-3-Clause) + +#include +#include + +#include "gtest/gtest.h" +#include "mfem.hpp" + +#include "smith/infrastructure/application_manager.hpp" +#include "smith/numerics/trust_region_solver.hpp" + +namespace { + +constexpr int test_size = 5; +constexpr double test_delta = 1.0e-3; + +std::vector applyDiagonalOperator(const mfem::Vector& diag, + const std::vector& states) +{ + std::vector out; + out.reserve(states.size()); + for (const auto* state : states) { + out.emplace_back(state->Size()); + for (int i = 0; i < state->Size(); ++i) { + out.back()[i] = diag[i] * (*state)[i]; + } + } + return out; +} + +std::vector toPointers(const std::vector& vectors) +{ + std::vector ptrs; + ptrs.reserve(vectors.size()); + for (const auto& v : vectors) { + ptrs.push_back(&v); + } + return ptrs; +} + +struct DiagonalSubspaceFixture { + DiagonalSubspaceFixture(int size) : u1(size), u2(size), u3(size), diag(size), b(size) + { + u1 = 1.0; + for (int i = 0; i < size; ++i) { + u2[i] = i + 2.0; + u3[i] = i * i - 15.0; + diag[i] = 2.0 * i + 0.01 * i * i + 1.25; + b[i] = -i + 0.02 * i * i + 0.1; + } + } + + mfem::Vector u1; + mfem::Vector u2; + mfem::Vector u3; + mfem::Vector diag; + mfem::Vector b; +}; + +} // namespace + +TEST(TrustRegionSubspaceMfem, SolveHitsTrustRegionBoundary) +{ + DiagonalSubspaceFixture fixture(test_size); + + const std::vector states = {&fixture.u1, &fixture.u2, &fixture.u3}; + const auto astates = applyDiagonalOperator(fixture.diag, states); + const auto astate_ptrs = toPointers(astates); + + auto [sol, leftvecs, leftvals, energy] = smith::solveSubspaceProblem(states, astate_ptrs, fixture.b, test_delta, 1); + + EXPECT_NEAR(sol.Norml2(), test_delta, 1.0e-12); + EXPECT_FALSE(leftvecs.empty()); + EXPECT_EQ(leftvals.size(), 1); + EXPECT_LT(energy, 0.0); +} + +TEST(TrustRegionSubspaceMfem, SolveHandlesZeroDirection) +{ + mfem::Vector u1(4); + mfem::Vector u2(4); + mfem::Vector zero(4); + mfem::Vector diag(4); + mfem::Vector b(4); + + zero = 0.0; + for (int i = 0; i < 4; ++i) { + u1[i] = 1.0 + i; + u2[i] = 0.25 * i - 0.5; + diag[i] = 1.0 + i; + b[i] = 0.5 - 0.1 * i; + } + + const std::vector states = {&u1, &zero, &u2}; + const auto astates = applyDiagonalOperator(diag, states); + const auto astate_ptrs = toPointers(astates); + + auto [sol, leftvecs, leftvals, energy] = smith::solveSubspaceProblem(states, astate_ptrs, b, 0.25, 1); + + EXPECT_LE(sol.Norml2(), 0.25 + 1.0e-12); + EXPECT_FALSE(leftvecs.empty()); + EXPECT_EQ(leftvals.size(), 1); + EXPECT_LT(energy, 0.0); +} + +TEST(TrustRegionSubspaceMfem, SolveIndefiniteHardCaseUsesShiftedNewtonPoint) +{ + mfem::Vector e0(2); + mfem::Vector e1(2); + mfem::Vector Ae0(2); + mfem::Vector Ae1(2); + mfem::Vector b(2); + + e0 = 0.0; + e1 = 0.0; + Ae0 = 0.0; + Ae1 = 0.0; + b = 0.0; + + e0[0] = 1.0; + e1[1] = 1.0; + Ae0[0] = -1.0; + Ae1[1] = 2.0; + b[1] = 1.0; + + const std::vector states = {&e0, &e1}; + const std::vector astates = {&Ae0, &Ae1}; + + auto [sol, leftvecs, leftvals, energy] = smith::solveSubspaceProblem(states, astates, b, 1.0, 1); + + EXPECT_NEAR(sol.Norml2(), 1.0, 1.0e-12); + EXPECT_NEAR(std::abs(sol[0]), std::sqrt(8.0 / 9.0), 1.0e-10); + EXPECT_NEAR(sol[1], 1.0 / 3.0, 1.0e-10); + EXPECT_EQ(leftvecs.size(), 1); + EXPECT_EQ(leftvals.size(), 1); + EXPECT_NEAR(leftvals[0], -1.0, 1.0e-12); + EXPECT_NEAR(energy, -2.0 / 3.0, 1.0e-10); +} + +TEST(TrustRegionSubspaceMfem, SolveThrowsOnNanProjection) +{ + mfem::Vector state(2); + mfem::Vector astate(2); + mfem::Vector b(2); + + state = 1.0; + astate = 1.0; + b = 0.0; + astate[1] = std::numeric_limits::quiet_NaN(); + + const std::vector states = {&state}; + const std::vector astates = {&astate}; + + EXPECT_THROW(smith::solveSubspaceProblem(states, astates, b, 1.0, 1), smith::TrustRegionException); +} + +int main(int argc, char* argv[]) +{ + ::testing::InitGoogleTest(&argc, argv); + smith::ApplicationManager applicationManager(argc, argv); + return RUN_ALL_TESTS(); +} diff --git a/src/smith/numerics/trust_region_solver.cpp b/src/smith/numerics/trust_region_solver.cpp deleted file mode 100644 index 8d8d04a9cc..0000000000 --- a/src/smith/numerics/trust_region_solver.cpp +++ /dev/null @@ -1,496 +0,0 @@ -// Copyright (c) Lawrence Livermore National Security, LLC and -// other Smith Project Developers. See the top-level LICENSE file for -// details. -// -// SPDX-License-Identifier: (BSD-3-Clause) - -#include "smith/numerics/trust_region_solver.hpp" - -#ifdef SMITH_USE_SLEPC - -#include - -#include "smith/infrastructure/profiling.hpp" -#include "smith/numerics/dense_petsc.hpp" - -namespace smith { - -/** - * @brief Get the global size of a mfem vector - * @param parallel_v Vector to check global size - * @param comm Parallel communicator - */ -int globalSize(const mfem::Vector& parallel_v, const MPI_Comm& comm) -{ - int local_size = parallel_v.Size(); - int global_size; - MPI_Allreduce(&local_size, &global_size, 1, MPI_INT, MPI_SUM, comm); - return global_size; -} - -/// @brief struct which aids in moving between mfem::Vector and petsc BV -struct BasisVectors { - /** - * @brief Construct with a representative state to set sizes - * @param state The state which is used to set sizes for basis vectors - */ - BasisVectors(const mfem::Vector& state) : local_rows(state.Size()), global_rows(globalSize(state, PETSC_COMM_WORLD)) - { - VecCreateMPI(PETSC_COMM_WORLD, local_rows, global_rows, &v); - - PetscInt iStart, iEnd; - VecGetOwnershipRange(v, &iStart, &iEnd); - - col_indices.reserve(size_t(local_rows)); - for (int i = iStart; i < iEnd; ++i) { - col_indices.push_back(i); - } - } - - /** - * @brief Destructor - */ - ~BasisVectors() { VecDestroy(&v); } - - /** - * @brief Construct petsc BV from vector of mfem::Vector - * @param states The states used to construct basis vectors - */ - BV constructBases(const std::vector& states) const - { - size_t num_cols = states.size(); - BV Q; - BVCreate(PETSC_COMM_SELF, &Q); - BVSetType(Q, BVVECS); - BVSetSizesFromVec(Q, v, static_cast(num_cols)); - for (size_t c = 0; c < num_cols; ++c) { - VecSetValues(v, local_rows, &col_indices[0], &(*states[c])[0], INSERT_VALUES); - VecAssemblyBegin(v); - VecAssemblyEnd(v); - int c_int = static_cast(c); - BVInsertVec(Q, c_int, v); - } - return Q; - } - - private: - const int local_rows; - const int global_rows; - - std::vector col_indices; - Vec v; -}; - -/** - * @brief Create a petsc vector from a mfem::Vector - * @param state The state used to create an mfem::Vector - */ -Vec petscVec(const mfem::Vector& state) -{ - const int local_rows = state.Size(); - const int global_rows = globalSize(state, PETSC_COMM_WORLD); - - Vec v; - VecCreateMPI(PETSC_COMM_WORLD, local_rows, global_rows, &v); - - PetscInt iStart, iEnd; - VecGetOwnershipRange(v, &iStart, &iEnd); - - std::vector col_indices; - col_indices.reserve(static_cast(local_rows)); - for (int i = iStart; i < iEnd; ++i) { - col_indices.push_back(i); - } - - VecSetValues(v, local_rows, &col_indices[0], &state[0], INSERT_VALUES); - - VecAssemblyBegin(v); - VecAssemblyEnd(v); - - return v; -} - -/** - * @brief Copy a petsc vector to an mfem::Vector - * @param v The petsc vector - * @param s The mfem vector - */ -void copy(const Vec& v, mfem::Vector& s) -{ - const int local_rows = s.Size(); - PetscInt iStart, iEnd; - VecGetOwnershipRange(v, &iStart, &iEnd); - - SLIC_ERROR_IF(local_rows != iEnd - iStart, - "Inconsistency between local t-dof vector size and petsc start and end indices"); - - std::vector col_indices; - col_indices.reserve(static_cast(local_rows)); - for (int i = iStart; i < iEnd; ++i) { - col_indices.push_back(i); - } - - VecGetValues(v, local_rows, &col_indices[0], &s[0]); -} - -/** - * @brief The reduced matrix in the space of {s} - * @param s The vector of mfem::Vector of directions - * @param As The vector of mfem::Vector of a global matrix A operated on directions - */ -Mat dot(const std::vector& s, const std::vector& As) -{ - SLIC_ERROR_IF(s.size() != As.size(), - "Search directions and their linear operator result must have same number of columns"); - size_t num_cols = s.size(); - int num_cols_int = static_cast(num_cols); - Mat sAs; - MatCreateSeqDense(PETSC_COMM_SELF, num_cols_int, num_cols_int, NULL, &sAs); - for (size_t i = 0; i < num_cols; ++i) { - for (size_t j = 0; j < num_cols; ++j) { - MatSetValue(sAs, static_cast(i), static_cast(j), mfem::InnerProduct(PETSC_COMM_WORLD, *s[i], *As[j]), - INSERT_VALUES); - } - } - MatAssemblyBegin(sAs, MAT_FINAL_ASSEMBLY); - MatAssemblyEnd(sAs, MAT_FINAL_ASSEMBLY); - return sAs; -} - -/** - * @brief The reduced vector s.T*b - * @param s The vector of mfem::vector of directions - * @param b The right hand size vector to be reduced - */ -Vec dot(const std::vector& s, const mfem::Vector& b) -{ - size_t num_cols = s.size(); - Vec sb; - VecCreateSeq(PETSC_COMM_SELF, static_cast(num_cols), &sb); - for (size_t i = 0; i < num_cols; ++i) { - VecSetValue(sb, static_cast(i), mfem::InnerProduct(PETSC_COMM_WORLD, *s[i], b), INSERT_VALUES); - } - return sb; -} - -/** - * @brief The qr decomposition of the state vectors - * @param states The vector of mfem::vectors of directions - * @return Pair of BV Q and DenseMat R - */ -auto qr(const std::vector& states) -{ - BasisVectors bvs(*states[0]); - BV Q = bvs.constructBases(states); - - Mat R; - int num_cols = static_cast(states.size()); - MatCreateSeqDense(PETSC_COMM_SELF, num_cols, num_cols, NULL, &R); - auto error = BVOrthogonalize(Q, R); - - if (error) throw PetscException("BVOrthogonalize failed."); - - return std::make_pair(Q, DenseMat(R)); -} - -/** - * @brief compute the quadratic energy from small dense matrices and vectors - * @param A The stiffness matrix - * @param b The rhs vector - * @param x The current solution vector - * @return The quadratic, linearized energy approximation - */ -double quadraticEnergy(const DenseMat& A, const DenseVec& b, const DenseVec& x) -{ - DenseVec Ax = A * x; - double xAx = dot(x, Ax); - double xb = dot(x, b); - return 0.5 * xAx - xb; -} - -/** - * @brief compute the pnorm_squared - * @param bvv input vector - * @param sig eigenvectors - */ -double pnorm_squared(const DenseVec& bvv, const DenseVec& sig) -{ - auto bvv_div_sig_squared = bvv / (sig * sig); - return sum(bvv_div_sig_squared); -} - -/** - * @brief compute the qnorm_squared - * @param bvv input vector - * @param sig eigenvectors - */ -double qnorm_squared(const DenseVec& bvv, const DenseVec& sig) -{ - auto bvv_div_sig_cubed = bvv / (sig * sig * sig); - return sum(bvv_div_sig_cubed); - // return bvv.dot((1.0 / (sig * sig * sig)).matrix()); -} - -// returns: -// minimum energy solution within delta -// N leftmost eigenvectors -// N smallest eigenvalue -// success status - -/** - * @brief solve the trust region problem exactly using a variant of the Moore Sorensen algorithm - * @param A matrix - * @param b rhs - * @param delta trust region radius - * @param num_leftmost the number of leftmost eigenvector/values to output - * returns the solution vector, a std::vector of leftmost vectors - * a std::vector of leftmost eigenvalues and the energy change (relative to x=0) - */ -auto exactTrustRegionSolve(DenseMat A, const DenseVec& b, double delta, int num_leftmost) -{ - // minimize 1/2 x^T A x - b^T x, s.t. norm(x) <= delta - auto [isize, jsize] = A.size(); - auto isize2 = b.size(); - SLIC_ERROR_IF(isize != jsize, "Exact trust region solver requires square matrices"); - SLIC_ERROR_IF(isize != isize2, - "The right hand size for exact trust region solve must be consistent with the input matrix size"); - - auto [sigs, V] = eigh(A); - std::vector leftmosts; - std::vector minsigs; - size_t num_leftmost_possible(size_t(std::min(num_leftmost, isize))); - for (size_t i = 0; i < num_leftmost_possible; ++i) { - leftmosts.emplace_back(V[i]); - minsigs.emplace_back(sigs[i]); - } - - const auto& leftMost = V[0]; - double minSig = sigs[0]; - - // bv = V.T b, V has columns which are eigenvectors - DenseVec bv(isize); - for (size_t i = 0; i < size_t(isize); ++i) { - bv.setValue(i, dot(V[i], b)); - } - - DenseVec bvOverSigs = bv / sigs; - double sigScale = sum(abs(sigs)) / isize; - double eps = 1e-12 * sigScale; - - // Check if solution is inside the trust region - if ((minSig >= eps) && (norm(bvOverSigs) <= delta)) { - return std::make_tuple(A.solve(b), leftmosts, minsigs, true); - } - - // if we get here, the solution must be on the tr boundary - // consider bounding the initial guess, see More' Sorenson paper - double lam = minSig < eps ? -minSig + eps : 0.0; - - // try to solve this for lam: - // (A + lam I)p = b, such that norm(p) = Delta - DenseVec sigsPlusLam = sigs + lam; - - bvOverSigs = bv / sigsPlusLam; - - // Check for the hard case - if ((minSig < eps) && (norm(bvOverSigs) < delta)) { - DenseVec p(isize); - p = 0.0; - for (int i = 0; i < isize; ++i) { - p.add(bv[i], V[size_t(i)]); - } - - const auto& z = leftMost; - double pz = dot(p, z); - double pp = dot(p, p); - double ddmpp = std::max(delta * delta - pp, 0.0); - - double tau1 = -pz + std::sqrt(pz * pz + ddmpp); - double tau2 = -pz - std::sqrt(pz * pz + ddmpp); - - DenseVec x1(p); - DenseVec x2(p); - x1.add(tau1, z); - x2.add(tau2, z); - - double e1 = quadraticEnergy(A, b, x1); - double e2 = quadraticEnergy(A, b, x2); - - DenseVec x = e1 < e2 ? x1 : x2; - - return std::make_tuple(x, leftmosts, minsigs, true); - } - DenseVec bvbv = bv * bv; - sigsPlusLam = sigs + lam; - - double pNormSq = pnorm_squared(bvbv, sigsPlusLam); - double pNorm = std::sqrt(pNormSq); - double bError = (pNorm - delta) / delta; - - // consider an out if it doesn't converge, or use a better initial guess, or bound the lam from below and above. - size_t iters = 0; - size_t maxIters = 30; - while ((std::abs(bError) > 1e-9) && (iters++ < maxIters)) { - double qNormSq = qnorm_squared(bvbv, sigsPlusLam); - lam += (pNormSq / qNormSq) * bError; - sigsPlusLam = sigs + lam; - pNormSq = pnorm_squared(bvbv, sigsPlusLam); - pNorm = std::sqrt(pNormSq); - bError = (pNorm - delta) / delta; - } - - bool success = true; - if (iters >= maxIters) { - success = false; - } - - bvOverSigs = bv / sigsPlusLam; - - DenseVec x(isize); - x = 0.0; - for (int i = 0; i < isize; ++i) { - x.add(bvOverSigs[i], V[size_t(i)]); - } - - double e1 = quadraticEnergy(A, b, x); - double e2 = quadraticEnergy(A, b, -x); - - if (e2 < e1) { - x *= -delta / norm(x); - } else { - x *= delta / norm(x); - } - - return std::make_tuple(x, leftmosts, minsigs, success); -} - -/// @brief remove the vector at location j and return what is left -std::vector remove_at(const std::vector& a, size_t j) -{ - std::vector b; - for (size_t i = 0; i < a.size(); ++i) { - if (i != j) { - b.emplace_back(a[i]); - } - } - return b; -} - -/// @brief returns the solution, as well as a list of the N leftmost eigenvectors -/// and their eigenvalues, and the predicted model energy change -std::tuple>, std::vector, double> solveSubspaceProblem( - const std::vector& states, const std::vector& Astates, - const mfem::Vector& b, double delta, int num_leftmost) -{ - SMITH_MARK_FUNCTION; - DenseMat sAs1 = dot(states, Astates); - DenseMat sAs = sym(sAs1); - - if (sAs.hasNan()) { - throw PetscException("States in subspace solve contain NaNs."); - return std::make_tuple(b, std::vector>{}, std::vector{}, 0); - } - - auto [Q_parallel, R] = qr(states); - - if (R.hasNan()) { - throw PetscException("R from qr returning with a NaN."); - return std::make_tuple(b, std::vector>{}, std::vector{}, 0); - } - - auto [rows, cols] = R.size(); - SLIC_ERROR_IF(rows != cols, "R matrix is not square in subspace problem solve\n"); - - double trace_mag = 0.0; - for (int i = 0; i < rows; ++i) { - trace_mag += std::abs(R(i, i)); - } - - // remove any nearly colinear state - for (int i = 0; i < rows; ++i) { - if (R(i, i) < 1e-9 * trace_mag) { - // printf("removing after QR state number %d\n", i); - auto statesNew = remove_at(states, size_t(i)); - auto AstatesNew = remove_at(Astates, size_t(i)); - return solveSubspaceProblem(statesNew, AstatesNew, b, delta, num_leftmost); - } - } - - auto Rinv = inverse(R); - DenseMat pAp = sAs.PtAP(Rinv); - - Vec b_parallel = petscVec(b); - std::vector pb_vec(states.size()); - BVDotVec(Q_parallel, b_parallel, &pb_vec[0]); - DenseVec pb(pb_vec); - - auto [reduced_x, leftvecs, leftvals, success] = exactTrustRegionSolve(pAp, pb, delta, num_leftmost); - - double energy = quadraticEnergy(pAp, pb, reduced_x); - - Vec x_parallel; - VecDuplicate(b_parallel, &x_parallel); - - std::vector reduced_x_vec = reduced_x.getValues(); - BVMultVec(Q_parallel, 1.0, 0.0, x_parallel, &reduced_x_vec[0]); - mfem::Vector sol(b); - copy(x_parallel, sol); - - std::vector> leftmosts; - for (size_t i = 0; i < leftvecs.size(); ++i) { - auto reduced_leftvec = leftvecs[i].getValues(); - BVMultVec(Q_parallel, 1.0, 0.0, x_parallel, &reduced_leftvec[0]); - leftmosts.emplace_back(std::make_shared(b)); - copy(x_parallel, *leftmosts[i]); - } - - BVDestroy(&Q_parallel); - VecDestroy(&b_parallel); - VecDestroy(&x_parallel); - return std::make_tuple(sol, leftmosts, leftvals, energy); -} - -/// @brief Remove any obvious dependent directions, namely ones which are scaled version of previous directions -/// The case where they are linear combinations of previous direction will be handled in the QR solver -std::pair, std::vector> removeDependentDirections( - std::vector directions, std::vector A_directions) -{ - SMITH_MARK_FUNCTION; - std::vector norms; - size_t num_dirs = directions.size(); - - for (size_t i = 0; i < num_dirs; ++i) { - norms.push_back(std::sqrt(mfem::InnerProduct(PETSC_COMM_WORLD, *directions[i], *directions[i]))); - } - - std::vector> kepts; - for (size_t i = 0; i < num_dirs; ++i) { - bool keepi = true; - if (norms[i] == 0) keepi = false; - for (auto&& kept_and_j : kepts) { - size_t j = kept_and_j.second; - double dot_ij = mfem::InnerProduct(PETSC_COMM_WORLD, *directions[i], *kept_and_j.first); - if (dot_ij > 0.999 * norms[i] * norms[j]) { - keepi = false; - } - } - // if (!keepi) printf("not keeping %zu\n",i); - if (keepi) { - kepts.emplace_back(std::make_pair(directions[i], i)); - } - } - - std::vector directions_new; - std::vector A_directions_new; - - for (auto kept_and_j : kepts) { - directions_new.push_back(directions[kept_and_j.second]); - A_directions_new.push_back(A_directions[kept_and_j.second]); - } - - return std::make_pair(directions_new, A_directions_new); -} - -} // namespace smith - -#endif // SMITH_USE_SLEPC diff --git a/src/smith/numerics/trust_region_solver.hpp b/src/smith/numerics/trust_region_solver.hpp index ad4b390f18..efd7f27a8b 100644 --- a/src/smith/numerics/trust_region_solver.hpp +++ b/src/smith/numerics/trust_region_solver.hpp @@ -14,21 +14,22 @@ #include "smith/smith_config.hpp" -#ifdef SMITH_USE_SLEPC - +#include #include -#include -#include +#include +#include +#include +#include -#include "smith/physics/state/finite_element_state.hpp" -#include "smith/physics/state/finite_element_dual.hpp" +#include "mfem.hpp" namespace smith { -class PetscException : public std::exception { +/// Exception type for trust-region subspace solve failures. +class TrustRegionException : public std::exception { public: /// constructor - PetscException(const std::string& message) : msg(message) {} + TrustRegionException(const std::string& message) : msg(message) {} /// what is message const char* what() const noexcept override { return msg.c_str(); } @@ -38,6 +39,32 @@ class PetscException : public std::exception { std::string msg; }; +/// Subspace solution, leftmost eigenvectors, leftmost eigenvalues, and predicted model energy change. +using TrustRegionSubspaceResult = + std::tuple>, std::vector, double>; + +/// Cached reduced trust-region subspace data reusable across trust-region radius updates. +struct CachedTrustRegionSubspaceProblem { + /// zero vector with correct size/layout for empty-subspace returns + mfem::Vector zero_solution; + /// orthonormalized physical-space basis spanning reduced subspace + std::vector> basis; + /// reduced Hessian in cached subspace basis + mfem::DenseMatrix projected_hessian; + /// reduced right-hand side in cached subspace basis + mfem::Vector projected_rhs; + /// eigenvalues of reduced Hessian + mfem::Vector eigenvalues; + /// eigenvectors of reduced Hessian + mfem::DenseMatrix eigenvectors; + /// reduced right-hand side projected onto reduced Hessian eigenvectors + mfem::Vector eigen_rhs; + /// cached leftmost eigenvectors lifted back to physical space + std::vector> leftmosts; + /// eigenvalues corresponding to cached leftmost eigenvectors + std::vector leftvals; +}; + /// @brief computes the global size of mfem::Vector int globalSize(const mfem::Vector& parallel_v, const MPI_Comm& comm); @@ -46,13 +73,18 @@ double innerProduct(const mfem::Vector& a, const mfem::Vector& b, const MPI_Comm /// @brief returns the solution, as well as a list of the N leftmost eigenvectors /// and their eigenvalues, and the predicted model energy change -std::tuple>, std::vector, double> solveSubspaceProblem( - const std::vector& directions, const std::vector& A_directions, - const mfem::Vector& b, double delta, int num_leftmost); +TrustRegionSubspaceResult solveSubspaceProblem(const std::vector& directions, + const std::vector& A_directions, + const mfem::Vector& b, double delta, int num_leftmost, + MPI_Comm comm = MPI_COMM_WORLD); -std::pair, std::vector> removeDependentDirections( - std::vector directions, std::vector A_directions); +/// @brief prepares reduced trust-region subspace data reusable across trust-region radius updates +CachedTrustRegionSubspaceProblem prepareSubspaceProblem(const std::vector& directions, + const std::vector& A_directions, + const mfem::Vector& b, int num_leftmost, + MPI_Comm comm = MPI_COMM_WORLD); -} // namespace smith +/// @brief solves cached reduced trust-region problem for given trust-region radius +TrustRegionSubspaceResult solvePreparedSubspaceProblem(const CachedTrustRegionSubspaceProblem& prepared, double delta); -#endif // SMITH_USE_SLEPC +} // namespace smith diff --git a/src/smith/physics/solid_mechanics.hpp b/src/smith/physics/solid_mechanics.hpp index f6867ee72b..f7d2706114 100644 --- a/src/smith/physics/solid_mechanics.hpp +++ b/src/smith/physics/solid_mechanics.hpp @@ -53,6 +53,7 @@ #include "smith/physics/state/finite_element_vector.hpp" namespace smith { + namespace solid_mechanics { namespace detail { diff --git a/src/smith/physics/tests/CMakeLists.txt b/src/smith/physics/tests/CMakeLists.txt index 81a9b32b86..911c3fe241 100644 --- a/src/smith/physics/tests/CMakeLists.txt +++ b/src/smith/physics/tests/CMakeLists.txt @@ -76,6 +76,7 @@ set(physics_parallel_test_sources dynamic_thermal_adjoint.cpp solid_reaction_adjoint.cpp thermal_nonlinear_solve.cpp + shallow_arch_buckling.cpp ) set(physics_parallel_tribol_test_sources contact_patch.cpp diff --git a/src/smith/physics/tests/contact_finite_diff.cpp b/src/smith/physics/tests/contact_finite_diff.cpp index 531b7f2eaa..87f141330f 100644 --- a/src/smith/physics/tests/contact_finite_diff.cpp +++ b/src/smith/physics/tests/contact_finite_diff.cpp @@ -162,9 +162,6 @@ TEST_P(ContactFiniteDiff, patch) merged_sol.SetVector(u, 0); merged_sol.SetVector(pressure, u.Size()); mfem::Vector f(merged_sol.Size()); - f = 0.0; - oper->Mult(merged_sol, f); - auto* J_op = &oper->GetGradient(merged_sol); mfem::Vector u_dot(merged_sol.Size()); u_dot = 0.0; // wiggle displacement (col = j) @@ -174,6 +171,9 @@ TEST_P(ContactFiniteDiff, patch) ++dof_ct; continue; } + f = 0.0; + oper->Mult(merged_sol, f); + auto* J_op = &oper->GetGradient(merged_sol); u_dot[j] = 1.0; mfem::Vector J_exact(merged_sol.Size()); J_exact = 0.0; @@ -206,6 +206,10 @@ TEST_P(ContactFiniteDiff, patch) } std::cout << "Max diff = " << std::setprecision(15) << max_diff << std::endl; + // Restore the contact state after the finite-difference probes before advancing the timestep. + f = 0.0; + oper->Mult(merged_sol, f); + solid_solver.advanceTimestep(dt); } } diff --git a/src/smith/physics/tests/shallow_arch_buckling.cpp b/src/smith/physics/tests/shallow_arch_buckling.cpp new file mode 100644 index 0000000000..258332731f --- /dev/null +++ b/src/smith/physics/tests/shallow_arch_buckling.cpp @@ -0,0 +1,171 @@ +// Copyright (c) Lawrence Livermore National Security, LLC and +// other Smith Project Developers. See the top-level LICENSE file for +// details. +// +// SPDX-License-Identifier: (BSD-3-Clause) + +#include +#include +#include +#include +#include + +#include "gtest/gtest.h" +#include "mpi.h" +#include "mfem.hpp" + +#include "smith/infrastructure/application_manager.hpp" +#include "smith/infrastructure/logger.hpp" +#include "smith/numerics/functional/domain.hpp" +#include "smith/numerics/functional/tensor.hpp" +#include "smith/numerics/solver_config.hpp" +#include "smith/physics/materials/solid_material.hpp" +#include "smith/physics/mesh.hpp" +#include "smith/physics/solid_mechanics.hpp" +#include "smith/physics/state/state_manager.hpp" + +namespace smith { +namespace { + +constexpr double length = 10.0; +constexpr double thickness = 0.025; +constexpr double end_tol = 1.0e-8; +constexpr double top_tol = 1.0e-8; +std::string solver_name = "TrustRegion"; +int print_level = 2; +int nonlinear_max_iterations = 300000; +int trust_subspace_option = static_cast(SubSpaceOptions::NEVER); +int trust_num_leftmost = 1; + +NonlinearSolver selectedNonlinearSolver() +{ + if (solver_name == "NewtonLineSearch") { + return NonlinearSolver::NewtonLineSearch; + } + if (solver_name == "TrustRegion") { + return NonlinearSolver::TrustRegion; + } + + throw std::runtime_error("Unknown --solver value '" + solver_name + "'. Use NewtonLineSearch or TrustRegion."); +} + +void parseCommandLine(int& argc, char** argv) +{ + int write_arg = 1; + for (int read_arg = 1; read_arg < argc; ++read_arg) { + const std::string arg = argv[read_arg]; + if (arg.rfind("--solver=", 0) == 0) { + solver_name = arg.substr(std::string("--solver=").size()); + } else if (arg.rfind("--print-level=", 0) == 0) { + print_level = std::stoi(arg.substr(std::string("--print-level=").size())); + } else if (arg.rfind("--nonlinear-max-iterations=", 0) == 0) { + nonlinear_max_iterations = std::stoi(arg.substr(std::string("--nonlinear-max-iterations=").size())); + } else if (arg.rfind("--trust-subspace-option=", 0) == 0) { + trust_subspace_option = std::stoi(arg.substr(std::string("--trust-subspace-option=").size())); + } else if (arg.rfind("--trust-num-leftmost=", 0) == 0) { + trust_num_leftmost = std::stoi(arg.substr(std::string("--trust-num-leftmost=").size())); + } else { + argv[write_arg] = argv[read_arg]; + ++write_arg; + } + } + argc = write_arg; +} + +} // namespace + +TEST(ShallowArchBuckling, CompressedThinBeamSnapThrough) +{ + MPI_Barrier(MPI_COMM_WORLD); + + constexpr int p = 1; + constexpr int dim = 2; + constexpr int nx = 120; + constexpr int ny = 5; + + axom::sidre::DataStore datastore; + smith::StateManager::initialize(datastore, "shallow_arch_buckling"); + + auto mesh = std::make_shared( + mfem::Mesh::MakeCartesian2D(nx, ny, mfem::Element::QUADRILATERAL, true, length, thickness), + "compressed_beam_mesh", 0, 0); + + mesh->addDomainOfBoundaryElements("left_end", + [](std::vector vertices, int) { return average(vertices)[0] < end_tol; }); + mesh->addDomainOfBoundaryElements( + "right_end", [](std::vector vertices, int) { return average(vertices)[0] > length - end_tol; }); + mesh->addDomainOfBoundaryElements( + "top_face", [](std::vector vertices, int) { return average(vertices)[1] > thickness - top_tol; }); + auto globalElementCount = [](int local_count) { + int global_count = 0; + MPI_Allreduce(&local_count, &global_count, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD); + return global_count; + }; + EXPECT_GT(globalElementCount(mesh->domain("left_end").total_elements()), 0); + EXPECT_GT(globalElementCount(mesh->domain("right_end").total_elements()), 0); + EXPECT_GT(globalElementCount(mesh->domain("top_face").total_elements()), 0); + + smith::LinearSolverOptions linear_options{.linear_solver = LinearSolver::CG, + .preconditioner = Preconditioner::HypreJacobi, + .relative_tol = 1.0e-8, + .absolute_tol = 1.0e-14, + .max_iterations = 100000, + .print_level = 0}; + + smith::NonlinearSolverOptions nonlinear_options{ + .nonlin_solver = selectedNonlinearSolver(), + .relative_tol = 1.0e-8, + .absolute_tol = 1.0e-10, + .max_iterations = nonlinear_max_iterations, + .print_level = print_level, + .subspace_option = static_cast(trust_subspace_option), + .num_leftmost = trust_num_leftmost}; + + SolidMechanics solid(nonlinear_options, linear_options, solid_mechanics::default_quasistatic_options, + "compressed_beam", mesh); + + solid_mechanics::NeoHookean mat{.density = 1.0, .K = 100.0, .G = 10.0}; + solid.setMaterial(mat, mesh->entireBody()); + solid.setFixedBCs(mesh->domain("left_end")); + + constexpr double final_compression = 0.2; + constexpr double seed_down_traction = 1.0e-5; + constexpr double final_snap_up_traction = 0.02; + solid.setDisplacementBCs([](auto, double t) { return vec2{{-final_compression * t, 0.0}}; }, + mesh->domain("right_end"), Component::X); + solid.setFixedBCs(mesh->domain("right_end"), Component::Y); + solid.setTraction( + [](auto, auto, double t) { + if (t < 0.5) { + return vec2{{0.0, -seed_down_traction * (t / 0.5)}}; + } + const double snap_ramp = (t - 0.5) / 0.5; + return vec2{{0.0, -seed_down_traction * (1.0 - snap_ramp) + final_snap_up_traction * snap_ramp}}; + }, + mesh->domain("top_face")); + + solid.completeSetup(); + solid.outputStateToDisk("shallow_arch_buckling"); + + SLIC_INFO_ROOT( + std::format("Compressed thin beam snap-through run: solver = {}, trust_subspace_option = {}, " + "trust_num_leftmost = {}", + solver_name, trust_subspace_option, trust_num_leftmost)); + + constexpr int num_steps = 5; + for (int step = 0; step < num_steps; ++step) { + solid.advanceTimestep(1.0 / num_steps); + SLIC_INFO_ROOT(std::format("Load step {}/{}", step + 1, num_steps)); + solid.outputStateToDisk("shallow_arch_buckling"); + } +} + +} // namespace smith + +int main(int argc, char* argv[]) +{ + smith::parseCommandLine(argc, argv); + ::testing::InitGoogleTest(&argc, argv); + smith::ApplicationManager applicationManager(argc, argv); + return RUN_ALL_TESTS(); +} diff --git a/src/smith/physics/weak_form.hpp b/src/smith/physics/weak_form.hpp index 2e766de4b3..8bd7e48e8d 100644 --- a/src/smith/physics/weak_form.hpp +++ b/src/smith/physics/weak_form.hpp @@ -12,6 +12,7 @@ #pragma once +#include #include #include #include