From a594e3ab4e4ed883962e4f83772ebddc9fbb054f Mon Sep 17 00:00:00 2001 From: Alejandro Mota Date: Sat, 6 Jun 2026 18:46:02 -0700 Subject: [PATCH] Add full-DOF matrix-free action entry point MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduces assemble_matrix_free_action_full!(asm, func, U_full, v_full, p), a sibling of the existing free-DOF assemble_matrix_free_action! that accepts full-DOF U_full and v_full vectors instead of the free-DOF Uu and Vu. Both vectors must have length equal to p.field.data. The existing free-DOF entry point writes Vu into p.hvp_scratch_field's free slots only, leaving the BC slots implicitly zero. This is the right semantics for Krylov-style J·v calls, where v is a perturbation ranging over unknowns and the BC component genuinely is zero. It is the wrong semantics for residual-style evaluations of c_M·M·dU in implicit Newmark with time-varying Dirichlet BCs: there, dU has a nonzero BC slice equal to g(t_{n+1}) − U_pred_BC, and the silently dropped M_{f,BC}·dU_BC cross-term biases the inertial residual at boundary-adjacent free rows. The full-DOF entry point gives the caller a direct way to inject the BC contribution: copyto! into p.field.data and p.hvp_scratch_field.data bypasses _update_for_assembly!'s "scatter free into unknowns, write BC from bc_cache.vals" logic; the element kernel then sees v_el including the BC component and produces the cross-term naturally. This mirrors Norma.jl's full-DOF-state design without changing how Carina-style free-DOF integrators store U/V/A elsewhere. Carefully restores the contract for the free-DOF entry point: after the action loop, BC slots of p.hvp_scratch_field are zeroed out. Were this skipped, a subsequent free-DOF Krylov J·v call would silently read leftover v_BC values and return a contaminated action. Test (TestAssemblers.jl) validates three invariants against a sibling no-BC assembler that produces the unrestricted element-scatter K_full: (a) On free rows, the full-DOF action equals (K_full · v_full)[free] for v_full with nonzero BC slots (rtol=1e-6 accommodates sparse SpMV vs per-element scatter floating-point reordering). (b) Zeroing v_full's BC slots collapses the full-DOF entry point onto the free-DOF entry point bit-for-bit. (c) The two answers in (a) and (b) genuinely differ — i.e., the cross-term is actually carrying signal on this mesh, so the validation is non-trivial. The condensed-assembler approach normally used elsewhere does NOT work as a reference: FEC's sparsity-pattern construction (SparsityPatterns.jl:200-249) drops entries whose row OR column is a Dirichlet DOF, even in condensed mode. Removing the BCs from the assembler entirely is the only way to retain K_{f,BC}. Co-Authored-By: Claude Opus 4.7 --- src/FiniteElementContainers.jl | 1 + src/Parameters.jl | 17 +++++++ src/assemblers/MatrixAction.jl | 72 +++++++++++++++++++++++++++ test/TestAssemblers.jl | 91 ++++++++++++++++++++++++++++++++++ 4 files changed, 181 insertions(+) diff --git a/src/FiniteElementContainers.jl b/src/FiniteElementContainers.jl index c8ee6ff8..51dd633f 100644 --- a/src/FiniteElementContainers.jl +++ b/src/FiniteElementContainers.jl @@ -14,6 +14,7 @@ export assemble_mass! export assemble_matrix! export assemble_matrix_action! export assemble_matrix_free_action! +export assemble_matrix_free_action_full! export assemble_scalar! export assemble_stiffness! export assemble_vector! diff --git a/src/Parameters.jl b/src/Parameters.jl index c92aebf4..f366edc4 100644 --- a/src/Parameters.jl +++ b/src/Parameters.jl @@ -367,6 +367,23 @@ function _update_for_assembly!(p::AbstractParameters, dof::DofManager, Uu, Vu) return nothing end +# Full-DOF flavor: caller is responsible for assembling the merged +# vectors U_full = [Uu; U_BC] and v_full = [v_free; v_BC] themselves. +# Unlike the free-DOF flavors above, we do NOT call +# update_field_dirichlet_bcs! — overwriting BC slots would silently +# erase the BC contribution the caller intentionally placed in U_full. +function _update_for_assembly_full!( + p::AbstractParameters, + U_full::AbstractVector{<:Number}, + v_full::AbstractVector{<:Number} +) + @assert length(U_full) == length(p.field.data) + @assert length(v_full) == length(p.hvp_scratch_field.data) + copyto!(p.field.data, U_full) + copyto!(p.hvp_scratch_field.data, v_full) + return nothing +end + """ $(TYPEDSIGNATURES) """ diff --git a/src/assemblers/MatrixAction.jl b/src/assemblers/MatrixAction.jl index 93970f24..0ac446a5 100644 --- a/src/assemblers/MatrixAction.jl +++ b/src/assemblers/MatrixAction.jl @@ -76,6 +76,78 @@ function _assemble_block_matrix_free_action!( end end +""" +$(TYPEDSIGNATURES) +Full-DOF variant of `assemble_matrix_free_action!`. Distinct from the +free-DOF variant in that the BC slots of `v_full` are honored as part +of the action — used for residual-style evaluations where `v_full` +carries a real BC contribution (e.g., the Newmark inertial residual +`c_M * M * dU` with `dU_BC ≠ 0` when the Dirichlet BC is time-varying). + +In the free-DOF variant `assemble_matrix_free_action!(..., Uu, Vu, p)`, +`Vu` is interpreted as a perturbation of the unknowns and the BC slots +of `p.hvp_scratch_field` are implicitly zero — correct for Krylov-style +Jacobian-action calls, where `v` ranges over unknowns only. + +For this full-DOF variant the caller is responsible for assembling +`U_full = [Uu; U_BC]` and `v_full = [v_free; v_BC]` themselves; both +vectors must have length equal to the underlying field data, and the +field's Dirichlet slots are NOT overwritten from `bc_cache.vals` (doing +so would clobber the BC contribution the caller deliberately placed +in `U_full`). +""" +function assemble_matrix_free_action_full!( + assembler, func_action::F, U_full, v_full, p +) where F <: Function + assemble_matrix_free_action_full!( + assembler.stiffness_action_storage, + assembler.vector_pattern, assembler.dof, + func_action, U_full, v_full, p + ) + return nothing +end + +""" +$(TYPEDSIGNATURES) +""" +function assemble_matrix_free_action_full!( + storage, pattern, dof, func_action, U_full, v_full, p +) + fill!(storage, zero(eltype(storage))) + fspace = function_space(dof) + X = coordinates(p) + t = current_time(p) + Δt = time_step(p) + U = p.field + U_old = p.field_old + V = p.hvp_scratch_field + _update_for_assembly_full!(p, U_full, v_full) + conns = fspace.elem_conns + foreach_block(fspace, p) do physics, props, ref_fe, b + _assemble_block_matrix_free_action!( + storage, + conns.data, conns.offsets[b], + func_action, + physics, ref_fe, + X, t, Δt, + U, U_old, V, + block_view(p.state_old, b), block_view(p.state_new, b), props + ) + end + # The free-DOF entry point above relies on `p.hvp_scratch_field`'s BC + # slots being zero (it never writes them, and the kernel reads them as + # part of v_el). Restore that invariant after we deliberately put + # v_BC into the scratch — otherwise a subsequent Krylov J·v call that + # comes through the free-DOF entry point silently gets contaminated + # by leftover v_BC values. + cache = p.dirichlet_bcs.bc_cache + data = p.hvp_scratch_field.data + Z = zero(eltype(data)) + fec_foreach(cache.dofs) do I + data[cache.dofs[I]] = Z + end +end + """ $(TYPEDSIGNATURES) """ diff --git a/test/TestAssemblers.jl b/test/TestAssemblers.jl index a74edc29..90c7e0d2 100644 --- a/test/TestAssemblers.jl +++ b/test/TestAssemblers.jl @@ -336,6 +336,97 @@ end end end +@testitem "Assemblers - test_matrix_free_action_full_mechanics" setup=[AssemblerHelperMechanics] begin + # The contract on assemble_matrix_free_action_full!: + # (a) Equivalence to the assembled gold reference: on free rows the + # result equals (K_full · v_full)[free] for any v_full, including + # nonzero BC slots — this is the K_{f,BC}·v_BC cross-term that + # the free-DOF entry point silently drops. (Tested with stiffness + # rather than mass only because the test Mechanics physics defines + # stiffness_action but not mass_action; the structural argument is + # identical for either operator.) + # (b) Reduction: when v_full has its BC slots zeroed, the full-DOF + # entry point reproduces the free-DOF entry point bit-for-bit. + # + # Reference path uses a sibling SparseMatrixAssembler constructed with + # NO Dirichlet BCs. In FEC, even the condensed sparsity pattern omits + # entries that touch a Dirichlet DOF, so an assembled K from a + # BC-equipped assembler has K_{f,BC} columns zeroed and matches the + # buggy free-DOF action. Removing the BCs forces the full element + # scatter into the sparse matrix, giving the true K_full whose matvec + # against v_full carries the cross-term we want to validate. This is + # the same trick used by test_lumped_mass_mechanics. + include("TestUtils.jl") + backends = _get_backends() + for dev in backends + # Reference assembler: no BCs → all DOFs are unknowns → full K + asm_ref = SparseMatrixAssembler(u) + p_ref = create_parameters(mesh, asm_ref, physics, props; + dirichlet_bcs = DirichletBC[], times = times) + FiniteElementContainers.initialize!(p_ref) + Uu_ref = create_unknowns(asm_ref) # length = n_dofs + + # Path under test: BC-equipped non-condensed assembler. + asm = SparseMatrixAssembler(u) + p = create_parameters(mesh, asm, physics, props; dirichlet_bcs = dbcs, times = times) + FiniteElementContainers.initialize!(p) + Uu = create_unknowns(asm) # length = n_unknown + + if dev != cpu + p_ref = p_ref |> dev + asm_ref = asm_ref |> dev + Uu_ref = Uu_ref |> dev + p = p |> dev + asm = asm |> dev + Uu = Uu |> dev + end + + # Deterministic v_full with NON-ZERO entries everywhere (in + # particular at BC slots). + n_dofs = length(p.field.data) + v_full_h = collect(eltype(p.field.data), (1:n_dofs) ./ n_dofs) + v_full = v_full_h |> dev + + # Reference: no-BC assembly of K, then plain matvec against v_full. + assemble_stiffness!(asm_ref, stiffness, Uu_ref, p_ref) + K_full = stiffness(asm_ref) + Kv_full_ref_h = Array(K_full * v_full) + free_dofs = Array(asm.dof.unknown_dofs) + Kv_free_ref = Kv_full_ref_h[free_dofs] + + # U_full is the current merged field state (BC slots populated by + # initialize!). Callers compute it as part of their integrator state. + U_full = copy(p.field.data) + + # (a) Cross-term-aware action matches the assembled reference. + assemble_matrix_free_action_full!(asm, stiffness_action, U_full, v_full, p) + Kv_free_mf = Array(hvp(asm, Uu)) + # rtol = 1e-6 accommodates floating-point reordering between sparse + # SpMV (reference) and per-element scatter (matrix-free). + @test isapprox(Kv_free_mf, Kv_free_ref; rtol = 1e-6) + + # (b) Zeroing v_full's BC slots collapses the full-DOF path onto + # the free-DOF path. + bc_dofs = Array(p.dirichlet_bcs.bc_cache.dofs) + v_full_zeroBC_h = copy(v_full_h) + v_full_zeroBC_h[bc_dofs] .= 0 + v_full_zeroBC = v_full_zeroBC_h |> dev + Vu = v_full_h[free_dofs] |> dev # same free-DOF entries as v_full + + assemble_matrix_free_action!(asm, stiffness_action, Uu, Vu, p) + Kv_free_legacy = copy(Array(hvp(asm, Uu))) + + assemble_matrix_free_action_full!(asm, stiffness_action, U_full, v_full_zeroBC, p) + Kv_free_full_zeroBC = Array(hvp(asm, Uu)) + @test Kv_free_full_zeroBC ≈ Kv_free_legacy + + # The difference between (a) and (b) is K_{f,BC}·v_BC: the + # cross-term the free-DOF path drops. Sanity-check it is nonzero + # (this mesh has a Dirichlet boundary, so K_{f,BC} is supported). + @test !isapprox(Kv_free_mf, Kv_free_full_zeroBC; atol=0) + end +end + @testitem "Assemblers - test_lumped_mass_mechanics" setup=[AssemblerHelperMechanics] begin # The contract on assemble_lumped_mass!: # (a) On a fully-free DOF space, it equals the row sums of the