From 2c7cd099a3fa0811c2b8a40ce36546c2d6ba9ce6 Mon Sep 17 00:00:00 2001 From: lmoresi Date: Fri, 5 Jun 2026 16:23:01 +0100 Subject: [PATCH 1/3] Add SNES_Stokes_Constrained: non-penalty free-slip + recoverable topography Enforce u.n = g on curved boundaries with a recoverable Lagrange multiplier instead of a tuned penalty. An augmented-Lagrangian (ALG2) outer loop carries both the existing penalty BC and the multiplier; the multiplier removes the penalty's accuracy bias, so a moderate, well-conditioned augmentation gives an exact constraint in 2-3 Stokes solves. The converged multiplier is the normal traction holding the boundary -- a direct dynamic-topography estimate (h = lambda / (drho g)), recoverable as a clean MeshVariable (interior exactly zero; boundary trace correlates with -n.sigma.n at 1.0000). Purely additive: the validated 2x2 saddle-point assembly and fieldsplit config are untouched. New class behind uw.systems.Stokes_Constrained. Hands-off solve(): relative constraint tolerance (RMS(u.n-g) < rtol*RMS|u|) and viscosity-weighted augmentation (1e3*mu(x)) by default -- no user tuning. On the SolCx benchmark this gives ~2e-4 accuracy in 3 outer iterations across viscosity contrast 1 -> 1e6. Validation: tests/test_1061_constrained_freeslip_annulus.py (5 tests). Design note and production roadmap in docs/developer/design/. Underworld development team with AI support from Claude Code --- .../design/CONSTRAINED_FREESLIP_MULTIPLIER.md | 204 +++++++++++ src/underworld3/systems/__init__.py | 1 + src/underworld3/systems/solvers.py | 342 ++++++++++++++++++ .../test_1061_constrained_freeslip_annulus.py | 141 ++++++++ 4 files changed, 688 insertions(+) create mode 100644 docs/developer/design/CONSTRAINED_FREESLIP_MULTIPLIER.md create mode 100644 tests/test_1061_constrained_freeslip_annulus.py diff --git a/docs/developer/design/CONSTRAINED_FREESLIP_MULTIPLIER.md b/docs/developer/design/CONSTRAINED_FREESLIP_MULTIPLIER.md new file mode 100644 index 00000000..cff96928 --- /dev/null +++ b/docs/developer/design/CONSTRAINED_FREESLIP_MULTIPLIER.md @@ -0,0 +1,204 @@ +# Constrained free-slip via a recoverable Lagrange multiplier (dynamic topography) + +**Status**: proof-of-concept (Phase 0 + Phase 1), serial. Branch +`feature/constrained-freeslip-topography`. + +## Motivation + +Free-slip / no-normal-flow on curved (annulus, spherical) boundaries is +currently enforced with **penalty-like** methods — a penalty natural BC +(`add_natural_bc(penalty · Γ·v · Γ, ...)`) or Nitsche. These are fragile: the +penalty magnitude must be tuned against the Rayleigh number and viscosity. Too +weak and a coherent radial throughflow appears (an under-scaled `1e4` natural BC +is ~100× too weak at Ra=1e6); too strong and the system ill-conditions and the +Stokes solve diverges in line search. + +This feature enforces `u·n = g` on a curved boundary with a **true Lagrange +multiplier** `λ` instead of a penalty. Because the converged multiplier *is* the +normal traction holding the boundary, it is simultaneously a direct estimate of +**dynamic surface topography**, `h = λ / (Δρ g)`. The equilibrium `λ` is also the +target end-state toward which a free surface can be integrated over a time +interval (connecting to the ETD free-surface work on +`feature/exp-integrator-freesurface`). + +## Formulation + +Stokes with a surface constraint `u·n = g` on Γ, multiplier `λ`: + +``` +[ A Bᵀ Cᵀ ] [u] [f] +[ B 0 0 ] [p] = [0] A = viscous, B = div, C = ∫_Γ (n·v) ψ +[ C 0 0 ] [λ] [g] (C couples only the boundary trace of u) +``` + +`C` is **co-dimension-1**: it touches only velocity DOFs on Γ. A monolithic +third FE field would therefore either waste interior DOFs or need a boundary +trace space PETSc/DMPlex does not provide on the same DM. We instead solve the +boundary Schur complement `S_λ = C K⁻¹ Cᵀ` by an **outer loop**, leaving the +validated 2×2 Stokes assembly untouched. + +### Augmented Lagrangian (ALG2) — the production algorithm + +Each Stokes solve carries a natural-BC traction with **both** the multiplier and +a penalty augmentation, reusing the existing penalty machinery: + +$$\mathbf{t} = \bigl[\lambda + r\,(\mathbf{u}\cdot\mathbf{n} - g)\bigr]\,\mathbf{n} +\quad\text{on } \Gamma,$$ + +and the multiplier is updated with the same `r`: + +$$\lambda \leftarrow \lambda + r\,(\mathbf{u}\cdot\mathbf{n} - g).$$ + +The penalty term `r(u·n)n` preconditions *every* boundary mode uniformly, so the +outer loop converges in a handful of iterations; the multiplier removes the +penalty's accuracy bias, so a **moderate, well-conditioned** `r` gives both fast +convergence *and* an exact constraint — unlike a pure penalty, which must be made +large and fragile. + +## Phase-0 spike findings (what shaped the design) + +Spikes (`/tmp/s3_*.py`) on a 2D annulus (no-slip inner boundary to remove the +rigid-rotation null space, multiplier free-slip on the outer boundary): + +- **Plain Uzawa works but is slow.** A damped-Richardson update + `λ ← λ + ρ(u·n)` converges and matches the penalty solution, but a single + scalar `ρ` cannot kill both the fast and slow boundary-Schur modes — the + residual contracts the dominant mode in ~5 iterations then crawls. +- **`ρ ∝ μ`, NOT `ρ ∝ μ/h`.** The optimal Richardson step is `ρ ≈ C·μ` with `C` + a geometry constant, **independent of mesh resolution** (`ρ=8μ` converged in 5 + iterations at cellSize 0.1/0.05/0.025). The naive `μ/h` scaling over-steps on + refinement and stalls. +- **CG is the wrong accelerator.** CG on `S_λ` diverged: each matvec is an + *inexact* iterative Stokes solve (plus pressure-null-space noise), and the + nodal Euclidean inner product is not the one in which `S_λ` is SPD. Krylov + acceleration needs an exact symmetric operator; this is neither. +- **Augmented Lagrangian is the right accelerator** (per L. Moresi). It converges + in **2 iterations** where plain Uzawa took 21, reusing the existing penalty BC. + This is the implemented algorithm. + +## Implementation + +`SNES_Stokes_Constrained(SNES_Stokes)` in `src/underworld3/systems/solvers.py`, +exported as `uw.systems.Stokes_Constrained`. **Purely additive** — the validated +2×2 saddle-point assembly and fieldsplit configuration are untouched, honouring +"solver stability is paramount". + +```python +stokes = uw.systems.Stokes_Constrained(mesh, velocityField=v, pressureField=p) +stokes.constitutive_model = uw.constitutive_models.ViscousFlowModel +stokes.constitutive_model.Parameters.shear_viscosity_0 = mu +stokes.bodyforce = buoyancy * unit_r +stokes.add_dirichlet_bc((0.0, 0.0), "Lower") # no-slip inner + +lam = stokes.add_constraint_bc("Upper", g=0.0) # free-slip outer (Gamma_P1) +stokes.solve() # no constraint tuning needed + +topo = stokes.topography("Upper", buoyancy_scale=delta_rho_g) # h = lambda/(drho g) +``` + +The outer loop is hidden behind ``solve()``: the tolerance is **relative** to the +velocity scale (``RMS(u.n-g) < rtol·RMS|u|``, default ``rtol=1e-3``) so it is +problem-independent, and the augmentation defaults to ``1e3·μ(x)`` (local- +viscosity-weighted). On SolCx this gives ~2e-4 accuracy in 3 outer iterations at +viscosity contrast 1 → 10⁶ with no user tuning. + +Key design points: + +- **Multiplier representation.** `λ` is an ordinary full-mesh scalar field at the + *velocity degree* (P2). Only its trace on Γ enters the weak form. Matching the + velocity degree means the multiplier reaches every velocity normal-trace DOF + (including P2 mid-edge), so there is no penalty floor on the constraint — a P1 + multiplier plateaus at ~`‖t‖/r` on the mid-edge component. +- **Clean topography field.** The dual update is restricted to boundary nodes via + a P1 boundary marker (`petsc_dm_find_labeled_points_local`) sampled at the + multiplier's nodes — boundary mid-edge nodes interpolate to 1, interior to + < 0.5. Interior `λ` therefore stays **exactly zero**, so `λ` is a directly + usable topography field (not interior garbage). +- **Coupling registered once.** `add_natural_bc([λ + r(u·n − g)]·n, boundary)` + is set up a single time; only `λ`'s boundary data changes between solves, so + nothing recompiles. +- **`add_constraint_bc(boundary, g=0, normal=None, augmentation=None)`** — + `normal` defaults to the smooth projected normals `mesh.Gamma_P1`; + `augmentation` defaults to a viscosity-scaled `r = 10³·μ`. + +## Validation + +`tests/test_1061_constrained_freeslip_annulus.py` (level_2 / tier_b), buoyancy- +driven annulus, no-slip inner + multiplier free-slip outer, vs a `1e6` penalty +reference. All pass (~14 s): + +| Check | Result | +|---|---| +| `RMS(u·n)` on outer boundary (no penalty coefficient) | 6.5e-5 | +| `relL2(v_multiplier vs v_penalty)` | 3.1e-3 | +| Outer iterations (augmented Lagrangian) | 2 | +| Interior `λ` (clean field) | exactly 0 | +| boundary `corr(λ, −n·σ·n)` (dynamic-topography stress) | 0.9999 | + +The consistent-boundary-flux identity `λ = −n·σ·n|_Γ` is the independent +cross-check: the multiplier's boundary trace equals the recovered normal Cauchy +stress (negative sign = the reaction traction holding the boundary), confirming +`λ` is the dynamic topography signal. + +## The augmentation parameter `r`: true-work trade-off + +`r` is a *speed* knob, not an *accuracy* knob — this is the key advantage over a +pure penalty. Sweep on the annulus (constraint tol 1e-4, wall time for one cold +solve; `tot_lin` = total outer Schur-KSP linear iterations across the loop): + +cellSize 0.1 (~2940 dof): + +| `r` | outer its | tot_lin | wall (s) | relL2 vs penalty | +|---:|---:|---:|---:|---:| +| 10 | 26 | 26 | 3.77 | 3.1e-3 | +| 100 | 4 | 4 | 0.61 | 3.2e-3 | +| 300 | 3 | 3 | 0.47 | 3.1e-3 | +| 1,000 | 3 | 3 | 0.53 | 3.1e-3 | +| 3,000 | 2 | 2 | **0.38** | 3.1e-3 | +| 10,000 | 2 | 2 | 0.51 | 2.9e-3 | +| 100,000 | 2 | 5 | 1.83 | 1.6e-3 | + +cellSize 0.05 (~10852 dof) shows the same shape (min wall ≈ 1.25 s at r=1e3; +6.98 s at r=10; 7.07 s at r=1e5). + +- **Outer iterations fall with `r`** (`26 → 4 → 3 → 2`) — bigger penalty, faster + dual convergence (`contraction ≈ ‖S_λ‖/(r+‖S_λ‖)`). +- **But the inner solve stiffens at large `r`.** Linear iterations per outer + solve stay at 1.0 up to `r=10⁴`, then rise (2.5 at `r=10⁵`), and wall time + balloons (the velocity sub-block conditioning degrades — visible in wall time + even before the outer KSP count moves). +- **True work is U-shaped**: both extremes are 3–8× slower than the optimum. The + efficient basin is `r ∈ [300, 10⁴]` (>1.5 decades) at both resolutions; the + default `r = 10³·μ` sits inside it. +- **Accuracy is `r`-independent** (relL2 ≈ 3.1e-3, flat across four decades of + `r`). So `r` is tuned for *speed* with a benign failure mode — too small just + costs iterations, too large just costs inner work; **the answer is never + wrong**. Contrast a pure penalty, where the magnitude must be tuned against + forcing strength and viscosity to get *accuracy* (too small ⇒ wrong), which is + the fragility this method removes. + +## Option trade-offs and what is deferred + +| Option | Verdict | +|---|---| +| (A) full-domain 3rd FE field + ε-screening | 3-way nested fieldsplit; ε re-introduces tuning. Rejected as primary. | +| (B1) boundary-stratum-only PetscFE field | No DMPlex support on the same DM. | +| (B2) co-dim-1 submesh + MATNEST | The honest monolithic form; deferred. | +| (C) reuse pressure / `_constraints` | `p` enforces `∇·u=0` interior, not `u·n=0` on Γ — not redundant. The CBF identity is a *validation* tool, not an implementation. | +| **(D) augmented-Lagrangian outer loop** | **Implemented.** Non-invasive, 2 iterations, exact, recoverable topography. | + +Deferred to follow-up PRs (Phase-0 spike S2 gathers the fieldsplit-feasibility +evidence): the monolithic 3-field / co-dim-1 representation; 3D spherical shells; +**parallel** (the boundary mask and the nodal update are serial); the +**both-boundaries-free-slip** annulus case (admits a rigid-rotation velocity null +space needing explicit removal); and live free-surface equilibrium integration +(pass `λ` as the target normal-stress end-state). + +## Files + +- `src/underworld3/systems/solvers.py` — `SNES_Stokes_Constrained`, `_ConstraintBC`. +- `src/underworld3/systems/__init__.py` — `Stokes_Constrained` export. +- `tests/test_1061_constrained_freeslip_annulus.py` — validation. + +Pre-existing (unrelated) failures noted: `tests/test_1060_nitsche_freeslip.py` +has two failing assertions on `development` independent of this work. diff --git a/src/underworld3/systems/__init__.py b/src/underworld3/systems/__init__.py index 0c66d57c..ddf22c92 100644 --- a/src/underworld3/systems/__init__.py +++ b/src/underworld3/systems/__init__.py @@ -48,6 +48,7 @@ from .solvers import SNES_Poisson as Poisson from .solvers import SNES_Darcy as SteadyStateDarcy from .solvers import SNES_Stokes as Stokes +from .solvers import SNES_Stokes_Constrained as Stokes_Constrained from .solvers import SNES_VE_Stokes as VE_Stokes from .solvers import SNES_Projection as Projection from .solvers import SNES_Vector_Projection as Vector_Projection diff --git a/src/underworld3/systems/solvers.py b/src/underworld3/systems/solvers.py index 4e2cf217..d0f9fe6c 100644 --- a/src/underworld3/systems/solvers.py +++ b/src/underworld3/systems/solvers.py @@ -1936,6 +1936,348 @@ def delta_t(self): return self.constitutive_model.Parameters.dt_elastic +class _ConstraintBC: + """Bookkeeping for one multiplier-enforced boundary constraint. + + Holds the multiplier field ``lam``, the prescribed normal velocity ``g``, + the (symbolic, row-vector) constraint normal, and the augmented-Lagrangian + parameter ``r`` (which is simultaneously the forward-problem penalty weight + and the multiplier-update step). + """ + + __slots__ = ("boundary", "g", "normal", "lam", "augmentation", "mask", "r_nodal") + + def __init__(self, boundary, g, normal, lam, augmentation, mask): + self.boundary = boundary + self.g = g + self.normal = normal + self.lam = lam + # augmentation r may be a scalar or a spatial sympy expression (e.g. + # viscosity-weighted). r_nodal holds it sampled at the multiplier nodes + # for the dual update; it is (re)computed at solve time. + self.augmentation = augmentation + self.mask = mask + self.r_nodal = None + + +class SNES_Stokes_Constrained(SNES_Stokes): + r""" + Stokes solver with boundary constraints enforced by a recoverable Lagrange + multiplier instead of a penalty. + + For each constraint boundary :math:`\Gamma` the no-normal-flow (free-slip) + or prescribed-normal-velocity condition + + .. math:: + + \mathbf{u} \cdot \mathbf{n} = g \quad \text{on } \Gamma + + is enforced by introducing a scalar multiplier field :math:`\lambda` and an + **augmented-Lagrangian** (Uzawa / ALG2) outer loop. Each Stokes solve carries + a natural-BC traction with both the multiplier and a penalty augmentation, + + .. math:: + + \mathbf{t} = \bigl[\lambda + r\,(\mathbf{u}\cdot\mathbf{n} - g)\bigr]\,\mathbf{n} + \quad \text{on } \Gamma , + + and the multiplier is updated with the same augmentation parameter, + + .. math:: + + \lambda \leftarrow \lambda + r\,(\mathbf{u}\cdot\mathbf{n} - g) . + + The penalty term :math:`r\,(\mathbf{u}\cdot\mathbf{n})\,\mathbf{n}` is exactly + the existing penalty free-slip BC: it preconditions every boundary mode + uniformly so the outer loop converges in a handful of iterations, while the + multiplier removes the penalty's accuracy bias — so a **moderate**, + well-conditioned :math:`r` gives both fast convergence and an *exact* + constraint (unlike a pure penalty, which must be made large and fragile). + + At convergence :math:`\lambda` is the normal traction holding the boundary, + giving a direct estimate of dynamic surface topography, + :math:`h = \lambda / (\Delta\rho\, g)`. Access it via :meth:`multiplier`. + + Notes + ----- + The multiplier is represented as an ordinary full-mesh scalar field of the + same degree as the velocity. Only its trace on :math:`\Gamma` enters the + weak form (interior values are inert), so no boundary trace space is + required. This is a proof-of-concept (serial; one full Stokes solve per + outer iteration). See the design note + ``docs/developer/design/CONSTRAINED_FREESLIP_MULTIPLIER.md``. + + See Also + -------- + SNES_Stokes : The unconstrained saddle-point solver this extends. + """ + + def __init__( + self, + mesh: uw.discretisation.Mesh, + velocityField: Optional[uw.discretisation.MeshVariable] = None, + pressureField: Optional[uw.discretisation.MeshVariable] = None, + degree: Optional[int] = 2, + p_continuous: Optional[bool] = True, + verbose: Optional[bool] = False, + DuDt: Union[SemiLagrangian_DDt, Lagrangian_DDt] = None, + DFDt: Union[SemiLagrangian_DDt, Lagrangian_DDt] = None, + ): + super().__init__( + mesh, + velocityField, + pressureField, + degree, + p_continuous, + verbose, + DuDt=DuDt, + DFDt=DFDt, + ) + + self._constraint_bcs = [] + # Diagnostics from the most recent constrained solve. + self.constraint_iterations = 0 + self.constraint_residual = None + self.constraint_total_linear_its = 0 + return + + def _viscosity_scale(self): + """A representative scalar viscosity, for sizing the initial Uzawa step.""" + try: + mu = self.constitutive_model.Parameters.shear_viscosity_0 + return float(mu) + except (TypeError, ValueError, AttributeError): + return 1.0 + + def add_constraint_bc(self, boundary, g=0.0, normal=None, augmentation=None, + augmentation_base=1.0e3): + r"""Register a multiplier-enforced normal-velocity constraint on ``boundary``. + + Parameters + ---------- + boundary : str + Mesh boundary label (e.g. ``"Upper"``). + g : float or sympy expression, default 0.0 + Prescribed normal velocity :math:`\mathbf{u}\cdot\mathbf{n} = g`. + The default ``0`` is free-slip / no-normal-flow. + normal : sympy matrix, optional + Row-vector constraint normal. Defaults to the mesh's smooth + projected boundary normals ``mesh.Gamma_P1``. + augmentation : float or sympy expression, optional + Augmented-Lagrangian parameter :math:`r` — simultaneously the + forward-problem penalty weight and the multiplier-update step. Any + :math:`r>0` converges; larger is faster but stiffens the linear + solve. **Defaults to ``augmentation_base · μ(x)``** — weighted by the + local viscosity so the dimensionless penalty ratio is uniform across + viscosity contrasts (essential for variable-viscosity problems; a + flat ``r`` under-constrains high-viscosity regions). May be passed as + a spatial sympy expression directly. + augmentation_base : float, default 1e3 + The base multiple used when ``augmentation`` is not given. + + Returns + ------- + lam : MeshVariable + The scalar multiplier field. After :meth:`solve`, its boundary + trace is the normal traction (topography proxy). + """ + if normal is None: + normal = self.mesh.Gamma_P1 + + normal = sympy.Matrix(normal) + if normal.shape[0] != 1: + normal = normal.reshape(1, self.mesh.dim) + + if augmentation is None: + # Viscosity-weighted augmentation r = augmentation_base * mu(x): + # keeps the penalty/viscous ratio uniform so high-viscosity boundary + # regions are constrained as well as low-viscosity ones. + try: + viscosity = self.constitutive_model.Parameters.shear_viscosity_0 + augmentation = augmentation_base * viscosity + except (AttributeError, TypeError): + augmentation = augmentation_base * self._viscosity_scale() + + idx = len(self._constraint_bcs) + # Multiplier at the velocity degree so its trace reaches every velocity + # normal-trace DOF (no penalty floor on the P2 mid-edge component). + lam = uw.discretisation.MeshVariable( + f"lambda_{self.instance_number}_{idx}", + self.mesh, + 1, + degree=self._degree, + ) + lam.data[:] = 0.0 + + # Boundary-node mask: restrict the multiplier update to the constraint + # boundary so interior values stay exactly zero and lambda is a clean, + # directly usable topography field. Build a P1 marker (1 on the boundary + # vertices, 0 elsewhere) and sample it at lambda's nodes: boundary + # mid-edge nodes interpolate to 1, interior nodes to < 0.5. + from underworld3.discretisation.discretisation_mesh import ( + petsc_dm_find_labeled_points_local, + ) + + marker = uw.discretisation.MeshVariable( + f"_bmarker_{self.instance_number}_{idx}", self.mesh, 1, degree=1, + ) + marker.data[:] = 0.0 + point_indices = petsc_dm_find_labeled_points_local( + self.mesh.dm, + "UW_Boundaries", + getattr(self.mesh.boundaries, boundary).value, + sectionIndex=False, + ) + if point_indices is not None: + marker.data[point_indices] = 1.0 + mask = ( + np.array(uw.function.evaluate(marker.sym, lam.coords)).reshape(-1) > 0.75 + ) + + # Augmented-Lagrangian natural BC, registered once: + # t = [ lambda + r (u.n - g) ] n + # The r(u.n)n part is the penalty BC (re-derived into the Jacobian each + # solve); the lambda part is the applied multiplier (fixed per solve). + nv = self.u.sym.dot(normal) + traction = (lam.sym[0] + augmentation * (nv - g)) * normal + self.add_natural_bc(traction, boundary) + + self._constraint_bcs.append( + _ConstraintBC(boundary, g, normal, lam, augmentation=augmentation, mask=mask) + ) + return lam + + def multiplier(self, boundary): + """Return the multiplier field for ``boundary`` (None if not constrained). + + After :meth:`solve`, the multiplier's boundary trace is the normal + traction holding the constraint; interior values are zero. Divide by + :math:`\\Delta\\rho\\,g` to obtain dynamic topography (see + :meth:`topography`). + """ + for cbc in self._constraint_bcs: + if cbc.boundary == boundary: + return cbc.lam + return None + + def topography(self, boundary, buoyancy_scale=1.0): + r"""Dynamic topography expression on ``boundary``. + + Returns the symbolic field :math:`\lambda / (\Delta\rho\, g)` for the + constraint multiplier on ``boundary`` (zero away from the boundary). + + Parameters + ---------- + boundary : str + A constrained boundary label. + buoyancy_scale : float or sympy expression, default 1.0 + The buoyancy scale :math:`\Delta\rho\, g` relating normal traction + to surface height. + """ + lam = self.multiplier(boundary) + if lam is None: + raise ValueError(f"No constraint registered on boundary '{boundary}'.") + return lam.sym[0] / buoyancy_scale + + def _constraint_rms(self, cbc): + """RMS of (u.n - g) over the constraint boundary, via boundary integral.""" + vn = self.u.sym.dot(cbc.normal) + num = float( + uw.maths.BdIntegral(self.mesh, fn=(vn - cbc.g) ** 2, + boundary=cbc.boundary).evaluate() + ) + length = float( + uw.maths.BdIntegral(self.mesh, fn=1.0, boundary=cbc.boundary).evaluate() + ) + return np.sqrt(num / length) if length > 0 else np.sqrt(num) + + def _nodal_constraint_residual(self, cbc): + """(u.n - g) evaluated at the multiplier's nodes.""" + expr = self.u.sym.dot(cbc.normal) - cbc.g + return np.array( + uw.function.evaluate(sympy.Matrix([[expr]]), cbc.lam.coords) + ).reshape(-1) + + def solve( + self, + zero_init_guess: bool = True, + *, + constraint_rtol: float = 1.0e-3, + constraint_atol: float = 1.0e-12, + constraint_max_iterations: int = 40, + constraint_verbose: bool = False, + **kwargs, + ): + """Solve the constrained Stokes system. + + With no constraint BCs registered this is an ordinary Stokes solve. + Otherwise it runs the augmented-Lagrangian outer loop until every + constraint boundary satisfies + ``RMS(u.n - g) < constraint_rtol · RMS|u| + constraint_atol`` (or the + iteration cap is hit). The tolerance is *relative* to the velocity scale + so it is problem-independent and needs no tuning. All other keyword + arguments are forwarded to the inner Stokes ``solve``. + """ + if not self._constraint_bcs: + return super().solve(zero_init_guess=zero_init_guess, **kwargs) + + # Sample the augmentation r at the multiplier nodes once (it may be a + # spatial / viscosity-weighted expression). Used for the dual update. + for cbc in self._constraint_bcs: + if isinstance(cbc.augmentation, (int, float)): + cbc.r_nodal = float(cbc.augmentation) * np.ones(cbc.lam.coords.shape[0]) + else: + cbc.r_nodal = np.array( + uw.function.evaluate( + sympy.Matrix([[cbc.augmentation]]), cbc.lam.coords + ) + ).reshape(-1) + + total_linear_its = 0 + for k in range(constraint_max_iterations): + super().solve(zero_init_guess=(zero_init_guess and k == 0), **kwargs) + try: + total_linear_its += int(self.snes.getLinearSolveIterations()) + except Exception: + pass + + # Velocity scale for the relative tolerance (RMS speed over nodes). + v_scale = float(np.sqrt(np.mean(np.sum(self.u.data**2, axis=1)))) + threshold = constraint_rtol * v_scale + constraint_atol + + all_converged = True + worst = 0.0 + for cbc in self._constraint_bcs: + rms = self._constraint_rms(cbc) + if rms >= threshold: + all_converged = False + worst = max(worst, rms) + if constraint_verbose: + uw.mpi.pprint( + f" [constraint {cbc.boundary}] iter {k}: " + f"RMS(u.n-g) = {rms:.3e} (rel {rms / max(v_scale, 1e-30):.2e}, " + f"mean r = {cbc.r_nodal.mean():.3g})" + ) + + self.constraint_iterations = k + 1 + self.constraint_residual = worst + self.constraint_total_linear_its = total_linear_its + + if all_converged: + if constraint_verbose: + uw.mpi.pprint(f"Constraint loop converged in {k + 1} iterations.") + break + + # Augmented-Lagrangian (ALG2) multiplier update, same r as the + # forward-problem penalty augmentation. Monotone-convergent for r>0. + # Restricted to boundary nodes so lambda stays a clean topography field. + for cbc in self._constraint_bcs: + resid = self._nodal_constraint_residual(cbc) + cbc.lam.data[cbc.mask, 0] += cbc.r_nodal[cbc.mask] * resid[cbc.mask] + + return + + class SNES_Projection(SNES_Scalar): r""" Scalar projection solver for mapping functions to mesh variables. diff --git a/tests/test_1061_constrained_freeslip_annulus.py b/tests/test_1061_constrained_freeslip_annulus.py new file mode 100644 index 00000000..4fba5316 --- /dev/null +++ b/tests/test_1061_constrained_freeslip_annulus.py @@ -0,0 +1,141 @@ +"""Constrained free-slip on an annulus via a recoverable Lagrange multiplier. + +Buoyancy-driven flow in an annulus with no-slip inner boundary and free-slip +outer boundary. The free-slip outer condition (u.n = 0) is enforced three ways +and compared: + + - penalty : the existing fragile penalty natural BC (reference) + - multiplier: SNES_Stokes_Constrained, augmented-Lagrangian multiplier + +The multiplier solver must (a) drive u.n -> 0 with NO penalty coefficient, +(b) match the penalty velocity field, and (c) yield a clean, recoverable +topography field whose boundary trace equals the consistent-boundary-flux +normal stress (-n.sigma.n), i.e. dynamic topography. + +Run with: pixi run python -m pytest tests/test_1061_constrained_freeslip_annulus.py -v +""" + +import pytest +import numpy as np +import sympy +import underworld3 as uw + +pytestmark = [pytest.mark.level_2, pytest.mark.tier_b] + +R_INNER, R_OUTER = 0.5, 1.0 +CELL = 0.1 +MU = 1.0 +RA = 1.0e2 + + +def _mesh_and_forcing(): + mesh = uw.meshing.Annulus(radiusInner=R_INNER, radiusOuter=R_OUTER, + cellSize=CELL, qdegree=3) + x, y = mesh.X + r = sympy.sqrt(x**2 + y**2) + unit_r = sympy.Matrix([[x / r, y / r]]) + theta = sympy.atan2(y, x) + buoy = RA * sympy.cos(3 * theta) * (r - R_INNER) / (R_OUTER - R_INNER) + return mesh, unit_r, buoy, theta + + +def _outer_rms_vn(solver, unit_r): + vn = solver.u.sym.dot(unit_r) + num = float(uw.maths.BdIntegral(solver.mesh, fn=vn**2, boundary="Upper").evaluate()) + length = float(uw.maths.BdIntegral(solver.mesh, fn=1.0, boundary="Upper").evaluate()) + return np.sqrt(num / length) + + +@pytest.fixture(scope="module") +def solutions(): + mesh, unit_r, buoy, theta = _mesh_and_forcing() + + # --- penalty reference --- + vp = uw.discretisation.MeshVariable("Up", mesh, mesh.dim, degree=2, vtype=uw.VarType.VECTOR) + pp = uw.discretisation.MeshVariable("Pp", mesh, 1, degree=1) + ref = uw.systems.Stokes(mesh, velocityField=vp, pressureField=pp) + ref.constitutive_model = uw.constitutive_models.ViscousFlowModel + ref.constitutive_model.Parameters.shear_viscosity_0 = MU + ref.saddle_preconditioner = 1.0 / MU + ref.bodyforce = buoy * unit_r + ref.add_dirichlet_bc((0.0, 0.0), "Lower") + ref.add_natural_bc(1e6 * MU * unit_r.dot(vp.sym) * unit_r, "Upper") + ref.tolerance = 1e-8 + ref.petsc_options["ksp_type"] = "fgmres" + ref.solve() + + # --- multiplier (augmented Lagrangian) --- + vc = uw.discretisation.MeshVariable("Uc", mesh, mesh.dim, degree=2, vtype=uw.VarType.VECTOR) + pc = uw.discretisation.MeshVariable("Pc", mesh, 1, degree=1) + con = uw.systems.Stokes_Constrained(mesh, velocityField=vc, pressureField=pc) + con.constitutive_model = uw.constitutive_models.ViscousFlowModel + con.constitutive_model.Parameters.shear_viscosity_0 = MU + con.saddle_preconditioner = 1.0 / MU + con.bodyforce = buoy * unit_r + con.add_dirichlet_bc((0.0, 0.0), "Lower") + lam = con.add_constraint_bc("Upper", g=0.0, normal=unit_r) + con.tolerance = 1e-8 + con.petsc_options["ksp_type"] = "fgmres" + con.solve() + + return { + "mesh": mesh, "unit_r": unit_r, "theta": theta, + "ref": ref, "con": con, "lam": lam, + "v_ref": vp.data.copy(), "v_con": vc.data.copy(), + } + + +def test_multiplier_enforces_free_slip(solutions): + """u.n -> 0 on the curved boundary with NO penalty coefficient.""" + rms = _outer_rms_vn(solutions["con"], solutions["unit_r"]) + print(f"multiplier RMS(u.n) on outer = {rms:.3e}") + assert rms < 2.0e-4 + + +def test_multiplier_matches_penalty(solutions): + """Constrained velocity matches the penalty reference field.""" + v_ref, v_con = solutions["v_ref"], solutions["v_con"] + rel = np.sqrt(np.sum((v_ref - v_con) ** 2)) / np.sqrt(np.sum(v_ref**2)) + print(f"relL2(v_multiplier vs v_penalty) = {rel:.3e}") + assert rel < 0.01 + + +def test_multiplier_api(solutions): + """The multiplier and topography are retrievable through the public API.""" + con, lam = solutions["con"], solutions["lam"] + assert con.multiplier("Upper") is lam + assert con.multiplier("Nonexistent") is None + # topography is lambda / (Delta_rho g) + topo_expr = con.topography("Upper", buoyancy_scale=2.0) + assert topo_expr == lam.sym[0] / 2.0 + + +def test_topography_field_is_clean(solutions): + """Multiplier interior is exactly zero; only the boundary trace is non-zero.""" + lam = solutions["lam"] + c = lam.coords + rr = np.sqrt(c[:, 0] ** 2 + c[:, 1] ** 2) + interior = rr < R_OUTER - 0.6 * CELL + boundary = rr > R_OUTER - 0.25 * CELL + assert np.max(np.abs(lam.data[interior, 0])) == 0.0 + assert np.max(np.abs(lam.data[boundary, 0])) > 1.0 + + +def test_topography_matches_dynamic_topography_stress(solutions): + """lambda on the boundary equals the CBF normal stress -n.sigma.n (dyn. topo.).""" + con, lam = solutions["con"], solutions["lam"] + unit_r = solutions["unit_r"] + c = lam.coords + rr = np.sqrt(c[:, 0] ** 2 + c[:, 1] ** 2) + bmask = rr > R_OUTER - 0.25 * CELL + + sigma = con.stress + nsn = (unit_r * sigma * unit_r.T)[0, 0] + nsn_b = np.array(uw.function.evaluate(sympy.Matrix([[nsn]]), c[bmask])).reshape(-1) + lam_b = lam.data[bmask, 0] + + a = lam_b - lam_b.mean() + b = -(nsn_b - nsn_b.mean()) + corr = np.dot(a, b) / np.sqrt(np.dot(a, a) * np.dot(b, b)) + print(f"boundary corr(lambda, -n.sigma.n) = {corr:.4f}") + assert corr > 0.99 From 583068dc049482b54ec7aebbe2cffdbe8c2cc0f4 Mon Sep 17 00:00:00 2001 From: lmoresi Date: Fri, 5 Jun 2026 16:30:16 +0100 Subject: [PATCH 2/3] docs: record P'=[p,h] monolithic fieldsplit feasibility spike Confirmed at the PETSc level that a 3-field DM (u, p, h) with p and h grouped yields a 2-way u | [p,h] Schur split (no nested fieldsplit), de-risking the monolithic "arbitrary saddle-point constraints" direction. Captures the bounded assembly touch-points and remaining caveats for a future block-Schur PR. Underworld development team with AI support from Claude Code --- .../design/CONSTRAINED_FREESLIP_MULTIPLIER.md | 36 +++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/docs/developer/design/CONSTRAINED_FREESLIP_MULTIPLIER.md b/docs/developer/design/CONSTRAINED_FREESLIP_MULTIPLIER.md index cff96928..3b168e07 100644 --- a/docs/developer/design/CONSTRAINED_FREESLIP_MULTIPLIER.md +++ b/docs/developer/design/CONSTRAINED_FREESLIP_MULTIPLIER.md @@ -194,6 +194,42 @@ evidence): the monolithic 3-field / co-dim-1 representation; 3D spherical shells space needing explicit removal); and live free-surface equilibrium integration (pass `λ` as the target normal-stress end-state). +## Monolithic `P'=[p,h]` fieldsplit — feasibility spike + +A future direction (and a general "inject arbitrary constraints into the saddle +point" capability): rather than a third field forcing a nested 3-way Schur, group +pressure and the multiplier into a composite `P' = [p, h]` and keep a **2-way +`u | P'`** split. + +**Spike result (confirmed):** a 3-field DM `(u, p, h)` on a real mesh, with the `p` +and `h` index sets grouped (`pc_fieldsplit_1_fields 1,2`, or an explicit +concatenated IS), produces exactly a 2-block `u | [p,h]` Schur fieldsplit +(block sizes 84 | 84 = u | (p+h) on a coarse test). The nested-Schur / +KSP-reconfiguration concern is therefore moot — the split structure is identical to +the current `u | p` solver. (`/tmp/spike_pph.py`.) + +**Remaining work (bounded, ~2 weeks of Cython, behind a subclass):** +- Register `h` as field 2 (one `dm.setField`). +- `h`-equation residual: boundary part `∫_Γ ψ(n·u − g)` (the existing Nitsche path + already registers field-1 boundary residuals — same pattern) plus a small interior + screening `ε∫_Ω h ψ` to de-singularise the interior `h` block. +- Three new Jacobian blocks: `uh`, `hu` (boundary integrals — the `UW_PetscDSSetBdJacobian` + machinery already does arbitrary field pairs for `up`/`pu`) and `hh` (interior mass); + `ph`/`hp` are zero. +- Group `[p,h]` in the fieldsplit and extend the Schur PC (`p` keeps its `1/μ` mass PC; + `h` gets the screening diagonal). +- The 3-IS field decomposition / nullspace path (currently assumes 2 fields). + +**Caveats:** the interior screening reintroduces a small `ε` (benign — it does not bias +the boundary multiplier); the work touches the validated `uu/up/pu/pp` assembly, so it +must live behind a subclass and be regression-tested. The one genuine PETSc limitation +remains a *co-dimension-1* `h` (boundary-only DOFs) — avoided here by the full-domain + +screening representation. + +**Recommendation:** the architecture is de-risked, but since the outer loop converges in +2–3 iterations with no user-facing tuning, monolithic is scheduled work (a general +saddle-point-constraint capability), not an urgent replacement. + ## Files - `src/underworld3/systems/solvers.py` — `SNES_Stokes_Constrained`, `_ConstraintBC`. From 43e2b7e9c3e592246eb795cac85f128ef9b70fe8 Mon Sep 17 00:00:00 2001 From: lmoresi Date: Fri, 5 Jun 2026 16:44:24 +0100 Subject: [PATCH 3/3] Address Copilot review on #219: guards for label, non-convergence, MPI - add_constraint_bc validates the boundary name and the 'UW_Boundaries' label, and rejects empty/None point sets (petsc_dm_find_labeled_points_local returns the vertex-0 sentinel np.array([0]) when the label is absent, which an `is not None` check would silently accept and corrupt the mask). - The outer loop no longer applies a final, never-solved multiplier update when the iteration cap is hit (u/p stay consistent with lambda) and now warns loudly (RuntimeWarning) on non-convergence. - Raise NotImplementedError when run on more than one MPI rank (the mask and the node-wise update are serial-only), instead of silently producing wrong results. Adds a test that add_constraint_bc rejects an unknown boundary name. Underworld development team with AI support from Claude Code --- src/underworld3/systems/solvers.py | 50 ++++++++++++++++++- .../test_1061_constrained_freeslip_annulus.py | 7 +++ 2 files changed, 55 insertions(+), 2 deletions(-) diff --git a/src/underworld3/systems/solvers.py b/src/underworld3/systems/solvers.py index d0f9fe6c..84873abc 100644 --- a/src/underworld3/systems/solvers.py +++ b/src/underworld3/systems/solvers.py @@ -2081,6 +2081,20 @@ def add_constraint_bc(self, boundary, g=0.0, normal=None, augmentation=None, The scalar multiplier field. After :meth:`solve`, its boundary trace is the normal traction (topography proxy). """ + # This proof-of-concept is serial only: the boundary mask construction + # and the node-wise multiplier update below are not MPI-decomposed. + if uw.mpi.size > 1: + raise NotImplementedError( + "SNES_Stokes_Constrained is serial-only (the boundary mask and " + "multiplier update are not MPI-safe). Run on a single rank." + ) + + if not hasattr(self.mesh.boundaries, boundary): + raise ValueError( + f"'{boundary}' is not a boundary of this mesh. " + f"Available: {[b.name for b in self.mesh.boundaries]}." + ) + if normal is None: normal = self.mesh.Gamma_P1 @@ -2122,14 +2136,26 @@ def add_constraint_bc(self, boundary, g=0.0, normal=None, augmentation=None, f"_bmarker_{self.instance_number}_{idx}", self.mesh, 1, degree=1, ) marker.data[:] = 0.0 + if not self.mesh.dm.hasLabel("UW_Boundaries"): + raise RuntimeError( + "Mesh has no 'UW_Boundaries' label; cannot build the constraint " + "boundary mask." + ) + # NB: petsc_dm_find_labeled_points_local returns np.array([0]) (vertex 0) + # when the label is absent and None when the value has no points, so an + # `is not None` check alone could silently mark vertex 0. The hasLabel + # guard above plus the explicit empty/None check below close that gap. point_indices = petsc_dm_find_labeled_points_local( self.mesh.dm, "UW_Boundaries", getattr(self.mesh.boundaries, boundary).value, sectionIndex=False, ) - if point_indices is not None: - marker.data[point_indices] = 1.0 + if point_indices is None or len(point_indices) == 0: + raise ValueError( + f"Boundary '{boundary}' has no labelled points on this mesh." + ) + marker.data[point_indices] = 1.0 mask = ( np.array(uw.function.evaluate(marker.sym, lam.coords)).reshape(-1) > 0.75 ) @@ -2234,6 +2260,7 @@ def solve( ).reshape(-1) total_linear_its = 0 + all_converged = False for k in range(constraint_max_iterations): super().solve(zero_init_guess=(zero_init_guess and k == 0), **kwargs) try: @@ -2268,6 +2295,12 @@ def solve( uw.mpi.pprint(f"Constraint loop converged in {k + 1} iterations.") break + # On the final permitted iteration, do NOT apply another multiplier + # update: it would never be solved with, leaving u/p inconsistent + # with lambda. Stop here and warn loudly below instead. + if k == constraint_max_iterations - 1: + break + # Augmented-Lagrangian (ALG2) multiplier update, same r as the # forward-problem penalty augmentation. Monotone-convergent for r>0. # Restricted to boundary nodes so lambda stays a clean topography field. @@ -2275,6 +2308,19 @@ def solve( resid = self._nodal_constraint_residual(cbc) cbc.lam.data[cbc.mask, 0] += cbc.r_nodal[cbc.mask] * resid[cbc.mask] + if not all_converged: + import warnings + + warnings.warn( + f"Constrained Stokes solve did NOT converge: worst " + f"RMS(u.n-g) = {self.constraint_residual:.3e} after " + f"{self.constraint_iterations} iterations " + f"(constraint_max_iterations={constraint_max_iterations}). " + f"Increase constraint_max_iterations or the augmentation.", + RuntimeWarning, + stacklevel=2, + ) + return diff --git a/tests/test_1061_constrained_freeslip_annulus.py b/tests/test_1061_constrained_freeslip_annulus.py index 4fba5316..34c51869 100644 --- a/tests/test_1061_constrained_freeslip_annulus.py +++ b/tests/test_1061_constrained_freeslip_annulus.py @@ -110,6 +110,13 @@ def test_multiplier_api(solutions): assert topo_expr == lam.sym[0] / 2.0 +def test_constraint_bc_rejects_unknown_boundary(solutions): + """add_constraint_bc validates the boundary name up front.""" + con = solutions["con"] + with pytest.raises(ValueError): + con.add_constraint_bc("Nonexistent") + + def test_topography_field_is_clean(solutions): """Multiplier interior is exactly zero; only the boundary trace is non-zero.""" lam = solutions["lam"]