From 907747787eceb6f7cdc2d436ac1298566e942048 Mon Sep 17 00:00:00 2001 From: Tyagi Date: Tue, 28 Apr 2026 15:35:30 +1000 Subject: [PATCH 1/7] Fix XDMF mesh topology output Write visualization timesteps with PETSc HDF5_VIZ so generated XDMF references /viz/topology/cells rather than checkpoint-style DMPlex topology. Keep write_checkpoint on HDF5_PETSC for restart files. Harden checkpoint_xdmf against invalid topology datasets, use integer topology precision from the HDF5 dataset, and write mesh variable DataItems with their direct dataset dimensions instead of HyperSlab wrappers. Add regression tests covering write_timestep visualization topology, write_checkpoint restart topology, and 3D XDMF references. --- .../discretisation/discretisation_mesh.py | 79 ++++++++++++------- tests/test_0005_xdmf_compat.py | 66 ++++++++++++++++ 2 files changed, 118 insertions(+), 27 deletions(-) diff --git a/src/underworld3/discretisation/discretisation_mesh.py b/src/underworld3/discretisation/discretisation_mesh.py index da6020f1..dc01bd22 100644 --- a/src/underworld3/discretisation/discretisation_mesh.py +++ b/src/underworld3/discretisation/discretisation_mesh.py @@ -2775,11 +2775,11 @@ def write_timestep( mesh_file = output_base_name + ".mesh.00000.h5" path = Path(mesh_file) if not path.is_file(): - self.write(mesh_file) + self.write(mesh_file, petsc_format=False) else: mesh_file = output_base_name + f".mesh.{index:05}.h5" - self.write(mesh_file) + self.write(mesh_file, petsc_format=False) if create_xdmf: _write_mesh_viz_groups(self, mesh_file) @@ -2960,10 +2960,10 @@ def _write_checkpoint_file(checkpoint_file, variables): mesh_file = filename + f".mesh.{index:05}.h5" path = Path(mesh_file) if not path.is_file(): - self.write(mesh_file) + self.write(mesh_file, petsc_format=True) else: - self.write(filename + f".mesh.{index:05}.h5") + self.write(filename + f".mesh.{index:05}.h5", petsc_format=True) variables = [] if meshVars is not None: @@ -3082,7 +3082,12 @@ def apply_snapshot_payload(self, payload: dict) -> None: self._stale_lvec = True @timing.routine_timer_decorator - def write(self, filename: str, index: Optional[int] = None): + def write( + self, + filename: str, + index: Optional[int] = None, + petsc_format: bool = False, + ): """ Save mesh data to the specified hdf5 file. @@ -3094,11 +3099,13 @@ def write(self, filename: str, index: Optional[int] = None): index : Not yet implemented. An optional index which might correspond to the timestep (for example). + petsc_format : + If True, write PETSc DMPlex HDF5 checkpoint/restart topology. + If False, write PETSc HDF5_VIZ topology used by XDMF. """ - viewer = PETSc.ViewerHDF5().create(filename, "w", comm=PETSc.COMM_WORLD) - if index: + if index is not None: raise RuntimeError("Recording `index` not currently supported") ## JM:To enable timestep recording, the following needs to be called. ## I'm unsure if the corresponding xdmf functionality is enabled via @@ -3106,7 +3113,13 @@ def write(self, filename: str, index: Optional[int] = None): # viewer.pushTimestepping(viewer) # viewer.setTimestep(index) - viewer.pushFormat(PETSc.Viewer.Format.HDF5_PETSC) + viewer_format = ( + PETSc.Viewer.Format.HDF5_PETSC + if petsc_format + else PETSc.Viewer.Format.HDF5_VIZ + ) + viewer = PETSc.ViewerHDF5().create(filename, "w", comm=PETSc.COMM_WORLD) + viewer.pushFormat(viewer_format) try: viewer(self.dm) finally: @@ -4999,20 +5012,42 @@ def checkpoint_xdmf( geomPath = "geometry" geom = h5["geometry"] - if "viz" in h5 and "topology" in h5["viz"]: + if "viz" in h5 and "topology" in h5["viz"] and "cells" in h5["viz"]["topology"]: topoPath = "viz/topology" topo = h5["viz"]["topology"] - else: + elif "topology" in h5 and "cells" in h5["topology"]: topoPath = "topology" topo = h5["topology"] + else: + h5.close() + raise RuntimeError( + f"Cannot generate XDMF for {mesh_filename}: no direct cell " + "connectivity dataset found at /viz/topology/cells." + ) vertices = geom["vertices"] numVertices = vertices.shape[0] spaceDim = vertices.shape[1] cells = topo["cells"] + if len(cells.shape) != 2: + h5.close() + raise RuntimeError( + f"Cannot generate XDMF for {mesh_filename}: {topoPath}/cells has " + f"shape {cells.shape}. XDMF requires a 2D direct cell-to-vertex " + "connectivity dataset." + ) numCells = cells.shape[0] numCorners = cells.shape[1] cellDim = topo["cells"].attrs["cell_dim"] + topology_precision = cells.dtype.itemsize + + if numCorners <= 1: + h5.close() + raise RuntimeError( + f"Cannot generate XDMF for {mesh_filename}: {topoPath}/cells has " + f"shape {cells.shape}. XDMF requires direct cell-to-vertex " + "connectivity, not PETSc DMPlex internal topology." + ) if topoPath == "topology": warnings.warn( @@ -5078,7 +5113,7 @@ def checkpoint_xdmf( &MeshData;:/{topoPath}/cells @@ -5166,27 +5201,17 @@ def get_field_info(h5_filename, mesh_var, center): else: variable_type = "Vector" + data_dimensions = f"{numItems}" if numComponents == 1 else f"{numItems} {numComponents}" var_attribute = f""" - - - 0 0 0 - 1 1 1 - 1 {numItems} {numComponents} - - - &{var.clean_name+"_Data"};:/{dataset_path} - + + &{var.clean_name+"_Data"};:/{dataset_path} """ diff --git a/tests/test_0005_xdmf_compat.py b/tests/test_0005_xdmf_compat.py index d0222750..5a72b045 100644 --- a/tests/test_0005_xdmf_compat.py +++ b/tests/test_0005_xdmf_compat.py @@ -132,6 +132,59 @@ def test_xdmf_compat_2d(tmp_path): del mesh +def test_write_timestep_mesh_uses_viz_topology(tmp_path): + """write_timestep mesh output keeps direct topology for XDMF readers.""" + + mesh = uw.meshing.StructuredQuadBox(elementRes=(4, 4)) + s_var = uw.discretisation.MeshVariable("s", mesh, 1, degree=1) + s_var.data[:, 0] = mesh._coords[:, 0] + + mesh.write_timestep( + "viztopo", index=0, outputPath=str(tmp_path), meshVars=[s_var] + ) + + mesh_h5 = os.path.join(str(tmp_path), "viztopo.mesh.00000.h5") + xdmf_file = os.path.join(str(tmp_path), "viztopo.mesh.00000.xdmf") + + with h5py.File(mesh_h5, "r") as h5f: + assert "viz/topology/cells" in h5f + cells = h5f["viz/topology/cells"] + assert len(cells.shape) == 2 + assert cells.shape[1] > 1 + + with open(xdmf_file, "r") as f: + xdmf_text = f.read() + assert "&MeshData;:/viz/topology/cells" in xdmf_text + assert "&MeshData;:/topology/cells" not in xdmf_text + assert 'ItemType="HyperSlab"' not in xdmf_text + + del mesh + + +def test_write_checkpoint_mesh_uses_petsc_topology(tmp_path): + """write_checkpoint keeps PETSc DMPlex topology for restart output.""" + + mesh = uw.meshing.StructuredQuadBox(elementRes=(4, 4)) + s_var = uw.discretisation.MeshVariable("s", mesh, 1, degree=1) + s_var.data[:, 0] = mesh._coords[:, 0] + + mesh.write_checkpoint( + "restart", + outputPath=str(tmp_path), + meshUpdates=True, + meshVars=[s_var], + index=0, + ) + + mesh_h5 = os.path.join(str(tmp_path), "restart.mesh.00000.h5") + + with h5py.File(mesh_h5, "r") as h5f: + assert "topologies/uw_mesh/topology" in h5f + assert "viz/topology/cells" not in h5f + + del mesh + + # --------------------------------------------------------------------------- # Test: 3D mesh # --------------------------------------------------------------------------- @@ -164,6 +217,19 @@ def test_xdmf_compat_3d(tmp_path): xdmf_file = os.path.join(str(tmp_path), "test3d.mesh.00000.xdmf") _check_xdmf_refs(xdmf_file, str(tmp_path)) + mesh_h5 = os.path.join(str(tmp_path), "test3d.mesh.00000.h5") + with h5py.File(mesh_h5, "r") as h5f: + assert "viz/topology/cells" in h5f + cells = h5f["viz/topology/cells"] + assert len(cells.shape) == 2 + assert cells.shape[1] > 1 + + with open(xdmf_file, "r") as f: + xdmf_text = f.read() + assert "&MeshData;:/viz/topology/cells" in xdmf_text + assert "&MeshData;:/topology/cells" not in xdmf_text + assert f'Dimensions="{s_compat.shape[0]}"' in xdmf_text + del mesh From 47719277ab6d85a215056ed11df7d58a29bceda5 Mon Sep 17 00:00:00 2001 From: Tyagi Date: Wed, 29 Apr 2026 00:57:44 +1000 Subject: [PATCH 2/7] Keep restart topology in timestep mesh output Restore write_timestep mesh files to PETSc's default HDF5 layout instead of forcing HDF5_VIZ. This preserves labels and internal DMPlex restart datasets such as /topology/cells, /topology/cones, /topology/order, and /topology/orientation in output.mesh.00000.h5. Keep the XDMF writer guarded so visualization still references /viz/topology/cells and never the internal PETSc /topology/cells connectivity. This keeps ParaView-compatible direct cell connectivity while retaining restart-like mesh contents. Update the XDMF compatibility regression test to require both restart-style topology datasets and the /viz/topology/cells dataset from write_timestep output. --- .../discretisation/discretisation_mesh.py | 29 +++++++++++-------- tests/test_0005_xdmf_compat.py | 7 +++-- 2 files changed, 22 insertions(+), 14 deletions(-) diff --git a/src/underworld3/discretisation/discretisation_mesh.py b/src/underworld3/discretisation/discretisation_mesh.py index dc01bd22..f43265e9 100644 --- a/src/underworld3/discretisation/discretisation_mesh.py +++ b/src/underworld3/discretisation/discretisation_mesh.py @@ -2775,11 +2775,11 @@ def write_timestep( mesh_file = output_base_name + ".mesh.00000.h5" path = Path(mesh_file) if not path.is_file(): - self.write(mesh_file, petsc_format=False) + self.write(mesh_file) else: mesh_file = output_base_name + f".mesh.{index:05}.h5" - self.write(mesh_file, petsc_format=False) + self.write(mesh_file) if create_xdmf: _write_mesh_viz_groups(self, mesh_file) @@ -3086,7 +3086,7 @@ def write( self, filename: str, index: Optional[int] = None, - petsc_format: bool = False, + petsc_format: Optional[bool] = None, ): """ Save mesh data to the specified hdf5 file. @@ -3100,8 +3100,11 @@ def write( Not yet implemented. An optional index which might correspond to the timestep (for example). petsc_format : - If True, write PETSc DMPlex HDF5 checkpoint/restart topology. - If False, write PETSc HDF5_VIZ topology used by XDMF. + If True, force PETSc DMPlex HDF5 checkpoint/restart topology. + If False, force PETSc HDF5_VIZ topology only. + If None, use PETSc's default HDF5 layout, which includes the + restart-style topology and labels as well as visualization + topology for XDMF. """ @@ -3113,17 +3116,19 @@ def write( # viewer.pushTimestepping(viewer) # viewer.setTimestep(index) - viewer_format = ( - PETSc.Viewer.Format.HDF5_PETSC - if petsc_format - else PETSc.Viewer.Format.HDF5_VIZ - ) viewer = PETSc.ViewerHDF5().create(filename, "w", comm=PETSc.COMM_WORLD) - viewer.pushFormat(viewer_format) try: + if petsc_format is not None: + viewer_format = ( + PETSc.Viewer.Format.HDF5_PETSC + if petsc_format + else PETSc.Viewer.Format.HDF5_VIZ + ) + viewer.pushFormat(viewer_format) viewer(self.dm) finally: - viewer.popFormat() + if petsc_format is not None: + viewer.popFormat() viewer.destroy() ## Add boundary metadata to the file diff --git a/tests/test_0005_xdmf_compat.py b/tests/test_0005_xdmf_compat.py index 5a72b045..646e912b 100644 --- a/tests/test_0005_xdmf_compat.py +++ b/tests/test_0005_xdmf_compat.py @@ -132,8 +132,8 @@ def test_xdmf_compat_2d(tmp_path): del mesh -def test_write_timestep_mesh_uses_viz_topology(tmp_path): - """write_timestep mesh output keeps direct topology for XDMF readers.""" +def test_write_timestep_mesh_keeps_restart_and_viz_topology(tmp_path): + """write_timestep mesh output keeps restart data plus XDMF topology.""" mesh = uw.meshing.StructuredQuadBox(elementRes=(4, 4)) s_var = uw.discretisation.MeshVariable("s", mesh, 1, degree=1) @@ -147,6 +147,9 @@ def test_write_timestep_mesh_uses_viz_topology(tmp_path): xdmf_file = os.path.join(str(tmp_path), "viztopo.mesh.00000.xdmf") with h5py.File(mesh_h5, "r") as h5f: + assert "labels" in h5f + assert "topology/cells" in h5f + assert "topology/cones" in h5f assert "viz/topology/cells" in h5f cells = h5f["viz/topology/cells"] assert len(cells.shape) == 2 From 5cd8a1f83de787cb3d074ffcf34a606346fe5521 Mon Sep 17 00:00:00 2001 From: Tyagi Date: Sat, 30 May 2026 04:29:21 +0800 Subject: [PATCH 3/7] Use PETSc native XDMF viz topology --- .../discretisation/discretisation_mesh.py | 117 ------------------ .../test_0005_xdmf_viz_topology_mpi.py | 80 ++++++++++++ tests/test_0005_xdmf_compat.py | 20 +-- 3 files changed, 91 insertions(+), 126 deletions(-) create mode 100644 tests/parallel/test_0005_xdmf_viz_topology_mpi.py diff --git a/src/underworld3/discretisation/discretisation_mesh.py b/src/underworld3/discretisation/discretisation_mesh.py index f43265e9..b1b79678 100644 --- a/src/underworld3/discretisation/discretisation_mesh.py +++ b/src/underworld3/discretisation/discretisation_mesh.py @@ -2781,9 +2781,6 @@ def write_timestep( mesh_file = output_base_name + f".mesh.{index:05}.h5" self.write(mesh_file) - if create_xdmf: - _write_mesh_viz_groups(self, mesh_file) - if meshVars is not None: for var in meshVars: save_location = output_base_name + f".mesh.{var.clean_name}.{index:05}.h5" @@ -4821,120 +4818,6 @@ def mesh_update_callback(array, change_context): return -## This is a temporary replacement for the PETSc xdmf generator -## Simplified to allow us to decide how we want to checkpoint - - -def _petsc_numbering_to_global_ids(numbering): - """Convert PETSc numbering entries to non-negative global ids.""" - - gids = numpy.asarray(numbering, dtype=numpy.int64).copy() - negative = gids < 0 - gids[negative] = -gids[negative] - 1 - return gids - - -def _local_viz_cell_connectivity(mesh): - """Return local cell-to-vertex connectivity in global vertex ids.""" - - dm = mesh.dm - pStart, pEnd = dm.getDepthStratum(0) - cStart, cEnd = dm.getHeightStratum(0) - vertex_numbering = dm.getVertexNumbering().getIndices() - vertex_gids = _petsc_numbering_to_global_ids(vertex_numbering) - cell_num_points = mesh.element.entities[mesh.dim] - - cell_points_list = [] - for cell_id in range(cStart, cEnd): - closure = dm.getTransitiveClosure(cell_id)[0] - # Filter closure to strictly retain true vertices - points = numpy.asarray( - [p for p in closure if pStart <= p < pEnd], - dtype=numpy.int64, - ) - if len(points) != cell_num_points: - raise RuntimeError(f"Expected {cell_num_points} vertices for cell {cell_id}, got {len(points)}.") - cell_points_list.append(vertex_gids[points - pStart]) - - if not cell_points_list: - return numpy.empty((0, cell_num_points), dtype=numpy.int64) - - if mesh.dim == 3: - if dm.isSimplex(): - reorder = [0, 2, 1, 3] - else: - reorder = [0, 3, 2, 1, 4, 5, 6, 7] - cell_points_list = [pts[reorder] for pts in cell_points_list] - - return numpy.asarray(cell_points_list, dtype=numpy.int64) - - -def _write_mesh_viz_groups(mesh, mesh_h5_path): - """Write ParaView-safe ``/viz`` geometry/topology groups into a mesh HDF5.""" - - import underworld3 as uw - dm = mesh.dm - pStart, pEnd = dm.getDepthStratum(0) - vertex_numbering = dm.getVertexNumbering().getIndices() - vertex_gids = _petsc_numbering_to_global_ids(vertex_numbering) - - coords_local = numpy.asarray(dm.getCoordinatesLocal().array, dtype=numpy.float64).reshape(-1, mesh.dim) - n_local_vertices = pEnd - pStart - if coords_local.shape[0] != n_local_vertices: - coords_local = numpy.asarray(mesh.X.coords, dtype=numpy.float64) - if coords_local.shape[0] != n_local_vertices: - raise RuntimeError( - f"Could not match local coordinate rows ({coords_local.shape[0]}) " - f"to DMPlex vertex count ({n_local_vertices}) for {mesh_h5_path}." - ) - - local_cells = _local_viz_cell_connectivity(mesh) - - # Gather GIDs and coordinates separately to prevent float64 upcasting of integer GIDs - gathered_gids = uw.mpi.comm.gather(vertex_gids, root=0) - gathered_coords = uw.mpi.comm.gather(coords_local, root=0) - gathered_cells = uw.mpi.comm.gather(local_cells, root=0) - uw.mpi.barrier() - - if uw.mpi.rank == 0: - import h5py - - gid_blocks = [block for block in gathered_gids if block is not None and block.size > 0] - coord_blocks = [block for block in gathered_coords if block is not None and block.size > 0] - cell_blocks = [block for block in gathered_cells if block is not None and block.size > 0] - - if gid_blocks: - all_gids = numpy.concatenate(gid_blocks) - all_coords = numpy.vstack(coord_blocks) - - # Vectorized deduplication (numpy.unique returns sorted unique elements) - ordered_gids, unique_indices = numpy.unique(all_gids, return_index=True) - ordered_vertices = all_coords[unique_indices] - else: - ordered_gids = numpy.empty((0,), dtype=numpy.int64) - ordered_vertices = numpy.empty((0, mesh.dim), dtype=numpy.float64) - - if cell_blocks: - all_cells = numpy.vstack(cell_blocks) - # Vectorized remapping using searchsorted (O(N log M) instead of Python dict lookup) - dense_cells = numpy.searchsorted(ordered_gids, all_cells) - else: - all_cells = numpy.empty((0, mesh.element.entities[mesh.dim]), dtype=numpy.int64) - dense_cells = numpy.empty_like(all_cells) - - with h5py.File(mesh_h5_path, "a") as h5: - if "viz" in h5: - del h5["viz"] - viz = h5.create_group("viz") - geom = viz.create_group("geometry") - topo = viz.create_group("topology") - geom.create_dataset("vertices", data=ordered_vertices) - topo_cells = topo.create_dataset("cells", data=dense_cells) - topo_cells.attrs["cell_dim"] = mesh.dim - - uw.mpi.barrier() - - def _write_compat_groups(mesh, var, var_h5_path): """Write ``/vertex_fields/`` or ``/cell_fields/`` compatibility groups. diff --git a/tests/parallel/test_0005_xdmf_viz_topology_mpi.py b/tests/parallel/test_0005_xdmf_viz_topology_mpi.py new file mode 100644 index 00000000..98635db4 --- /dev/null +++ b/tests/parallel/test_0005_xdmf_viz_topology_mpi.py @@ -0,0 +1,80 @@ +"""MPI regression tests for PETSc-native XDMF visualization topology.""" + +import os +import shutil +import tempfile + +import h5py +import pytest + +import underworld3 as uw + + +pytestmark = [ + pytest.mark.level_1, + pytest.mark.tier_a, + pytest.mark.mpi(min_size=2), + pytest.mark.timeout(60), +] + + +def _shared_tmpdir(prefix): + if uw.mpi.rank == 0: + tmpdir = tempfile.mkdtemp(prefix=prefix) + else: + tmpdir = None + return uw.mpi.comm.bcast(tmpdir, root=0) + + +def _cleanup_shared_tmpdir(tmpdir): + uw.mpi.comm.barrier() + if uw.mpi.rank == 0: + shutil.rmtree(tmpdir, ignore_errors=True) + + +def _check_mesh_output(out_dir, name): + mesh_h5 = os.path.join(out_dir, f"{name}.mesh.00000.h5") + xdmf_file = os.path.join(out_dir, f"{name}.mesh.00000.xdmf") + + if uw.mpi.rank == 0: + assert os.path.exists(mesh_h5), mesh_h5 + assert os.path.exists(xdmf_file), xdmf_file + + with h5py.File(mesh_h5, "r") as h5f: + assert "labels" in h5f + assert "topology/cells" in h5f + assert "topology/cones" in h5f + assert "geometry/vertices" in h5f + assert "viz/topology/cells" in h5f + + cells = h5f["viz/topology/cells"] + vertices = h5f["geometry/vertices"] + assert len(cells.shape) == 2 + assert cells.shape[1] > 1 + assert cells[:].min() >= 0 + assert cells[:].max() < vertices.shape[0] + + with open(xdmf_file, "r") as f: + xdmf_text = f.read() + assert "&MeshData;:/viz/topology/cells" in xdmf_text + assert "&MeshData;:/geometry/vertices" in xdmf_text + assert "&MeshData;:/topology/cells" not in xdmf_text + + +@pytest.mark.mpi(min_size=2) +def test_xdmf_viz_topology_parallel_2d_and_3d(): + """PETSc writes valid viz topology for timestep output under MPI.""" + + out_dir = _shared_tmpdir("uw3_xdmf_viz_topology_mpi_") + try: + mesh_2d = uw.meshing.StructuredQuadBox(elementRes=(3, 3)) + mesh_2d.write_timestep("test_topo_2d", index=0, outputPath=out_dir) + uw.mpi.comm.barrier() + _check_mesh_output(out_dir, "test_topo_2d") + + mesh_3d = uw.meshing.StructuredQuadBox(elementRes=(2, 2, 2)) + mesh_3d.write_timestep("test_topo_3d", index=0, outputPath=out_dir) + uw.mpi.comm.barrier() + _check_mesh_output(out_dir, "test_topo_3d") + finally: + _cleanup_shared_tmpdir(out_dir) diff --git a/tests/test_0005_xdmf_compat.py b/tests/test_0005_xdmf_compat.py index 646e912b..c3bcc22c 100644 --- a/tests/test_0005_xdmf_compat.py +++ b/tests/test_0005_xdmf_compat.py @@ -365,7 +365,7 @@ def test_tensor_variable_repacking(tmp_path): def test_xdmf_viz_topology_written_correctly(tmp_path): - """Verify that write_timestep correctly writes /viz/topology/cells and XDMF points to it.""" + """write_timestep uses PETSc-native viz topology and valid geometry.""" mesh = uw.meshing.StructuredQuadBox(elementRes=(3, 3)) @@ -376,13 +376,13 @@ def test_xdmf_viz_topology_written_correctly(tmp_path): assert _check_h5_group_exists(mesh_h5, "viz/topology/cells"), ( "Mesh HDF5 must contain the /viz/topology/cells dataset" ) - assert _check_h5_group_exists(mesh_h5, "viz/geometry/vertices"), ( - "Mesh HDF5 must contain the /viz/geometry/vertices dataset" + assert _check_h5_group_exists(mesh_h5, "geometry/vertices"), ( + "Mesh HDF5 must contain the /geometry/vertices dataset" ) # Validate cell-to-vertex connectivity bounds cells = _read_h5_dataset(mesh_h5, "viz/topology/cells") - vertices = _read_h5_dataset(mesh_h5, "viz/geometry/vertices") + vertices = _read_h5_dataset(mesh_h5, "geometry/vertices") num_vertices = vertices.shape[0] assert cells.max() < num_vertices, ( @@ -400,15 +400,15 @@ def test_xdmf_viz_topology_written_correctly(tmp_path): assert "/viz/topology/cells" in xdmf_content, ( "XDMF file should explicitly point to /viz/topology/cells" ) - assert "/viz/geometry/vertices" in xdmf_content, ( - "XDMF file should explicitly point to /viz/geometry/vertices" + assert "/geometry/vertices" in xdmf_content, ( + "XDMF file should explicitly point to /geometry/vertices" ) del mesh def test_xdmf_viz_topology_3d_written_correctly(tmp_path): - """Verify that write_timestep correctly writes /viz/topology/cells for 3D meshes.""" + """write_timestep uses PETSc-native viz topology for 3D meshes.""" mesh = uw.meshing.StructuredQuadBox(elementRes=(2, 2, 2)) @@ -422,7 +422,7 @@ def test_xdmf_viz_topology_3d_written_correctly(tmp_path): # Validate connectivity cells = _read_h5_dataset(mesh_h5, "viz/topology/cells") - vertices = _read_h5_dataset(mesh_h5, "viz/geometry/vertices") + vertices = _read_h5_dataset(mesh_h5, "geometry/vertices") num_vertices = vertices.shape[0] assert cells.max() < num_vertices, "Invalid 3D topology bounds" @@ -430,6 +430,8 @@ def test_xdmf_viz_topology_3d_written_correctly(tmp_path): xdmf_file = os.path.join(str(tmp_path), "test_topo_3d.mesh.00000.xdmf") assert os.path.exists(xdmf_file) with open(xdmf_file, "r") as f: - assert "/viz/topology/cells" in f.read() + xdmf_content = f.read() + assert "/viz/topology/cells" in xdmf_content + assert "/geometry/vertices" in xdmf_content del mesh From 4bd05789c162a599af8533a986ff8bf09fd75047 Mon Sep 17 00:00:00 2001 From: Tyagi Date: Sat, 30 May 2026 05:14:32 +0800 Subject: [PATCH 4/7] Unify timestep and PETSc reload output Add a petsc_reload option to Mesh.write_timestep so timestep variable files can include PETSc DMPlex section/vector metadata alongside the existing XDMF, /fields, and vertex/cell compatibility payloads. This lets the same per-variable HDF5 files support both read_timestep remapping and read_checkpoint reloads. Refactor PETSc reload writing into shared helpers and add create_xdmf support to write_checkpoint by routing through the unified timestep writer. Invalid option combinations for the timestep layout now fail explicitly. Update checkpoint/reload developer documentation and add roundtrip tests covering unified write_timestep output and write_checkpoint(create_xdmf=True). --- .../checkpoint-output-and-reload-methods.md | 89 +++++++++-- .../discretisation/discretisation_mesh.py | 139 +++++++++++++----- tests/test_0003_save_load.py | 78 ++++++++++ 3 files changed, 259 insertions(+), 47 deletions(-) diff --git a/docs/developer/subsystems/checkpoint-output-and-reload-methods.md b/docs/developer/subsystems/checkpoint-output-and-reload-methods.md index 7b9e3db0..3fc4498f 100644 --- a/docs/developer/subsystems/checkpoint-output-and-reload-methods.md +++ b/docs/developer/subsystems/checkpoint-output-and-reload-methods.md @@ -1,7 +1,10 @@ # Checkpoint Output And Reload Methods -UW3 currently has two output/reload workflows for mesh and mesh-variable data. -They serve different purposes and should not be treated as interchangeable. +UW3 has two reload workflows for mesh and mesh-variable data. Timestep output +can now include both payloads in the same files when requested: + +- XDMF/remap payloads for ParaView and `read_timestep()`. +- PETSc DMPlex section/vector payloads for `read_checkpoint()`. ## Method A: `write_timestep()` / `read_timestep()` @@ -15,6 +18,8 @@ mesh.write_timestep( index=0, outputPath=str(output_dir), meshVars=[velocity, pressure], + create_xdmf=True, + petsc_reload=False, ) velocity.read_timestep("output", "Velocity", 0, outputPath=str(output_dir)) @@ -36,16 +41,41 @@ uses coordinate-based remapping. In practice this means the target variable is filled by comparing target coordinates to source coordinates, using a KDTree or similar nearest-neighbour/remap process. +Set `petsc_reload=True` to add PETSc DMPlex section/vector metadata to the same +per-variable timestep files: + +```python +mesh.write_timestep( + "output", + index=0, + outputPath=str(output_dir), + meshVars=[velocity, pressure], + create_xdmf=True, + petsc_reload=True, +) + +velocity.read_checkpoint( + output_dir / "output.mesh.Velocity.00000.h5", + data_name="Velocity", +) +``` + +With both `create_xdmf=True` and `petsc_reload=True`, the same variable file can +be used by `read_timestep()` for coordinate/KDTree remapping and by +`read_checkpoint()` for exact PETSc-native reload. + ### Advantages - Produces XDMF/HDF5 files suitable for visualisation workflows. - Can remap data onto a different mesh or a different node layout. - Useful for postprocessing where exact finite-element section identity is not required. +- Can also be made PETSc-reloadable with `petsc_reload=True`. ### Disadvantages -- Reload is not an exact PETSc FE-vector restart path. +- Reload is not an exact PETSc FE-vector restart path unless + `petsc_reload=True` is used and the field is loaded with `read_checkpoint()`. - The KDTree/remap step can be memory-heavy for large meshes. - At high MPI counts, remap memory can dominate postprocessing memory use. - Discontinuous fields and high-order fields rely on coordinate remap behavior @@ -63,6 +93,7 @@ mesh.write_checkpoint( index=0, outputPath=str(output_dir), meshVars=[velocity, pressure], + create_xdmf=False, ) ``` @@ -109,6 +140,26 @@ the section/vector metadata required to reconstruct finite-element vectors. Reloading uses PETSc DMPlex topology, section, vector, and `PetscSF` metadata. It does not use KDTree coordinate remapping. +Set `create_xdmf=True` to route through the unified timestep writer. This writes +XDMF/remap payloads and PETSc reload payloads together, using the timestep file +layout: + +```python +mesh.write_checkpoint( + "checkout", + index=0, + outputPath=str(output_dir), + meshVars=[velocity, pressure], + create_xdmf=True, +) +``` + +The variable files are then named +`checkout.mesh...h5` rather than +`checkout...h5`. Because this mode uses the timestep file +layout, it does not support `unique_id=True` or +`separate_variable_files=False`. + ### Advantages - Exact FE-vector reload path for restart and postprocessing. @@ -121,8 +172,9 @@ It does not use KDTree coordinate remapping. ### Disadvantages -- Does not write XDMF. -- Does not write `/vertex_fields/...` visualisation datasets. +- Does not write XDMF unless `create_xdmf=True`. +- Does not write `/vertex_fields/...` visualisation datasets unless + `create_xdmf=True`. - Assumes the checkpoint mesh and variable checkpoint files are used together. - Different-rank reload should be validated for each workflow before relying on it in production. @@ -133,18 +185,32 @@ It does not use KDTree coordinate remapping. | --- | --- | | ParaView/XDMF visualisation | `write_timestep()` | | Flexible remap onto another mesh | `write_timestep()` | -| Exact restart/postprocessing | `write_checkpoint()` | +| Exact restart/postprocessing | `write_timestep(..., petsc_reload=True)` or `write_checkpoint()` | | Large spherical benchmark metric evaluation | `write_checkpoint()` | | Avoid KDTree memory growth at high MPI counts | `write_checkpoint()` | -It is valid for production scripts to write both: +For unified output, write both payload families in one call: + +```python +mesh.write_timestep( + "output", + index=0, + outputPath=str(output_dir), + meshVars=[v, p], + create_xdmf=True, + petsc_reload=True, +) +``` + +It is also valid for production scripts to keep separate visualisation and +restart outputs: ```python mesh.write_timestep("output", index=0, outputPath=str(output_dir), meshVars=[v, p]) mesh.write_checkpoint("checkout", index=0, outputPath=str(output_dir), meshVars=[v, p]) ``` -The first output is for visualisation. The second output is for restart or +The first output is for visualisation/remap. The second output is for restart or metrics-from-checkpoint postprocessing. ## Spherical Benchmark Evidence @@ -200,8 +266,11 @@ reuse the old `read_timestep()` remap path. For production benchmark workflows: - run the solve stage first -- write `write_timestep()` output if visualisation files are needed -- write `write_checkpoint()` output for restart/postprocessing +- use `write_timestep(..., create_xdmf=True, petsc_reload=True)` if one unified + file family should support visualisation, remap, and PETSc-native reload +- alternatively, write `write_timestep()` output for visualisation and + `write_checkpoint()` output for restart/postprocessing as separate file + families - exit before metric evaluation - run a second metrics-from-checkpoint job - reload mesh from `.mesh..h5` diff --git a/src/underworld3/discretisation/discretisation_mesh.py b/src/underworld3/discretisation/discretisation_mesh.py index b1b79678..ed904ce1 100644 --- a/src/underworld3/discretisation/discretisation_mesh.py +++ b/src/underworld3/discretisation/discretisation_mesh.py @@ -2731,20 +2731,28 @@ def write_timestep( swarmVars: Optional[list] = [], meshUpdates: bool = False, create_xdmf: bool = True, + petsc_reload: bool = False, ): """ - Write mesh and selected variables for visualisation output. + Write mesh and selected variables for timestep output. This writes: - one mesh HDF5 file (shared/static or per-step, depending on ``meshUpdates``) - one HDF5 file per mesh variable - optional proxy files for swarm variables - optional XDMF file linking all output files + - optional PETSc DMPlex section/vector metadata for exact reload When ``create_xdmf=True`` (the default), variable files also include ParaView-compatible groups (``/vertex_fields`` or ``/cell_fields``), and an XDMF file is generated on rank 0. + When ``petsc_reload=True``, variable files also include PETSc DMPlex + section/vector metadata. Those files can then be loaded by + ``MeshVariable.read_checkpoint()`` for exact same-mesh reload, while + still being readable by ``MeshVariable.read_timestep()`` for + coordinate/KDTree remapping if ``create_xdmf`` output was also written. + """ options = PETSc.Options() options.setValue("viewer_hdf5_sp_output", True) @@ -2781,17 +2789,25 @@ def write_timestep( mesh_file = output_base_name + f".mesh.{index:05}.h5" self.write(mesh_file) + variables = [] if meshVars is not None: for var in meshVars: save_location = output_base_name + f".mesh.{var.clean_name}.{index:05}.h5" var.write(save_location) if create_xdmf: _write_compat_groups(self, var, save_location) + variables.append((var, save_location)) if swarmVars is not None: for svar in swarmVars: save_location = output_base_name + f".proxy.{svar.clean_name}.{index:05}.h5" svar.write_proxy(save_location) + if petsc_reload: + variables.append((svar._meshVar, save_location)) + + if petsc_reload: + for var, save_location in variables: + self._write_petsc_reload_file(save_location, [var], mode="a") if create_xdmf and uw.mpi.rank == 0: checkpoint_xdmf( @@ -2863,6 +2879,48 @@ def petsc_save_checkpoint( create_xdmf=True, ) + def _write_petsc_reload_variable(self, viewer, var): + """Write one variable's PETSc DMPlex reload metadata to ``viewer``.""" + + if var._lvec is None: + var._set_vec(available=True) + + iset, subdm = self.dm.createSubDM(var.field_id) + subdm.setName(var.clean_name) + old_lvec_name = var._lvec.getName() + + try: + var._lvec.setName(var.clean_name) + self.dm.sectionView(viewer, subdm) + self.dm.localVectorView(viewer, subdm, var._lvec) + finally: + var._lvec.setName(old_lvec_name) + iset.destroy() + subdm.destroy() + + def _write_petsc_reload_file(self, checkpoint_file, variables, mode="w"): + """Write PETSc DMPlex section/vector reload metadata.""" + + old_dm_name = self.dm.getName() + self.dm.setName("uw_mesh") + + viewer = PETSc.ViewerHDF5().create( + checkpoint_file, mode, comm=PETSc.COMM_WORLD + ) + viewer.pushFormat(PETSc.Viewer.Format.HDF5_PETSC) + try: + self.dm.sectionView(viewer, self.dm) + + for var in variables: + self._write_petsc_reload_variable(viewer, var) + + uw.mpi.barrier() + finally: + viewer.popFormat() + viewer.destroy() + if old_dm_name is not None: + self.dm.setName(old_dm_name) + @timing.routine_timer_decorator def write_checkpoint( self, @@ -2874,12 +2932,15 @@ def write_checkpoint( index: Optional[int] = 0, unique_id: Optional[bool] = False, separate_variable_files: bool = True, + create_xdmf: bool = False, ): """Write PETSc DMPlex checkpoint files for restart/postprocessing. Checkpoint output stores PETSc DMPlex section/vector metadata required - for exact parallel reload. Unlike ``write_timestep()``, this is restart - output and does not write XDMF or vertex-field visualisation datasets. + for exact parallel reload. By default this is restart output and does + not write XDMF or vertex-field visualisation datasets. Use + ``create_xdmf=True`` to write unified timestep-style output that also + contains PETSc reload metadata. Parameters ---------- @@ -2901,49 +2962,49 @@ def write_checkpoint( If ``True`` (default), write one file per variable: ``...h5``. If ``False``, write all variables into one file: ``.checkpoint..h5``. + create_xdmf + If ``True``, route through ``write_timestep()`` and write XDMF, + vertex/cell compatibility groups, coordinate/KDTree remap data, + and PETSc reload metadata. The output uses the timestep filename + convention ``.mesh...h5``. This mode does + not support ``unique_id=True`` or ``separate_variable_files=False``. """ if outputPath: filename = os.path.join(outputPath, filename) + if create_xdmf: + if unique_id: + raise RuntimeError( + "write_checkpoint(create_xdmf=True) uses write_timestep() " + "layout and does not support unique_id=True." + ) + if not separate_variable_files: + raise RuntimeError( + "write_checkpoint(create_xdmf=True) uses per-variable " + "timestep files and does not support " + "separate_variable_files=False." + ) + output_dir = os.path.dirname(filename) + output_name = os.path.basename(filename) + self.write_timestep( + output_name, + index=index, + outputPath=output_dir, + meshVars=meshVars, + swarmVars=swarmVars, + meshUpdates=meshUpdates, + create_xdmf=True, + petsc_reload=True, + ) + return + def _checkpoint_filename(var_name=None): variable_part = f".{var_name}" if var_name is not None else ".checkpoint" if unique_id: return filename + f"{uw.mpi.unique}{variable_part}.{index:05}.h5" return filename + f"{variable_part}.{index:05}.h5" - def _write_variable(viewer, var): - if var._lvec is None: - var._set_vec(available=True) - - iset, subdm = self.dm.createSubDM(var.field_id) - subdm.setName(var.clean_name) - old_lvec_name = var._lvec.getName() - - try: - var._lvec.setName(var.clean_name) - self.dm.sectionView(viewer, subdm) - self.dm.localVectorView(viewer, subdm, var._lvec) - finally: - var._lvec.setName(old_lvec_name) - iset.destroy() - subdm.destroy() - - def _write_checkpoint_file(checkpoint_file, variables): - viewer = PETSc.ViewerHDF5().create(checkpoint_file, "w", comm=PETSc.COMM_WORLD) - viewer.pushFormat(PETSc.Viewer.Format.HDF5_PETSC) - try: - # Store the parallel-mesh section information for restoring the checkpoint. - self.dm.sectionView(viewer, self.dm) - - for var in variables: - _write_variable(viewer, var) - - uw.mpi.barrier() # should not be required - finally: - viewer.popFormat() - viewer.destroy() - old_dm_name = self.dm.getName() self.dm.setName("uw_mesh") @@ -2970,9 +3031,13 @@ def _write_checkpoint_file(checkpoint_file, variables): if separate_variable_files: for var in variables: - _write_checkpoint_file(_checkpoint_filename(var.clean_name), [var]) + self._write_petsc_reload_file( + _checkpoint_filename(var.clean_name), [var], mode="w" + ) else: - _write_checkpoint_file(_checkpoint_filename(), variables) + self._write_petsc_reload_file( + _checkpoint_filename(), variables, mode="w" + ) finally: if old_dm_name is not None: self.dm.setName(old_dm_name) diff --git a/tests/test_0003_save_load.py b/tests/test_0003_save_load.py index 30d3fcbc..a3e453e0 100644 --- a/tests/test_0003_save_load.py +++ b/tests/test_0003_save_load.py @@ -2,6 +2,7 @@ # All tests in this module are quick core tests pytestmark = pytest.mark.level_1 +import h5py import numpy as np from pathlib import Path @@ -140,6 +141,83 @@ def assert_reloaded_fields(x_reloaded, u_reloaded, d_reloaded): assert_reloaded_fields(x_reloaded, u_reloaded, d_reloaded) +def test_timestep_with_petsc_reload_roundtrip(tmp_path): + import underworld3 as uw + from underworld3.meshing import UnstructuredSimplexBox + + tmp_path = _shared_tmp_path(tmp_path, uw) + + mesh = UnstructuredSimplexBox( + minCoords=(0.0, 0.0), + maxCoords=(1.0, 1.0), + cellSize=1.0 / 8.0, + ) + + x = uw.discretisation.MeshVariable("x", mesh, 1, degree=1) + x.data[:, 0] = x.coords[:, 0] + 2.0 * x.coords[:, 1] + + mesh.write_timestep( + "unified", + index=0, + outputPath=str(tmp_path), + meshVars=[x], + create_xdmf=True, + petsc_reload=True, + ) + + var_file = tmp_path / "unified.mesh.x.00000.h5" + if uw.mpi.rank == 0: + with h5py.File(var_file, "r") as h5f: + assert "fields/x" in h5f + assert "fields/coordinates" in h5f + assert "vertex_fields/x_x" in h5f + assert "topologies/uw_mesh/dms/x/section" in h5f + assert "topologies/uw_mesh/dms/x/vecs/x" in h5f + uw.mpi.barrier() + + mesh_reloaded = uw.discretisation.Mesh(f"{tmp_path}/unified.mesh.00000.h5") + x_checkpoint = uw.discretisation.MeshVariable("x", mesh_reloaded, 1, degree=1) + x_checkpoint.read_checkpoint(str(var_file), data_name="x") + np.testing.assert_allclose( + x_checkpoint.data[:, 0], + x_checkpoint.coords[:, 0] + 2.0 * x_checkpoint.coords[:, 1], + atol=1.0e-12, + ) + + x_timestep = uw.discretisation.MeshVariable("x_timestep", mesh, 1, degree=1) + x_timestep.read_timestep("unified", "x", 0, outputPath=str(tmp_path)) + np.testing.assert_allclose(x_timestep.data[:, 0], x.data[:, 0], atol=1.0e-12) + + mesh.write_checkpoint( + "unified_checkpoint", + index=0, + outputPath=str(tmp_path), + meshVars=[x], + create_xdmf=True, + ) + + checkpoint_var_file = tmp_path / "unified_checkpoint.mesh.x.00000.h5" + checkpoint_xdmf_file = tmp_path / "unified_checkpoint.mesh.00000.xdmf" + if uw.mpi.rank == 0: + assert checkpoint_xdmf_file.is_file() + with h5py.File(checkpoint_var_file, "r") as h5f: + assert "fields/x" in h5f + assert "vertex_fields/x_x" in h5f + assert "topologies/uw_mesh/dms/x/section" in h5f + uw.mpi.barrier() + + mesh_reloaded = uw.discretisation.Mesh( + f"{tmp_path}/unified_checkpoint.mesh.00000.h5" + ) + x_checkpoint = uw.discretisation.MeshVariable("x", mesh_reloaded, 1, degree=1) + x_checkpoint.read_checkpoint(str(checkpoint_var_file), data_name="x") + np.testing.assert_allclose( + x_checkpoint.data[:, 0], + x_checkpoint.coords[:, 0] + 2.0 * x_checkpoint.coords[:, 1], + atol=1.0e-12, + ) + + def test_swarm_save_and_load(tmp_path): import underworld3 as uw from underworld3.meshing import UnstructuredSimplexBox From 5ab5cb4081644319207db5bee26645a6b0eca608 Mon Sep 17 00:00:00 2001 From: Tyagi Date: Sat, 30 May 2026 12:17:47 +0530 Subject: [PATCH 5/7] Warn that write_checkpoint is compatibility API Emit a visible FutureWarning from Mesh.write_checkpoint() so new code is directed to the unified write_timestep(..., petsc_reload=True) API. Update checkpoint/reload documentation to present write_timestep() as the standard output method and write_checkpoint() as a legacy compatibility wrapper. Adjust tests to assert the warning on compatibility calls. --- .../checkpoint-output-and-reload-methods.md | 94 +++++++++++++------ .../discretisation/discretisation_mesh.py | 24 +++-- tests/test_0003_save_load.py | 47 +++++----- tests/test_0005_xdmf_compat.py | 15 +-- 4 files changed, 117 insertions(+), 63 deletions(-) diff --git a/docs/developer/subsystems/checkpoint-output-and-reload-methods.md b/docs/developer/subsystems/checkpoint-output-and-reload-methods.md index 3fc4498f..fa37072d 100644 --- a/docs/developer/subsystems/checkpoint-output-and-reload-methods.md +++ b/docs/developer/subsystems/checkpoint-output-and-reload-methods.md @@ -1,14 +1,17 @@ # Checkpoint Output And Reload Methods -UW3 has two reload workflows for mesh and mesh-variable data. Timestep output -can now include both payloads in the same files when requested: +`write_timestep()` is the standard mesh and mesh-variable output API. It can +write either or both output payloads: - XDMF/remap payloads for ParaView and `read_timestep()`. - PETSc DMPlex section/vector payloads for `read_checkpoint()`. -## Method A: `write_timestep()` / `read_timestep()` +`write_checkpoint()` is retained as a compatibility wrapper for older scripts, +but new code should use `write_timestep(..., petsc_reload=True)`. -This is the visualisation and flexible remap workflow. +## Standard API + +This is the visualisation and flexible remap workflow by default. Example: @@ -64,6 +67,19 @@ With both `create_xdmf=True` and `petsc_reload=True`, the same variable file can be used by `read_timestep()` for coordinate/KDTree remapping and by `read_checkpoint()` for exact PETSc-native reload. +For PETSc reload output without XDMF files, use: + +```python +mesh.write_timestep( + "restart", + index=0, + outputPath=str(output_dir), + meshVars=[velocity, pressure], + create_xdmf=False, + petsc_reload=True, +) +``` + ### Advantages - Produces XDMF/HDF5 files suitable for visualisation workflows. @@ -81,11 +97,12 @@ be used by `read_timestep()` for coordinate/KDTree remapping and by - Discontinuous fields and high-order fields rely on coordinate remap behavior rather than PETSc section metadata. -## Method B: `write_checkpoint()` / `read_checkpoint()` +## Legacy Compatibility -This is the restart and exact postprocessing workflow. +`write_checkpoint()` is deprecated and retained for existing callers. It emits a +`FutureWarning` directing users to `write_timestep(..., petsc_reload=True)`. -Example: +Legacy call: ```python mesh.write_checkpoint( @@ -97,6 +114,19 @@ mesh.write_checkpoint( ) ``` +Preferred replacement: + +```python +mesh.write_timestep( + "checkout", + index=0, + outputPath=str(output_dir), + meshVars=[velocity, pressure], + create_xdmf=False, + petsc_reload=True, +) +``` + Default files: ```text @@ -183,11 +213,11 @@ layout, it does not support `unique_id=True` or | Use case | Recommended method | | --- | --- | -| ParaView/XDMF visualisation | `write_timestep()` | -| Flexible remap onto another mesh | `write_timestep()` | -| Exact restart/postprocessing | `write_timestep(..., petsc_reload=True)` or `write_checkpoint()` | -| Large spherical benchmark metric evaluation | `write_checkpoint()` | -| Avoid KDTree memory growth at high MPI counts | `write_checkpoint()` | +| ParaView/XDMF visualisation | `write_timestep(..., create_xdmf=True)` | +| Flexible remap onto another mesh | `write_timestep(..., create_xdmf=True)` | +| Exact restart/postprocessing | `write_timestep(..., create_xdmf=False, petsc_reload=True)` | +| Unified visualisation/remap plus PETSc reload | `write_timestep(..., create_xdmf=True, petsc_reload=True)` | +| Avoid KDTree memory growth at high MPI counts | `write_timestep(..., petsc_reload=True)` with `read_checkpoint()` | For unified output, write both payload families in one call: @@ -202,12 +232,19 @@ mesh.write_timestep( ) ``` -It is also valid for production scripts to keep separate visualisation and -restart outputs: +If separate visualisation and restart-style file families are wanted, use two +`write_timestep()` calls with different base names: ```python mesh.write_timestep("output", index=0, outputPath=str(output_dir), meshVars=[v, p]) -mesh.write_checkpoint("checkout", index=0, outputPath=str(output_dir), meshVars=[v, p]) +mesh.write_timestep( + "restart", + index=0, + outputPath=str(output_dir), + meshVars=[v, p], + create_xdmf=False, + petsc_reload=True, +) ``` The first output is for visualisation/remap. The second output is for restart or @@ -215,19 +252,21 @@ metrics-from-checkpoint postprocessing. ## Spherical Benchmark Evidence -The spherical Thieulot benchmark exposed the practical difference between the -two methods. Boundary metric evaluation is run in a second step after the Stokes -solve. The old reload path used `write_timestep()` output and `read_timestep()`; -the new path uses `write_checkpoint()` output and `read_checkpoint()`. +The spherical Thieulot benchmark exposed the practical difference between +coordinate/KDTree remap and PETSc-native reload. Boundary metric evaluation is +run in a second step after the Stokes solve. The old reload path used +`read_timestep()`; the newer path uses PETSc DMPlex section/vector metadata and +`read_checkpoint()`. New output should be written through +`write_timestep(..., petsc_reload=True)`. ### Resource Usage | Resolution | Method | NCPUs | Walltime | CPU time | Memory used | Exit status | | --- | --- | ---: | ---: | ---: | ---: | --- | -| `1/64` | `write_timestep/read_timestep` | 144 | `00:03:43` | `07:04:27` | `211.27 GB` | `0` | -| `1/64` | `write_checkpoint/read_checkpoint` | 144 | `00:02:41` | `05:21:14` | `233.67 GB` | `0` | -| `1/128` | `write_timestep/read_timestep` | 1152 | `00:13:55` | `214:02:57` | `3.92 TB` | `0` | -| `1/128` | `write_checkpoint/read_checkpoint` | 1152 | `00:03:57` | `64:19:53` | `1.83 TB` | `0` | +| `1/64` | `read_timestep` remap | 144 | `00:03:43` | `07:04:27` | `211.27 GB` | `0` | +| `1/64` | PETSc `read_checkpoint` reload | 144 | `00:02:41` | `05:21:14` | `233.67 GB` | `0` | +| `1/128` | `read_timestep` remap | 1152 | `00:13:55` | `214:02:57` | `3.92 TB` | `0` | +| `1/128` | PETSc `read_checkpoint` reload | 1152 | `00:03:57` | `64:19:53` | `1.83 TB` | `0` | For the `1/128` case, checkpoint reload reduced memory by about `2.09 TB` and reduced walltime by about `3.5x`. @@ -236,7 +275,7 @@ reduced walltime by about `3.5x`. `1/128` spherical Thieulot benchmark: -| Metric | `write_timestep/read_timestep` | `write_checkpoint/read_checkpoint` | Difference | +| Metric | `read_timestep` remap | PETSc `read_checkpoint` reload | Difference | | --- | ---: | ---: | ---: | | `v_l2_norm` | `1.4319274480265082e-06` | `1.4319274480231255e-06` | `-3.38e-18` | | `p_l2_norm` | `5.985841567394967e-04` | `5.985841567395382e-04` | `4.15e-17` | @@ -254,7 +293,7 @@ reuse the old `read_timestep()` remap path. `1/64` spherical Thieulot benchmark: -| Metric | `write_timestep/read_timestep` | `write_checkpoint/read_checkpoint` | Difference | +| Metric | `read_timestep` remap | PETSc `read_checkpoint` reload | Difference | | --- | ---: | ---: | ---: | | `v_l2_norm` | `1.1662200663950889e-05` | `1.1662200663957042e-05` | `6.15e-18` | | `p_l2_norm` | `2.7573367818459473e-03` | `2.7573367818460497e-03` | `1.02e-16` | @@ -268,9 +307,8 @@ For production benchmark workflows: - run the solve stage first - use `write_timestep(..., create_xdmf=True, petsc_reload=True)` if one unified file family should support visualisation, remap, and PETSc-native reload -- alternatively, write `write_timestep()` output for visualisation and - `write_checkpoint()` output for restart/postprocessing as separate file - families +- alternatively, write separate `write_timestep()` outputs for visualisation and + PETSc reload by changing `create_xdmf` and `petsc_reload` - exit before metric evaluation - run a second metrics-from-checkpoint job - reload mesh from `.mesh..h5` diff --git a/src/underworld3/discretisation/discretisation_mesh.py b/src/underworld3/discretisation/discretisation_mesh.py index ed904ce1..a66a54b6 100644 --- a/src/underworld3/discretisation/discretisation_mesh.py +++ b/src/underworld3/discretisation/discretisation_mesh.py @@ -2934,13 +2934,15 @@ def write_checkpoint( separate_variable_files: bool = True, create_xdmf: bool = False, ): - """Write PETSc DMPlex checkpoint files for restart/postprocessing. + """Compatibility wrapper for PETSc DMPlex reload output. - Checkpoint output stores PETSc DMPlex section/vector metadata required - for exact parallel reload. By default this is restart output and does - not write XDMF or vertex-field visualisation datasets. Use - ``create_xdmf=True`` to write unified timestep-style output that also - contains PETSc reload metadata. + This method is retained for existing callers. New code should use + ``write_timestep(..., petsc_reload=True)`` so all mesh-variable output + goes through the standard timestep writer. By default this compatibility + method writes PETSc DMPlex section/vector metadata required for exact + parallel reload and does not write XDMF or vertex-field visualisation + datasets. Use ``create_xdmf=True`` to route through the unified + timestep-style output path. Parameters ---------- @@ -2969,6 +2971,16 @@ def write_checkpoint( convention ``.mesh...h5``. This mode does not support ``unique_id=True`` or ``separate_variable_files=False``. """ + import warnings + + warnings.warn( + "write_checkpoint() is deprecated and retained for compatibility. " + "Use write_timestep(..., petsc_reload=True) for PETSc reload output; " + "set create_xdmf=True when visualization/remap payloads are also " + "needed.", + FutureWarning, + stacklevel=2, + ) if outputPath: filename = os.path.join(outputPath, filename) diff --git a/tests/test_0003_save_load.py b/tests/test_0003_save_load.py index a3e453e0..55575dc9 100644 --- a/tests/test_0003_save_load.py +++ b/tests/test_0003_save_load.py @@ -100,14 +100,15 @@ def assert_reloaded_fields(x_reloaded, u_reloaded, d_reloaded): ) checkpoint_base = tmp_path / "restart" - mesh.write_checkpoint( - "restart", - outputPath=str(tmp_path), - meshUpdates=False, - meshVars=[x, u, d], - index=0, - separate_variable_files=False, - ) + with pytest.warns(FutureWarning, match="write_checkpoint\\(\\) is deprecated"): + mesh.write_checkpoint( + "restart", + outputPath=str(tmp_path), + meshUpdates=False, + meshVars=[x, u, d], + index=0, + separate_variable_files=False, + ) mesh_reloaded = uw.discretisation.Mesh(f"{checkpoint_base}.mesh.00000.h5") x_reloaded = uw.discretisation.MeshVariable("x", mesh_reloaded, 1, degree=1) @@ -121,13 +122,14 @@ def assert_reloaded_fields(x_reloaded, u_reloaded, d_reloaded): assert_reloaded_fields(x_reloaded, u_reloaded, d_reloaded) separate_base = tmp_path / "restart_separate" - mesh.write_checkpoint( - "restart_separate", - outputPath=str(tmp_path), - meshUpdates=False, - meshVars=[x, u, d], - index=0, - ) + with pytest.warns(FutureWarning, match="write_checkpoint\\(\\) is deprecated"): + mesh.write_checkpoint( + "restart_separate", + outputPath=str(tmp_path), + meshUpdates=False, + meshVars=[x, u, d], + index=0, + ) mesh_reloaded = uw.discretisation.Mesh(f"{separate_base}.mesh.00000.h5") x_reloaded = uw.discretisation.MeshVariable("x", mesh_reloaded, 1, degree=1) @@ -188,13 +190,14 @@ def test_timestep_with_petsc_reload_roundtrip(tmp_path): x_timestep.read_timestep("unified", "x", 0, outputPath=str(tmp_path)) np.testing.assert_allclose(x_timestep.data[:, 0], x.data[:, 0], atol=1.0e-12) - mesh.write_checkpoint( - "unified_checkpoint", - index=0, - outputPath=str(tmp_path), - meshVars=[x], - create_xdmf=True, - ) + with pytest.warns(FutureWarning, match="write_checkpoint\\(\\) is deprecated"): + mesh.write_checkpoint( + "unified_checkpoint", + index=0, + outputPath=str(tmp_path), + meshVars=[x], + create_xdmf=True, + ) checkpoint_var_file = tmp_path / "unified_checkpoint.mesh.x.00000.h5" checkpoint_xdmf_file = tmp_path / "unified_checkpoint.mesh.00000.xdmf" diff --git a/tests/test_0005_xdmf_compat.py b/tests/test_0005_xdmf_compat.py index c3bcc22c..3c7b34d5 100644 --- a/tests/test_0005_xdmf_compat.py +++ b/tests/test_0005_xdmf_compat.py @@ -171,13 +171,14 @@ def test_write_checkpoint_mesh_uses_petsc_topology(tmp_path): s_var = uw.discretisation.MeshVariable("s", mesh, 1, degree=1) s_var.data[:, 0] = mesh._coords[:, 0] - mesh.write_checkpoint( - "restart", - outputPath=str(tmp_path), - meshUpdates=True, - meshVars=[s_var], - index=0, - ) + with pytest.warns(FutureWarning, match="write_checkpoint\\(\\) is deprecated"): + mesh.write_checkpoint( + "restart", + outputPath=str(tmp_path), + meshUpdates=True, + meshVars=[s_var], + index=0, + ) mesh_h5 = os.path.join(str(tmp_path), "restart.mesh.00000.h5") From de791953c6e699a2b2825f454c85abf4c17fc439 Mon Sep 17 00:00:00 2001 From: Tyagi Date: Sat, 30 May 2026 13:00:38 +0530 Subject: [PATCH 6/7] Clarify unified timestep reload documentation --- docs/advanced/snapshot-restore.md | 12 +-- .../petsc-dmplex-checkpoint-reload-plan.md | 52 +++++++------ .../checkpoint-output-and-reload-methods.md | 75 ++++++++++++------- .../discretisation/discretisation_mesh.py | 65 ++++++++++++---- .../discretisation_mesh_variables.py | 12 ++- 5 files changed, 139 insertions(+), 77 deletions(-) diff --git a/docs/advanced/snapshot-restore.md b/docs/advanced/snapshot-restore.md index 997090e6..35fa0ae3 100644 --- a/docs/advanced/snapshot-restore.md +++ b/docs/advanced/snapshot-restore.md @@ -25,10 +25,10 @@ Typical uses: The same entry points serve two storage modes — in-memory for fast intra-run stash, and on-disk for persistent restart / postprocessing. -You pick by giving (or not giving) a ``file=``. The existing -``mesh.write_timestep()`` / ``read_timestep()`` and -``mesh.write_checkpoint()`` / ``MeshVariable.read_checkpoint()`` -paths are unchanged and continue to serve their existing roles (see +You pick by giving (or not giving) a ``file=``. Mesh-variable output uses +``mesh.write_timestep()`` with explicit payload flags; reload uses either +``MeshVariable.read_timestep()`` for coordinate/KDTree remap or +``MeshVariable.read_checkpoint()`` for PETSc-native reload (see "Choosing between paths" at the bottom). ## The API @@ -245,8 +245,8 @@ classes. For a Python-side summary use |---|---| | Backtrack a few steps inside a running script (RK staging, adaptive Δt, predictor–corrector probes) | ``save_state()`` → token | | Persist whole-model state across runs (crash recovery, bisection studies, full restart) | ``save_state(file=…)`` / ``load_state(file=…)`` | -| Restart from a previous run on a *different* rank count or remap onto a *different* resolution | ``mesh.write_timestep()`` / ``MeshVariable.read_timestep()`` (KDTree remap) | -| Efficient same-rank restart writing only specific variables for postprocessing | ``mesh.write_checkpoint()`` / ``MeshVariable.read_checkpoint()`` (PETSc DMPlex per-variable) | +| Restart from a previous run on a *different* rank count or remap onto a *different* resolution | ``mesh.write_timestep(create_xdmf=True)`` / ``MeshVariable.read_timestep()`` (KDTree remap) | +| Efficient same-rank restart writing only specific variables for postprocessing | ``mesh.write_timestep(petsc_reload=True)`` / ``MeshVariable.read_checkpoint()`` (PETSc DMPlex per-variable) | | Visualisation for ParaView (XDMF + per-step HDF5) | ``mesh.write_timestep(create_xdmf=True)`` | ## Related diff --git a/docs/developer/design/petsc-dmplex-checkpoint-reload-plan.md b/docs/developer/design/petsc-dmplex-checkpoint-reload-plan.md index 713f2b8d..40b89544 100644 --- a/docs/developer/design/petsc-dmplex-checkpoint-reload-plan.md +++ b/docs/developer/design/petsc-dmplex-checkpoint-reload-plan.md @@ -15,18 +15,21 @@ the failure mode is already known. ## Objective -Provide an exact PETSc DMPlex checkpoint reload path for UW3 mesh variables. -This path is intended for restart and large-scale postprocessing, not -visualisation. +Provide an exact PETSc DMPlex reload path for UW3 mesh variables. This path is +intended for restart and large-scale postprocessing. It is now exposed through +the standard `write_timestep(..., petsc_reload=True)` output method; legacy +`write_checkpoint()` calls remain supported as a compatibility wrapper. Target workflow: ```python -mesh.write_checkpoint( +mesh.write_timestep( "checkout", + index=0, outputPath=str(output_dir), meshVars=[v_soln, p_soln], - index=0, + create_xdmf=False, + petsc_reload=True, ) ``` @@ -34,8 +37,8 @@ Default output: ```text checkout.mesh.00000.h5 -checkout.Velocity.00000.h5 -checkout.Pressure.00000.h5 +checkout.mesh.Velocity.00000.h5 +checkout.mesh.Pressure.00000.h5 ``` Reload workflow: @@ -45,31 +48,34 @@ mesh = uw.discretisation.Mesh("checkout.mesh.00000.h5") v_soln = uw.discretisation.MeshVariable("Velocity", mesh, mesh.dim, degree=2) p_soln = uw.discretisation.MeshVariable("Pressure", mesh, 1, degree=1) -v_soln.read_checkpoint("checkout.Velocity.00000.h5", data_name="Velocity") -p_soln.read_checkpoint("checkout.Pressure.00000.h5", data_name="Pressure") +v_soln.read_checkpoint("checkout.mesh.Velocity.00000.h5", data_name="Velocity") +p_soln.read_checkpoint("checkout.mesh.Pressure.00000.h5", data_name="Pressure") ``` The reload path must not use `KDTree` remapping. It must restore FE data through PETSc DMPlex topology, section, vector, and `PetscSF` metadata. -## Existing Output Methods +## Output Payloads -UW3 has two related but different output paths. +`mesh.write_timestep(...)` is the standard output method. It can write either +or both output payloads. -| Method | Purpose | Reload method | Strength | Limitation | +| Payload | Writer option | Reload method | Strength | Limitation | | --- | --- | --- | --- | --- | -| `mesh.write_timestep(...)` | Visualisation and flexible field remap | `MeshVariable.read_timestep(...)` | Writes XDMF and vertex-field data; can map data onto a different mesh | Uses coordinate/KDTree remapping; memory-heavy for large meshes and high MPI counts | -| `mesh.write_checkpoint(...)` | Restart and exact postprocessing | `MeshVariable.read_checkpoint(...)` | Uses PETSc DMPlex section/vector metadata; avoids KDTree | Not a visualisation output; no XDMF or vertex-field datasets | +| XDMF/remap | `create_xdmf=True` | `MeshVariable.read_timestep(...)` | Writes XDMF and vertex-field data; can map data onto a different mesh | Uses coordinate/KDTree remapping; memory-heavy for large meshes and high MPI counts | +| PETSc reload | `petsc_reload=True` | `MeshVariable.read_checkpoint(...)` | Uses PETSc DMPlex section/vector metadata; avoids KDTree | Requires compatible PETSc mesh metadata | -The benchmark scripts should use `write_timestep()` when they need -visualisation files, and `write_checkpoint()` when they need restart-safe, -memory-efficient postprocessing. +For unified output, use `write_timestep(..., create_xdmf=True, +petsc_reload=True)`. For restart-safe, memory-efficient postprocessing without +XDMF, use `write_timestep(..., create_xdmf=False, petsc_reload=True)`. ## Implemented Design -### Checkpoint Writing +### PETSc Reload Writing -`Mesh.write_checkpoint(...)` writes PETSc DMPlex HDF5 storage version `3.0.0`. +`Mesh.write_timestep(..., petsc_reload=True)` writes PETSc DMPlex reload +metadata into per-variable HDF5 files. The legacy `Mesh.write_checkpoint(...)` +compatibility wrapper writes PETSc DMPlex HDF5 storage version `3.0.0`. The mesh file is named: @@ -236,11 +242,9 @@ projection after checkpoint reload. The checkpoint reload implementation is ready for review when: -- `write_checkpoint()` writes PETSc DMPlex HDF5 storage version `3.0.0`. -- `write_checkpoint()` supports `outputPath`. -- `write_checkpoint()` defaults to one checkpoint file per variable. -- `write_checkpoint(..., separate_variable_files=False)` still supports a - combined variable checkpoint file. +- `write_timestep(..., petsc_reload=True)` writes PETSc DMPlex reload metadata. +- `write_timestep(..., petsc_reload=True)` supports `outputPath`. +- legacy `write_checkpoint()` remains available as a compatibility wrapper. - `MeshVariable.read_checkpoint(...)` reloads through PETSc metadata, not coordinate/KDTree remapping. - scalar, vector, and discontinuous variables roundtrip in tests. diff --git a/docs/developer/subsystems/checkpoint-output-and-reload-methods.md b/docs/developer/subsystems/checkpoint-output-and-reload-methods.md index fa37072d..974b6bce 100644 --- a/docs/developer/subsystems/checkpoint-output-and-reload-methods.md +++ b/docs/developer/subsystems/checkpoint-output-and-reload-methods.md @@ -11,9 +11,19 @@ but new code should use `write_timestep(..., petsc_reload=True)`. ## Standard API -This is the visualisation and flexible remap workflow by default. +`write_timestep()` always writes the mesh file and one HDF5 file per mesh +variable. Mesh-variable files always contain raw coordinate/value datasets under +`/fields`, which are the source data used by `MeshVariable.read_timestep()` for +coordinate/KDTree remapping. -Example: +The two optional payloads are selected with explicit flags: + +| Flag | Output payload | Reader/use case | +| --- | --- | --- | +| `create_xdmf=True` | `/vertex_fields` or `/cell_fields` compatibility datasets plus a companion `.xdmf` file | ParaView/XDMF visualisation | +| `petsc_reload=True` | PETSc DMPlex section/vector metadata under `/topologies/uw_mesh/dms/...` | `MeshVariable.read_checkpoint()` PETSc-native reload | + +### Visualisation And Remap ```python mesh.write_timestep( @@ -44,8 +54,10 @@ uses coordinate-based remapping. In practice this means the target variable is filled by comparing target coordinates to source coordinates, using a KDTree or similar nearest-neighbour/remap process. -Set `petsc_reload=True` to add PETSc DMPlex section/vector metadata to the same -per-variable timestep files: +### Unified Visualisation And PETSc Reload + +Set both flags to write one file family that supports ParaView/XDMF, +coordinate/KDTree remap, and PETSc-native reload: ```python mesh.write_timestep( @@ -67,7 +79,9 @@ With both `create_xdmf=True` and `petsc_reload=True`, the same variable file can be used by `read_timestep()` for coordinate/KDTree remapping and by `read_checkpoint()` for exact PETSc-native reload. -For PETSc reload output without XDMF files, use: +### PETSc Reload Without XDMF + +For PETSc reload output without ParaView/XDMF payloads, use: ```python mesh.write_timestep( @@ -80,6 +94,20 @@ mesh.write_timestep( ) ``` +This still writes raw `/fields` datasets, but it does not write +`/vertex_fields`, `/cell_fields`, or a companion `.xdmf` file. + +Typical PETSc-reload-only files still use the timestep naming convention: + +```text +restart.mesh.00000.h5 +restart.mesh.Velocity.00000.h5 +restart.mesh.Pressure.00000.h5 +``` + +The variable files contain raw `/fields` datasets and PETSc reload metadata +under `/topologies/uw_mesh/dms//`. + ### Advantages - Produces XDMF/HDF5 files suitable for visualisation workflows. @@ -127,7 +155,15 @@ mesh.write_timestep( ) ``` -Default files: +The preferred replacement writes timestep-style files: + +```text +checkout.mesh.00000.h5 +checkout.mesh.Velocity.00000.h5 +checkout.mesh.Pressure.00000.h5 +``` + +The legacy call writes checkpoint-style variable filenames: ```text checkout.mesh.00000.h5 @@ -165,10 +201,10 @@ Combined variable file: checkout.checkpoint.00000.h5 ``` -The checkpoint files store PETSc DMPlex HDF5 storage version `3.0.0` data with -the section/vector metadata required to reconstruct finite-element vectors. -Reloading uses PETSc DMPlex topology, section, vector, and `PetscSF` metadata. -It does not use KDTree coordinate remapping. +These files store PETSc DMPlex HDF5 storage version `3.0.0` data with the +section/vector metadata required to reconstruct finite-element vectors. +Reloading uses PETSc DMPlex topology, section, vector, and `PetscSF` metadata; +it does not use KDTree coordinate remapping. Set `create_xdmf=True` to route through the unified timestep writer. This writes XDMF/remap payloads and PETSc reload payloads together, using the timestep file @@ -190,24 +226,7 @@ The variable files are then named layout, it does not support `unique_id=True` or `separate_variable_files=False`. -### Advantages - -- Exact FE-vector reload path for restart and postprocessing. -- Avoids KDTree memory spikes. -- Preserves continuous, vector, and discontinuous variable layouts through - PETSc section metadata. -- Per-variable files avoid forcing postprocessing to open one large combined - field checkpoint. -- Better suited to large MPI jobs where memory locality matters. - -### Disadvantages - -- Does not write XDMF unless `create_xdmf=True`. -- Does not write `/vertex_fields/...` visualisation datasets unless - `create_xdmf=True`. -- Assumes the checkpoint mesh and variable checkpoint files are used together. -- Different-rank reload should be validated for each workflow before relying on - it in production. +New code should prefer the equivalent `write_timestep()` calls above. ## Which Method To Use diff --git a/src/underworld3/discretisation/discretisation_mesh.py b/src/underworld3/discretisation/discretisation_mesh.py index a66a54b6..fab3c4cc 100644 --- a/src/underworld3/discretisation/discretisation_mesh.py +++ b/src/underworld3/discretisation/discretisation_mesh.py @@ -2736,22 +2736,57 @@ def write_timestep( """ Write mesh and selected variables for timestep output. - This writes: - - one mesh HDF5 file (shared/static or per-step, depending on ``meshUpdates``) + This is the standard mesh output method. It always writes: + + - one mesh HDF5 file, shared across timesteps unless ``meshUpdates=True`` - one HDF5 file per mesh variable - - optional proxy files for swarm variables - - optional XDMF file linking all output files - - optional PETSc DMPlex section/vector metadata for exact reload - - When ``create_xdmf=True`` (the default), variable files also include - ParaView-compatible groups (``/vertex_fields`` or ``/cell_fields``), - and an XDMF file is generated on rank 0. - - When ``petsc_reload=True``, variable files also include PETSc DMPlex - section/vector metadata. Those files can then be loaded by - ``MeshVariable.read_checkpoint()`` for exact same-mesh reload, while - still being readable by ``MeshVariable.read_timestep()`` for - coordinate/KDTree remapping if ``create_xdmf`` output was also written. + - raw coordinate/value datasets under ``/fields`` for coordinate-based + reload with ``MeshVariable.read_timestep()`` + + The optional payloads are controlled explicitly: + + - ``create_xdmf=True`` writes ParaView/XDMF output. Variable files also + receive ``/vertex_fields`` or ``/cell_fields`` compatibility groups, + and rank 0 writes the companion ``.xdmf`` file. + - ``petsc_reload=True`` writes PETSc DMPlex section/vector metadata into + the same per-variable HDF5 files. These files can then be loaded with + ``MeshVariable.read_checkpoint()`` for PETSc-native same-mesh reload. + + Common choices are: + + - visualisation/remap only: + ``create_xdmf=True, petsc_reload=False`` + - PETSc-native reload only: + ``create_xdmf=False, petsc_reload=True`` + - unified visualisation/remap and PETSc reload: + ``create_xdmf=True, petsc_reload=True`` + + With both flags enabled, the same variable HDF5 file can be used by + ``MeshVariable.read_timestep()`` for coordinate/KDTree remapping and by + ``MeshVariable.read_checkpoint()`` for exact PETSc-native reload. + + Parameters + ---------- + filename + Output filename base. Files are written as + ``.mesh..h5`` and + ``.mesh...h5``. + index + Timestep/output index used in generated filenames. + outputPath + Directory where output files are written. + meshVars + Mesh variables to write. + swarmVars + Swarm variables to write as proxy fields. + meshUpdates + If ``False``, reuse ``.mesh.00000.h5`` when it already + exists. If ``True``, write an indexed mesh file for this timestep. + create_xdmf + Write ParaView/XDMF-compatible datasets and companion XDMF file. + petsc_reload + Write PETSc DMPlex section/vector metadata for reload with + ``MeshVariable.read_checkpoint()``. """ options = PETSc.Options() diff --git a/src/underworld3/discretisation/discretisation_mesh_variables.py b/src/underworld3/discretisation/discretisation_mesh_variables.py index d27a7c8e..185c8a54 100644 --- a/src/underworld3/discretisation/discretisation_mesh_variables.py +++ b/src/underworld3/discretisation/discretisation_mesh_variables.py @@ -1124,8 +1124,9 @@ def write( Note: This is a low-level method intended to be called by wrapper functions such as ``mesh.write_timestep()`` which handle output paths, - XDMF generation, and multi-variable coordination. Prefer using - ``mesh.write_timestep()`` for normal checkpoint and visualisation output. + optional XDMF generation, optional PETSc reload metadata, and + multi-variable coordination. Prefer using ``mesh.write_timestep()`` for + mesh-variable output. Note: This is a COLLECTIVE operation - all MPI ranks must call it. @@ -1465,10 +1466,13 @@ def read_checkpoint( filename: str, data_name: Optional[str] = None, ): - """Load this mesh variable from ``Mesh.write_checkpoint()`` output. + """Load this mesh variable from PETSc reload output. This is an exact PETSc DMPlex section/vector reload path. It does not - use the coordinate/KDTree remapping used by ``read_timestep()``. + use the coordinate/KDTree remapping used by ``read_timestep()``. New + output should be written with ``Mesh.write_timestep(..., + petsc_reload=True)``; legacy ``Mesh.write_checkpoint()`` files are also + supported. """ if data_name is None: From 5570161f1c67ec5e409f50a419122b95c63b0bdf Mon Sep 17 00:00:00 2001 From: Tyagi Date: Sat, 30 May 2026 13:13:06 +0530 Subject: [PATCH 7/7] Clean up checkpoint and reload documentation --- docs/advanced/snapshot-restore.md | 2 +- .../design/in_memory_checkpoint_design.md | 61 ++-- .../petsc-dmplex-checkpoint-reload-plan.md | 37 +- .../checkpoint-output-and-reload-methods.md | 2 +- .../subsystems/checkpointing-system.md | 340 ++++++++---------- .../discretisation_mesh_variables.py | 19 +- src/underworld3/model.py | 2 +- 7 files changed, 216 insertions(+), 247 deletions(-) diff --git a/docs/advanced/snapshot-restore.md b/docs/advanced/snapshot-restore.md index 35fa0ae3..2895f4b9 100644 --- a/docs/advanced/snapshot-restore.md +++ b/docs/advanced/snapshot-restore.md @@ -245,7 +245,7 @@ classes. For a Python-side summary use |---|---| | Backtrack a few steps inside a running script (RK staging, adaptive Δt, predictor–corrector probes) | ``save_state()`` → token | | Persist whole-model state across runs (crash recovery, bisection studies, full restart) | ``save_state(file=…)`` / ``load_state(file=…)`` | -| Restart from a previous run on a *different* rank count or remap onto a *different* resolution | ``mesh.write_timestep(create_xdmf=True)`` / ``MeshVariable.read_timestep()`` (KDTree remap) | +| Restart from a previous run on a *different* rank count or remap onto a *different* resolution | ``mesh.write_timestep(...)`` / ``MeshVariable.read_timestep()`` (coordinate/KDTree remap) | | Efficient same-rank restart writing only specific variables for postprocessing | ``mesh.write_timestep(petsc_reload=True)`` / ``MeshVariable.read_checkpoint()`` (PETSc DMPlex per-variable) | | Visualisation for ParaView (XDMF + per-step HDF5) | ``mesh.write_timestep(create_xdmf=True)`` | diff --git a/docs/developer/design/in_memory_checkpoint_design.md b/docs/developer/design/in_memory_checkpoint_design.md index 3c4c5fd5..b705efd1 100644 --- a/docs/developer/design/in_memory_checkpoint_design.md +++ b/docs/developer/design/in_memory_checkpoint_design.md @@ -49,33 +49,33 @@ benefits: paths, which surfaces gaps and bugs that disk-only checkpointing exposes only at quarterly-test cadence. -## Two checkpoint paths today (audit-confirmed) - -The audit (read-only investigation, current `main`) confirmed two -distinct on-disk paths in `src/underworld3/`: - -**Path A — PETScSection-based (native checkpoint).** -- `Mesh.write_checkpoint(...)` at `discretisation/discretisation_mesh.py:1892–1953`. -- Uses `dm.sectionView(viewer, subdm)` and `dm.globalVectorView(viewer, subdm, var._gvec)` - through a `PETSc.ViewerHDF5()`. -- Captures mesh topology, deformed coordinates, MV DOF values, and - swarm-variable values via the `_meshVar` proxy. -- Lower-level; bound to the producing DM. - -**Path B — write_timestep (visualization-oriented).** -- `Mesh.write_timestep(...)` at `discretisation/discretisation_mesh.py:1750–1830` - and `Swarm.write_timestep(...)` at `swarm.py:3726–3810`. -- Per-variable HDF5 + XDMF for ParaView; mixes `PETSc.ViewerHDF5()` - with direct `h5py` writes (`swarm.py:1772–1850`). -- More flexible — can be re-loaded at different resolution / - decomposition; bulkier; the user-facing visualisation pipeline. - -**For in-memory snapshot, Path A is the conceptual model.** Restore -goes back to the same DM, so the resolution/decomposition flexibility -of Path B is unneeded. But the in-memory backend will not actually use -the HDF5 Viewer — it will copy section structure + global-vector data -directly into numpy arrays. Same conceptual capture, different -mechanism. +## Two mesh-variable output paths + +This design note predates the unified timestep writer. The current public API +keeps the same two reload semantics, but exposes new output through +`Mesh.write_timestep(...)`: + +**Coordinate-remap path.** +- Write with `Mesh.write_timestep(...)`. +- Read selected variables with `MeshVariable.read_timestep(...)`. +- Uses `/fields` coordinate/value datasets and coordinate/KDTree remapping. +- Can load data onto a different mesh or MPI decomposition. + +**PETSc-native reload path.** +- Write with `Mesh.write_timestep(..., petsc_reload=True)`. +- Read selected variables with `MeshVariable.read_checkpoint(...)`. +- Uses PETSc DMPlex topology, section, vector, and `PetscSF` metadata. +- Intended for exact same-mesh finite-element vector reload. + +`Mesh.write_checkpoint(...)` is retained as a compatibility wrapper for older +scripts. New code should use `Mesh.write_timestep(..., petsc_reload=True)` for +PETSc-native reload output. + +**For in-memory snapshot, the PETSc-native path is the conceptual model.** +Restore goes back to the same DM, so the resolution/decomposition flexibility +of coordinate remap is unneeded. But the in-memory backend does not use the +HDF5 Viewer — it copies section structure and vector data directly into numpy +arrays. Same conceptual capture, different mechanism. ## What state must be captured @@ -352,7 +352,7 @@ section. In rough dependency order: 1. **Backend abstraction layer.** Extract the "capture" logic from the - "store" logic in `Mesh.write_checkpoint()`. Introduce a + output-storage logic used for PETSc-native reload. Introduce a `CheckpointBackend` protocol (e.g., `save_section`, `save_vector`, `save_metadata`, `load_section`, `load_vector`, `load_metadata`). Shaped from day one to support both backends — the in-memory case @@ -365,9 +365,8 @@ In rough dependency order: - `InMemoryBackend` — dict of numpy arrays. Trivial once the abstraction exists. - `OnDiskFullStateBackend` — single monolithic HDF5 file. Shares - PETSc-state serialisation with the existing `write_checkpoint` - path (already HDF5); adds Python-state serialisation as HDF5 - attributes/groups. + PETSc-state serialisation with the existing PETSc-native reload path + (already HDF5); adds Python-state serialisation as HDF5 attributes/groups. 3. **Adopt the serialisation contract for new solver-internal code.** Decision: option (C) — state as first-class dataclass (see "General serialisation contract" section). Any new algorithm-helper class diff --git a/docs/developer/design/petsc-dmplex-checkpoint-reload-plan.md b/docs/developer/design/petsc-dmplex-checkpoint-reload-plan.md index 40b89544..2df11c64 100644 --- a/docs/developer/design/petsc-dmplex-checkpoint-reload-plan.md +++ b/docs/developer/design/petsc-dmplex-checkpoint-reload-plan.md @@ -74,8 +74,9 @@ XDMF, use `write_timestep(..., create_xdmf=False, petsc_reload=True)`. ### PETSc Reload Writing `Mesh.write_timestep(..., petsc_reload=True)` writes PETSc DMPlex reload -metadata into per-variable HDF5 files. The legacy `Mesh.write_checkpoint(...)` -compatibility wrapper writes PETSc DMPlex HDF5 storage version `3.0.0`. +metadata into the standard per-variable timestep HDF5 files. The legacy +`Mesh.write_checkpoint(...)` compatibility wrapper also writes PETSc DMPlex +reload metadata, but uses the older checkpoint filename layout. The mesh file is named: @@ -83,14 +84,22 @@ The mesh file is named: .mesh..h5 ``` -With the default `separate_variable_files=True`, each mesh variable is written -to its own checkpoint file: +With `write_timestep(..., petsc_reload=True)`, each mesh variable is written to +its own timestep-layout file: + +```text +.mesh...h5 +``` + +The legacy `write_checkpoint(...)` wrapper writes one file per variable by +default: ```text ...h5 ``` -With `separate_variable_files=False`, all variables are written into: +With the legacy `write_checkpoint(..., separate_variable_files=False)` mode, +all variables are written into: ```text .checkpoint..h5 @@ -180,11 +189,11 @@ mpirun -np 2 ./uw python -m pytest \ ## Spherical Benchmark Validation The motivating case is spherical benchmark postprocessing at high MPI counts. -The old `write_timestep()` / `read_timestep()` path can build large KDTree -mapping structures during reload. At `1/128` this used nearly the full 4.5 TB +The coordinate-remap `read_timestep()` path can build large KDTree mapping +structures during reload. At `1/128` this used nearly the full 4.5 TB allocation on Gadi. -The checkpoint method avoids KDTree reload and preserves velocity/pressure +The PETSc reload method avoids KDTree reload and preserves velocity/pressure metrics to roundoff. Boundary stress metrics require the benchmark to recover stress consistently after reload. In the spherical benchmark this is handled by projecting the six deviatoric-stress components and then forming `sigma_rr`. @@ -193,10 +202,10 @@ projecting the six deviatoric-stress components and then forming `sigma_rr`. | Resolution | Method | NCPUs | Walltime | Memory used | Status | | --- | --- | ---: | ---: | ---: | --- | -| `1/64` | `write_timestep/read_timestep` | 144 | `00:03:43` | `211.27 GB` | completed | -| `1/64` | `write_checkpoint/read_checkpoint` | 144 | `00:02:41` | `233.67 GB` | completed | -| `1/128` | `write_timestep/read_timestep` | 1152 | `00:13:55` | `3.92 TB` | completed near memory limit | -| `1/128` | `write_checkpoint/read_checkpoint` | 1152 | `00:03:57` | `1.83 TB` | completed | +| `1/64` | `write_timestep/read_timestep` remap | 144 | `00:03:43` | `211.27 GB` | completed | +| `1/64` | `write_timestep(petsc_reload=True)/read_checkpoint` | 144 | `00:02:41` | `233.67 GB` | completed | +| `1/128` | `write_timestep/read_timestep` remap | 1152 | `00:13:55` | `3.92 TB` | completed near memory limit | +| `1/128` | `write_timestep(petsc_reload=True)/read_checkpoint` | 1152 | `00:03:57` | `1.83 TB` | completed | The `1/128` checkpoint reload reduced memory by about `2.09 TB` and walltime by about `3.5x` for the postprocessing run. @@ -205,7 +214,7 @@ about `3.5x` for the postprocessing run. `1/128` spherical Thieulot benchmark: -| Metric | `write_timestep/read_timestep` | `write_checkpoint/read_checkpoint` | +| Metric | `write_timestep/read_timestep` remap | `write_timestep(petsc_reload=True)/read_checkpoint` | | --- | ---: | ---: | | `v_l2_norm` | `1.4319274480265082e-06` | `1.4319274480231255e-06` | | `p_l2_norm` | `5.985841567394967e-04` | `5.985841567395382e-04` | @@ -222,7 +231,7 @@ projection after checkpoint reload. `1/64` spherical Thieulot benchmark: -| Metric | `write_timestep/read_timestep` | `write_checkpoint/read_checkpoint` | +| Metric | `write_timestep/read_timestep` remap | `write_timestep(petsc_reload=True)/read_checkpoint` | | --- | ---: | ---: | | `v_l2_norm` | `1.1662200663950889e-05` | `1.1662200663957042e-05` | | `p_l2_norm` | `2.7573367818459473e-03` | `2.7573367818460497e-03` | diff --git a/docs/developer/subsystems/checkpoint-output-and-reload-methods.md b/docs/developer/subsystems/checkpoint-output-and-reload-methods.md index 974b6bce..92930107 100644 --- a/docs/developer/subsystems/checkpoint-output-and-reload-methods.md +++ b/docs/developer/subsystems/checkpoint-output-and-reload-methods.md @@ -233,7 +233,7 @@ New code should prefer the equivalent `write_timestep()` calls above. | Use case | Recommended method | | --- | --- | | ParaView/XDMF visualisation | `write_timestep(..., create_xdmf=True)` | -| Flexible remap onto another mesh | `write_timestep(..., create_xdmf=True)` | +| Flexible remap onto another mesh | `write_timestep(...)` with `MeshVariable.read_timestep(...)` | | Exact restart/postprocessing | `write_timestep(..., create_xdmf=False, petsc_reload=True)` | | Unified visualisation/remap plus PETSc reload | `write_timestep(..., create_xdmf=True, petsc_reload=True)` | | Avoid KDTree memory growth at high MPI counts | `write_timestep(..., petsc_reload=True)` with `read_checkpoint()` | diff --git a/docs/developer/subsystems/checkpointing-system.md b/docs/developer/subsystems/checkpointing-system.md index f34725f5..c0502b89 100644 --- a/docs/developer/subsystems/checkpointing-system.md +++ b/docs/developer/subsystems/checkpointing-system.md @@ -2,256 +2,214 @@ title: "Checkpointing and Restart System" --- -# Checkpointing and Restart Architecture +# Checkpointing and Restart System -Underworld3 provides a flexible checkpointing system for simulation data persistence, restart capabilities, and post-processing workflows. +Underworld3 has two related but distinct output/reload layers: -## System Overview +- **Mesh-variable output**, written with `Mesh.write_timestep()`. +- **Whole-model snapshots**, written with `Model.save_state()`. -The checkpointing system is built around **PETSc HDF5** files and uses **KDTree interpolation** for flexible restart scenarios. +Use `Mesh.write_timestep()` when you want selected mesh variables for +visualisation, postprocessing, remapping, or PETSc-native reload. Use +`Model.save_state()` when you want to capture the state of a full model, +including registered meshes, variables, swarms, and Python-side state bearers. -### Key Components +## Mesh-Variable Output -1. **`mesh.write_timestep()`** - Primary checkpointing interface -2. **KDTree mapping** - Enables flexible mesh remapping on restart -3. **PETSc HDF5 format** - Parallel-safe, standardized file format -4. **Swarm checkpointing** - Particle data persistence (limited down-sampling) +`Mesh.write_timestep()` is the standard mesh and mesh-variable output method. +It writes one mesh HDF5 file and one HDF5 file per requested mesh variable. +Each variable file always contains `/fields` coordinate/value datasets used by +`MeshVariable.read_timestep()`. -## Mesh Checkpointing +Optional payloads are controlled by explicit flags: -### Basic Usage +| Flag | Payload | Reader / use | +| --- | --- | --- | +| `create_xdmf=True` | XDMF-compatible visualisation datasets and a companion `.xdmf` file | ParaView and other XDMF tools | +| `petsc_reload=True` | PETSc DMPlex section/vector metadata | `MeshVariable.read_checkpoint()` | + +### Visualisation and Coordinate Remap ```python -# Save checkpoint -mesh.write_timestep("simulation_step_100.h5", - meshVars=[velocity, pressure, temperature], - time=100.0, step=100) - -# Restart from checkpoint -mesh.read_timestep("simulation_step_100.h5", - meshVars=[velocity, pressure, temperature]) +mesh.write_timestep( + "output", + index=100, + outputPath="output", + meshVars=[velocity, pressure, temperature], + time=100.0, + create_xdmf=True, +) ``` -### Advanced Features +Typical files: -#### Different Mesh Geometries -```python -# Original simulation: 32x32 elements -mesh_coarse = uw.meshing.StructuredQuadBox(elementRes=(32, 32)) +```text +output/output.mesh.00100.h5 +output/output.mesh.velocity.00100.h5 +output/output.mesh.pressure.00100.h5 +output/output.mesh.temperature.00100.h5 +output/output.mesh.00100.xdmf +``` -# Restart simulation: 64x64 elements (refined) -mesh_fine = uw.meshing.StructuredQuadBox(elementRes=(64, 64)) +Reload selected variables through the coordinate/KDTree remap path: -# KDTree automatically maps data from coarse to fine mesh -mesh_fine.read_timestep("coarse_simulation_step_100.h5", - meshVars=[velocity, pressure, temperature]) +```python +velocity.read_timestep("output", "velocity", 100, outputPath="output") +pressure.read_timestep("output", "pressure", 100, outputPath="output") ``` -#### Different Parallel Decompositions -```python -# Original run: 4 processes -# mpirun -np 4 python simulation.py +`read_timestep()` compares saved coordinates with the target variable's live +coordinates and maps values by the coordinate-remap path. This is useful when +the target mesh or MPI decomposition differs from the one used to write the +files. It is not an exact PETSc finite-element vector reload. + +### PETSc-Native Reload -# Restart run: 8 processes (different decomposition) -# mpirun -np 8 python restart_simulation.py +For exact finite-element vector reload, add `petsc_reload=True` and load with +`MeshVariable.read_checkpoint()`: -# Checkpointing system handles parallel decomposition changes automatically -mesh.read_timestep("4_process_checkpoint.h5", meshVars=[fields]) +```python +mesh.write_timestep( + "restart", + index=100, + outputPath="output", + meshVars=[velocity, pressure], + create_xdmf=False, + petsc_reload=True, +) + +velocity.read_checkpoint( + "output/restart.mesh.velocity.00100.h5", + data_name="velocity", +) +pressure.read_checkpoint( + "output/restart.mesh.pressure.00100.h5", + data_name="pressure", +) ``` -### Technical Implementation +Typical files: -#### KDTree Mapping Process -1. **Coordinate extraction**: Target mesh node coordinates identified -2. **KDTree search**: Find nearest source mesh nodes for each target -3. **Interpolation weights**: Compute interpolation coefficients -4. **Value mapping**: Interpolate field values to new mesh nodes -5. **Boundary handling**: Special treatment for domain boundaries +```text +output/restart.mesh.00100.h5 +output/restart.mesh.velocity.00100.h5 +output/restart.mesh.pressure.00100.h5 +``` -#### File Format Details -- **Format**: PETSc HDF5 with parallel I/O -- **Metadata**: Time, step number, mesh geometry info -- **Field data**: All MeshVariable data with proper chunking -- **Compression**: Optional HDF5 compression for large datasets +The variable files contain `/fields` datasets and PETSc reload metadata under +`/topologies/uw_mesh/dms//`. `read_checkpoint()` uses PETSc DMPlex +topology, section, vector, and `PetscSF` metadata. It does not use KDTree +remapping. -## Swarm Checkpointing +### Unified Visualisation and PETSc Reload -### Capabilities and Limitations +Set both flags when one output family should support ParaView, coordinate +remap, and PETSc-native reload: -#### What Works Well ```python -# Save swarm state -swarm.save("particles_step_100.h5") - -# Restore to same or different mesh -swarm.load("particles_step_100.h5") +mesh.write_timestep( + "output", + index=100, + outputPath="output", + meshVars=[velocity, pressure], + create_xdmf=True, + petsc_reload=True, +) ``` -#### Key Characteristics -- **Mesh-resolution independent**: Particles can be loaded to different mesh resolutions -- **No down-sampling strategy**: All particles are saved/restored (no intelligent reduction) -- **Spatial integrity**: Particle positions and data preserved exactly -- **Migration compatibility**: Works with mesh parallel decomposition changes +The same variable file can then be read by: -### Use Cases -- **Material tracking**: Persistent material property histories -- **Lagrangian tracers**: Pathline and geological history tracking -- **Passive markers**: Flow visualization and analysis -- **Material interfaces**: Sharp interface tracking +- `MeshVariable.read_timestep(...)` for coordinate/KDTree remap. +- `MeshVariable.read_checkpoint(...)` for PETSc-native reload. -## Production Workflows +## Compatibility API -### Checkpoint-Based Visualization - -Instead of attempting real-time parallel visualization: +`Mesh.write_checkpoint()` is retained for older scripts and emits a +`FutureWarning`. New code should use: ```python -# Simulation loop with regular checkpointing -for step in range(1000): - # ... solve physics ... - - if step % 10 == 0: # Every 10 steps - mesh.write_timestep(f"output/step_{step:04d}.h5", - meshVars=[velocity, pressure, temperature], - time=step * dt, step=step) - -# Separate visualization workflow (single process) -import glob -checkpoint_files = sorted(glob.glob("output/step_*.h5")) - -for checkpoint in checkpoint_files: - # Load data for visualization - mesh.read_timestep(checkpoint, meshVars=[velocity, pressure, temperature]) - - # Create high-quality visualization - create_full_field_visualization(mesh, velocity, pressure, temperature) +mesh.write_timestep(..., petsc_reload=True) ``` -### Multi-Resolution Analysis +instead of: ```python -# High-resolution simulation checkpoint -mesh_hr = uw.meshing.StructuredQuadBox(elementRes=(256, 256)) -# ... run simulation, save checkpoint ... - -# Load to lower resolution for analysis -mesh_lr = uw.meshing.StructuredQuadBox(elementRes=(64, 64)) -mesh_lr.read_timestep("hr_simulation_final.h5", meshVars=[fields]) - -# Fast analysis on reduced data -perform_statistical_analysis(fields) +mesh.write_checkpoint(...) ``` -## Performance Characteristics - -### Checkpoint File Sizes -- **Mesh variables**: Proportional to mesh resolution and field count -- **Swarm data**: Proportional to particle count (can be very large) -- **Compression**: HDF5 compression can reduce sizes significantly -- **Typical sizes**: 10MB - 10GB depending on problem scale - -### I/O Performance -- **Parallel writing**: Scales well with process count -- **Reading speed**: Fast restart due to parallel HDF5 -- **Storage requirements**: Plan for 5-20% of simulation time for I/O +The compatibility method writes PETSc-reloadable files, but it uses legacy +checkpoint-style variable filenames. The standard `write_timestep()` method +keeps one naming convention for visualisation, remap, and PETSc reload output. -### Memory Usage -- **KDTree construction**: Temporary memory spike during interpolation -- **Field mapping**: Additional memory for intermediate interpolation arrays -- **Swarm loading**: Full swarm data loaded into memory +## Whole-Model Snapshots -## Best Practices +`Model.save_state()` and `Model.load_state()` are the model-level snapshot API. -### Checkpoint Frequency ```python -# Balance between restart capability and I/O cost -checkpoint_interval = max(10, total_steps // 100) # At least every 10 steps - -if step % checkpoint_interval == 0: - mesh.write_timestep(f"restart_{step}.h5", ...) +token = model.save_state() +model.load_state(token) ``` -### File Management -```python -# Implement checkpoint rotation to manage disk space -def cleanup_old_checkpoints(keep_last=5): - checkpoints = sorted(glob.glob("restart_*.h5")) - for old_checkpoint in checkpoints[:-keep_last]: - os.remove(old_checkpoint) -``` +Without `file=`, `save_state()` returns an in-memory token for short-lived +backtracking inside a running process. -### Metadata Tracking ```python -# Include simulation parameters in checkpoint -checkpoint_metadata = { - "rayleigh_number": Ra, - "viscosity_contrast": eta_contrast, - "boundary_conditions": bc_info, - "mesh_resolution": mesh.data.shape[0] -} - -# Store as HDF5 attributes or separate JSON file +model.save_state(file="run.snap.h5") +model.load_state("run.snap.h5") ``` -## Integration with Model Orchestration +With `file=`, `save_state()` writes a persistent snapshot wrapper plus a sibling +bulk directory: -### Future Direction: Model-Centric Checkpointing +```text +run.snap.h5 +run.snap.bulk/ + {mesh}.mesh.00000.h5 + {mesh}.{variable}.00000.h5 + {swarm}.swarm.h5 +``` -The checkpointing system may migrate from `mesh.write_timestep()` to `model.save_checkpoint()`: +The wrapper file stores model metadata and references to the bulk files. The +mesh-variable bulk files are PETSc-reloadable and are read through +`MeshVariable.read_checkpoint()` during `Model.load_state(...)`. -```python -# Current approach -mesh.write_timestep("step_100.h5", meshVars=[v, p, T]) +Disk snapshots are for same-model restart. The model should already contain +compatible registered meshes, variables, swarms, and state bearers when +`load_state()` is called. For remapping onto a different mesh or MPI layout, +use `Mesh.write_timestep()` plus `MeshVariable.read_timestep()`. -# Future model-centric approach -model = uw.Model(mesh=mesh, velocity=v, pressure=p, temperature=T) -model.save_checkpoint("step_100.h5", include_parameters=True) -``` +## Choosing the Correct Path -**Benefits:** -- **Parameter persistence**: Automatically save constitutive parameters -- **Model validation**: Ensure checkpoint compatibility with model structure -- **Metadata integration**: Rich simulation metadata in checkpoint files -- **Version control**: Track model version and parameter changes +| Need | Write | Read | +| --- | --- | --- | +| ParaView/XDMF visualisation | `Mesh.write_timestep(create_xdmf=True)` | External visualisation tool | +| Remap selected variables onto another mesh or decomposition | `Mesh.write_timestep(...)` | `MeshVariable.read_timestep(...)` | +| PETSc-native reload of selected variables | `Mesh.write_timestep(petsc_reload=True)` | `MeshVariable.read_checkpoint(...)` | +| One file family for visualisation, remap, and PETSc reload | `Mesh.write_timestep(create_xdmf=True, petsc_reload=True)` | `read_timestep()` or `read_checkpoint()` depending on intent | +| In-process backtracking | `Model.save_state()` | `Model.load_state(token)` | +| Persistent whole-model restart | `Model.save_state(file=...)` | `Model.load_state(path)` | -## Error Handling and Robustness +## Performance Notes -### Common Issues and Solutions +`read_timestep()` is flexible because it is coordinate based, but the remap can +be memory-heavy for large meshes and high MPI counts. -#### Mesh Incompatibility -```python -try: - mesh.read_timestep("checkpoint.h5", meshVars=[fields]) -except uw.CheckpointError as e: - print(f"Mesh incompatibility: {e}") - print("Consider using interpolation mode or checking mesh geometry") -``` +`read_checkpoint()` is less flexible but better suited to exact restart and +large postprocessing jobs because it reloads PETSc finite-element data directly +from DMPlex metadata. -#### Missing Fields -```python -# Partial field loading when checkpoint has different fields -available_fields = mesh.get_checkpoint_fields("checkpoint.h5") -loadable_fields = [f for f in [v, p, T] if f.name in available_fields] -mesh.read_timestep("checkpoint.h5", meshVars=loadable_fields) -``` +For production jobs that need both visualisation and restart, prefer: -#### Parallel Inconsistency ```python -# Ensure all processes participate in checkpoint operations -uw.mpi.barrier() # Synchronize before checkpoint -mesh.write_timestep("checkpoint.h5", meshVars=[fields]) -uw.mpi.barrier() # Synchronize after checkpoint +mesh.write_timestep( + "output", + index=step, + outputPath="output", + meshVars=[velocity, pressure], + create_xdmf=True, + petsc_reload=True, +) ``` -## Summary - -The Underworld3 checkpointing system provides: - -- ✅ **Flexible restart**: Different meshes and parallel decompositions -- ✅ **Production-ready**: Robust parallel HDF5 I/O -- ✅ **KDTree interpolation**: Automatic field remapping -- ✅ **Swarm support**: Complete particle state preservation -- ⚠️ **No swarm down-sampling**: Memory constraints for large particle counts -- 🔄 **Future enhancement**: Model-centric approach with rich metadata - -The system enables robust simulation workflows, flexible post-processing, and production-scale computational geodynamics applications. \ No newline at end of file +This makes the output intent explicit and avoids maintaining separate user APIs +for XDMF output and PETSc reload output. diff --git a/src/underworld3/discretisation/discretisation_mesh_variables.py b/src/underworld3/discretisation/discretisation_mesh_variables.py index 185c8a54..dc8a883b 100644 --- a/src/underworld3/discretisation/discretisation_mesh_variables.py +++ b/src/underworld3/discretisation/discretisation_mesh_variables.py @@ -1226,11 +1226,14 @@ def read_timestep( verbose=False, ): """ - Read a mesh variable from an arbitrary vertex-based checkpoint file - and reconstruct/interpolate the data field accordingly. The saved - mesh and the live mesh may have different sizes/decompositions; the - values are matched by nearest-neighbour kd-tree interpolation to - the live mesh nodes. + Read a mesh variable from ``Mesh.write_timestep()`` output using the + coordinate-remap path. The saved mesh and the live mesh may have + different sizes or decompositions; values are matched to the live mesh + nodes by nearest-neighbour KDTree interpolation. + + This is the flexible remap reader. It is distinct from + ``read_checkpoint()``, which loads PETSc DMPlex section/vector metadata + for PETSc-native same-mesh reload. Parallel-safe and memory-bounded. Two transient swarms route the work without ever holding the full file on more than one rank: @@ -1252,9 +1255,9 @@ def read_timestep( """ # Format dispatch: ``data_filename`` may be either the - # legacy ``write_timestep`` prefix (in which case we - # reconstruct the per-variable file path the usual way) or a - # v1.1 snapshot wrapper path produced by + # ``write_timestep`` filename base (in which case we reconstruct the + # per-variable file path the usual way) or a v1.1 snapshot wrapper path + # produced by # ``model.save_state(file=…)``. The format-detection logic is # hidden from the user — same call, both formats. import h5py diff --git a/src/underworld3/model.py b/src/underworld3/model.py index f2b485f3..04c9bd6e 100644 --- a/src/underworld3/model.py +++ b/src/underworld3/model.py @@ -621,7 +621,7 @@ def save_state(self, *, file: Optional[str] = None): With ``file=``, writes a persistent on-disk snapshot at that path (plus a sibling ``.bulk/`` directory holding the bulk PETSc + swarm sidecars). Survives the process; suitable - for restart, postprocessing, transferring runs. + for same-model restart and postprocessing workflows. Either way the captured state is the full model: all registered meshes and mesh-variables, all swarms with