From 85bc2bf98da4d68cb13e7adc43278a25711904e2 Mon Sep 17 00:00:00 2001 From: Yousef Moazzam Date: Thu, 18 Jun 2026 15:27:48 +0100 Subject: [PATCH 1/2] Update output non-slicing dims shape in output chunk shape calculation --- httomo/runner/dataset_store_backing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/httomo/runner/dataset_store_backing.py b/httomo/runner/dataset_store_backing.py index 200527e96..849d85ccc 100644 --- a/httomo/runner/dataset_store_backing.py +++ b/httomo/runner/dataset_store_backing.py @@ -52,7 +52,7 @@ def calculate_section_output_chunk_shape( for method in section.methods: if method.memory_gpu is None: continue - output_non_slice_dims = method.calculate_output_dims(input_non_slice_dims) + output_non_slice_dims = method.calculate_output_dims(output_non_slice_dims) output_chunk_shape = list(output_non_slice_dims) output_chunk_shape.insert(slicing_dim, chunk_shape[slicing_dim]) From 70523055244c2e3b311c58b3cdaf984272c0b85c Mon Sep 17 00:00:00 2001 From: Yousef Moazzam Date: Thu, 18 Jun 2026 15:36:29 +0100 Subject: [PATCH 2/2] Pass along output section data shape in full pipeline CPU memory estimation When performing CPU memory estimation of a section at runtime, any change to the data shape during the processing is naturally propagated due to the output of a section being passed as the input of the next section, so the data source's shape could be inspected to know if the data shape had changed. When performing CPU memory estimation of the entire pipeline prior to httomo runtime, there's no processed data to pass along, and thus the transformation of the data shape must be accounted for in some other way. This change passes along the estimated output chunk shape of a section to the input of the next section, to then achieve this propagation of data shape transformation. Note that it assumes that the global shape of the data will be the size of a chunk multiplied by the number of processes, which is roughly correct but may very slightly overestimate in some cases; this is due to the fact that, depending on the input data shape, the splitting of data across multiple processes could result in one or more of the processes getting slightly less data as a consequence of the data shape not being evenly divisible along the slicing dimension. --- httomo/cli.py | 19 ++++++++++--------- httomo/runner/dataset_store_backing.py | 20 +++++++++++++++++--- 2 files changed, 27 insertions(+), 12 deletions(-) diff --git a/httomo/cli.py b/httomo/cli.py index 4775c00a4..9cca815a4 100644 --- a/httomo/cli.py +++ b/httomo/cli.py @@ -562,17 +562,18 @@ def estimate_cpu_memory(in_data_file: Path, pipeline_file: Path, nprocs: int) -> section_memory_peak = 0 for idx in range(len(sections)): + mem, previewed_shape = estimate_section_memory( + nprocs, + 0, + None, + dtype, + previewed_shape, + sections, + idx, + ) section_memory_peak = max( section_memory_peak, - estimate_section_memory( - nprocs, - 0, - None, - dtype, - previewed_shape, - sections, - idx, - ), + mem, ) return section_memory_peak diff --git a/httomo/runner/dataset_store_backing.py b/httomo/runner/dataset_store_backing.py index 849d85ccc..e8ed2bdf6 100644 --- a/httomo/runner/dataset_store_backing.py +++ b/httomo/runner/dataset_store_backing.py @@ -74,7 +74,7 @@ def estimate_section_memory( sections: List[Section], section_idx: int, consider_pinned_memory_pool: bool = False, -) -> int: +) -> tuple[int, tuple[int, int, int]]: # Get chunk shape created by reader of section `n` (the current section) that will account # for padding. This chunk shape is based on the chunk shape written by the writer of # section `n - 1` (the previous section) @@ -150,12 +150,26 @@ def estimate_section_memory( # See https://github.com/cupy/cupy/issues/9813 cupy_transfer_overhead = total_mem + # Calculate the shape of the global data of the output of the section to pass back to the + # caller. + # + # NOTE: The caller will only use this if performing CPU memory estimation of the entire + # pipeline not at runtime of htttomo (ie, the `memory-check` CLI command). If performing + # CPU memory estimation of a section during runtime of httomo, the caller won't use this + # value. + current_section_slicing_dim = _get_slicing_dim(sections[section_idx].pattern) - 1 + output_global_shape = list(output_chunk_shape) + output_global_shape[current_section_slicing_dim] = ( + output_global_shape[current_section_slicing_dim] * nprocs + ) + return ( padded_input_chunk_bytes + output_chunk_bytes + reslice_bytes + cupy_pinned_cpu_pool_memory - + cupy_transfer_overhead + + cupy_transfer_overhead, + output_global_shape, ) @@ -167,7 +181,7 @@ def determine_store_backing( global_shape: Tuple[int, int, int], section_idx: int, ) -> DataSetStoreBacking: - section_memory = estimate_section_memory( + section_memory, _ = estimate_section_memory( comm.size, comm.rank, comm.allgather,