From 85bc2bf98da4d68cb13e7adc43278a25711904e2 Mon Sep 17 00:00:00 2001
From: Yousef Moazzam <yousef.moazzam@diamond.ac.uk>
Date: Thu, 18 Jun 2026 15:27:48 +0100
Subject: [PATCH 1/2] Update output non-slicing dims shape in output chunk
 shape calculation

---
 httomo/runner/dataset_store_backing.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/httomo/runner/dataset_store_backing.py b/httomo/runner/dataset_store_backing.py
index 200527e96..849d85ccc 100644
--- a/httomo/runner/dataset_store_backing.py
+++ b/httomo/runner/dataset_store_backing.py
@@ -52,7 +52,7 @@ def calculate_section_output_chunk_shape(
     for method in section.methods:
         if method.memory_gpu is None:
             continue
-        output_non_slice_dims = method.calculate_output_dims(input_non_slice_dims)
+        output_non_slice_dims = method.calculate_output_dims(output_non_slice_dims)
 
     output_chunk_shape = list(output_non_slice_dims)
     output_chunk_shape.insert(slicing_dim, chunk_shape[slicing_dim])

From 70523055244c2e3b311c58b3cdaf984272c0b85c Mon Sep 17 00:00:00 2001
From: Yousef Moazzam <yousef.moazzam@diamond.ac.uk>
Date: Thu, 18 Jun 2026 15:36:29 +0100
Subject: [PATCH 2/2] Pass along output section data shape in full pipeline CPU
 memory estimation

When performing CPU memory estimation of a section at runtime, any
change to the data shape during the processing is naturally propagated
due to the output of a section being passed as the input of the next
section, so the data source's shape could be inspected to know if the
data shape had changed.

When performing CPU memory estimation of the entire pipeline prior to
httomo runtime, there's no processed data to pass along, and thus the
transformation of the data shape must be accounted for in some other
way.

This change passes along the estimated output chunk shape of a section
to the input of the next section, to then achieve this propagation of
data shape transformation. Note that it assumes that the global shape of
the data will be the size of a chunk multiplied by the number of
processes, which is roughly correct but may very slightly overestimate
in some cases; this is due to the fact that, depending on the input data
shape, the splitting of data across multiple processes could result in
one or more of the processes getting slightly less data as a consequence
of the data shape not being evenly divisible along the slicing
dimension.
---
 httomo/cli.py                          | 19 ++++++++++---------
 httomo/runner/dataset_store_backing.py | 20 +++++++++++++++++---
 2 files changed, 27 insertions(+), 12 deletions(-)

diff --git a/httomo/cli.py b/httomo/cli.py
index 4775c00a4..9cca815a4 100644
--- a/httomo/cli.py
+++ b/httomo/cli.py
@@ -562,17 +562,18 @@ def estimate_cpu_memory(in_data_file: Path, pipeline_file: Path, nprocs: int) ->
 
     section_memory_peak = 0
     for idx in range(len(sections)):
+        mem, previewed_shape = estimate_section_memory(
+            nprocs,
+            0,
+            None,
+            dtype,
+            previewed_shape,
+            sections,
+            idx,
+        )
         section_memory_peak = max(
             section_memory_peak,
-            estimate_section_memory(
-                nprocs,
-                0,
-                None,
-                dtype,
-                previewed_shape,
-                sections,
-                idx,
-            ),
+            mem,
         )
 
     return section_memory_peak
diff --git a/httomo/runner/dataset_store_backing.py b/httomo/runner/dataset_store_backing.py
index 849d85ccc..e8ed2bdf6 100644
--- a/httomo/runner/dataset_store_backing.py
+++ b/httomo/runner/dataset_store_backing.py
@@ -74,7 +74,7 @@ def estimate_section_memory(
     sections: List[Section],
     section_idx: int,
     consider_pinned_memory_pool: bool = False,
-) -> int:
+) -> tuple[int, tuple[int, int, int]]:
     # Get chunk shape created by reader of section `n` (the current section) that will account
     # for padding. This chunk shape is based on the chunk shape written by the writer of
     # section `n - 1` (the previous section)
@@ -150,12 +150,26 @@ def estimate_section_memory(
         # See https://github.com/cupy/cupy/issues/9813
         cupy_transfer_overhead = total_mem
 
+    # Calculate the shape of the global data of the output of the section to pass back to the
+    # caller.
+    #
+    # NOTE: The caller will only use this if performing CPU memory estimation of the entire
+    # pipeline not at runtime of htttomo (ie, the `memory-check` CLI command). If performing
+    # CPU memory estimation of a section during runtime of httomo, the caller won't use this
+    # value.
+    current_section_slicing_dim = _get_slicing_dim(sections[section_idx].pattern) - 1
+    output_global_shape = list(output_chunk_shape)
+    output_global_shape[current_section_slicing_dim] = (
+        output_global_shape[current_section_slicing_dim] * nprocs
+    )
+
     return (
         padded_input_chunk_bytes
         + output_chunk_bytes
         + reslice_bytes
         + cupy_pinned_cpu_pool_memory
-        + cupy_transfer_overhead
+        + cupy_transfer_overhead,
+        output_global_shape,
     )
 
 
@@ -167,7 +181,7 @@ def determine_store_backing(
     global_shape: Tuple[int, int, int],
     section_idx: int,
 ) -> DataSetStoreBacking:
-    section_memory = estimate_section_memory(
+    section_memory, _ = estimate_section_memory(
         comm.size,
         comm.rank,
         comm.allgather,