From df0d689d8172a3c097a8545f377599b96c5a2cfa Mon Sep 17 00:00:00 2001
From: Justin Hiemstra <jhiemstra@wisc.edu>
Date: Mon, 8 Jun 2026 16:38:43 -0500
Subject: [PATCH 1/3] Propagate enable_profiling through
 ProcessedContainerSettings

ProcessedContainerSettings.from_container_settings() rebuilds the processed
container settings from the raw ContainerSettings, but it never copied the
enable_profiling field, so the processed settings always used the dataclass
default of False. run_container_singularity() reads the flag off the processed
settings (config.enable_profiling), so the profiling branch never ran: no peer
cgroup was created, cgroup_wrapper.sh never executed, and no usage-profile.tsv
was written, regardless of the user's containers.enable_profiling config value.

This regressed in #390, which moved enable_profiling from a top-level config
field to the nested container settings and added the field to both
ContainerSettings and ProcessedContainerSettings, but missed wiring it through
the conversion in from_container_settings().

Add the missing assignment, plus a regression test in test_config.py asserting
that enable_profiling survives the full config-parsing path.
---
 spras/config/container_schema.py |  1 +
 test/test_config.py              | 16 ++++++++++++++++
 2 files changed, 17 insertions(+)

diff --git a/spras/config/container_schema.py b/spras/config/container_schema.py
index 70276f63..5112198b 100644
--- a/spras/config/container_schema.py
+++ b/spras/config/container_schema.py
@@ -95,6 +95,7 @@ def from_container_settings(settings: ContainerSettings, hash_length: int) -> "P
             unpack_singularity=unpack_singularity,
             base_url=container_base_url,
             prefix=container_prefix,
+            enable_profiling=settings.enable_profiling,
             hash_length=hash_length,
             images=dict(settings.images),
         )
diff --git a/test/test_config.py b/test/test_config.py
index cf4cdf0f..f20f0e64 100644
--- a/test/test_config.py
+++ b/test/test_config.py
@@ -251,6 +251,22 @@ def test_config_container_images_invalid_algorithm(self):
         with pytest.raises(ValueError, match="Unknown algorithm name 'typo_algo'"):
             config.init_global(test_config)
 
+    def test_config_container_enable_profiling(self):
+        # enable_profiling must survive the ContainerSettings -> ProcessedContainerSettings
+        # conversion in from_container_settings(). containers.py reads it off the processed
+        # settings at runtime, so if it isn't propagated the profiling code path never runs.
+        # Regression test for the field being silently dropped during that conversion.
+        test_config = get_test_config()
+
+        # Default: absent from config --> False
+        config.init_global(test_config)
+        assert config.config.container_settings.enable_profiling is False
+
+        # Explicitly enabled --> must propagate to the processed settings as True
+        test_config["containers"]["enable_profiling"] = True
+        config.init_global(test_config)
+        assert config.config.container_settings.enable_profiling is True
+
     def test_error_dataset_label(self):
         test_config = get_test_config()
         error_test_dicts = [{"label": "test$"}, {"label": "@test'"}, {"label": "[test]"}, {"label": "test-test"},

From 482442d498bcf3e0097fd5a7ee7b5968203649af Mon Sep 17 00:00:00 2001
From: Justin Hiemstra <jhiemstra@wisc.edu>
Date: Mon, 22 Jun 2026 13:27:27 -0500
Subject: [PATCH 2/3] Don't use capture_output and stderr in the same command

subprocess.run rejects capture_output=True together with an explicit
stderr argument (capture_output is shorthand for stdout=PIPE, stderr=PIPE).
In the profiling/cgroup branch of run_container_singularity this raised
"ValueError: stdout and stderr arguments may not be used with
capture_output." on every Apptainer run with enable_profiling=true.

Set stdout=PIPE and stderr=STDOUT explicitly so the container's stderr is
merged into the captured stdout, preserving the original intent of
capturing combined output via proc.stdout.
---
 spras/containers.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/spras/containers.py b/spras/containers.py
index c30697f3..4bc3df55 100644
--- a/spras/containers.py
+++ b/spras/containers.py
@@ -553,7 +553,7 @@ def run_container_singularity(resolved: ResolvedImage, command: List[str], volum
         # as `containers.py`.
         wrapper = os.path.join(os.path.dirname(__file__), "cgroup_wrapper.sh")
         cmd = [wrapper, my_cgroup] + singularity_cmd
-        proc = subprocess.run(cmd, capture_output=True, text=True, stderr=subprocess.STDOUT)
+        proc = subprocess.run(cmd, text=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
 
         print("Reading memory and CPU stats from cgroup")
         create_apptainer_container_stats(my_cgroup, out_dir)

From a1c52e79cb4654961d806a21449d529a1aebe547 Mon Sep 17 00:00:00 2001
From: Justin Hiemstra <jhiemstra@wisc.edu>
Date: Mon, 22 Jun 2026 13:53:44 -0500
Subject: [PATCH 3/3] Declare usage-profile.tsv as a reconstruct output

The profiling file is written next to raw-pathway.txt inside the reconstruct
rule, but it was never declared as a Snakemake output. With a shared
filesystem this is harmless, but under shared-fs-usage: none (e.g. HTCondor)
Snakemake only transfers declared outputs back from execute nodes, so the
profiling data was created on the EP and then discarded.

Declare usage-profile.tsv as an additional reconstruct output, gated on
enable_profiling and the singularity/apptainer framework so the output is
only required when it is actually produced (run_container_singularity),
matching the existing warn-only behavior for profiling under docker.
---
 Snakefile | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/Snakefile b/Snakefile
index 5ad7aa18..374014e0 100644
--- a/Snakefile
+++ b/Snakefile
@@ -273,6 +273,16 @@ def get_algorithm_image(wildcards):
     return None
 
 # Run the pathway reconstruction algorithm
+# When profiling is enabled, each reconstruct job writes a 'usage-profile.tsv' alongside its raw-pathway.txt.
+# Declare it as a rule output so that Snakemake tracks it and, critically, transfers it back from the execute
+# node in environments without a shared filesystem (e.g. HTCondor with shared-fs-usage: none). This is only
+# produced when profiling is actually performed, which requires the singularity/apptainer container framework
+# (see run_container_singularity), so we gate on the same condition to avoid declaring an output that is never
+# created (which would fail the rule).
+profiling_outputs = {}
+if container_settings.enable_profiling and container_settings.framework.is_singularity_family:
+    profiling_outputs['profile_file'] = SEP.join([out_dir, '{dataset}-{algorithm}-{params}', 'usage-profile.tsv'])
+
 rule reconstruct:
     input: collect_prepared_input
     # Each reconstruct call should be in a separate output subdirectory that is unique for the parameter combination so
@@ -280,7 +290,9 @@ rule reconstruct:
     # Overwriting files can happen because the pathway reconstruction algorithms often generate output files with the
     # same name regardless of the inputs or parameters, and these aren't renamed until after the container command
     # terminates
-    output: pathway_file = SEP.join([out_dir, '{dataset}-{algorithm}-{params}', 'raw-pathway.txt'])
+    output:
+        pathway_file = SEP.join([out_dir, '{dataset}-{algorithm}-{params}', 'raw-pathway.txt']),
+        **profiling_outputs
     resources:
         htcondor_transfer_input_files=get_algorithm_image
     run: