From df0d689d8172a3c097a8545f377599b96c5a2cfa Mon Sep 17 00:00:00 2001 From: Justin Hiemstra Date: Mon, 8 Jun 2026 16:38:43 -0500 Subject: [PATCH 1/3] Propagate enable_profiling through ProcessedContainerSettings ProcessedContainerSettings.from_container_settings() rebuilds the processed container settings from the raw ContainerSettings, but it never copied the enable_profiling field, so the processed settings always used the dataclass default of False. run_container_singularity() reads the flag off the processed settings (config.enable_profiling), so the profiling branch never ran: no peer cgroup was created, cgroup_wrapper.sh never executed, and no usage-profile.tsv was written, regardless of the user's containers.enable_profiling config value. This regressed in #390, which moved enable_profiling from a top-level config field to the nested container settings and added the field to both ContainerSettings and ProcessedContainerSettings, but missed wiring it through the conversion in from_container_settings(). Add the missing assignment, plus a regression test in test_config.py asserting that enable_profiling survives the full config-parsing path. --- spras/config/container_schema.py | 1 + test/test_config.py | 16 ++++++++++++++++ 2 files changed, 17 insertions(+) diff --git a/spras/config/container_schema.py b/spras/config/container_schema.py index 70276f63..5112198b 100644 --- a/spras/config/container_schema.py +++ b/spras/config/container_schema.py @@ -95,6 +95,7 @@ def from_container_settings(settings: ContainerSettings, hash_length: int) -> "P unpack_singularity=unpack_singularity, base_url=container_base_url, prefix=container_prefix, + enable_profiling=settings.enable_profiling, hash_length=hash_length, images=dict(settings.images), ) diff --git a/test/test_config.py b/test/test_config.py index cf4cdf0f..f20f0e64 100644 --- a/test/test_config.py +++ b/test/test_config.py @@ -251,6 +251,22 @@ def test_config_container_images_invalid_algorithm(self): with pytest.raises(ValueError, match="Unknown algorithm name 'typo_algo'"): config.init_global(test_config) + def test_config_container_enable_profiling(self): + # enable_profiling must survive the ContainerSettings -> ProcessedContainerSettings + # conversion in from_container_settings(). containers.py reads it off the processed + # settings at runtime, so if it isn't propagated the profiling code path never runs. + # Regression test for the field being silently dropped during that conversion. + test_config = get_test_config() + + # Default: absent from config --> False + config.init_global(test_config) + assert config.config.container_settings.enable_profiling is False + + # Explicitly enabled --> must propagate to the processed settings as True + test_config["containers"]["enable_profiling"] = True + config.init_global(test_config) + assert config.config.container_settings.enable_profiling is True + def test_error_dataset_label(self): test_config = get_test_config() error_test_dicts = [{"label": "test$"}, {"label": "@test'"}, {"label": "[test]"}, {"label": "test-test"}, From 482442d498bcf3e0097fd5a7ee7b5968203649af Mon Sep 17 00:00:00 2001 From: Justin Hiemstra Date: Mon, 22 Jun 2026 13:27:27 -0500 Subject: [PATCH 2/3] Don't use capture_output and stderr in the same command subprocess.run rejects capture_output=True together with an explicit stderr argument (capture_output is shorthand for stdout=PIPE, stderr=PIPE). In the profiling/cgroup branch of run_container_singularity this raised "ValueError: stdout and stderr arguments may not be used with capture_output." on every Apptainer run with enable_profiling=true. Set stdout=PIPE and stderr=STDOUT explicitly so the container's stderr is merged into the captured stdout, preserving the original intent of capturing combined output via proc.stdout. --- spras/containers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/spras/containers.py b/spras/containers.py index c30697f3..4bc3df55 100644 --- a/spras/containers.py +++ b/spras/containers.py @@ -553,7 +553,7 @@ def run_container_singularity(resolved: ResolvedImage, command: List[str], volum # as `containers.py`. wrapper = os.path.join(os.path.dirname(__file__), "cgroup_wrapper.sh") cmd = [wrapper, my_cgroup] + singularity_cmd - proc = subprocess.run(cmd, capture_output=True, text=True, stderr=subprocess.STDOUT) + proc = subprocess.run(cmd, text=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) print("Reading memory and CPU stats from cgroup") create_apptainer_container_stats(my_cgroup, out_dir) From a1c52e79cb4654961d806a21449d529a1aebe547 Mon Sep 17 00:00:00 2001 From: Justin Hiemstra Date: Mon, 22 Jun 2026 13:53:44 -0500 Subject: [PATCH 3/3] Declare usage-profile.tsv as a reconstruct output The profiling file is written next to raw-pathway.txt inside the reconstruct rule, but it was never declared as a Snakemake output. With a shared filesystem this is harmless, but under shared-fs-usage: none (e.g. HTCondor) Snakemake only transfers declared outputs back from execute nodes, so the profiling data was created on the EP and then discarded. Declare usage-profile.tsv as an additional reconstruct output, gated on enable_profiling and the singularity/apptainer framework so the output is only required when it is actually produced (run_container_singularity), matching the existing warn-only behavior for profiling under docker. --- Snakefile | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/Snakefile b/Snakefile index 5ad7aa18..374014e0 100644 --- a/Snakefile +++ b/Snakefile @@ -273,6 +273,16 @@ def get_algorithm_image(wildcards): return None # Run the pathway reconstruction algorithm +# When profiling is enabled, each reconstruct job writes a 'usage-profile.tsv' alongside its raw-pathway.txt. +# Declare it as a rule output so that Snakemake tracks it and, critically, transfers it back from the execute +# node in environments without a shared filesystem (e.g. HTCondor with shared-fs-usage: none). This is only +# produced when profiling is actually performed, which requires the singularity/apptainer container framework +# (see run_container_singularity), so we gate on the same condition to avoid declaring an output that is never +# created (which would fail the rule). +profiling_outputs = {} +if container_settings.enable_profiling and container_settings.framework.is_singularity_family: + profiling_outputs['profile_file'] = SEP.join([out_dir, '{dataset}-{algorithm}-{params}', 'usage-profile.tsv']) + rule reconstruct: input: collect_prepared_input # Each reconstruct call should be in a separate output subdirectory that is unique for the parameter combination so @@ -280,7 +290,9 @@ rule reconstruct: # Overwriting files can happen because the pathway reconstruction algorithms often generate output files with the # same name regardless of the inputs or parameters, and these aren't renamed until after the container command # terminates - output: pathway_file = SEP.join([out_dir, '{dataset}-{algorithm}-{params}', 'raw-pathway.txt']) + output: + pathway_file = SEP.join([out_dir, '{dataset}-{algorithm}-{params}', 'raw-pathway.txt']), + **profiling_outputs resources: htcondor_transfer_input_files=get_algorithm_image run: