diff --git a/Snakefile b/Snakefile index 5ad7aa18..374014e0 100644 --- a/Snakefile +++ b/Snakefile @@ -273,6 +273,16 @@ def get_algorithm_image(wildcards): return None # Run the pathway reconstruction algorithm +# When profiling is enabled, each reconstruct job writes a 'usage-profile.tsv' alongside its raw-pathway.txt. +# Declare it as a rule output so that Snakemake tracks it and, critically, transfers it back from the execute +# node in environments without a shared filesystem (e.g. HTCondor with shared-fs-usage: none). This is only +# produced when profiling is actually performed, which requires the singularity/apptainer container framework +# (see run_container_singularity), so we gate on the same condition to avoid declaring an output that is never +# created (which would fail the rule). +profiling_outputs = {} +if container_settings.enable_profiling and container_settings.framework.is_singularity_family: + profiling_outputs['profile_file'] = SEP.join([out_dir, '{dataset}-{algorithm}-{params}', 'usage-profile.tsv']) + rule reconstruct: input: collect_prepared_input # Each reconstruct call should be in a separate output subdirectory that is unique for the parameter combination so @@ -280,7 +290,9 @@ rule reconstruct: # Overwriting files can happen because the pathway reconstruction algorithms often generate output files with the # same name regardless of the inputs or parameters, and these aren't renamed until after the container command # terminates - output: pathway_file = SEP.join([out_dir, '{dataset}-{algorithm}-{params}', 'raw-pathway.txt']) + output: + pathway_file = SEP.join([out_dir, '{dataset}-{algorithm}-{params}', 'raw-pathway.txt']), + **profiling_outputs resources: htcondor_transfer_input_files=get_algorithm_image run: diff --git a/spras/config/container_schema.py b/spras/config/container_schema.py index 70276f63..5112198b 100644 --- a/spras/config/container_schema.py +++ b/spras/config/container_schema.py @@ -95,6 +95,7 @@ def from_container_settings(settings: ContainerSettings, hash_length: int) -> "P unpack_singularity=unpack_singularity, base_url=container_base_url, prefix=container_prefix, + enable_profiling=settings.enable_profiling, hash_length=hash_length, images=dict(settings.images), ) diff --git a/spras/containers.py b/spras/containers.py index c30697f3..4bc3df55 100644 --- a/spras/containers.py +++ b/spras/containers.py @@ -553,7 +553,7 @@ def run_container_singularity(resolved: ResolvedImage, command: List[str], volum # as `containers.py`. wrapper = os.path.join(os.path.dirname(__file__), "cgroup_wrapper.sh") cmd = [wrapper, my_cgroup] + singularity_cmd - proc = subprocess.run(cmd, capture_output=True, text=True, stderr=subprocess.STDOUT) + proc = subprocess.run(cmd, text=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) print("Reading memory and CPU stats from cgroup") create_apptainer_container_stats(my_cgroup, out_dir) diff --git a/test/test_config.py b/test/test_config.py index cf4cdf0f..f20f0e64 100644 --- a/test/test_config.py +++ b/test/test_config.py @@ -251,6 +251,22 @@ def test_config_container_images_invalid_algorithm(self): with pytest.raises(ValueError, match="Unknown algorithm name 'typo_algo'"): config.init_global(test_config) + def test_config_container_enable_profiling(self): + # enable_profiling must survive the ContainerSettings -> ProcessedContainerSettings + # conversion in from_container_settings(). containers.py reads it off the processed + # settings at runtime, so if it isn't propagated the profiling code path never runs. + # Regression test for the field being silently dropped during that conversion. + test_config = get_test_config() + + # Default: absent from config --> False + config.init_global(test_config) + assert config.config.container_settings.enable_profiling is False + + # Explicitly enabled --> must propagate to the processed settings as True + test_config["containers"]["enable_profiling"] = True + config.init_global(test_config) + assert config.config.container_settings.enable_profiling is True + def test_error_dataset_label(self): test_config = get_test_config() error_test_dicts = [{"label": "test$"}, {"label": "@test'"}, {"label": "[test]"}, {"label": "test-test"},