From 3e2d734fe3f7ab8fa3262c80a5825f6443eca279 Mon Sep 17 00:00:00 2001 From: Lester Hedges Date: Tue, 25 Feb 2025 11:05:15 +0000 Subject: [PATCH 001/170] Add CLI entry-point for Boresch ghost atom modifications. --- README.md | 16 ++++++ pyproject.toml | 3 +- src/somd2/app/__init__.py | 5 +- src/somd2/app/{run.py => _cli.py} | 82 ++++++++++++++++++++++++++++--- 4 files changed, 95 insertions(+), 11 deletions(-) rename src/somd2/app/{run.py => _cli.py} (60%) diff --git a/README.md b/README.md index 72f55293..9e26c1ea 100644 --- a/README.md +++ b/README.md @@ -149,6 +149,22 @@ can be controlled via the `--null-energy` option. The number of neighbours shoul be chosen as a trade off between accuracy and computational cost. A value of around 20% of the number of replicas has been found to be a good starting point. +## Ghost atom modifications + +We support the modification of ghost atom bonded terms to avoid spurious coupling +to the physical system using the approach described in [this](https://pubs.acs.org/doi/10.1021/acs.jctc.0c01328) paper. +These are enabled by default, but can be disabled using the ``--no-ghost-modifications`` +option. Alternatively, we also provide the `ghostly` command-line tool that can +be used to apply the modifications to perturbable system without running a simulation, +e.g. for use elsewhere. This can be used via: + +```bash +ghostly perturbable_system.bss --output perturbable_system_ghosted.bss --log-level debug +``` + +(Here the log level is set to debug to provide more information on the modifications +that are applied.) + ## Note for SOMD1 users For existing users of `somd1`, it's possible to generate input for `somd2` by passing diff --git a/pyproject.toml b/pyproject.toml index 8e147696..ae7b5c9a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -20,7 +20,8 @@ dynamic = ["version"] license-files = ["LICENSE"] [project.scripts] -somd2 = "somd2.app:cli" +somd2 = "somd2.app:somd2" +ghostly = "somd2.app:ghostly" [project.urls] repository = "https://github.com/OpenBioSim/somd2" diff --git a/src/somd2/app/__init__.py b/src/somd2/app/__init__.py index 45c2fbc2..5bbf9e5a 100644 --- a/src/somd2/app/__init__.py +++ b/src/somd2/app/__init__.py @@ -28,7 +28,8 @@ .. autosummary:: :toctree: generated/ - cli + somd2 + ghostly """ -from .run import * +from ._cli import * diff --git a/src/somd2/app/run.py b/src/somd2/app/_cli.py similarity index 60% rename from src/somd2/app/run.py rename to src/somd2/app/_cli.py index 2bb8b130..568b87f8 100644 --- a/src/somd2/app/run.py +++ b/src/somd2/app/_cli.py @@ -20,17 +20,11 @@ ##################################################################### """ -The somd2 command line program. - -Usage: - To get the help for this program and list all of the - arguments (with defaults) use: - - somd2 --help +SOMD2 command line interface. """ -def cli(): +def somd2(): """ SOMD2: Command line interface. """ @@ -91,3 +85,75 @@ def cli(): except Exception as e: _logger.error(f"An error occurred during the simulation: {e}") exit(1) + + +def ghostly(): + """ + SOMD2: Command line interface. + """ + + import argparse + import sys + + import sire as sr + + from somd2 import _logger + from somd2._utils._ghosts import boresch + + parser = argparse.ArgumentParser( + description="Ghostly: ghost atom bonded term modifications" + ) + + parser.add_argument( + "system", + type=str, + help="Path to a stream file containing the perturbable system.", + ) + + parser.add_argument( + "--output", + type=str, + help="File prefix for the output file.", + default="ghostly", + required=False, + ) + + parser.add_argument( + "--log-level", + type=str, + help="Log level for the logger.", + default="info", + choices=["debug", "info", "warning", "error", "critical"], + required=False, + ) + + # Parse the arguments. + args = parser.parse_args() + + # Set the logger level. + _logger.remove() + _logger.add(sys.stderr, level=args.log_level.upper(), enqueue=True) + + # Try to load the system. + try: + system = sr.stream.load(args.system) + system = sr.morph.link_to_reference(system) + except Exception as e: + _logger.error(f"An error occurred while loading the system: {e}") + exit(1) + + # Try to apply the modifications. + try: + system = boresch(system) + except Exception as e: + _logger.error( + f"An error occurred while applying the ghost atom modifications: {e}" + ) + exit(1) + + # Try to save the system. + try: + sr.stream.save(system, f"{args.output}.bss") + except Exception as e: + _logger.error(f"An error occurred while saving the system: {e}") + exit(1) From 40a59582cf8f6d8eea191421e9bdc1104c3f79eb Mon Sep 17 00:00:00 2001 From: Lester Hedges Date: Tue, 25 Feb 2025 11:28:18 +0000 Subject: [PATCH 002/170] Fix formatting of output name. (Prefix only.) [ci skip] --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 9e26c1ea..a5660ebc 100644 --- a/README.md +++ b/README.md @@ -159,7 +159,7 @@ be used to apply the modifications to perturbable system without running a simul e.g. for use elsewhere. This can be used via: ```bash -ghostly perturbable_system.bss --output perturbable_system_ghosted.bss --log-level debug +ghostly perturbable_system.bss --output ghosted --log-level debug ``` (Here the log level is set to debug to provide more information on the modifications From 4827b560c715a7f6be4a3c236ae12131b948c814 Mon Sep 17 00:00:00 2001 From: Lester Hedges Date: Tue, 25 Feb 2025 14:43:28 +0000 Subject: [PATCH 003/170] Add __all__ to CLI module. [ci skip] --- src/somd2/app/_cli.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/somd2/app/_cli.py b/src/somd2/app/_cli.py index 568b87f8..7d1ca3ec 100644 --- a/src/somd2/app/_cli.py +++ b/src/somd2/app/_cli.py @@ -23,6 +23,8 @@ SOMD2 command line interface. """ +__all__ = ["somd2", "ghostly"] + def somd2(): """ From 4d9a9914b8b86b1379b87045cbdeaf7fd440ca3c Mon Sep 17 00:00:00 2001 From: Lester Hedges Date: Wed, 26 Feb 2025 09:35:38 +0000 Subject: [PATCH 004/170] Use input file name as base for output by default. --- src/somd2/app/_cli.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/somd2/app/_cli.py b/src/somd2/app/_cli.py index 7d1ca3ec..9ecbb89f 100644 --- a/src/somd2/app/_cli.py +++ b/src/somd2/app/_cli.py @@ -95,6 +95,7 @@ def ghostly(): """ import argparse + import os import sys import sire as sr @@ -107,7 +108,7 @@ def ghostly(): ) parser.add_argument( - "system", + "input", type=str, help="Path to a stream file containing the perturbable system.", ) @@ -116,7 +117,6 @@ def ghostly(): "--output", type=str, help="File prefix for the output file.", - default="ghostly", required=False, ) @@ -138,7 +138,7 @@ def ghostly(): # Try to load the system. try: - system = sr.stream.load(args.system) + system = sr.stream.load(args.input) system = sr.morph.link_to_reference(system) except Exception as e: _logger.error(f"An error occurred while loading the system: {e}") @@ -155,7 +155,9 @@ def ghostly(): # Try to save the system. try: - sr.stream.save(system, f"{args.output}.bss") + input = os.path.splitext(args.input)[0] + output = args.output if args.output else input + "_ghostly" + sr.stream.save(system, f"{output}.bss") except Exception as e: _logger.error(f"An error occurred while saving the system: {e}") exit(1) From 56fc0d9189340aaef469d03c582265bb9bbcd002 Mon Sep 17 00:00:00 2001 From: Lester Hedges Date: Wed, 14 May 2025 09:59:35 +0100 Subject: [PATCH 005/170] Update environment for GCMC. --- environment.yaml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/environment.yaml b/environment.yaml index d7f280a9..0ad0294c 100644 --- a/environment.yaml +++ b/environment.yaml @@ -9,3 +9,6 @@ dependencies: - git - loguru - numba + - pycuda + - pip: + - git+https://github.com/openbiosim/loch From b67efda8aac687dcbf79658ea03627bce694aeb3 Mon Sep 17 00:00:00 2001 From: Lester Hedges Date: Fri, 16 May 2025 15:50:46 +0100 Subject: [PATCH 006/170] Add GCMC configuration options and validation. --- src/somd2/config/_config.py | 174 ++++++++++++++++++++++++++++++++++++ 1 file changed, 174 insertions(+) diff --git a/src/somd2/config/_config.py b/src/somd2/config/_config.py index 0978ee82..15e1e5b0 100644 --- a/src/somd2/config/_config.py +++ b/src/somd2/config/_config.py @@ -126,6 +126,13 @@ def __init__( max_gpus=None, oversubscription_factor=1, replica_exchange=False, + gcmc=False, + gcmc_selection=None, + gcmc_excess_chemical_potential="-6.09 kcal/mol", + gcmc_standard_volume="30.543 A^3", + gcmc_num_ghosts=10, + gcmc_sphere_radius="4 A", + gcmc_bulk_sampling_probability=0.1, rest2_scale=1.0, rest2_selection=None, output_directory="output", @@ -266,6 +273,9 @@ def __init__( energy_frequency: str Frequency at which to output energy data. If running using 'replica_exchange', then this will also be the frequency at which replica swaps are attempted. + When performing Grand Canonical Monte Carlo (GCMC) water insertions/deletions + via 'gcmc=True', this will also be the frequency at which GCMC moves are + attempted. save_trajectories: bool Whether to save trajectory files @@ -299,6 +309,36 @@ def __init__( Whether to run replica exchange simulation. Currently this can only be used when GPU resources are available. + gcmc: bool + Whether to perform Grand Canonical Monte Carlo (GCMC) water insertions/deletions. + + gcmc_selection: str + A sire sslection string specifying the atoms that define the centre of geometry + of the GCMC sphere. If None, then GCMC moves will be attempted within the entire + simulation volume. + + gcmc_excess_chemical_potential: str + The excess chemical potential of water in kcal/mol. The default value is calibrated + for the TIP3P water model. This can be calculated from the free energy of decoupling + a single water molecule from bulk. + + gcmc_standard_volume: str + The standard volume of a water molecule in A^3. The default value is calibrated + from NPT simulation of TIP3P water. + + gcmc_num_ghosts: int + The initial number of ghost water molecules to insert into the system. These + are used as placeholders for GCMC insertion moves. + + gcmc_sphere_radius: str + The radius of the GCMC sphere. + + gcmc_bulk_sampling_probability: float + The probability of performing bulk GCMC moves, i.e. within the entire simulation + box rather than the GCMC sphere. These can be used to maintain a constant bulk + density, i.e. acting as a barostat. (This option has no affect when + 'gcmc_selection=None'.) + rest2_scale: float, list(float) The scaling factor for Replica Exchange with Solute Tempering (REST) simulations. This is the factor by which the temperature of the solute is scaled with respect to @@ -410,6 +450,13 @@ def __init__( self.max_gpus = max_gpus self.oversubscription_factor = oversubscription_factor self.replica_exchange = replica_exchange + self.gcmc = gcmc + self.gcmc_selection = gcmc_selection + self.gcmc_excess_chemical_potential = gcmc_excess_chemical_potential + self.gcmc_standard_volume = gcmc_standard_volume + self.gcmc_num_ghosts = gcmc_num_ghosts + self.gcmc_sphere_radius = gcmc_sphere_radius + self.gcmc_bulk_sampling_probability = gcmc_bulk_sampling_probability self.rest2_scale = rest2_scale self.rest2_selection = rest2_selection self.restart = restart @@ -1336,6 +1383,133 @@ def replica_exchange(self, replica_exchange): raise ValueError("'replica_exchange' must be of type 'bool'") self._replica_exchange = replica_exchange + @property + def gcmc(self): + return self._gcmc + + @gcmc.setter + def gcmc(self, gcmc): + if not isinstance(gcmc, bool): + raise ValueError("'gcmc' must be of type 'bool'") + self._gcmc = gcmc + + @property + def gcmc_selection(self): + return self._gcmc_selection + + @gcmc_selection.setter + def gcmc_selection(self, gcmc_selection): + if gcmc_selection is not None: + if not isinstance(gcmc_selection, str): + raise TypeError("'gcmc_selection' must be of type 'str'") + self._gcmc_selection = gcmc_selection + + @property + def gcmc_excess_chemical_potential(self): + return self._gcmc_excess_chemical_potential + + @gcmc_excess_chemical_potential.setter + def gcmc_excess_chemical_potential(self, gcmc_excess_chemical_potential): + if not isinstance(gcmc_excess_chemical_potential, str): + raise TypeError("'gcmc_excess_chemical_potential' must be of type 'str'") + + from sire.units import kcal_per_mol + + try: + gcmc_e = _sr.u(gcmc_excess_chemical_potential) + except: + raise ValueError( + "Unable to parse 'gcmc_excess_chemical_potential' " + f"as a Sire GeneralUnit: {gcmc_excess_chemical_potential}" + ) + + if not gcmc_e.has_same_units(kcal_per_mol): + raise ValueError("'gcmc_excess_chemical_potential' units are invalid.") + + self._gcmc_excess_chemical_potential = gcmc_e + + @property + def gcmc_standard_volume(self): + return self._gcmc_standard_volume + + @gcmc_standard_volume.setter + def gcmc_standard_volume(self, gcmc_standard_volume): + if not isinstance(gcmc_standard_volume, str): + raise TypeError("'gcmc_standard_volume' must be of type 'str'") + + from sire.units import angstrom3 + + try: + gcmc_v = _sr.u(gcmc_standard_volume) + except: + raise ValueError( + "Unable to parse 'gcmc_standard_volume' " + f"as a Sire GeneralUnit: {gcmc_standard_volume}" + ) + + if not gcmc_v.has_same_units(angstrom3): + raise ValueError("'gcmc_standard_volume' units are invalid.") + + self._gcmc_standard_volume = gcmc_v + + @property + def gcmc_num_ghosts(self): + return self._gcmc_num_ghosts + + @gcmc_num_ghosts.setter + def gcmc_num_ghosts(self, gcmc_num_ghosts): + if gcmc_num_ghosts is not None: + if not isinstance(gcmc_num_ghosts, int): + try: + gcmc_num_ghosts = int(gcmc_num_ghosts) + except: + raise ValueError("'gcmc_num_ghosts' must be an integer") + + if gcmc_num_ghosts < 0: + raise ValueError("'gcmc_num_ghosts' must be greater than or equal to 0") + self._gcmc_num_ghosts = gcmc_num_ghosts + + @property + def gcmc_sphere_radius(self): + return self._gcmc_sphere_radius + + @gcmc_sphere_radius.setter + def gcmc_sphere_radius(self, gcmc_sphere_radius): + if not isinstance(gcmc_sphere_radius, str): + raise TypeError("'gcmc_sphere_radius' must be of type 'str'") + + from sire.units import angstrom + + try: + gcmc_r = _sr.u(gcmc_sphere_radius) + except: + raise ValueError( + "Unable to parse 'gcmc_sphere_radius' " + f"as a Sire GeneralUnit: {gcmc_sphere_radius}" + ) + + if not gcmc_r.has_same_units(angstrom): + raise ValueError("'gcmc_sphere_radius' units are invalid.") + + self._gcmc_sphere_radius = gcmc_r + + @property + def gcmc_bulk_sampling_probability(self): + return self._gcmc_bulk_sampling_probability + + @gcmc_bulk_sampling_probability.setter + def gcmc_bulk_sampling_probability(self, gcmc_bulk_sampling_probability): + if not isinstance(gcmc_bulk_sampling_probability, float): + try: + gcmc_bulk_sampling_probability = float(gcmc_bulk_sampling_probability) + except Exception: + raise ValueError("'gcmc_bulk_sampling_probability' must be a float") + if gcmc_bulk_sampling_probability < 0.0 or gcmc_bulk_sampling_probability > 1.0: + raise ValueError( + "'gcmc_bulk_sampling_probability' must be between 0.0 and 1.0" + ) + self._gcmc_bulk_sampling_probability = gcmc_bulk_sampling_probability + @property def rest2_scale(self): return self._rest2_scale From bed7677fa7a2af7bf4e4a93a4e8be2b6e8ae2d36 Mon Sep 17 00:00:00 2001 From: Lester Hedges Date: Fri, 16 May 2025 15:51:30 +0100 Subject: [PATCH 007/170] GCMC configuration handling and self-consistency checks. --- src/somd2/runner/_base.py | 85 +++++++++++++++++++++++++++++++++++++-- 1 file changed, 81 insertions(+), 4 deletions(-) diff --git a/src/somd2/runner/_base.py b/src/somd2/runner/_base.py index 911587a7..47e23741 100644 --- a/src/somd2/runner/_base.py +++ b/src/somd2/runner/_base.py @@ -381,10 +381,33 @@ def __init__(self, system, config): mols = self._system[0] else: mols = self._system - mols0 = _sr.morph.link_to_reference(mols) - mols1 = _sr.morph.link_to_perturbed(mols) - _sr.save(mols0, self._filenames["topology0"]) - _sr.save(mols1, self._filenames["topology1"]) + # Add ghost waters to the system. + if self._gcmc and self._has_space: + from loch import GCMCSampler + from numpy.random import default_rng + + # Create a random number generator. + rng = default_rng() + + # Get a water template. + try: + water = mols["water"].molecules()[0] + except: + msg = "No water molecules in the system." + _logger.error(msg) + raise ValueError(msg) + + # Create the GCMC system. + mols = GCMCSampler._prepare_system( + mols, water, rng, self._config.gcmc_num_ghosts + ) + + # For GCMC, we need to save the system with the ghost waters. + if not self._config.gcmc: + mols0 = _sr.morph.link_to_reference(mols) + mols1 = _sr.morph.link_to_perturbed(mols) + _sr.save(mols0, self._filenames["topology0"]) + _sr.save(mols1, self._filenames["topology1"]) # Append only this number of lines from the end of the dataframe during checkpointing. self._energy_per_block = int( @@ -394,6 +417,60 @@ def __init__(self, system, config): # Zero the energy sample. self._nrg_sample = 0 + # GCMC specific validation. + if self._config.gcmc: + if not self._is_gpu: + msg = "GCMC simulations require a GPU platform." + _logger.error(msg) + raise ValueError(msg) + + if not self._has_space: + msg = "GCMC simulations require a periodic space." + _logger.error(msg) + raise ValueError(msg) + + if self._config.pressure != None: + msg = "GCMC simulations must be run in the NVT ensemble." + _logger.error(msg) + raise ValueError(msg) + + # Make sure the frame frequency is a multiple of the energy frequency. + + # Get the scale factor. + scale = ( + self._config.frame_frequency / self._config.energy_frequency + ).value() + + # Make sure it's an integer. + if isclose(scale, round(scale), abs_tol=1e-4): + msg = "'frame_frequency' must be a multiple of 'energy_frequency'." + _logger.error(msg) + raise ValueError(msg) + + # Make sure the checkpoint frequency is a multiple of the frame frequency. + + # Get the scale factor. + scale = ( + self._config.checkpoint_frequency / self._config.frame_frequency + ).value() + + # Make sure it's an integer. + if isclose(scale, round(scale), abs_tol=1e-4): + msg = "'checkpoint_frequency' must be a multiple of 'frame_frequency'." + _logger.error(msg) + raise ValueError(msg) + + # Make sure the selection is valid. + if self._gcmc_selection is not None: + try: + atoms = _sr.mol.selection_to_atoms( + self._system, self._config.gcmc_selection + ) + except: + msg = "Invalid 'gcmc_selection' value." + _logger.error(msg) + raise ValueError(msg) + # Create the default dynamics kwargs dictionary. These can be overloaded # as needed. self._dynamics_kwargs = { From 9096415371308626b0d1141ccbfb8030a1f9f1b5 Mon Sep 17 00:00:00 2001 From: Lester Hedges Date: Fri, 16 May 2025 16:00:24 +0100 Subject: [PATCH 008/170] Fix post equilibration minimisation. --- src/somd2/runner/_runner.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/somd2/runner/_runner.py b/src/somd2/runner/_runner.py index 902e5a8d..397240fc 100644 --- a/src/somd2/runner/_runner.py +++ b/src/somd2/runner/_runner.py @@ -431,7 +431,7 @@ def generate_lam_vals(lambda_base, increment=0.001): not self._config.equilibration_constraints and self._config.perturbable_constraint != "none" ): - self._minimisation( + system = self._minimisation( system, lambda_value=lambda_value, rest2_scale=rest2_scale, @@ -696,6 +696,12 @@ def _minimisation( perturbable_constraint: str The constraint for perturbable molecules. + + Returns + ------- + + system: :class: `System ` + The minimised system. """ _logger.info(f"Minimising at {_lam_sym} = {lambda_value:.5f}") From cdcd68d256db7fb7b5fc9103297b7f69f7025e7a Mon Sep 17 00:00:00 2001 From: Lester Hedges Date: Fri, 16 May 2025 16:47:15 +0100 Subject: [PATCH 009/170] Implemented per-lambda GCMC for base runner. --- src/somd2/config/_config.py | 27 ++++--- src/somd2/runner/_base.py | 43 ++++++++++- src/somd2/runner/_runner.py | 141 ++++++++++++++++++++++++++++++------ 3 files changed, 170 insertions(+), 41 deletions(-) diff --git a/src/somd2/config/_config.py b/src/somd2/config/_config.py index 15e1e5b0..82a839e6 100644 --- a/src/somd2/config/_config.py +++ b/src/somd2/config/_config.py @@ -131,7 +131,7 @@ def __init__( gcmc_excess_chemical_potential="-6.09 kcal/mol", gcmc_standard_volume="30.543 A^3", gcmc_num_ghosts=10, - gcmc_sphere_radius="4 A", + gcmc_radius="4 A", gcmc_bulk_sampling_probability=0.1, rest2_scale=1.0, rest2_selection=None, @@ -330,7 +330,7 @@ def __init__( The initial number of ghost water molecules to insert into the system. These are used as placeholders for GCMC insertion moves. - gcmc_sphere_radius: str + gcmc_radius: str The radius of the GCMC sphere. gcmc_bulk_sampling_probability: float @@ -455,7 +455,7 @@ def __init__( self.gcmc_excess_chemical_potential = gcmc_excess_chemical_potential self.gcmc_standard_volume = gcmc_standard_volume self.gcmc_num_ghosts = gcmc_num_ghosts - self.gcmc_sphere_radius = gcmc_sphere_radius + self.gcmc_radius = gcmc_radius self.gcmc_bulk_sampling_probability = gcmc_bulk_sampling_probability self.rest2_scale = rest2_scale self.rest2_selection = rest2_selection @@ -1470,28 +1470,27 @@ def gcmc_num_ghosts(self, gcmc_num_ghosts): self._gcmc_num_ghosts = gcmc_num_ghosts @property - def gcmc_sphere_radius(self): - return self._gcmc_sphere_radius + def gcmc_radius(self): + return self._gcmc_radius - @gcmc_sphere_radius.setter - def gcmc_sphere_radius(self, gcmc_sphere_radius): - if not isinstance(gcmc_sphere_radius, str): - raise TypeError("'gcmc_sphere_radius' must be of type 'str'") + @gcmc_radius.setter + def gcmc_radius(self, gcmc_radius): + if not isinstance(gcmc_radius, str): + raise TypeError("'gcmc_radius' must be of type 'str'") from sire.units import angstrom try: - gcmc_r = _sr.u(gcmc_sphere_radius) + gcmc_r = _sr.u(gcmc_radius) except: raise ValueError( - "Unable to parse 'gcmc_sphere_radius' " - f"as a Sire GeneralUnit: {gcmc_sphere_radius}" + "Unable to parse 'gcmc_radius' " f"as a Sire GeneralUnit: {gcmc_radius}" ) if not gcmc_r.has_same_units(angstrom): - raise ValueError("'gcmc_sphere_radius' units are invalid.") + raise ValueError("'gcmc_radius' units are invalid.") - self._gcmc_sphere_radius = gcmc_r + self._gcmc_radius = gcmc_r @property def gcmc_bulk_sampling_probability(self): diff --git a/src/somd2/runner/_base.py b/src/somd2/runner/_base.py index 47e23741..fa723a53 100644 --- a/src/somd2/runner/_base.py +++ b/src/somd2/runner/_base.py @@ -382,7 +382,7 @@ def __init__(self, system, config): else: mols = self._system # Add ghost waters to the system. - if self._gcmc and self._has_space: + if self._config.gcmc and self._has_space: from loch import GCMCSampler from numpy.random import default_rng @@ -442,7 +442,7 @@ def __init__(self, system, config): ).value() # Make sure it's an integer. - if isclose(scale, round(scale), abs_tol=1e-4): + if not isclose(scale, round(scale), abs_tol=1e-4): msg = "'frame_frequency' must be a multiple of 'energy_frequency'." _logger.error(msg) raise ValueError(msg) @@ -455,13 +455,24 @@ def __init__(self, system, config): ).value() # Make sure it's an integer. - if isclose(scale, round(scale), abs_tol=1e-4): + if not isclose(scale, round(scale), abs_tol=1e-4): msg = "'checkpoint_frequency' must be a multiple of 'frame_frequency'." _logger.error(msg) raise ValueError(msg) + # Make sure the runtime is a multiple of the frame frequency. + + # Get the scale factor. + scale = (self._config.runtime / self._config.frame_frequency).value() + + # Make sure it's an integer. + if not isclose(scale, round(scale), abs_tol=1e-4): + msg = "'runtime' must be a multiple of 'frame_frequency'." + _logger.error(msg) + raise ValueError(msg) + # Make sure the selection is valid. - if self._gcmc_selection is not None: + if self._config.gcmc_selection is not None: try: atoms = _sr.mol.selection_to_atoms( self._system, self._config.gcmc_selection @@ -498,6 +509,28 @@ def __init__(self, system, config): "map": config._extra_args, } + # Create the GCMC specific kwargs dictionary. + if self._config.gcmc: + self._gcmc_kwargs = { + "reference": self._config.gcmc_selection, + "excess_chemical_potential": str( + self._config.gcmc_excess_chemical_potential + ), + "standard_volume": str(self._config.gcmc_standard_volume), + "radius": str(self._config.gcmc_radius), + "max_gcmc_waters": self._config.gcmc_num_ghosts, + "bulk_sampling_probability": self._config.gcmc_bulk_sampling_probability, + "cutoff_type": self._config.cutoff_type, + "cutoff": str(self._config.cutoff), + "temperature": str(self._config.temperature), + "lambda_schedule": self._config.lambda_schedule, + "coulomb_power": self._config.coulomb_power, + "shift_coulomb": str(self._config.shift_coulomb), + "shift_delta": str(self._config.shift_delta), + "log_level": self._config.log_level, + "overwrite": self._config.overwrite, + } + def _check_space(self): """ Check if the system has a periodic space. @@ -785,6 +818,8 @@ def increment_filename(base_filename, suffix): filenames["energy_components"] = str( output_directory / f"energy_components_{lam}.txt" ) + filenames["gcmc_log"] = str(output_directory / f"gcmc_log_{lam}.txt") + filenames["gcmc_ghosts"] = str(output_directory / f"gcmc_ghosts_{lam}.txt") if restart: filenames["config"] = str( output_directory / increment_filename("config", "yaml") diff --git a/src/somd2/runner/_runner.py b/src/somd2/runner/_runner.py index 397240fc..5f89645b 100644 --- a/src/somd2/runner/_runner.py +++ b/src/somd2/runner/_runner.py @@ -472,6 +472,31 @@ def generate_lam_vals(lambda_base, increment=0.001): # Now sort the scaling factors. rest2_scale_factors = [rest2_scale_factors[i] for i in sorted_indices] + # Prepare the GCMC sampler. + if self._config.gcmc: + _logger.info(f"Preparing GCMC sampler for {_lam_sym} = {lambda_value:.5f}") + + from loch import GCMCSampler + + gcmc_sampler = GCMCSampler( + system, + device=int(device), + lambda_value=lambda_value, + log_file=self._filenames[0]["gcmc_log"], + ghost_file=self._filenames[0]["gcmc_ghosts"], + **self._gcmc_kwargs, + ) + + # Get the GCMC system. + system = gcmc_sampler.system() + + # Write the end states so that we can later visualise trajectories. + if index == 0: + mols0 = _sr.morph.link_to_reference(system) + mols1 = _sr.morph.link_to_perturbed(system) + _sr.save(mols0, self._filenames["topology0"]) + _sr.save(mols1, self._filenames["topology1"]) + _logger.info(f"Running dynamics at {_lam_sym} = {lambda_value:.5f}") # Copy the dynamics kwargs. @@ -518,17 +543,54 @@ def generate_lam_vals(lambda_base, increment=0.001): # Run the dynamics. try: - dynamics.run( - self._config.checkpoint_frequency, - energy_frequency=self._config.energy_frequency, - frame_frequency=self._config.frame_frequency, - lambda_windows=lambda_array, - rest2_scale_factors=rest2_scale_factors, - save_velocities=self._config.save_velocities, - auto_fix_minimise=True, - num_energy_neighbours=num_energy_neighbours, - null_energy=self._config.null_energy, - ) + # GCMC specific handling. Note that the frame and checkpoint + # frequencies are multiples of the energy frequency so we can + # run in energy frequency blocks with no remainder. + if self._config.gcmc: + # Initialise the run time and time at which the next frame is saved. + runtime = _sr.u("0ps") + save_frames = self._config.frame_frequency > 0 + next_frame = self._config.frame_frequency + + # Loop until we reach the runtime. + while runtime <= self._config.checkpoint_frequency: + # Run the dynamics in blocks of the energy frequency. + dynamics.run( + self._config.energy_frequency, + energy_frequency=self._config.energy_frequency, + frame_frequency=self._config.frame_frequency, + lambda_windows=lambda_array, + rest2_scale_factors=rest2_scale_factors, + save_velocities=self._config.save_velocities, + auto_fix_minimise=True, + num_energy_neighbours=num_energy_neighbours, + null_energy=self._config.null_energy, + ) + + # Perform a GCMC move. + gcmc_sampler.move(dynamics.context()) + + # Update the runtime. + runtime += self._config.energy_frequency + + # If a frame is saved, then we need to save current indices + # of the ghost water residues. + if save_frames and runtime >= next_frame: + gcmc_sampler.write_ghost_residues() + next_frame += self._config.frame_frequency + + else: + dynamics.run( + self._config.checkpoint_frequency, + energy_frequency=self._config.energy_frequency, + frame_frequency=self._config.frame_frequency, + lambda_windows=lambda_array, + rest2_scale_factors=rest2_scale_factors, + save_velocities=self._config.save_velocities, + auto_fix_minimise=True, + num_energy_neighbours=num_energy_neighbours, + null_energy=self._config.null_energy, + ) except Exception as e: raise RuntimeError( f"Dynamics block {block+1} for {_lam_sym} = {lambda_value:.5f} failed: {e}" @@ -580,7 +642,7 @@ def generate_lam_vals(lambda_base, increment=0.001): f"Checkpoint failed for {_lam_sym} = {lambda_value:.5f}: {e}" ) - # Handle the remainder time. + # Handle the remainder time. (There will be no remainer when GCMC sampling.) if rem > 0: block += 1 try: @@ -635,17 +697,50 @@ def generate_lam_vals(lambda_base, increment=0.001): ) else: try: - dynamics.run( - time, - energy_frequency=self._config.energy_frequency, - frame_frequency=self._config.frame_frequency, - lambda_windows=lambda_array, - rest2_scale_factors=rest2_scale_factors, - save_velocities=self._config.save_velocities, - auto_fix_minimise=True, - num_energy_neighbours=num_energy_neighbours, - null_energy=self._config.null_energy, - ) + if self._config.gcmc: + # Initialise the run time and time at which the next frame is saved. + runtime = _sr.u("0ps") + save_frames = self._config.frame_frequency > 0 + next_frame = self._config.frame_frequency + + # Loop until we reach the runtime. + while runtime <= time: + # Run the dynamics in blocks of the energy frequency. + dynamics.run( + self._config.energy_frequency, + energy_frequency=self._config.energy_frequency, + frame_frequency=self._config.frame_frequency, + lambda_windows=lambda_array, + rest2_scale_factors=rest2_scale_factors, + save_velocities=self._config.save_velocities, + auto_fix_minimise=True, + num_energy_neighbours=num_energy_neighbours, + null_energy=self._config.null_energy, + ) + + # Perform a GCMC move. + gcmc_sampler.move(dynamics.context()) + + # Update the runtime. + runtime += self._config.energy_frequency + + # If a frame is saved, then we need to save current indices + # of the ghost water residues. + if save_frames and runtime >= next_frame: + gcmc_sampler.write_ghost_residues() + next_frame += self._config.frame_frequency + else: + dynamics.run( + time, + energy_frequency=self._config.energy_frequency, + frame_frequency=self._config.frame_frequency, + lambda_windows=lambda_array, + rest2_scale_factors=rest2_scale_factors, + save_velocities=self._config.save_velocities, + auto_fix_minimise=True, + num_energy_neighbours=num_energy_neighbours, + null_energy=self._config.null_energy, + ) except Exception as e: raise RuntimeError( f"Dynamics for {_lam_sym} = {lambda_value:.5f} failed: {e}" From ca0e73ba150f05c1a311c5da856c17ea9a2dc8d4 Mon Sep 17 00:00:00 2001 From: Lester Hedges Date: Fri, 16 May 2025 16:53:32 +0100 Subject: [PATCH 010/170] Separate the SOMD2 and GCMC log levels. --- src/somd2/config/_config.py | 21 +++++++++++++++++++++ src/somd2/runner/_base.py | 2 +- 2 files changed, 22 insertions(+), 1 deletion(-) diff --git a/src/somd2/config/_config.py b/src/somd2/config/_config.py index 82a839e6..1f7651f8 100644 --- a/src/somd2/config/_config.py +++ b/src/somd2/config/_config.py @@ -70,6 +70,7 @@ class Config: "charge_scaled_morph", ], "log_level": [level.lower() for level in _logger._core.levels], + "gcmc_log_level": [level.lower() for level in _logger._core.levels], } # A dictionary of nargs for the various options. @@ -133,6 +134,7 @@ def __init__( gcmc_num_ghosts=10, gcmc_radius="4 A", gcmc_bulk_sampling_probability=0.1, + gcmc_log_level="error", rest2_scale=1.0, rest2_selection=None, output_directory="output", @@ -339,6 +341,9 @@ def __init__( density, i.e. acting as a barostat. (This option has no affect when 'gcmc_selection=None'.) + gcmc_log_level: str + Log level to use for GCMC sampling. + rest2_scale: float, list(float) The scaling factor for Replica Exchange with Solute Tempering (REST) simulations. This is the factor by which the temperature of the solute is scaled with respect to @@ -457,6 +462,7 @@ def __init__( self.gcmc_num_ghosts = gcmc_num_ghosts self.gcmc_radius = gcmc_radius self.gcmc_bulk_sampling_probability = gcmc_bulk_sampling_probability + self.gcmc_log_level = gcmc_log_level self.rest2_scale = rest2_scale self.rest2_selection = rest2_selection self.restart = restart @@ -1509,6 +1515,21 @@ def gcmc_bulk_sampling_probability(self, gcmc_bulk_sampling_probability): ) self._gcmc_bulk_sampling_probability = gcmc_bulk_sampling_probability + @property + def gcmc_log_level(self): + return self._gcmc_log_level + + @gcmc_log_level.setter + def gcmc_log_level(self, log_level): + if not isinstance(log_level, str): + raise TypeError("'log_level' must be of type 'str'") + log_level = log_level.lower().replace(" ", "") + if log_level not in self._choices["log_level"]: + raise ValueError( + f"Log level not recognised. Valid log levels are: {', '.join(self._choices['log_level'])}" + ) + self._gcmc_log_level = log_level + @property def rest2_scale(self): return self._rest2_scale diff --git a/src/somd2/runner/_base.py b/src/somd2/runner/_base.py index fa723a53..5ae03b43 100644 --- a/src/somd2/runner/_base.py +++ b/src/somd2/runner/_base.py @@ -527,7 +527,7 @@ def __init__(self, system, config): "coulomb_power": self._config.coulomb_power, "shift_coulomb": str(self._config.shift_coulomb), "shift_delta": str(self._config.shift_delta), - "log_level": self._config.log_level, + "log_level": self._config.gcmc_log_level, "overwrite": self._config.overwrite, } From cba820e4015ebe57a5e42b805fbfcd9c6425dd56 Mon Sep 17 00:00:00 2001 From: Lester Hedges Date: Mon, 19 May 2025 13:43:42 +0100 Subject: [PATCH 011/170] Set gcmc_kwargs to None if not performing GCMC. --- src/somd2/runner/_base.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/somd2/runner/_base.py b/src/somd2/runner/_base.py index 5ae03b43..60385ab9 100644 --- a/src/somd2/runner/_base.py +++ b/src/somd2/runner/_base.py @@ -530,6 +530,8 @@ def __init__(self, system, config): "log_level": self._config.gcmc_log_level, "overwrite": self._config.overwrite, } + else: + self._gcmc_kwargs = None def _check_space(self): """ From d170705698547e5ee4f5b2ccbfa45df1fa3cfeab Mon Sep 17 00:00:00 2001 From: Lester Hedges Date: Mon, 19 May 2025 13:44:11 +0100 Subject: [PATCH 012/170] Need unique GCMC log and ghost file for each lambda. --- src/somd2/runner/_runner.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/somd2/runner/_runner.py b/src/somd2/runner/_runner.py index 5f89645b..6d94829b 100644 --- a/src/somd2/runner/_runner.py +++ b/src/somd2/runner/_runner.py @@ -482,8 +482,8 @@ def generate_lam_vals(lambda_base, increment=0.001): system, device=int(device), lambda_value=lambda_value, - log_file=self._filenames[0]["gcmc_log"], - ghost_file=self._filenames[0]["gcmc_ghosts"], + log_file=self._filenames[index]["gcmc_log"], + ghost_file=self._filenames[index]["gcmc_ghosts"], **self._gcmc_kwargs, ) From 62dce3bd2203fe1a5e756574c691a066c76b4797 Mon Sep 17 00:00:00 2001 From: Lester Hedges Date: Mon, 19 May 2025 14:07:23 +0100 Subject: [PATCH 013/170] Added GCMC implementation for replica exchange. --- src/somd2/runner/_repex.py | 112 +++++++++++++++++++++++++++++++++---- 1 file changed, 101 insertions(+), 11 deletions(-) diff --git a/src/somd2/runner/_repex.py b/src/somd2/runner/_repex.py index ec5ed10c..e021c399 100644 --- a/src/somd2/runner/_repex.py +++ b/src/somd2/runner/_repex.py @@ -114,7 +114,14 @@ def __getstate__(self): return d def _create_dynamics( - self, system, lambdas, rest2_scale_factors, num_gpus, dynamics_kwargs + self, + system, + lambdas, + rest2_scale_factors, + num_gpus, + dynamics_kwargs, + gcmc_kwargs=None, + output_directory=None, ): """ Create the dynamics objects. @@ -136,14 +143,27 @@ def _create_dynamics( dynamics_kwargs: dict A dictionary of default dynamics keyword arguments. + + gcmc_kwargs: dict + GCMC specific keyword arguments. If None, then GCMC is not used. + + output_directory: pathlib.Path + The directory for simulation output. """ # Copy the dynamics keyword arguments. dynamics_kwargs = dynamics_kwargs.copy() + # Copy the GCMC keyword arguments. + if gcmc_kwargs is not None: + gcmc_kwargs = gcmc_kwargs.copy() + # Initialise the dynamics object list. self._dynamics = [] + # Initialise the GCMC object list. + self._gcmc = [] + # Create the dynamics objects in serial. for i, (lam, scale) in enumerate(zip(lambdas, rest2_scale_factors)): # Work out the device index. @@ -156,6 +176,27 @@ def _create_dynamics( else: mols = system + if gcmc_kwargs is not None: + from local import GCMCSampler + + log_file = str(output_directory / f"gcmc_{lam:.5f}.log") + ghost_file = str(output_directory / f"gcmc_{lam:.5f}.ghost") + + # Create the GCMC sampler. + self._gcmc.append( + GCMCSampler( + mols, + device=int(device), + lambda_value=lam, + log_file=log_file, + ghost_file=ghost_file, + **gcmc_kwargs, + ) + ) + + # Get the modified GCMC system. + mols = sampler.system() + # Overload the device and lambda value. dynamics_kwargs["device"] = device dynamics_kwargs["lambda_value"] = lam @@ -178,7 +219,7 @@ def _create_dynamics( def get(self, index): """ - Get the dynamics object for a given index. + Get the dynamics object (and GCMC sampler) for a given index. Parameters ---------- @@ -190,9 +231,14 @@ def get(self, index): ------- tuple - The dynamics object for the replica. + The dynamics object for the replica and its GCMC sampler. """ - return self._dynamics[index] + try: + gcmc_sampler = self._gcmc[index] + except: + gcmc_sampler = None + + return self._dynamics[index], gcmc_sampler def set(self, index, dynamics): """ @@ -437,6 +483,8 @@ def __init__(self, system, config): self._rest2_scale_factors, self._num_gpus, self._dynamics_kwargs, + gcmc_kwargs=self._config.gcmc_kwargs, + output_directory=self._config.output_directory, ) # Conversion factor for reduced potential. @@ -574,6 +622,14 @@ def run(self): # Record the start time for the production block. prod_start = time() + # Store the number of blocks per-frame. For GCMC, we need to write the + # indices of the current ghost water residues each time a frame is saved. + # For GCMC simulations, the frame frequency is guaranteed to be a multiple + # of the energy frequency. + cycles_per_frame = int( + self._config.frame_frequency / self._config.energy_frequency + ) + # Perform the replica exchange simulation. for i in range(cycles): _logger.info(f"Running dynamics for cycle {i+1} of {cycles}") @@ -593,6 +649,9 @@ def run(self): # Whether to checkpoint. is_checkpoint = i > 0 and i % cycles_per_checkpoint == 0 + # Whether a frame was saved after the previous block. + write_gcmc_ghosts = i > 0 and (i - 1) % cycles_per_frame == 0 + # Run a dynamics block for each replica, making sure only each GPU is only # oversubscribed by a factor of self._config.oversubscription_factor. for j in range(num_batches): @@ -608,6 +667,7 @@ def run(self): repeat(i == cycles - 1), repeat(block), repeat(num_blocks + int(rem > 0)), + repeat(write_gcmc_ghosts), ): if not result: _logger.error( @@ -688,6 +748,7 @@ def _run_block( is_final_block, block, num_blocks, + write_gcmc_ghosts=False, ): """ Run a dynamics block for a given replica. @@ -719,6 +780,10 @@ def _run_block( num_blocks: int The total number of blocks. + write_gcmc_ghosts: bool + Whether to write the indices of GCMC ghost residues to + file. + Returns ------- @@ -737,8 +802,8 @@ def _run_block( lam = lambdas[index] try: - # Get the dynamics object. - dynamics = self._dynamics_cache.get(index) + # Get the dynamics object (and GCMC sampler). + dynamics, gcmc_sampler = self._dynamics_cache.get(index) # Minimise the system if this is a restart simulation and this is # the first block. @@ -752,6 +817,19 @@ def _run_block( # Draw new velocities from the Maxwell-Boltzmann distribution. dynamics.randomise_velocities() + # Perform a GCMC move. For repex this needs to be done before the + # dynamics block so that the final energies, which are used in the + # repex acceptance criteria, are correct. + if gcmc_sampler is not None: + # The frame frequency was hit after the previous block, so we + # need to write the current indices of the GCMC ghost residues + # to file. + if write_gcmc_ghosts: + gcmc_sampler.write_ghost_residues() + + # Perform the GCMC move. + gcmc_sampler.move(dynamics.context()) + # Run the dynamics. dynamics.run( self._config.energy_frequency, @@ -837,8 +915,15 @@ def _minimise(self, index): _logger.info(f"Minimising at {_lam_sym} = {self._lambda_values[index]:.5f}") try: - # Get the dynamics object. - dynamics = self._dynamics_cache.get(index) + # Get the dynamics object (and GCMC sampler). + dynamics, gcmc_sampler = self._dynamics_cache.get(index) + + if gcmc_sampler is not None: + _logger.info( + f"Pre-equilibrating with GCMC moves at {_lam_sym} = {lam:.5f}" + ) + for i in range(100): + gcmc_sampler.move(dynamics.context()) # Minimise. dynamics.minimise(timeout=self._config.timeout) @@ -873,8 +958,13 @@ def _equilibrate(self, index): _logger.info(f"Equilibrating at {_lam_sym} = {self._lambda_values[index]:.5f}") try: - # Get the dynamics object. - dynamics = self._dynamics_cache.get(index) + # Get the dynamics object (and GCMC sampler). + dynamics, gcmc_sampler = self._dynamics_cache.get(index) + + if gcmc_sampler is not None: + _logger.info(f"Equilibrating with GCMC moves at {_lam_sym} = {lam:.5f}") + for i in range(100): + gcmc_sampler.move(dynamics.context()) # Equilibrate. dynamics.run( @@ -961,7 +1051,7 @@ def _compute_energies(self, index): ) # Get the dynamics object. - dynamics = self._dynamics_cache.get(index) + dynamics, _ = self._dynamics_cache.get(index) # Create an array to hold the energies. energies = _np.zeros(self._config.num_lambda) From 99d951a38775228d42818134640739c7db89c928 Mon Sep 17 00:00:00 2001 From: Lester Hedges Date: Mon, 19 May 2025 15:18:01 +0100 Subject: [PATCH 014/170] Unable to decouple SOMD2 and GCMC loggers. --- src/somd2/config/_config.py | 21 --------------------- src/somd2/runner/_base.py | 3 +-- src/somd2/runner/_repex.py | 2 -- src/somd2/runner/_runner.py | 1 - 4 files changed, 1 insertion(+), 26 deletions(-) diff --git a/src/somd2/config/_config.py b/src/somd2/config/_config.py index 1f7651f8..82a839e6 100644 --- a/src/somd2/config/_config.py +++ b/src/somd2/config/_config.py @@ -70,7 +70,6 @@ class Config: "charge_scaled_morph", ], "log_level": [level.lower() for level in _logger._core.levels], - "gcmc_log_level": [level.lower() for level in _logger._core.levels], } # A dictionary of nargs for the various options. @@ -134,7 +133,6 @@ def __init__( gcmc_num_ghosts=10, gcmc_radius="4 A", gcmc_bulk_sampling_probability=0.1, - gcmc_log_level="error", rest2_scale=1.0, rest2_selection=None, output_directory="output", @@ -341,9 +339,6 @@ def __init__( density, i.e. acting as a barostat. (This option has no affect when 'gcmc_selection=None'.) - gcmc_log_level: str - Log level to use for GCMC sampling. - rest2_scale: float, list(float) The scaling factor for Replica Exchange with Solute Tempering (REST) simulations. This is the factor by which the temperature of the solute is scaled with respect to @@ -462,7 +457,6 @@ def __init__( self.gcmc_num_ghosts = gcmc_num_ghosts self.gcmc_radius = gcmc_radius self.gcmc_bulk_sampling_probability = gcmc_bulk_sampling_probability - self.gcmc_log_level = gcmc_log_level self.rest2_scale = rest2_scale self.rest2_selection = rest2_selection self.restart = restart @@ -1515,21 +1509,6 @@ def gcmc_bulk_sampling_probability(self, gcmc_bulk_sampling_probability): ) self._gcmc_bulk_sampling_probability = gcmc_bulk_sampling_probability - @property - def gcmc_log_level(self): - return self._gcmc_log_level - - @gcmc_log_level.setter - def gcmc_log_level(self, log_level): - if not isinstance(log_level, str): - raise TypeError("'log_level' must be of type 'str'") - log_level = log_level.lower().replace(" ", "") - if log_level not in self._choices["log_level"]: - raise ValueError( - f"Log level not recognised. Valid log levels are: {', '.join(self._choices['log_level'])}" - ) - self._gcmc_log_level = log_level - @property def rest2_scale(self): return self._rest2_scale diff --git a/src/somd2/runner/_base.py b/src/somd2/runner/_base.py index 60385ab9..ae0ef768 100644 --- a/src/somd2/runner/_base.py +++ b/src/somd2/runner/_base.py @@ -527,8 +527,8 @@ def __init__(self, system, config): "coulomb_power": self._config.coulomb_power, "shift_coulomb": str(self._config.shift_coulomb), "shift_delta": str(self._config.shift_delta), - "log_level": self._config.gcmc_log_level, "overwrite": self._config.overwrite, + "no_logger": True, } else: self._gcmc_kwargs = None @@ -820,7 +820,6 @@ def increment_filename(base_filename, suffix): filenames["energy_components"] = str( output_directory / f"energy_components_{lam}.txt" ) - filenames["gcmc_log"] = str(output_directory / f"gcmc_log_{lam}.txt") filenames["gcmc_ghosts"] = str(output_directory / f"gcmc_ghosts_{lam}.txt") if restart: filenames["config"] = str( diff --git a/src/somd2/runner/_repex.py b/src/somd2/runner/_repex.py index e021c399..b02f473f 100644 --- a/src/somd2/runner/_repex.py +++ b/src/somd2/runner/_repex.py @@ -179,7 +179,6 @@ def _create_dynamics( if gcmc_kwargs is not None: from local import GCMCSampler - log_file = str(output_directory / f"gcmc_{lam:.5f}.log") ghost_file = str(output_directory / f"gcmc_{lam:.5f}.ghost") # Create the GCMC sampler. @@ -188,7 +187,6 @@ def _create_dynamics( mols, device=int(device), lambda_value=lam, - log_file=log_file, ghost_file=ghost_file, **gcmc_kwargs, ) diff --git a/src/somd2/runner/_runner.py b/src/somd2/runner/_runner.py index 6d94829b..794333ac 100644 --- a/src/somd2/runner/_runner.py +++ b/src/somd2/runner/_runner.py @@ -482,7 +482,6 @@ def generate_lam_vals(lambda_base, increment=0.001): system, device=int(device), lambda_value=lambda_value, - log_file=self._filenames[index]["gcmc_log"], ghost_file=self._filenames[index]["gcmc_ghosts"], **self._gcmc_kwargs, ) From a9bac0b022ca702667bf5a1eac8cef5e918a1bcc Mon Sep 17 00:00:00 2001 From: Lester Hedges Date: Mon, 19 May 2025 15:22:47 +0100 Subject: [PATCH 015/170] Log GCMC moves. --- src/somd2/runner/_repex.py | 1 + src/somd2/runner/_runner.py | 6 ++++++ 2 files changed, 7 insertions(+) diff --git a/src/somd2/runner/_repex.py b/src/somd2/runner/_repex.py index b02f473f..60140e07 100644 --- a/src/somd2/runner/_repex.py +++ b/src/somd2/runner/_repex.py @@ -826,6 +826,7 @@ def _run_block( gcmc_sampler.write_ghost_residues() # Perform the GCMC move. + _logger.info(f"Performing GCMC move at {_lam_sym} = {lam:.5f}") gcmc_sampler.move(dynamics.context()) # Run the dynamics. diff --git a/src/somd2/runner/_runner.py b/src/somd2/runner/_runner.py index 794333ac..ebc05376 100644 --- a/src/somd2/runner/_runner.py +++ b/src/somd2/runner/_runner.py @@ -567,6 +567,9 @@ def generate_lam_vals(lambda_base, increment=0.001): ) # Perform a GCMC move. + _logger.info( + f"Performing GCMC move at {_lam_sym} = {lambda_value:.5f}" + ) gcmc_sampler.move(dynamics.context()) # Update the runtime. @@ -718,6 +721,9 @@ def generate_lam_vals(lambda_base, increment=0.001): ) # Perform a GCMC move. + _logger.info( + f"Performing GCMC move at {_lam_sym} = {lambda_value:.5f}" + ) gcmc_sampler.move(dynamics.context()) # Update the runtime. From 1dfcdfc4933817761de7ea78390f8009af33a919 Mon Sep 17 00:00:00 2001 From: Lester Hedges Date: Mon, 19 May 2025 15:48:25 +0100 Subject: [PATCH 016/170] Write final ghost residue indices. [ci skip] --- src/somd2/runner/_repex.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/somd2/runner/_repex.py b/src/somd2/runner/_repex.py index 60140e07..b1fdabcc 100644 --- a/src/somd2/runner/_repex.py +++ b/src/somd2/runner/_repex.py @@ -720,6 +720,11 @@ def run(self): with open(self._repex_state, "wb") as f: _pickle.dump(self._dynamics_cache, f) + # Save the final GCMC ghost indices. + if self._config.gcmc and i % cycles_per_frame == 0: + for gcmc in self._dynamics_cache._gcmc: + gcmc.write_ghost_residues() + # Record the end time. end = time() From b5bbc4213055fec2163a12b316fc68f1537963c5 Mon Sep 17 00:00:00 2001 From: Lester Hedges Date: Mon, 19 May 2025 16:07:39 +0100 Subject: [PATCH 017/170] Manually compute timings so they include GCMC sampling time. --- src/somd2/runner/_runner.py | 42 +++++++++++++++++++++++++++++++------ 1 file changed, 36 insertions(+), 6 deletions(-) diff --git a/src/somd2/runner/_runner.py b/src/somd2/runner/_runner.py index ebc05376..d53ce1c4 100644 --- a/src/somd2/runner/_runner.py +++ b/src/somd2/runner/_runner.py @@ -521,6 +521,11 @@ def generate_lam_vals(lambda_base, increment=0.001): else: num_energy_neighbours = None + from time import time + + # Store the checkpoint time in nanoseconds. + checkpoint_interval = self._config.checkpoint_frequency.to("ns") + # Run the simulation, checkpointing in blocks. if self._config.checkpoint_frequency.value() > 0.0: @@ -540,6 +545,9 @@ def generate_lam_vals(lambda_base, increment=0.001): # Add the start block number. block += self._start_block + # Record the start time. + start = time() + # Run the dynamics. try: # GCMC specific handling. Note that the frame and checkpoint @@ -607,8 +615,14 @@ def generate_lam_vals(lambda_base, increment=0.001): # Commit the current system. system = dynamics.commit() - # Get the simulation speed. - speed = dynamics.time_speed() + # Record the end time. + end = time() + + # Work how many fractional days the block took. + block_time = (end - start) / 86400 + + # Calculate the speed in nanoseconds per day. + speed = checkpoint_interval / block_time # Check if this is the final block. is_final_block = ( @@ -647,6 +661,7 @@ def generate_lam_vals(lambda_base, increment=0.001): # Handle the remainder time. (There will be no remainer when GCMC sampling.) if rem > 0: block += 1 + start = time() try: dynamics.run( rem, @@ -667,8 +682,14 @@ def generate_lam_vals(lambda_base, increment=0.001): # Commit the current system. system = dynamics.commit() - # Get the simulation speed. - speed = dynamics.time_speed() + # Record the end time. + end = time() + + # Work how many fractional days the block took. + block_time = (end - start) / 86400 + + # Calculate the speed in nanoseconds per day. + speed = checkpoint_interval / block_time # Checkpoint. self._checkpoint( @@ -698,6 +719,9 @@ def generate_lam_vals(lambda_base, increment=0.001): f"Final dynamics block for {lam_sym} = {lambda_value:.5f} failed: {e}" ) else: + # Record the start time. + start = time() + try: if self._config.gcmc: # Initialise the run time and time at which the next frame is saved. @@ -754,8 +778,14 @@ def generate_lam_vals(lambda_base, increment=0.001): # Commit the current system. system = dynamics.commit() - # Get the simulation speed. - speed = dynamics.time_speed() + # Record the end time. + end = time() + + # Work how many fractional days the simulation took. + prod_time = (end - start) / 86400 + + # Calculate the speed in nanoseconds per day. + speed = time.to("ns") / prod_time # Checkpoint. self._checkpoint(system, index, 0, speed, is_final_block=True) From fef2286827da9bc41789c757f590455f8045b08b Mon Sep 17 00:00:00 2001 From: Lester Hedges Date: Mon, 19 May 2025 16:14:17 +0100 Subject: [PATCH 018/170] Logging tweaks. [ci skip] --- src/somd2/runner/_repex.py | 4 ++++ src/somd2/runner/_runner.py | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/src/somd2/runner/_repex.py b/src/somd2/runner/_repex.py index b1fdabcc..c86f9cb4 100644 --- a/src/somd2/runner/_repex.py +++ b/src/somd2/runner/_repex.py @@ -195,6 +195,10 @@ def _create_dynamics( # Get the modified GCMC system. mols = sampler.system() + _logger.info( + f"Created GCMC sampler for lambda {lam:.5f} on device {device}" + ) + # Overload the device and lambda value. dynamics_kwargs["device"] = device dynamics_kwargs["lambda_value"] = lam diff --git a/src/somd2/runner/_runner.py b/src/somd2/runner/_runner.py index d53ce1c4..56bb8d15 100644 --- a/src/somd2/runner/_runner.py +++ b/src/somd2/runner/_runner.py @@ -474,7 +474,7 @@ def generate_lam_vals(lambda_base, increment=0.001): # Prepare the GCMC sampler. if self._config.gcmc: - _logger.info(f"Preparing GCMC sampler for {_lam_sym} = {lambda_value:.5f}") + _logger.info(f"Preparing GCMC sampler at {_lam_sym} = {lambda_value:.5f}") from loch import GCMCSampler From 00e7f95b575992fde74a11c8efbbcb73e97191e8 Mon Sep 17 00:00:00 2001 From: Lester Hedges Date: Mon, 19 May 2025 21:07:28 +0100 Subject: [PATCH 019/170] Add GCMC move to equilibration stage. --- src/somd2/runner/_runner.py | 57 +++++++++++++++++++++---------------- 1 file changed, 33 insertions(+), 24 deletions(-) diff --git a/src/somd2/runner/_runner.py b/src/somd2/runner/_runner.py index 56bb8d15..58a76561 100644 --- a/src/somd2/runner/_runner.py +++ b/src/somd2/runner/_runner.py @@ -355,6 +355,30 @@ def generate_lam_vals(lambda_base, increment=0.001): lam_vals = [lambda_base - increment, lambda_base + increment] return lam_vals + # Prepare the GCMC sampler. + if self._config.gcmc: + _logger.info(f"Preparing GCMC sampler at {_lam_sym} = {lambda_value:.5f}") + + from loch import GCMCSampler + + gcmc_sampler = GCMCSampler( + system, + device=int(device), + lambda_value=lambda_value, + ghost_file=self._filenames[index]["gcmc_ghosts"], + **self._gcmc_kwargs, + ) + + # Get the GCMC system. + system = gcmc_sampler.system() + + # Write the end states so that we can later visualise trajectories. + if index == 0: + mols0 = _sr.morph.link_to_reference(system) + mols1 = _sr.morph.link_to_perturbed(system) + _sr.save(mols0, self._filenames["topology0"]) + _sr.save(mols1, self._filenames["topology1"]) + # Minimisation. if self._config.minimise: # Minimise with no constraints if we need to equilibrate first. @@ -410,6 +434,15 @@ def generate_lam_vals(lambda_base, increment=0.001): # Create the dynamics object. dynamics = system.dynamics(**dynamics_kwargs) + # Equilibrate with GCMC moves. + if self._config.gcmc: + _logger.info( + f"Euilibraing with GCMC moves at {_lam_sym} = {lambda_value:.5f}" + ) + + for i in range(100): + gcmc_sampler.move(dynamics.context()) + # Run without saving energies or frames. dynamics.run( self._config.equilibration_time, @@ -472,30 +505,6 @@ def generate_lam_vals(lambda_base, increment=0.001): # Now sort the scaling factors. rest2_scale_factors = [rest2_scale_factors[i] for i in sorted_indices] - # Prepare the GCMC sampler. - if self._config.gcmc: - _logger.info(f"Preparing GCMC sampler at {_lam_sym} = {lambda_value:.5f}") - - from loch import GCMCSampler - - gcmc_sampler = GCMCSampler( - system, - device=int(device), - lambda_value=lambda_value, - ghost_file=self._filenames[index]["gcmc_ghosts"], - **self._gcmc_kwargs, - ) - - # Get the GCMC system. - system = gcmc_sampler.system() - - # Write the end states so that we can later visualise trajectories. - if index == 0: - mols0 = _sr.morph.link_to_reference(system) - mols1 = _sr.morph.link_to_perturbed(system) - _sr.save(mols0, self._filenames["topology0"]) - _sr.save(mols1, self._filenames["topology1"]) - _logger.info(f"Running dynamics at {_lam_sym} = {lambda_value:.5f}") # Copy the dynamics kwargs. From 04c99f6a92bd042cb5a825b2a72bff1ef960ee53 Mon Sep 17 00:00:00 2001 From: Lester Hedges Date: Tue, 20 May 2025 13:53:24 +0100 Subject: [PATCH 020/170] Use PDB topology format for GCMC simulations. --- src/somd2/runner/_base.py | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/src/somd2/runner/_base.py b/src/somd2/runner/_base.py index ae0ef768..0531869f 100644 --- a/src/somd2/runner/_base.py +++ b/src/somd2/runner/_base.py @@ -869,8 +869,19 @@ def _prepare_output(self): for file in list(set(deleted)): file.unlink() - filenames["topology0"] = str(self._config.output_directory / "system0.prm7") - filenames["topology1"] = str(self._config.output_directory / "system1.prm7") + # Use PDB format for GCMC simulations to allow trajectory post-processing + # and analysis with grand. + if self._config.gcmc: + top_ext = "pdb" + else: + top_ext = "prm7" + + filenames["topology0"] = str( + self._config.output_directory / f"system0.{top_ext}" + ) + filenames["topology1"] = str( + self._config.output_directory / f"system1.{top_ext}" + ) return filenames From cae9ebac79b0464b8379cafec48086daa85293b7 Mon Sep 17 00:00:00 2001 From: Lester Hedges Date: Tue, 20 May 2025 14:10:02 +0100 Subject: [PATCH 021/170] Use 'ratio' rather than 'scale' for clarity. --- src/somd2/runner/_base.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/src/somd2/runner/_base.py b/src/somd2/runner/_base.py index 0531869f..fab7e400 100644 --- a/src/somd2/runner/_base.py +++ b/src/somd2/runner/_base.py @@ -436,37 +436,37 @@ def __init__(self, system, config): # Make sure the frame frequency is a multiple of the energy frequency. - # Get the scale factor. - scale = ( + # Get the ratio. + ratio = ( self._config.frame_frequency / self._config.energy_frequency ).value() # Make sure it's an integer. - if not isclose(scale, round(scale), abs_tol=1e-4): + if not isclose(ratio, round(ratio), abs_tol=1e-4): msg = "'frame_frequency' must be a multiple of 'energy_frequency'." _logger.error(msg) raise ValueError(msg) # Make sure the checkpoint frequency is a multiple of the frame frequency. - # Get the scale factor. - scale = ( + # Get the ratio. + ratio = ( self._config.checkpoint_frequency / self._config.frame_frequency ).value() # Make sure it's an integer. - if not isclose(scale, round(scale), abs_tol=1e-4): + if not isclose(ratio, round(ratio), abs_tol=1e-4): msg = "'checkpoint_frequency' must be a multiple of 'frame_frequency'." _logger.error(msg) raise ValueError(msg) # Make sure the runtime is a multiple of the frame frequency. - # Get the scale factor. - scale = (self._config.runtime / self._config.frame_frequency).value() + # Get the ratio. + ratio = (self._config.runtime / self._config.frame_frequency).value() # Make sure it's an integer. - if not isclose(scale, round(scale), abs_tol=1e-4): + if not isclose(ratio, round(ratio), abs_tol=1e-4): msg = "'runtime' must be a multiple of 'frame_frequency'." _logger.error(msg) raise ValueError(msg) From 793017d0ecf44626c43895f00cb78bf6f89e354e Mon Sep 17 00:00:00 2001 From: Lester Hedges Date: Tue, 20 May 2025 14:28:21 +0100 Subject: [PATCH 022/170] Enable replica exchange to work with NVT. --- src/somd2/runner/_repex.py | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/src/somd2/runner/_repex.py b/src/somd2/runner/_repex.py index c86f9cb4..fbcdae2a 100644 --- a/src/somd2/runner/_repex.py +++ b/src/somd2/runner/_repex.py @@ -494,8 +494,11 @@ def __init__(self, system, config): self._beta = 1.0 / kT # Store the pressure times Avaogadro's number. - NA = 6.02214076e23 / _sr.units.mole - self._pressure = (self._config.pressure * NA).value() + if self._config.pressure is not None: + NA = 6.02214076e23 / _sr.units.mole + self._pressure = (self._config.pressure * NA).value() + else: + self._pressure = None # If restarting, subtract the time already run from the total runtime if self._config.restart: @@ -1094,9 +1097,13 @@ def _assemble_results(self, results): # Fill the matrix. for i, energies in results: for j, energy in enumerate(energies): - matrix[i, j] = self._beta * ( - energy + self._pressure * self._dynamics_cache._openmm_volumes[i] - ) + matrix[i, j] = self._beta * energy + if self._pressure is not None: + matrix[i, j] += ( + self._beta + * self._config.pressure + * self._dynamics_cache._openmm_volumes[j] + ) return matrix From 6c746c381d78ef14134c0cbb2bf2c5ad994ce5a4 Mon Sep 17 00:00:00 2001 From: Lester Hedges Date: Tue, 20 May 2025 14:30:42 +0100 Subject: [PATCH 023/170] Pass GCMC specific kwargs to DynamicsCache constructor. --- src/somd2/runner/_repex.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/src/somd2/runner/_repex.py b/src/somd2/runner/_repex.py index fbcdae2a..a7756573 100644 --- a/src/somd2/runner/_repex.py +++ b/src/somd2/runner/_repex.py @@ -40,7 +40,15 @@ class DynamicsCache: A class for caching dynamics objects. """ - def __init__(self, system, lambdas, rest2_scale_factors, num_gpus, dynamics_kwargs): + def __init__( + self, + system, + lambdas, + rest2_scale_factors, + num_gpus, + dynamics_kwargs, + gcmc_kwargs=None, + ): """ Constructor. @@ -61,6 +69,9 @@ def __init__(self, system, lambdas, rest2_scale_factors, num_gpus, dynamics_kwar dynamics_kwargs: dict A dictionary of default dynamics keyword arguments. + + gcmc_kwargs: dict + GCMC specific keyword arguments. If None, then GCMC is not used. """ # Warn if the number of replicas is not a multiple of the number of GPUs. @@ -454,6 +465,7 @@ def __init__(self, system, config): self._rest2_scale_factors, self._num_gpus, dynamics_kwargs, + self._gcmc_kwargs, ) else: # Check to see if the simulation is already complete. From 5890c5b2a312f94b62cff8ecac7fcde27b64c2c1 Mon Sep 17 00:00:00 2001 From: Lester Hedges Date: Tue, 20 May 2025 14:32:51 +0100 Subject: [PATCH 024/170] Pass GCMC kwargs through to _create_dynamics method. [ci skip] --- src/somd2/runner/_repex.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/somd2/runner/_repex.py b/src/somd2/runner/_repex.py index a7756573..7a561935 100644 --- a/src/somd2/runner/_repex.py +++ b/src/somd2/runner/_repex.py @@ -94,7 +94,12 @@ def __init__( # Create the dynamics objects. self._create_dynamics( - system, lambdas, rest2_scale_factors, num_gpus, dynamics_kwargs + system, + lambdas, + rest2_scale_factors, + num_gpus, + dynamics_kwargs, + gcmc_kwargs=gcmc_kwargs, ) def __setstate__(self, state): From 2f1a45ca35590657e8e18d54a33344ba2f8c4e06 Mon Sep 17 00:00:00 2001 From: Lester Hedges Date: Tue, 20 May 2025 14:34:09 +0100 Subject: [PATCH 025/170] Fix module name. [ci skip] --- src/somd2/runner/_repex.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/somd2/runner/_repex.py b/src/somd2/runner/_repex.py index 7a561935..15240034 100644 --- a/src/somd2/runner/_repex.py +++ b/src/somd2/runner/_repex.py @@ -193,7 +193,7 @@ def _create_dynamics( mols = system if gcmc_kwargs is not None: - from local import GCMCSampler + from loch import GCMCSampler ghost_file = str(output_directory / f"gcmc_{lam:.5f}.ghost") From 3d188ab308676fe4ae53f8f005a92d11ed31183a Mon Sep 17 00:00:00 2001 From: Lester Hedges Date: Tue, 20 May 2025 14:35:42 +0100 Subject: [PATCH 026/170] Pass output directory to DynamicsCache constructor. --- src/somd2/runner/_repex.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/somd2/runner/_repex.py b/src/somd2/runner/_repex.py index 15240034..e6740e32 100644 --- a/src/somd2/runner/_repex.py +++ b/src/somd2/runner/_repex.py @@ -48,6 +48,7 @@ def __init__( num_gpus, dynamics_kwargs, gcmc_kwargs=None, + output_directory=None, ): """ Constructor. @@ -72,6 +73,9 @@ def __init__( gcmc_kwargs: dict GCMC specific keyword arguments. If None, then GCMC is not used. + + output_directory: pathlib.Path + The directory for simulation output. """ # Warn if the number of replicas is not a multiple of the number of GPUs. @@ -100,6 +104,7 @@ def __init__( num_gpus, dynamics_kwargs, gcmc_kwargs=gcmc_kwargs, + output_directory=output_directory, ) def __setstate__(self, state): @@ -471,6 +476,7 @@ def __init__(self, system, config): self._num_gpus, dynamics_kwargs, self._gcmc_kwargs, + output_directory=self._config.output_directory, ) else: # Check to see if the simulation is already complete. From d977c10d6c9462c9ce18b54fe2da5d1815cf04c1 Mon Sep 17 00:00:00 2001 From: Lester Hedges Date: Tue, 20 May 2025 14:38:30 +0100 Subject: [PATCH 027/170] Create instance of GCMCSampler, then append to list. --- src/somd2/runner/_repex.py | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/src/somd2/runner/_repex.py b/src/somd2/runner/_repex.py index e6740e32..aa76e54a 100644 --- a/src/somd2/runner/_repex.py +++ b/src/somd2/runner/_repex.py @@ -203,18 +203,19 @@ def _create_dynamics( ghost_file = str(output_directory / f"gcmc_{lam:.5f}.ghost") # Create the GCMC sampler. - self._gcmc.append( - GCMCSampler( - mols, - device=int(device), - lambda_value=lam, - ghost_file=ghost_file, - **gcmc_kwargs, - ) + gcmc_sampler = GCMCSampler( + mols, + device=int(device), + lambda_value=lam, + ghost_file=ghost_file, + **gcmc_kwargs, ) # Get the modified GCMC system. - mols = sampler.system() + mols = gcmc_sampler.system() + + # Store the GCMC sampler. + self._gcmc.append(gcmc_sampler) _logger.info( f"Created GCMC sampler for lambda {lam:.5f} on device {device}" From 811d10af5a17a1325213486aa5b5857564af68e4 Mon Sep 17 00:00:00 2001 From: Lester Hedges Date: Tue, 20 May 2025 14:46:25 +0100 Subject: [PATCH 028/170] Fix log messages. [ci skip] --- src/somd2/runner/_repex.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/somd2/runner/_repex.py b/src/somd2/runner/_repex.py index aa76e54a..805cedf6 100644 --- a/src/somd2/runner/_repex.py +++ b/src/somd2/runner/_repex.py @@ -955,7 +955,7 @@ def _minimise(self, index): if gcmc_sampler is not None: _logger.info( - f"Pre-equilibrating with GCMC moves at {_lam_sym} = {lam:.5f}" + f"Pre-equilibrating with GCMC moves at {_lam_sym} = {self._lambda_values[index]:.5f}" ) for i in range(100): gcmc_sampler.move(dynamics.context()) @@ -997,7 +997,9 @@ def _equilibrate(self, index): dynamics, gcmc_sampler = self._dynamics_cache.get(index) if gcmc_sampler is not None: - _logger.info(f"Equilibrating with GCMC moves at {_lam_sym} = {lam:.5f}") + _logger.info( + f"Equilibrating with GCMC moves at {_lam_sym} = {self._lambda_values[index]:.5f}" + ) for i in range(100): gcmc_sampler.move(dynamics.context()) From 32e936f5ff61a3c12c11453b0978aba13901e3e6 Mon Sep 17 00:00:00 2001 From: Lester Hedges Date: Tue, 20 May 2025 16:09:45 +0100 Subject: [PATCH 029/170] Fix name clashes caused by manual timer. --- src/somd2/runner/_runner.py | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/src/somd2/runner/_runner.py b/src/somd2/runner/_runner.py index 58a76561..c14cbc3d 100644 --- a/src/somd2/runner/_runner.py +++ b/src/somd2/runner/_runner.py @@ -21,6 +21,8 @@ __all__ = ["Runner"] +from time import time as _timer + import sire as _sr from somd2 import _logger @@ -144,10 +146,8 @@ def run(self): Use concurrent.futures to run lambda windows in parallel """ - from time import time - # Record the start time. - start = time() + start = _timer() # Create shared resources. self._create_shared_resources() @@ -199,7 +199,7 @@ def run(self): executor._processes[pid].terminate() # Record the end time. - end = time() + end = _timer() # Log the run time in minutes. _logger.success( @@ -530,8 +530,6 @@ def generate_lam_vals(lambda_base, increment=0.001): else: num_energy_neighbours = None - from time import time - # Store the checkpoint time in nanoseconds. checkpoint_interval = self._config.checkpoint_frequency.to("ns") @@ -555,7 +553,7 @@ def generate_lam_vals(lambda_base, increment=0.001): block += self._start_block # Record the start time. - start = time() + start = _timer() # Run the dynamics. try: @@ -625,7 +623,7 @@ def generate_lam_vals(lambda_base, increment=0.001): system = dynamics.commit() # Record the end time. - end = time() + end = _timer() # Work how many fractional days the block took. block_time = (end - start) / 86400 @@ -670,7 +668,7 @@ def generate_lam_vals(lambda_base, increment=0.001): # Handle the remainder time. (There will be no remainer when GCMC sampling.) if rem > 0: block += 1 - start = time() + start = _timer() try: dynamics.run( rem, @@ -692,7 +690,7 @@ def generate_lam_vals(lambda_base, increment=0.001): system = dynamics.commit() # Record the end time. - end = time() + end = _timer() # Work how many fractional days the block took. block_time = (end - start) / 86400 @@ -729,7 +727,7 @@ def generate_lam_vals(lambda_base, increment=0.001): ) else: # Record the start time. - start = time() + start = _timer() try: if self._config.gcmc: @@ -788,7 +786,7 @@ def generate_lam_vals(lambda_base, increment=0.001): system = dynamics.commit() # Record the end time. - end = time() + end = _timer() # Work how many fractional days the simulation took. prod_time = (end - start) / 86400 From 95077e2113c354f678724f63e82b4f1baf6f39e5 Mon Sep 17 00:00:00 2001 From: Lester Hedges Date: Tue, 20 May 2025 16:21:12 +0100 Subject: [PATCH 030/170] No need to convert to int. [ci skip] --- src/somd2/runner/_repex.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/somd2/runner/_repex.py b/src/somd2/runner/_repex.py index 805cedf6..8a18beb3 100644 --- a/src/somd2/runner/_repex.py +++ b/src/somd2/runner/_repex.py @@ -205,7 +205,7 @@ def _create_dynamics( # Create the GCMC sampler. gcmc_sampler = GCMCSampler( mols, - device=int(device), + device=device, lambda_value=lam, ghost_file=ghost_file, **gcmc_kwargs, From da3cadbf6be678a339c8e5ee31ed602f356c2091 Mon Sep 17 00:00:00 2001 From: Lester Hedges Date: Tue, 20 May 2025 17:08:40 +0100 Subject: [PATCH 031/170] Requires manual context management via push and pop. --- src/somd2/runner/_repex.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/src/somd2/runner/_repex.py b/src/somd2/runner/_repex.py index 8a18beb3..88058d46 100644 --- a/src/somd2/runner/_repex.py +++ b/src/somd2/runner/_repex.py @@ -855,6 +855,9 @@ def _run_block( # dynamics block so that the final energies, which are used in the # repex acceptance criteria, are correct. if gcmc_sampler is not None: + # Push the PyCUDA context to the top of the stack. + gcmc_sampler._pycuda_context.push() + # The frame frequency was hit after the previous block, so we # need to write the current indices of the GCMC ghost residues # to file. @@ -865,6 +868,9 @@ def _run_block( _logger.info(f"Performing GCMC move at {_lam_sym} = {lam:.5f}") gcmc_sampler.move(dynamics.context()) + # Pop the PyCUDA context from the stack. + gcmc_sampler._pycuda_context.pop() + # Run the dynamics. dynamics.run( self._config.energy_frequency, @@ -954,12 +960,18 @@ def _minimise(self, index): dynamics, gcmc_sampler = self._dynamics_cache.get(index) if gcmc_sampler is not None: + # Push the PyCUDA context to the top of the stack. + gcmc_sampler._pycuda_context.push() + _logger.info( f"Pre-equilibrating with GCMC moves at {_lam_sym} = {self._lambda_values[index]:.5f}" ) for i in range(100): gcmc_sampler.move(dynamics.context()) + # Pop the PyCUDA context from the stack. + gcmc_sampler._pycuda_context.pop() + # Minimise. dynamics.minimise(timeout=self._config.timeout) @@ -997,12 +1009,18 @@ def _equilibrate(self, index): dynamics, gcmc_sampler = self._dynamics_cache.get(index) if gcmc_sampler is not None: + # Push the PyCUDA context to the top of the stack. + gcmc_sampler._pycuda_context.push() + _logger.info( f"Equilibrating with GCMC moves at {_lam_sym} = {self._lambda_values[index]:.5f}" ) for i in range(100): gcmc_sampler.move(dynamics.context()) + # Pop the PyCUDA context from the stack. + gcmc_sampler._pycuda_context.pop() + # Equilibrate. dynamics.run( self._config.equilibration_time, From d1043e0cd1a8ddc5d4fbccb8a9407c8856f71bab Mon Sep 17 00:00:00 2001 From: Lester Hedges Date: Wed, 21 May 2025 09:31:40 +0100 Subject: [PATCH 032/170] PyCUDA context management now handled by GCMCSampler. --- src/somd2/runner/_repex.py | 18 ------------------ 1 file changed, 18 deletions(-) diff --git a/src/somd2/runner/_repex.py b/src/somd2/runner/_repex.py index 88058d46..8a18beb3 100644 --- a/src/somd2/runner/_repex.py +++ b/src/somd2/runner/_repex.py @@ -855,9 +855,6 @@ def _run_block( # dynamics block so that the final energies, which are used in the # repex acceptance criteria, are correct. if gcmc_sampler is not None: - # Push the PyCUDA context to the top of the stack. - gcmc_sampler._pycuda_context.push() - # The frame frequency was hit after the previous block, so we # need to write the current indices of the GCMC ghost residues # to file. @@ -868,9 +865,6 @@ def _run_block( _logger.info(f"Performing GCMC move at {_lam_sym} = {lam:.5f}") gcmc_sampler.move(dynamics.context()) - # Pop the PyCUDA context from the stack. - gcmc_sampler._pycuda_context.pop() - # Run the dynamics. dynamics.run( self._config.energy_frequency, @@ -960,18 +954,12 @@ def _minimise(self, index): dynamics, gcmc_sampler = self._dynamics_cache.get(index) if gcmc_sampler is not None: - # Push the PyCUDA context to the top of the stack. - gcmc_sampler._pycuda_context.push() - _logger.info( f"Pre-equilibrating with GCMC moves at {_lam_sym} = {self._lambda_values[index]:.5f}" ) for i in range(100): gcmc_sampler.move(dynamics.context()) - # Pop the PyCUDA context from the stack. - gcmc_sampler._pycuda_context.pop() - # Minimise. dynamics.minimise(timeout=self._config.timeout) @@ -1009,18 +997,12 @@ def _equilibrate(self, index): dynamics, gcmc_sampler = self._dynamics_cache.get(index) if gcmc_sampler is not None: - # Push the PyCUDA context to the top of the stack. - gcmc_sampler._pycuda_context.push() - _logger.info( f"Equilibrating with GCMC moves at {_lam_sym} = {self._lambda_values[index]:.5f}" ) for i in range(100): gcmc_sampler.move(dynamics.context()) - # Pop the PyCUDA context from the stack. - gcmc_sampler._pycuda_context.pop() - # Equilibrate. dynamics.run( self._config.equilibration_time, From ac2487a03593e9b59f963c203324cf2e432ab95d Mon Sep 17 00:00:00 2001 From: Lester Hedges Date: Wed, 21 May 2025 09:39:47 +0100 Subject: [PATCH 033/170] Consolidate saving of end-state topology to single place. --- src/somd2/runner/_base.py | 14 +++++++------- src/somd2/runner/_runner.py | 7 ------- 2 files changed, 7 insertions(+), 14 deletions(-) diff --git a/src/somd2/runner/_base.py b/src/somd2/runner/_base.py index fab7e400..cefedee5 100644 --- a/src/somd2/runner/_base.py +++ b/src/somd2/runner/_base.py @@ -402,13 +402,6 @@ def __init__(self, system, config): mols, water, rng, self._config.gcmc_num_ghosts ) - # For GCMC, we need to save the system with the ghost waters. - if not self._config.gcmc: - mols0 = _sr.morph.link_to_reference(mols) - mols1 = _sr.morph.link_to_perturbed(mols) - _sr.save(mols0, self._filenames["topology0"]) - _sr.save(mols1, self._filenames["topology1"]) - # Append only this number of lines from the end of the dataframe during checkpointing. self._energy_per_block = int( self._config.checkpoint_frequency / self._config.energy_frequency @@ -1272,6 +1265,13 @@ def _checkpoint( from somd2 import __version__, _sire_version, _sire_revisionid + # Save the end-state topologies for trajectory analysis and visualisation. + if block == 0 and index == 0: + mols0 = _sr.morph.link_to_reference(system) + mols1 = _sr.morph.link_to_perturbed(system) + _sr.save(mols0, self._filenames["topology0"]) + _sr.save(mols1, self._filenames["topology1"]) + # Get the lambda value. lam = self._lambda_values[index] diff --git a/src/somd2/runner/_runner.py b/src/somd2/runner/_runner.py index c14cbc3d..2f6d8bbb 100644 --- a/src/somd2/runner/_runner.py +++ b/src/somd2/runner/_runner.py @@ -372,13 +372,6 @@ def generate_lam_vals(lambda_base, increment=0.001): # Get the GCMC system. system = gcmc_sampler.system() - # Write the end states so that we can later visualise trajectories. - if index == 0: - mols0 = _sr.morph.link_to_reference(system) - mols1 = _sr.morph.link_to_perturbed(system) - _sr.save(mols0, self._filenames["topology0"]) - _sr.save(mols1, self._filenames["topology1"]) - # Minimisation. if self._config.minimise: # Minimise with no constraints if we need to equilibrate first. From de3bd9cd8adedaa832031ee8c3886d8925e4c24c Mon Sep 17 00:00:00 2001 From: Lester Hedges Date: Wed, 21 May 2025 10:07:06 +0100 Subject: [PATCH 034/170] PyCUDA context needs to be pushed and popped by the thread. --- src/somd2/runner/_repex.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/src/somd2/runner/_repex.py b/src/somd2/runner/_repex.py index 8a18beb3..eb84c038 100644 --- a/src/somd2/runner/_repex.py +++ b/src/somd2/runner/_repex.py @@ -855,6 +855,9 @@ def _run_block( # dynamics block so that the final energies, which are used in the # repex acceptance criteria, are correct. if gcmc_sampler is not None: + # Push the PyCUDA context on top of the stack. + gcmc_sampler.push() + # The frame frequency was hit after the previous block, so we # need to write the current indices of the GCMC ghost residues # to file. @@ -865,6 +868,9 @@ def _run_block( _logger.info(f"Performing GCMC move at {_lam_sym} = {lam:.5f}") gcmc_sampler.move(dynamics.context()) + # Remove the PyCUDA context from the stack. + gcmc_sampler.pop() + # Run the dynamics. dynamics.run( self._config.energy_frequency, @@ -954,12 +960,18 @@ def _minimise(self, index): dynamics, gcmc_sampler = self._dynamics_cache.get(index) if gcmc_sampler is not None: + # Push the PyCUDA context on top of the stack. + gcmc_sampler.push() + _logger.info( f"Pre-equilibrating with GCMC moves at {_lam_sym} = {self._lambda_values[index]:.5f}" ) for i in range(100): gcmc_sampler.move(dynamics.context()) + # Remove the PyCUDA context from the stack. + gcmc_sampler.pop() + # Minimise. dynamics.minimise(timeout=self._config.timeout) @@ -997,12 +1009,18 @@ def _equilibrate(self, index): dynamics, gcmc_sampler = self._dynamics_cache.get(index) if gcmc_sampler is not None: + # Push the PyCUDA context on top of the stack. + gcmc_sampler.push() + _logger.info( f"Equilibrating with GCMC moves at {_lam_sym} = {self._lambda_values[index]:.5f}" ) for i in range(100): gcmc_sampler.move(dynamics.context()) + # Remove the PyCUDA context from the stack. + gcmc_sampler.pop() + # Equilibrate. dynamics.run( self._config.equilibration_time, From 85921dd25e4a5a7f9eceac8da64fa7addd0ba364 Mon Sep 17 00:00:00 2001 From: Lester Hedges Date: Wed, 21 May 2025 12:02:55 +0100 Subject: [PATCH 035/170] Use d.context() for clarity. --- src/somd2/runner/_repex.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/somd2/runner/_repex.py b/src/somd2/runner/_repex.py index eb84c038..de313f2d 100644 --- a/src/somd2/runner/_repex.py +++ b/src/somd2/runner/_repex.py @@ -304,8 +304,10 @@ def save_openmm_state(self, index): from openmm.unit import angstrom # Get the current OpenMM state. - state = self._dynamics[index]._d._omm_mols.getState( - getPositions=True, getVelocities=True + state = ( + self._dynamics[index] + ._d.context() + .getState(getPositions=True, getVelocities=True) ) # Store the state. @@ -1114,14 +1116,14 @@ def _compute_energies(self, index): # Loop over the states. for i in range(self._config.num_lambda): # Set the state. - dynamics._d._omm_mols.setState(self._dynamics_cache._openmm_states[i]) + dynamics._d.context().setState(self._dynamics_cache._openmm_states[i]) dynamics._d._clear_state() # Compute and store the energy for this state. energies[i] = dynamics.current_potential_energy().value() # Reset the state. - dynamics._d._omm_mols.setState(self._dynamics_cache._openmm_states[index]) + dynamics._d.context().setState(self._dynamics_cache._openmm_states[index]) return index, energies From 29c78f39edcb7de19528d249fcd52b9dd95ad0d6 Mon Sep 17 00:00:00 2001 From: Lester Hedges Date: Wed, 21 May 2025 12:04:35 +0100 Subject: [PATCH 036/170] Swap GCMC water state when mixing replicas. --- src/somd2/runner/_repex.py | 39 +++++++++++++++++++++++++++++++++++++- 1 file changed, 38 insertions(+), 1 deletion(-) diff --git a/src/somd2/runner/_repex.py b/src/somd2/runner/_repex.py index de313f2d..24cc95e1 100644 --- a/src/somd2/runner/_repex.py +++ b/src/somd2/runner/_repex.py @@ -92,6 +92,7 @@ def __init__( self._old_states = _np.array(range(len(lambdas))) self._openmm_states = [None] * len(lambdas) self._openmm_volumes = [None] * len(lambdas) + self._gcmc_states = [None] * len(lambdas) self._num_proposed = _np.matrix(_np.zeros((len(lambdas), len(lambdas)))) self._num_accepted = _np.matrix(_np.zeros((len(lambdas), len(lambdas)))) self._num_swaps = _np.matrix(_np.zeros((len(lambdas), len(lambdas)))) @@ -318,6 +319,22 @@ def save_openmm_state(self, index): angstrom**3 ) + def save_gcmc_state(self, index): + """ + Save the current GCMC water state for the replica. + + Parameters + ---------- + + index: int + The index of the replica. + """ + # Get the GCMC sampler. + gcmc_sampler = self._gcmc[index] + + # Store the state. + self._gcmc_states[index] = gcmc_sampler.water_state() + def get_states(self): """ Get the states of the dynamics objects. @@ -351,7 +368,23 @@ def mix_states(self): # The state has changed. if i != state: _logger.debug(f"Replica {i} seeded from state {state}") - self._dynamics[i]._d._omm_mols.setState(self._openmm_states[state]) + self._dynamics[i]._d.context().setState(self._openmm_states[state]) + + # Swap the water state in the GCMCSamplers. + if self._gcmc[i] is not None: + for j, (state0, state1) in enumerate( + zip(self._gcmc_states[i], self._gcmc_states[state]) + ): + # The states are different and one of them is a ghost. + if state0 != state1 and (state0 == 0 or state1 == 0): + _logger.debug( + f"Swapping GCMC water state {state0} with {state1} for replica {i}" + ) + self._gcmc[i].push() + self._gcmc[i]._set_water_state( + j, state1, self._dynamics[i].context() + ) + self._gcmc[i].pop() # Update the swap matrix. old_state = self._old_states[i] @@ -889,6 +922,10 @@ def _run_block( # Set the state. self._dynamics_cache.save_openmm_state(index) + # Save the GCMC state. + if gcmc_sampler is not None: + self._dynamics_cache.save_gcmc_state(index) + # Get the energies at each lambda value. energies = ( dynamics._d.energy_trajectory() From 59746e13e346ce245a2e71a32909f3c7798a0a44 Mon Sep 17 00:00:00 2001 From: Lester Hedges Date: Wed, 21 May 2025 12:35:54 +0100 Subject: [PATCH 037/170] Call .context() on Dynamics not DynamicsData object. --- src/somd2/runner/_repex.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/somd2/runner/_repex.py b/src/somd2/runner/_repex.py index 24cc95e1..9e2a2a39 100644 --- a/src/somd2/runner/_repex.py +++ b/src/somd2/runner/_repex.py @@ -307,7 +307,7 @@ def save_openmm_state(self, index): # Get the current OpenMM state. state = ( self._dynamics[index] - ._d.context() + .context() .getState(getPositions=True, getVelocities=True) ) @@ -368,7 +368,7 @@ def mix_states(self): # The state has changed. if i != state: _logger.debug(f"Replica {i} seeded from state {state}") - self._dynamics[i]._d.context().setState(self._openmm_states[state]) + self._dynamics[i].context().setState(self._openmm_states[state]) # Swap the water state in the GCMCSamplers. if self._gcmc[i] is not None: @@ -1153,14 +1153,14 @@ def _compute_energies(self, index): # Loop over the states. for i in range(self._config.num_lambda): # Set the state. - dynamics._d.context().setState(self._dynamics_cache._openmm_states[i]) + dynamics.context().setState(self._dynamics_cache._openmm_states[i]) dynamics._d._clear_state() # Compute and store the energy for this state. energies[i] = dynamics.current_potential_energy().value() # Reset the state. - dynamics._d.context().setState(self._dynamics_cache._openmm_states[index]) + dynamics.context().setState(self._dynamics_cache._openmm_states[index]) return index, energies From 3596fc445070b32852cea01cdc16f4ae450f0371 Mon Sep 17 00:00:00 2001 From: Lester Hedges Date: Thu, 22 May 2025 09:30:16 +0100 Subject: [PATCH 038/170] Switch position of ghost residue writing. --- src/somd2/runner/_repex.py | 20 +++++++------------- 1 file changed, 7 insertions(+), 13 deletions(-) diff --git a/src/somd2/runner/_repex.py b/src/somd2/runner/_repex.py index 9e2a2a39..520ff6e3 100644 --- a/src/somd2/runner/_repex.py +++ b/src/somd2/runner/_repex.py @@ -713,8 +713,8 @@ def run(self): # Whether to checkpoint. is_checkpoint = i > 0 and i % cycles_per_checkpoint == 0 - # Whether a frame was saved after the previous block. - write_gcmc_ghosts = i > 0 and (i - 1) % cycles_per_frame == 0 + # Whether a frame is saved at the end of the cycle. + write_gcmc_ghosts = i > 0 and i % cycles_per_frame == 0 # Run a dynamics block for each replica, making sure only each GPU is only # oversubscribed by a factor of self._config.oversubscription_factor. @@ -786,11 +786,6 @@ def run(self): with open(self._repex_state, "wb") as f: _pickle.dump(self._dynamics_cache, f) - # Save the final GCMC ghost indices. - if self._config.gcmc and i % cycles_per_frame == 0: - for gcmc in self._dynamics_cache._gcmc: - gcmc.write_ghost_residues() - # Record the end time. end = time() @@ -893,12 +888,6 @@ def _run_block( # Push the PyCUDA context on top of the stack. gcmc_sampler.push() - # The frame frequency was hit after the previous block, so we - # need to write the current indices of the GCMC ghost residues - # to file. - if write_gcmc_ghosts: - gcmc_sampler.write_ghost_residues() - # Perform the GCMC move. _logger.info(f"Performing GCMC move at {_lam_sym} = {lam:.5f}") gcmc_sampler.move(dynamics.context()) @@ -919,6 +908,11 @@ def _run_block( null_energy=self._config.null_energy, ) + # The frame frequency was hit, so writ the indices of the current + # ghost water residues to file. + if gcmc_sampler is not None and write_gcmc_ghosts: + gcmc_sampler.write_ghost_residues() + # Set the state. self._dynamics_cache.save_openmm_state(index) From 0da38fd0898d2dda7b3c47f0daf71b91b007eb52 Mon Sep 17 00:00:00 2001 From: Lester Hedges Date: Thu, 22 May 2025 11:57:17 +0100 Subject: [PATCH 039/170] Use NumPy to find water indices where state differs between replicas. --- src/somd2/runner/_repex.py | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/src/somd2/runner/_repex.py b/src/somd2/runner/_repex.py index 520ff6e3..150ee4c9 100644 --- a/src/somd2/runner/_repex.py +++ b/src/somd2/runner/_repex.py @@ -372,17 +372,22 @@ def mix_states(self): # Swap the water state in the GCMCSamplers. if self._gcmc[i] is not None: - for j, (state0, state1) in enumerate( - zip(self._gcmc_states[i], self._gcmc_states[state]) - ): - # The states are different and one of them is a ghost. - if state0 != state1 and (state0 == 0 or state1 == 0): + # Find the indices of the water states that differ. + water_idxs = _np.where( + self._gcmc_states[i] != self._gcmc_states[state] + )[0] + # Loop over the water indices and swap the states. + for idx in water_idxs: + state0 = self._gcmc_states[i][idx] + state1 = self._gcmc_states[state][idx] + # Only update if one of the states is a ghost water. + if state0 == 0 or state1 == 0: _logger.debug( f"Swapping GCMC water state {state0} with {state1} for replica {i}" ) self._gcmc[i].push() self._gcmc[i]._set_water_state( - j, state1, self._dynamics[i].context() + idx, state1, self._dynamics[i].context() ) self._gcmc[i].pop() From a02eafea19c4d5f632da9801e638f6f09e2f148d Mon Sep 17 00:00:00 2001 From: Lester Hedges Date: Thu, 22 May 2025 13:50:04 +0100 Subject: [PATCH 040/170] Make sure we store a copy of the water state array. --- src/somd2/runner/_repex.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/somd2/runner/_repex.py b/src/somd2/runner/_repex.py index 150ee4c9..d38c2244 100644 --- a/src/somd2/runner/_repex.py +++ b/src/somd2/runner/_repex.py @@ -333,7 +333,7 @@ def save_gcmc_state(self, index): gcmc_sampler = self._gcmc[index] # Store the state. - self._gcmc_states[index] = gcmc_sampler.water_state() + self._gcmc_states[index] = gcmc_sampler.water_state().copy() def get_states(self): """ From 7302e48f3fb3a2dfc666cb5929259b13ec0fd779 Mon Sep 17 00:00:00 2001 From: Lester Hedges Date: Thu, 22 May 2025 14:43:28 +0100 Subject: [PATCH 041/170] Improve logging of water state swaps. [ci skip] --- src/somd2/runner/_repex.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/somd2/runner/_repex.py b/src/somd2/runner/_repex.py index d38c2244..d9cb486d 100644 --- a/src/somd2/runner/_repex.py +++ b/src/somd2/runner/_repex.py @@ -383,7 +383,8 @@ def mix_states(self): # Only update if one of the states is a ghost water. if state0 == 0 or state1 == 0: _logger.debug( - f"Swapping GCMC water state {state0} with {state1} for replica {i}" + f"Swapping state from {state0} to {state1} for " + f"water index {idx} in replica {i}" ) self._gcmc[i].push() self._gcmc[i]._set_water_state( From 1bf1fdf11f9a3787c840fdfae584c9455a5b026a Mon Sep 17 00:00:00 2001 From: Lester Hedges Date: Thu, 22 May 2025 20:55:25 +0100 Subject: [PATCH 042/170] Rename attribute: self._gcmc --> self._gcmc_samplers. --- src/somd2/runner/_repex.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/somd2/runner/_repex.py b/src/somd2/runner/_repex.py index d9cb486d..26f40152 100644 --- a/src/somd2/runner/_repex.py +++ b/src/somd2/runner/_repex.py @@ -184,7 +184,7 @@ def _create_dynamics( self._dynamics = [] # Initialise the GCMC object list. - self._gcmc = [] + self._gcmc_samplers = [] # Create the dynamics objects in serial. for i, (lam, scale) in enumerate(zip(lambdas, rest2_scale_factors)): @@ -216,7 +216,7 @@ def _create_dynamics( mols = gcmc_sampler.system() # Store the GCMC sampler. - self._gcmc.append(gcmc_sampler) + self._gcmc_samplers.append(gcmc_sampler) _logger.info( f"Created GCMC sampler for lambda {lam:.5f} on device {device}" @@ -259,7 +259,7 @@ def get(self, index): The dynamics object for the replica and its GCMC sampler. """ try: - gcmc_sampler = self._gcmc[index] + gcmc_sampler = self._gcmc_samplers[index] except: gcmc_sampler = None @@ -330,7 +330,7 @@ def save_gcmc_state(self, index): The index of the replica. """ # Get the GCMC sampler. - gcmc_sampler = self._gcmc[index] + gcmc_sampler = self._gcmc_samplers[index] # Store the state. self._gcmc_states[index] = gcmc_sampler.water_state().copy() @@ -371,7 +371,7 @@ def mix_states(self): self._dynamics[i].context().setState(self._openmm_states[state]) # Swap the water state in the GCMCSamplers. - if self._gcmc[i] is not None: + if self._gcmc_samplers[i] is not None: # Find the indices of the water states that differ. water_idxs = _np.where( self._gcmc_states[i] != self._gcmc_states[state] @@ -386,11 +386,11 @@ def mix_states(self): f"Swapping state from {state0} to {state1} for " f"water index {idx} in replica {i}" ) - self._gcmc[i].push() - self._gcmc[i]._set_water_state( + self._gcmc_samplers[i].push() + self._gcmc_samplers[i]._set_water_state( idx, state1, self._dynamics[i].context() ) - self._gcmc[i].pop() + self._gcmc_samplers[i].pop() # Update the swap matrix. old_state = self._old_states[i] From cc6210d336f53568ecb90f2d6c29f6d2f31c537e Mon Sep 17 00:00:00 2001 From: Lester Hedges Date: Thu, 22 May 2025 21:08:12 +0100 Subject: [PATCH 043/170] Grammar tweak. [ci skip] --- src/somd2/runner/_repex.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/somd2/runner/_repex.py b/src/somd2/runner/_repex.py index 26f40152..4a08e4da 100644 --- a/src/somd2/runner/_repex.py +++ b/src/somd2/runner/_repex.py @@ -431,7 +431,7 @@ def __init__(self, system, config): ---------- system: str, :class: `System ` - The perturbable system to be simulated. This can be either a path + The perturbable system to be simulated. This can either be a path to a stream file, or a Sire system object. config: :class: `Config ` From 09de1b531370e03a98324eb873eead656a6f0aad Mon Sep 17 00:00:00 2001 From: Lester Hedges Date: Fri, 23 May 2025 19:20:08 +0100 Subject: [PATCH 044/170] GCMC and real waters are no longer differentiated. [ci skip] --- src/somd2/runner/_repex.py | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/src/somd2/runner/_repex.py b/src/somd2/runner/_repex.py index 4a08e4da..c8bf7d87 100644 --- a/src/somd2/runner/_repex.py +++ b/src/somd2/runner/_repex.py @@ -378,19 +378,21 @@ def mix_states(self): )[0] # Loop over the water indices and swap the states. for idx in water_idxs: + # Get the water state in the two replicas. state0 = self._gcmc_states[i][idx] state1 = self._gcmc_states[state][idx] - # Only update if one of the states is a ghost water. - if state0 == 0 or state1 == 0: - _logger.debug( - f"Swapping state from {state0} to {state1} for " - f"water index {idx} in replica {i}" - ) - self._gcmc_samplers[i].push() - self._gcmc_samplers[i]._set_water_state( - idx, state1, self._dynamics[i].context() - ) - self._gcmc_samplers[i].pop() + + _logger.debug( + f"Swapping state from {state0} to {state1} for " + f"water index {idx} in replica {i}" + ) + + # Update the water state in the GCMCSampler. + self._gcmc_samplers[i].push() + self._gcmc_samplers[i]._set_water_state( + idx, state1, self._dynamics[i].context() + ) + self._gcmc_samplers[i].pop() # Update the swap matrix. old_state = self._old_states[i] From 4a985736972b2c525441ffd2d80ccb8d438bf102 Mon Sep 17 00:00:00 2001 From: Lester Hedges Date: Fri, 23 May 2025 22:02:36 +0100 Subject: [PATCH 045/170] Change parameter name. [ci skip] --- src/somd2/config/_config.py | 32 ++++++++++++++++---------------- src/somd2/runner/_base.py | 4 ++-- 2 files changed, 18 insertions(+), 18 deletions(-) diff --git a/src/somd2/config/_config.py b/src/somd2/config/_config.py index 82a839e6..05f6024d 100644 --- a/src/somd2/config/_config.py +++ b/src/somd2/config/_config.py @@ -130,7 +130,7 @@ def __init__( gcmc_selection=None, gcmc_excess_chemical_potential="-6.09 kcal/mol", gcmc_standard_volume="30.543 A^3", - gcmc_num_ghosts=10, + gcmc_num_waters=20, gcmc_radius="4 A", gcmc_bulk_sampling_probability=0.1, rest2_scale=1.0, @@ -326,9 +326,9 @@ def __init__( The standard volume of a water molecule in A^3. The default value is calibrated from NPT simulation of TIP3P water. - gcmc_num_ghosts: int - The initial number of ghost water molecules to insert into the system. These - are used as placeholders for GCMC insertion moves. + gcmc_num_waters: int + The additional number of ghost water molecules to add to the system. These are + used as placeholders for GCMC insertion moves. gcmc_radius: str The radius of the GCMC sphere. @@ -454,7 +454,7 @@ def __init__( self.gcmc_selection = gcmc_selection self.gcmc_excess_chemical_potential = gcmc_excess_chemical_potential self.gcmc_standard_volume = gcmc_standard_volume - self.gcmc_num_ghosts = gcmc_num_ghosts + self.gcmc_num_waters = gcmc_num_waters self.gcmc_radius = gcmc_radius self.gcmc_bulk_sampling_probability = gcmc_bulk_sampling_probability self.rest2_scale = rest2_scale @@ -1453,21 +1453,21 @@ def gcmc_standard_volume(self, gcmc_standard_volume): self._gcmc_standard_volume = gcmc_v @property - def gcmc_num_ghosts(self): - return self._gcmc_num_ghosts + def gcmc_num_waters(self): + return self._gcmc_num_waters - @gcmc_num_ghosts.setter - def gcmc_num_ghosts(self, gcmc_num_ghosts): - if gcmc_num_ghosts is not None: - if not isinstance(gcmc_num_ghosts, int): + @gcmc_num_waters.setter + def gcmc_num_waters(self, gcmc_num_waters): + if gcmc_num_waters is not None: + if not isinstance(gcmc_num_waters, int): try: - gcmc_num_ghosts = int(gcmc_num_ghosts) + gcmc_num_waters = int(gcmc_num_waters) except: - raise ValueError("'gcmc_num_ghosts' must be an integer") + raise ValueError("'gcmc_num_waters' must be an integer") - if gcmc_num_ghosts < 0: - raise ValueError("'gcmc_num_ghosts' must be greater than or equal to 0") - self._gcmc_num_ghosts = gcmc_num_ghosts + if gcmc_num_waters < 0: + raise ValueError("'gcmc_num_waters' must be greater than or equal to 0") + self._gcmc_num_waters = gcmc_num_waters @property def gcmc_radius(self): diff --git a/src/somd2/runner/_base.py b/src/somd2/runner/_base.py index cefedee5..b7ecba0b 100644 --- a/src/somd2/runner/_base.py +++ b/src/somd2/runner/_base.py @@ -399,7 +399,7 @@ def __init__(self, system, config): # Create the GCMC system. mols = GCMCSampler._prepare_system( - mols, water, rng, self._config.gcmc_num_ghosts + mols, water, rng, self._config.gcmc_num_waters ) # Append only this number of lines from the end of the dataframe during checkpointing. @@ -511,7 +511,7 @@ def __init__(self, system, config): ), "standard_volume": str(self._config.gcmc_standard_volume), "radius": str(self._config.gcmc_radius), - "max_gcmc_waters": self._config.gcmc_num_ghosts, + "max_gcmc_waters": self._config.gcmc_num_waters, "bulk_sampling_probability": self._config.gcmc_bulk_sampling_probability, "cutoff_type": self._config.cutoff_type, "cutoff": str(self._config.cutoff), From 5773604fe77aadb962d04fc24e74c7ebd3d372f6 Mon Sep 17 00:00:00 2001 From: Lester Hedges Date: Fri, 23 May 2025 23:44:56 +0100 Subject: [PATCH 046/170] Flag ghost waters when checkpointing. --- src/somd2/runner/_repex.py | 4 ++++ src/somd2/runner/_runner.py | 4 ++++ 2 files changed, 8 insertions(+) diff --git a/src/somd2/runner/_repex.py b/src/somd2/runner/_repex.py index c8bf7d87..d15b72cc 100644 --- a/src/somd2/runner/_repex.py +++ b/src/somd2/runner/_repex.py @@ -941,6 +941,10 @@ def _run_block( # Commit the current system. system = dynamics.commit() + # If performing GCMC, then we need to flag the ghost waters. + if self._config.gcmc: + system = gcmc_sampler._flag_ghost_waters(system) + # Get the simulation speed. speed = dynamics.time_speed() diff --git a/src/somd2/runner/_runner.py b/src/somd2/runner/_runner.py index 2f6d8bbb..13f45cb1 100644 --- a/src/somd2/runner/_runner.py +++ b/src/somd2/runner/_runner.py @@ -615,6 +615,10 @@ def generate_lam_vals(lambda_base, increment=0.001): # Commit the current system. system = dynamics.commit() + # If performing GCMC, then we need to flag the ghost waters. + if self._config.gcmc: + system = gcmc_sampler._flag_ghost_waters(system) + # Record the end time. end = _timer() From ff105cecafcfb17e566b8033d22ffde18e712527 Mon Sep 17 00:00:00 2001 From: Lester Hedges Date: Fri, 23 May 2025 23:53:34 +0100 Subject: [PATCH 047/170] Remove ghost waters from GCMC checkpoint systems. --- src/somd2/runner/_base.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/src/somd2/runner/_base.py b/src/somd2/runner/_base.py index b7ecba0b..f5f1e54e 100644 --- a/src/somd2/runner/_base.py +++ b/src/somd2/runner/_base.py @@ -940,6 +940,20 @@ def _check_restart(self): # Append the system to the list. systems.append(_sr.morph.link_to_reference(system)) + # If this is a GCMC simulation, then remove all ghost waters from each of the systems. + if self._config.gcmc: + _logger.info("Removing existing ghost waters from GCMC checkpoint systems") + for i, system in enumerate(systems): + # Remove the ghost waters from the system. + try: + for mol in system["property is_ghost_water"].molecules(): + _logger.debug( + f"Removing ghost water molecule {mol.number()} for {_lam_sym}={self._lambda_values[i]:.5f}" + ) + system.remove(mol) + except: + pass + return True, systems @staticmethod From e7c14693c71f9d562f2192e5ac8902d14b9a60e6 Mon Sep 17 00:00:00 2001 From: Lester Hedges Date: Sat, 24 May 2025 09:51:53 +0100 Subject: [PATCH 048/170] Initialise gcmc_sampler array with NoneType entries. --- src/somd2/runner/_repex.py | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/src/somd2/runner/_repex.py b/src/somd2/runner/_repex.py index d15b72cc..2ceddad2 100644 --- a/src/somd2/runner/_repex.py +++ b/src/somd2/runner/_repex.py @@ -92,6 +92,7 @@ def __init__( self._old_states = _np.array(range(len(lambdas))) self._openmm_states = [None] * len(lambdas) self._openmm_volumes = [None] * len(lambdas) + self._gcmc_samplers = [None] * len(lambdas) self._gcmc_states = [None] * len(lambdas) self._num_proposed = _np.matrix(_np.zeros((len(lambdas), len(lambdas)))) self._num_accepted = _np.matrix(_np.zeros((len(lambdas), len(lambdas)))) @@ -183,9 +184,6 @@ def _create_dynamics( # Initialise the dynamics object list. self._dynamics = [] - # Initialise the GCMC object list. - self._gcmc_samplers = [] - # Create the dynamics objects in serial. for i, (lam, scale) in enumerate(zip(lambdas, rest2_scale_factors)): # Work out the device index. @@ -216,7 +214,7 @@ def _create_dynamics( mols = gcmc_sampler.system() # Store the GCMC sampler. - self._gcmc_samplers.append(gcmc_sampler) + self._gcmc_samplers[i] = gcmc_sampler _logger.info( f"Created GCMC sampler for lambda {lam:.5f} on device {device}" @@ -258,12 +256,7 @@ def get(self, index): tuple The dynamics object for the replica and its GCMC sampler. """ - try: - gcmc_sampler = self._gcmc_samplers[index] - except: - gcmc_sampler = None - - return self._dynamics[index], gcmc_sampler + return self._dynamics[index], self._gcmc_samplers[index] def set(self, index, dynamics): """ From 7ca5216ce8b0a1430957221aafef92ce0d19fb02 Mon Sep 17 00:00:00 2001 From: Lester Hedges Date: Sat, 24 May 2025 10:04:56 +0100 Subject: [PATCH 049/170] Consolidate water state updates. --- src/somd2/runner/_repex.py | 26 +++++++++----------------- 1 file changed, 9 insertions(+), 17 deletions(-) diff --git a/src/somd2/runner/_repex.py b/src/somd2/runner/_repex.py index 2ceddad2..c40d7707 100644 --- a/src/somd2/runner/_repex.py +++ b/src/somd2/runner/_repex.py @@ -369,23 +369,15 @@ def mix_states(self): water_idxs = _np.where( self._gcmc_states[i] != self._gcmc_states[state] )[0] - # Loop over the water indices and swap the states. - for idx in water_idxs: - # Get the water state in the two replicas. - state0 = self._gcmc_states[i][idx] - state1 = self._gcmc_states[state][idx] - - _logger.debug( - f"Swapping state from {state0} to {state1} for " - f"water index {idx} in replica {i}" - ) - - # Update the water state in the GCMCSampler. - self._gcmc_samplers[i].push() - self._gcmc_samplers[i]._set_water_state( - idx, state1, self._dynamics[i].context() - ) - self._gcmc_samplers[i].pop() + + # Update the water state in the GCMCSampler. + self._gcmc_samplers[i].push() + self._gcmc_samplers[i]._set_water_state( + water_idxs, + self._gcmc_states[state][water_idxs], + self._dynamics[i].context(), + ) + self._gcmc_samplers[i].pop() # Update the swap matrix. old_state = self._old_states[i] From e5841f94cbe15512b24adbbb0f589555bbe6ccdf Mon Sep 17 00:00:00 2001 From: Lester Hedges Date: Sat, 24 May 2025 10:44:54 +0100 Subject: [PATCH 050/170] Separate per-block and average timings. --- src/somd2/runner/_runner.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/src/somd2/runner/_runner.py b/src/somd2/runner/_runner.py index 13f45cb1..b598832e 100644 --- a/src/somd2/runner/_runner.py +++ b/src/somd2/runner/_runner.py @@ -540,13 +540,16 @@ def generate_lam_vals(lambda_base, increment=0.001): num_blocks = int(frac) rem = round(frac - num_blocks, 12) + # Store the star time. + start = _timer() + # Run the dynamics in blocks. for block in range(int(num_blocks)): # Add the start block number. block += self._start_block # Record the start time. - start = _timer() + block_start = _timer() # Run the dynamics. try: @@ -620,10 +623,10 @@ def generate_lam_vals(lambda_base, increment=0.001): system = gcmc_sampler._flag_ghost_waters(system) # Record the end time. - end = _timer() + block_end = _timer() # Work how many fractional days the block took. - block_time = (end - start) / 86400 + block_time = (block_end - block_start) / 86400 # Calculate the speed in nanoseconds per day. speed = checkpoint_interval / block_time @@ -665,7 +668,7 @@ def generate_lam_vals(lambda_base, increment=0.001): # Handle the remainder time. (There will be no remainer when GCMC sampling.) if rem > 0: block += 1 - start = _timer() + block_start = _timer() try: dynamics.run( rem, @@ -687,10 +690,10 @@ def generate_lam_vals(lambda_base, increment=0.001): system = dynamics.commit() # Record the end time. - end = _timer() + block_end = _timer() # Work how many fractional days the block took. - block_time = (end - start) / 86400 + block_time = (block_end - block_start) / 86400 # Calculate the speed in nanoseconds per day. speed = checkpoint_interval / block_time @@ -723,9 +726,6 @@ def generate_lam_vals(lambda_base, increment=0.001): f"Final dynamics block for {lam_sym} = {lambda_value:.5f} failed: {e}" ) else: - # Record the start time. - start = _timer() - try: if self._config.gcmc: # Initialise the run time and time at which the next frame is saved. From 7579bed9885092be833732ab50e7f9e5d671f996 Mon Sep 17 00:00:00 2001 From: Lester Hedges Date: Sat, 24 May 2025 16:45:20 +0100 Subject: [PATCH 051/170] Report average speed correctly. --- src/somd2/runner/_runner.py | 32 +++++++++++++++++++++++--------- 1 file changed, 23 insertions(+), 9 deletions(-) diff --git a/src/somd2/runner/_runner.py b/src/somd2/runner/_runner.py index b598832e..f288b7f3 100644 --- a/src/somd2/runner/_runner.py +++ b/src/somd2/runner/_runner.py @@ -185,9 +185,9 @@ def run(self): for job in _futures.as_completed(jobs): lambda_value = jobs[job] try: - result = job.result() + success, time = job.result() except Exception as e: - result = False + success = False _logger.error( f"Exception raised for {_lam_sym} = {lambda_value}: {e}" ) @@ -201,6 +201,15 @@ def run(self): # Record the end time. end = _timer() + # Work how many fractional days the simulation took. + prod_time = (end - start) / 86400 + + # Calculate the speed in nanoseconds per day. + speed = time.to("ns") / prod_time + + # Log the speed. + _logger.info(f"Average speed: {prod_speed:.2f} ns day-1") + # Log the run time in minutes. _logger.success( f"Simulation finished. Run time: {(end - start) / 60:.2f} minutes" @@ -221,6 +230,9 @@ def run_window(self, index): success: bool Whether the simulation was successful. + + time: sire.units.GeneralUnit + The duration of the simulation. """ # Get the lambda value. @@ -255,9 +267,9 @@ def run_window(self, index): f"Running {_lam_sym} = {lambda_value} on GPU {gpu_num}" ) - # Run the smullation. + # Run the simulation. try: - self._run( + time = self._run( system, index, device=gpu_num, @@ -277,11 +289,11 @@ def run_window(self, index): # Run the simulation. try: - self._run(system, index, is_restart=self._is_restart) + time = self._run(system, index, is_restart=self._is_restart) except Exception as e: _logger.error(f"Error running {_lam_sym} = {lambda_value}: {e}") - return True + return True, time def _run( self, system, index, device=None, lambda_minimisation=None, is_restart=False @@ -310,8 +322,8 @@ def _run( Returns ------- - df : pandas dataframe - Dataframe containing the sire energy trajectory. + time: sire.units.GeneralUnit + The duration of the simulation. """ # Get the lambda value. @@ -330,7 +342,7 @@ def _run( _logger.success( f"{_lam_sym} = {lambda_value} already complete. Skipping." ) - return + return _sr.u("0ps") # Work out the current block number. self._start_block = int( @@ -798,6 +810,8 @@ def generate_lam_vals(lambda_base, increment=0.001): f"{_lam_sym} = {lambda_value:.5f} complete, speed = {speed:.2f} ns day-1" ) + return time + def _minimisation( self, system, From 701d55ea74f4b22ffc1faed91c5978e9f3ffb31c Mon Sep 17 00:00:00 2001 From: Lester Hedges Date: Sat, 24 May 2025 17:49:25 +0100 Subject: [PATCH 052/170] Fix indentation of return statement. [ci skip] --- src/somd2/runner/_runner.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/somd2/runner/_runner.py b/src/somd2/runner/_runner.py index f288b7f3..b702e9a4 100644 --- a/src/somd2/runner/_runner.py +++ b/src/somd2/runner/_runner.py @@ -810,7 +810,7 @@ def generate_lam_vals(lambda_base, increment=0.001): f"{_lam_sym} = {lambda_value:.5f} complete, speed = {speed:.2f} ns day-1" ) - return time + return time def _minimisation( self, From 5abd7e7e3680df097fcc5163957cbdaea6553e4e Mon Sep 17 00:00:00 2001 From: Lester Hedges Date: Sat, 24 May 2025 18:29:53 +0100 Subject: [PATCH 053/170] Fix variable names. [ci skip] --- src/somd2/runner/_runner.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/somd2/runner/_runner.py b/src/somd2/runner/_runner.py index b702e9a4..20fcd695 100644 --- a/src/somd2/runner/_runner.py +++ b/src/somd2/runner/_runner.py @@ -202,13 +202,13 @@ def run(self): end = _timer() # Work how many fractional days the simulation took. - prod_time = (end - start) / 86400 + days = (end - start) / 86400 # Calculate the speed in nanoseconds per day. - speed = time.to("ns") / prod_time + speed = time.to("ns") / days # Log the speed. - _logger.info(f"Average speed: {prod_speed:.2f} ns day-1") + _logger.info(f"Average speed: {speed:.2f} ns day-1") # Log the run time in minutes. _logger.success( From 3a98b8ce3cb0bc15461d615d84c8893a0181aa2d Mon Sep 17 00:00:00 2001 From: Lester Hedges Date: Sat, 24 May 2025 18:34:18 +0100 Subject: [PATCH 054/170] Remove unused variable. [ci skip] --- src/somd2/runner/_runner.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/somd2/runner/_runner.py b/src/somd2/runner/_runner.py index 20fcd695..8896f00e 100644 --- a/src/somd2/runner/_runner.py +++ b/src/somd2/runner/_runner.py @@ -187,7 +187,6 @@ def run(self): try: success, time = job.result() except Exception as e: - success = False _logger.error( f"Exception raised for {_lam_sym} = {lambda_value}: {e}" ) From 8bb1b0bd1d6f7e8fb985df8b4ea765fd3e7130aa Mon Sep 17 00:00:00 2001 From: Lester Hedges Date: Sat, 24 May 2025 19:10:26 +0100 Subject: [PATCH 055/170] Standardise variable name. [ci skip] --- src/somd2/runner/_runner.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/somd2/runner/_runner.py b/src/somd2/runner/_runner.py index 8896f00e..309ebb1c 100644 --- a/src/somd2/runner/_runner.py +++ b/src/somd2/runner/_runner.py @@ -797,10 +797,10 @@ def generate_lam_vals(lambda_base, increment=0.001): end = _timer() # Work how many fractional days the simulation took. - prod_time = (end - start) / 86400 + days = (end - start) / 86400 # Calculate the speed in nanoseconds per day. - speed = time.to("ns") / prod_time + speed = time.to("ns") / days # Checkpoint. self._checkpoint(system, index, 0, speed, is_final_block=True) From e1ca4e4dcbd10e9c65867b525a49d30a6aec93bd Mon Sep 17 00:00:00 2001 From: Lester Hedges Date: Tue, 27 May 2025 09:14:55 +0100 Subject: [PATCH 056/170] Clarify performance logging. [ci skip] --- src/somd2/runner/_repex.py | 2 +- src/somd2/runner/_runner.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/somd2/runner/_repex.py b/src/somd2/runner/_repex.py index c40d7707..09e4019a 100644 --- a/src/somd2/runner/_repex.py +++ b/src/somd2/runner/_repex.py @@ -789,7 +789,7 @@ def run(self): prod_speed = self._config.runtime.to("ns") / prod_time # Record the average production speed. - _logger.info(f"Average replica speed: {prod_speed:.2f} ns day-1") + _logger.info(f"Overall performance: {prod_speed:.2f} ns day-1") # Log the run time in minutes. _logger.success( diff --git a/src/somd2/runner/_runner.py b/src/somd2/runner/_runner.py index 309ebb1c..898179cf 100644 --- a/src/somd2/runner/_runner.py +++ b/src/somd2/runner/_runner.py @@ -207,7 +207,7 @@ def run(self): speed = time.to("ns") / days # Log the speed. - _logger.info(f"Average speed: {speed:.2f} ns day-1") + _logger.info(f"Overall performance: {speed:.2f} ns day-1") # Log the run time in minutes. _logger.success( From 00b367ed80a0f74c645149073964edc9e8054ff7 Mon Sep 17 00:00:00 2001 From: Lester Hedges Date: Tue, 27 May 2025 12:52:53 +0100 Subject: [PATCH 057/170] Add GCMC term to energy. --- src/somd2/runner/_repex.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/somd2/runner/_repex.py b/src/somd2/runner/_repex.py index 09e4019a..afd94b80 100644 --- a/src/somd2/runner/_repex.py +++ b/src/somd2/runner/_repex.py @@ -1172,12 +1172,20 @@ def _assemble_results(self, results): for i, energies in results: for j, energy in enumerate(energies): matrix[i, j] = self._beta * energy + # Add the pressure term if applicable. if self._pressure is not None: matrix[i, j] += ( self._beta * self._config.pressure * self._dynamics_cache._openmm_volumes[j] ) + # Add the GCMC term if applicable. + if self._config.gcmc: + matrix[ + i, j + ] += self._dynamics_cache._gcmc_samplers._B_bulk * _np.sum( + self._dynamics_cache._gcmc_states[i] + ) return matrix From b501770af778308099e337d33b9f8a3e927c3a9f Mon Sep 17 00:00:00 2001 From: Lester Hedges Date: Tue, 27 May 2025 13:33:55 +0100 Subject: [PATCH 058/170] Pass through GCMC specific options to dynamics.run(). --- src/somd2/runner/_repex.py | 7 +++++++ src/somd2/runner/_runner.py | 11 +++++++++++ 2 files changed, 18 insertions(+) diff --git a/src/somd2/runner/_repex.py b/src/somd2/runner/_repex.py index afd94b80..fe4d267b 100644 --- a/src/somd2/runner/_repex.py +++ b/src/somd2/runner/_repex.py @@ -899,6 +899,13 @@ def _run_block( auto_fix_minimise=True, num_energy_neighbours=self._config.num_energy_neighbours, null_energy=self._config.null_energy, + # GCMC specific options. + adams_value=gcmc_sampler._B_bulk if gcmc_sampler is not None else None, + num_waters=( + _np.sum(gcmc_sampler.water_state()) + if gcmc_sampler is not None + else None + ), ) # The frame frequency was hit, so writ the indices of the current diff --git a/src/somd2/runner/_runner.py b/src/somd2/runner/_runner.py index 898179cf..f60dacb4 100644 --- a/src/somd2/runner/_runner.py +++ b/src/somd2/runner/_runner.py @@ -586,6 +586,17 @@ def generate_lam_vals(lambda_base, increment=0.001): auto_fix_minimise=True, num_energy_neighbours=num_energy_neighbours, null_energy=self._config.null_energy, + # GCMC specific options. + adams_value=( + gcmc_sampler._B_bulk + if gcmc_sampler is not None + else None + ), + num_waters=( + _np.sum(gcmc_sampler.water_state()) + if gcmc_sampler is not None + else None + ), ) # Perform a GCMC move. From 2f4359fc361a5189b3689816fb3f940e844116d9 Mon Sep 17 00:00:00 2001 From: Lester Hedges Date: Tue, 27 May 2025 14:56:48 +0100 Subject: [PATCH 059/170] Access list, not object directly. [ci skip] --- src/somd2/runner/_repex.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/somd2/runner/_repex.py b/src/somd2/runner/_repex.py index fe4d267b..748aa87a 100644 --- a/src/somd2/runner/_repex.py +++ b/src/somd2/runner/_repex.py @@ -1190,7 +1190,7 @@ def _assemble_results(self, results): if self._config.gcmc: matrix[ i, j - ] += self._dynamics_cache._gcmc_samplers._B_bulk * _np.sum( + ] += self._dynamics_cache._gcmc_samplers[i]._B_bulk * _np.sum( self._dynamics_cache._gcmc_states[i] ) From a7f758197a83a214cd071fef07d43a0dce1a4362 Mon Sep 17 00:00:00 2001 From: Lester Hedges Date: Tue, 27 May 2025 15:40:51 +0100 Subject: [PATCH 060/170] Pass excess chemical potential, not Adams value. [ci skip] --- src/somd2/runner/_base.py | 3 +++ src/somd2/runner/_repex.py | 8 ++++---- src/somd2/runner/_runner.py | 6 ++---- 3 files changed, 9 insertions(+), 8 deletions(-) diff --git a/src/somd2/runner/_base.py b/src/somd2/runner/_base.py index f5f1e54e..0fc2a211 100644 --- a/src/somd2/runner/_base.py +++ b/src/somd2/runner/_base.py @@ -402,6 +402,9 @@ def __init__(self, system, config): mols, water, rng, self._config.gcmc_num_waters ) + # Store the excess chemical potential. + self._mu_ex = self._config.gcmc_excess_chemical_potential.value() + # Append only this number of lines from the end of the dataframe during checkpointing. self._energy_per_block = int( self._config.checkpoint_frequency / self._config.energy_frequency diff --git a/src/somd2/runner/_repex.py b/src/somd2/runner/_repex.py index 748aa87a..daa463f6 100644 --- a/src/somd2/runner/_repex.py +++ b/src/somd2/runner/_repex.py @@ -900,7 +900,9 @@ def _run_block( num_energy_neighbours=self._config.num_energy_neighbours, null_energy=self._config.null_energy, # GCMC specific options. - adams_value=gcmc_sampler._B_bulk if gcmc_sampler is not None else None, + excess_chemical_potential=( + self._mu_ex if gcmc_sampler is not None else None + ), num_waters=( _np.sum(gcmc_sampler.water_state()) if gcmc_sampler is not None @@ -1188,9 +1190,7 @@ def _assemble_results(self, results): ) # Add the GCMC term if applicable. if self._config.gcmc: - matrix[ - i, j - ] += self._dynamics_cache._gcmc_samplers[i]._B_bulk * _np.sum( + matrix[i, j] += self._mu_ex * _np.sum( self._dynamics_cache._gcmc_states[i] ) diff --git a/src/somd2/runner/_runner.py b/src/somd2/runner/_runner.py index f60dacb4..f5748ea8 100644 --- a/src/somd2/runner/_runner.py +++ b/src/somd2/runner/_runner.py @@ -587,10 +587,8 @@ def generate_lam_vals(lambda_base, increment=0.001): num_energy_neighbours=num_energy_neighbours, null_energy=self._config.null_energy, # GCMC specific options. - adams_value=( - gcmc_sampler._B_bulk - if gcmc_sampler is not None - else None + excess_chemical_potential=( + self._mu_ex if gcmc_sampler is not None else None ), num_waters=( _np.sum(gcmc_sampler.water_state()) From d82606c4b11b441e8effabac839f7bbc9ca68545 Mon Sep 17 00:00:00 2001 From: Lester Hedges Date: Tue, 27 May 2025 16:50:28 +0100 Subject: [PATCH 061/170] Missing beta factor for GCMC term. [ci skip] --- src/somd2/runner/_repex.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/somd2/runner/_repex.py b/src/somd2/runner/_repex.py index daa463f6..792f1547 100644 --- a/src/somd2/runner/_repex.py +++ b/src/somd2/runner/_repex.py @@ -1190,8 +1190,10 @@ def _assemble_results(self, results): ) # Add the GCMC term if applicable. if self._config.gcmc: - matrix[i, j] += self._mu_ex * _np.sum( - self._dynamics_cache._gcmc_states[i] + matrix[i, j] += ( + self._beta + * self._mu_ex + * _np.sum(self._dynamics_cache._gcmc_states[i]) ) return matrix From f7e104352f12e49d23684435b40ba906e5e218b0 Mon Sep 17 00:00:00 2001 From: Lester Hedges Date: Tue, 27 May 2025 17:24:08 +0100 Subject: [PATCH 062/170] Energy now already includes pressure and GC term. --- src/somd2/runner/_repex.py | 19 +++---------------- 1 file changed, 3 insertions(+), 16 deletions(-) diff --git a/src/somd2/runner/_repex.py b/src/somd2/runner/_repex.py index 792f1547..cf654e76 100644 --- a/src/somd2/runner/_repex.py +++ b/src/somd2/runner/_repex.py @@ -922,7 +922,7 @@ def _run_block( if gcmc_sampler is not None: self._dynamics_cache.save_gcmc_state(index) - # Get the energies at each lambda value. + # Get the energy at each lambda value. energies = ( dynamics._d.energy_trajectory() .to_pandas(to_alchemlyb=True, energy_unit="kcal/mol") @@ -1177,24 +1177,11 @@ def _assemble_results(self, results): # Create the matrix. matrix = _np.zeros((len(results), len(results))) - # Fill the matrix. + # Fill the matrix. The energy returned by the dynamics block already + # includes the pressure and grand canonical contributions. for i, energies in results: for j, energy in enumerate(energies): matrix[i, j] = self._beta * energy - # Add the pressure term if applicable. - if self._pressure is not None: - matrix[i, j] += ( - self._beta - * self._config.pressure - * self._dynamics_cache._openmm_volumes[j] - ) - # Add the GCMC term if applicable. - if self._config.gcmc: - matrix[i, j] += ( - self._beta - * self._mu_ex - * _np.sum(self._dynamics_cache._gcmc_states[i]) - ) return matrix From 609a1a244d6d2c403c5a287e7f839f193a4ff211 Mon Sep 17 00:00:00 2001 From: Lester Hedges Date: Tue, 27 May 2025 17:35:38 +0100 Subject: [PATCH 063/170] No need to store volume. [ci skip] --- src/somd2/runner/_repex.py | 7 ------- 1 file changed, 7 deletions(-) diff --git a/src/somd2/runner/_repex.py b/src/somd2/runner/_repex.py index cf654e76..5563862b 100644 --- a/src/somd2/runner/_repex.py +++ b/src/somd2/runner/_repex.py @@ -91,7 +91,6 @@ def __init__( self._states = _np.array(range(len(lambdas))) self._old_states = _np.array(range(len(lambdas))) self._openmm_states = [None] * len(lambdas) - self._openmm_volumes = [None] * len(lambdas) self._gcmc_samplers = [None] * len(lambdas) self._gcmc_states = [None] * len(lambdas) self._num_proposed = _np.matrix(_np.zeros((len(lambdas), len(lambdas)))) @@ -128,7 +127,6 @@ def __getstate__(self): "_states": self._states, "_old_states": self._old_states, "_openmm_states": self._openmm_states, - "_openmm_volumes": self._openmm_volumes, "_num_proposed": self._num_proposed, "_num_accepted": self._num_accepted, "_num_swaps": self._num_swaps, @@ -307,11 +305,6 @@ def save_openmm_state(self, index): # Store the state. self._openmm_states[index] = state - # Store the volume. - self._openmm_volumes[index] = state.getPeriodicBoxVolume().value_in_unit( - angstrom**3 - ) - def save_gcmc_state(self, index): """ Save the current GCMC water state for the replica. From 6c6976890b8a546d32487a43001b98d6db00b52a Mon Sep 17 00:00:00 2001 From: Lester Hedges Date: Tue, 27 May 2025 18:14:43 +0100 Subject: [PATCH 064/170] Add missing NumPy import. [ci skip] --- src/somd2/runner/_runner.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/somd2/runner/_runner.py b/src/somd2/runner/_runner.py index f5748ea8..62ba6680 100644 --- a/src/somd2/runner/_runner.py +++ b/src/somd2/runner/_runner.py @@ -23,6 +23,8 @@ from time import time as _timer +import numpy as _np + import sire as _sr from somd2 import _logger From 8dc0d5c96f2030186aacc7f538cc61dbbdb5014e Mon Sep 17 00:00:00 2001 From: Lester Hedges Date: Wed, 28 May 2025 09:27:33 +0100 Subject: [PATCH 065/170] Enforce NVT for GCMC. --- src/somd2/runner/_base.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/src/somd2/runner/_base.py b/src/somd2/runner/_base.py index 0fc2a211..37d651ee 100644 --- a/src/somd2/runner/_base.py +++ b/src/somd2/runner/_base.py @@ -383,17 +383,25 @@ def __init__(self, system, config): mols = self._system # Add ghost waters to the system. if self._config.gcmc and self._has_space: + # Make sure that a pressure has not been set. + if self._config.pressure is not None: + msg = "GCMC simulations must be run in the NVT ensemble." + _logger.error(msg) + raise ValueError(msg) + from loch import GCMCSampler from numpy.random import default_rng # Create a random number generator. rng = default_rng() - # Get a water template. + # Check that the system is solvated with water molecules. This + # is required for GCMC simulations since the existing waters + # provide a template for the ghost waters. try: water = mols["water"].molecules()[0] except: - msg = "No water molecules in the system." + msg = "No water molecules in the system. Cannot perform GCMC." _logger.error(msg) raise ValueError(msg) From 0805d921a758359e47d33b0cd2a2c72da4d93f9d Mon Sep 17 00:00:00 2001 From: Lester Hedges Date: Wed, 28 May 2025 11:55:07 +0100 Subject: [PATCH 066/170] GCMC currently only supported on CUDA platform. [ci skip] --- src/somd2/runner/_base.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/somd2/runner/_base.py b/src/somd2/runner/_base.py index 37d651ee..b316f6c7 100644 --- a/src/somd2/runner/_base.py +++ b/src/somd2/runner/_base.py @@ -423,8 +423,8 @@ def __init__(self, system, config): # GCMC specific validation. if self._config.gcmc: - if not self._is_gpu: - msg = "GCMC simulations require a GPU platform." + if self._config.platform != "cuda": + msg = "GCMC simulations require the CUDA platform." _logger.error(msg) raise ValueError(msg) From c2c26946e540ab22c7285dc0ed18ee16c45d41b3 Mon Sep 17 00:00:00 2001 From: Lester Hedges Date: Wed, 28 May 2025 13:48:20 +0100 Subject: [PATCH 067/170] Standardise logger output. [ci skip] --- src/somd2/runner/_repex.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/somd2/runner/_repex.py b/src/somd2/runner/_repex.py index 5563862b..6cde0ffc 100644 --- a/src/somd2/runner/_repex.py +++ b/src/somd2/runner/_repex.py @@ -862,7 +862,7 @@ def _run_block( _logger.info(f"Minimising restart at {_lam_sym} = {lam:.5f}") dynamics.minimise(timeout=self._config.timeout) - _logger.info(f"Running dynamics for {_lam_sym} = {lam:.5f}") + _logger.info(f"Running dynamics at {_lam_sym} = {lam:.5f}") # Draw new velocities from the Maxwell-Boltzmann distribution. dynamics.randomise_velocities() From b94706440bc915357f6fedebb2d18399f1a495ad Mon Sep 17 00:00:00 2001 From: Lester Hedges Date: Wed, 28 May 2025 15:43:53 +0100 Subject: [PATCH 068/170] Fix ghost file name for repex simulation. [ci skip] --- src/somd2/runner/_repex.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/somd2/runner/_repex.py b/src/somd2/runner/_repex.py index 6cde0ffc..438653b1 100644 --- a/src/somd2/runner/_repex.py +++ b/src/somd2/runner/_repex.py @@ -197,7 +197,7 @@ def _create_dynamics( if gcmc_kwargs is not None: from loch import GCMCSampler - ghost_file = str(output_directory / f"gcmc_{lam:.5f}.ghost") + ghost_file = str(output_directory / f"gcmc_ghosts_{lam:.5f}.txt") # Create the GCMC sampler. gcmc_sampler = GCMCSampler( From d86fcfb1fce37cdb9155ac9643845175b298a70e Mon Sep 17 00:00:00 2001 From: Lester Hedges Date: Mon, 2 Jun 2025 12:05:48 +0100 Subject: [PATCH 069/170] Only reconstruct SOMD1 system when ghost atom mods are enabled. --- src/somd2/runner/_base.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/somd2/runner/_base.py b/src/somd2/runner/_base.py index b316f6c7..efbd55ae 100644 --- a/src/somd2/runner/_base.py +++ b/src/somd2/runner/_base.py @@ -100,8 +100,12 @@ def __init__(self, system, config): raise IOError(msg) # If we're not using SOMD1 compatibility, then reconstruct the original - # perturbable system. - if not self._config.somd1_compatibility: + # perturbable system. We only need to do this if applying modifications + # to ghost atom bonded terms. + if ( + not self._config.somd1_compatibility + and self._config.ghost_modifications + ): from .._utils._somd1 import reconstruct_system self._system = reconstruct_system(self._system) From a0145f5bdb2b1183a093a0d46f73d269cc71d916 Mon Sep 17 00:00:00 2001 From: Lester Hedges Date: Mon, 2 Jun 2025 15:31:52 +0100 Subject: [PATCH 070/170] Trajectory reconstruction fails with PDB, so use PRM7 too. --- src/somd2/runner/_base.py | 29 ++++++++++++++++------------- 1 file changed, 16 insertions(+), 13 deletions(-) diff --git a/src/somd2/runner/_base.py b/src/somd2/runner/_base.py index efbd55ae..8e003e82 100644 --- a/src/somd2/runner/_base.py +++ b/src/somd2/runner/_base.py @@ -877,19 +877,10 @@ def _prepare_output(self): for file in list(set(deleted)): file.unlink() - # Use PDB format for GCMC simulations to allow trajectory post-processing - # and analysis with grand. - if self._config.gcmc: - top_ext = "pdb" - else: - top_ext = "prm7" - - filenames["topology0"] = str( - self._config.output_directory / f"system0.{top_ext}" - ) - filenames["topology1"] = str( - self._config.output_directory / f"system1.{top_ext}" - ) + # File names for end-state topologies. This can be used for trajectory + # visulation and analysis. + filenames["topology0"] = str(self._config.output_directory / "system0.prm7") + filenames["topology1"] = str(self._config.output_directory / "system1.prm7") return filenames @@ -1301,6 +1292,18 @@ def _checkpoint( _sr.save(mols0, self._filenames["topology0"]) _sr.save(mols1, self._filenames["topology1"]) + # If this is a GCMC simulation, then save the end state + # topologies to PDB format to allow analysis with grand. + if self._config.gcmc: + _sr.save( + mols0, + self._filenames["topology0"].replace(".prm7", ".pdb"), + ) + _sr.save( + mols1, + self._filenames["topology1"].replace(".prm7", ".pdb"), + ) + # Get the lambda value. lam = self._lambda_values[index] From 22cf751900a07611736719b230f7297ccd93cae2 Mon Sep 17 00:00:00 2001 From: Lester Hedges Date: Tue, 3 Jun 2025 09:22:27 +0100 Subject: [PATCH 071/170] Log the number of waters in the GCMC volume when checkpointing. --- src/somd2/runner/_repex.py | 7 +++++++ src/somd2/runner/_runner.py | 16 +++++++++++++--- 2 files changed, 20 insertions(+), 3 deletions(-) diff --git a/src/somd2/runner/_repex.py b/src/somd2/runner/_repex.py index 438653b1..86e8a32c 100644 --- a/src/somd2/runner/_repex.py +++ b/src/somd2/runner/_repex.py @@ -950,6 +950,13 @@ def _run_block( f"for {_lam_sym} = {lam:.5f}" ) + # Log the number of waters within the GCMC sampling volume. + if gcmc_sampler is not None: + _logger.info( + f"Number of waters in GCMC volume for {_lam_sym} = {lam:.5f}: " + f"{gcmc_sampler.num_waters_in_volume()}" + ) + if is_final_block: _logger.success(f"{_lam_sym} = {lam:.5f} complete") diff --git a/src/somd2/runner/_runner.py b/src/somd2/runner/_runner.py index 62ba6680..2c813962 100644 --- a/src/somd2/runner/_runner.py +++ b/src/somd2/runner/_runner.py @@ -385,6 +385,9 @@ def generate_lam_vals(lambda_base, increment=0.001): # Get the GCMC system. system = gcmc_sampler.system() + else: + gcmc_sampler = None + # Minimisation. if self._config.minimise: # Minimise with no constraints if we need to equilibrate first. @@ -441,7 +444,7 @@ def generate_lam_vals(lambda_base, increment=0.001): dynamics = system.dynamics(**dynamics_kwargs) # Equilibrate with GCMC moves. - if self._config.gcmc: + if gcmc_sampler is not None: _logger.info( f"Euilibraing with GCMC moves at {_lam_sym} = {lambda_value:.5f}" ) @@ -641,7 +644,7 @@ def generate_lam_vals(lambda_base, increment=0.001): system = dynamics.commit() # If performing GCMC, then we need to flag the ghost waters. - if self._config.gcmc: + if gcmc_sampler is not None: system = gcmc_sampler._flag_ghost_waters(system) # Record the end time. @@ -678,6 +681,13 @@ def generate_lam_vals(lambda_base, increment=0.001): f"for {_lam_sym} = {lambda_value:.5f}" ) + # Log the number of waters within the GCMC sampling volume. + if gcmc_sampler is not None: + _logger.info( + f"Number of waters in GCMC volume for {_lam_sym} = {lambda_value:.5f}: " + f"{gcmc_sampler.num_waters_in_volume()}" + ) + if is_final_block: _logger.success( f"{_lam_sym} = {lambda_value:.5f} complete, speed = {speed:.2f} ns day-1" @@ -749,7 +759,7 @@ def generate_lam_vals(lambda_base, increment=0.001): ) else: try: - if self._config.gcmc: + if gcmc_sampler is not None: # Initialise the run time and time at which the next frame is saved. runtime = _sr.u("0ps") save_frames = self._config.frame_frequency > 0 From edfd3d80dc3825e9a670f0491955328460649e1b Mon Sep 17 00:00:00 2001 From: Lester Hedges Date: Tue, 3 Jun 2025 16:24:21 +0100 Subject: [PATCH 072/170] Fix method name [ci skip] --- src/somd2/runner/_repex.py | 2 +- src/somd2/runner/_runner.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/somd2/runner/_repex.py b/src/somd2/runner/_repex.py index 86e8a32c..19bbd144 100644 --- a/src/somd2/runner/_repex.py +++ b/src/somd2/runner/_repex.py @@ -954,7 +954,7 @@ def _run_block( if gcmc_sampler is not None: _logger.info( f"Number of waters in GCMC volume for {_lam_sym} = {lam:.5f}: " - f"{gcmc_sampler.num_waters_in_volume()}" + f"{gcmc_sampler.num_waters()}" ) if is_final_block: diff --git a/src/somd2/runner/_runner.py b/src/somd2/runner/_runner.py index 2c813962..991e253b 100644 --- a/src/somd2/runner/_runner.py +++ b/src/somd2/runner/_runner.py @@ -685,7 +685,7 @@ def generate_lam_vals(lambda_base, increment=0.001): if gcmc_sampler is not None: _logger.info( f"Number of waters in GCMC volume for {_lam_sym} = {lambda_value:.5f}: " - f"{gcmc_sampler.num_waters_in_volume()}" + f"{gcmc_sampler.num_waters()}" ) if is_final_block: From e451ef20c5f767ac659a60f50687ad226b4f9eea Mon Sep 17 00:00:00 2001 From: Lester Hedges Date: Wed, 4 Jun 2025 08:36:42 +0100 Subject: [PATCH 073/170] Log output formatting tweak. [ci skip] --- src/somd2/runner/_repex.py | 4 ++-- src/somd2/runner/_runner.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/somd2/runner/_repex.py b/src/somd2/runner/_repex.py index 19bbd144..9b2a89d0 100644 --- a/src/somd2/runner/_repex.py +++ b/src/somd2/runner/_repex.py @@ -953,8 +953,8 @@ def _run_block( # Log the number of waters within the GCMC sampling volume. if gcmc_sampler is not None: _logger.info( - f"Number of waters in GCMC volume for {_lam_sym} = {lam:.5f}: " - f"{gcmc_sampler.num_waters()}" + f"Current number of waters in GCMC volume at {_lam_sym} = {lam:.5f} " + f"is {gcmc_sampler.num_waters()}" ) if is_final_block: diff --git a/src/somd2/runner/_runner.py b/src/somd2/runner/_runner.py index 991e253b..0998a115 100644 --- a/src/somd2/runner/_runner.py +++ b/src/somd2/runner/_runner.py @@ -684,8 +684,8 @@ def generate_lam_vals(lambda_base, increment=0.001): # Log the number of waters within the GCMC sampling volume. if gcmc_sampler is not None: _logger.info( - f"Number of waters in GCMC volume for {_lam_sym} = {lambda_value:.5f}: " - f"{gcmc_sampler.num_waters()}" + f"Current number of waters in GCMC volume at {_lam_sym} = {lambda_value:.5f} " + f"is {gcmc_sampler.num_waters()}" ) if is_final_block: From af4f7046968a8f9ef137cdf5dfe0005ad637a12c Mon Sep 17 00:00:00 2001 From: Lester Hedges Date: Wed, 4 Jun 2025 09:15:29 +0100 Subject: [PATCH 074/170] Need to push/pop the PyCUDA context when logging number of waters. --- src/somd2/runner/_repex.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/somd2/runner/_repex.py b/src/somd2/runner/_repex.py index 9b2a89d0..9603b58b 100644 --- a/src/somd2/runner/_repex.py +++ b/src/somd2/runner/_repex.py @@ -952,11 +952,17 @@ def _run_block( # Log the number of waters within the GCMC sampling volume. if gcmc_sampler is not None: + # Push the PyCUDA context on top of the stack. + gcmc_sampler.push() + _logger.info( f"Current number of waters in GCMC volume at {_lam_sym} = {lam:.5f} " f"is {gcmc_sampler.num_waters()}" ) + # Remove the PyCUDA context from the stack. + gcmc_sampler.pop() + if is_final_block: _logger.success(f"{_lam_sym} = {lam:.5f} complete") From 5b6a2f7ebffe9bbfdbb11f390c9367c81eef1661 Mon Sep 17 00:00:00 2001 From: Lester Hedges Date: Thu, 5 Jun 2025 16:49:17 +0100 Subject: [PATCH 075/170] Allow GPU oversubscription for regular runner. --- src/somd2/config/_config.py | 2 +- src/somd2/runner/_runner.py | 63 ++++++++++++++++++++++++------------- 2 files changed, 42 insertions(+), 23 deletions(-) diff --git a/src/somd2/config/_config.py b/src/somd2/config/_config.py index 05f6024d..e71c41a9 100644 --- a/src/somd2/config/_config.py +++ b/src/somd2/config/_config.py @@ -303,7 +303,7 @@ def __init__( Does nothing if platform is set to CPU. oversubscription_factor: int - Factor by which to oversubscribe jobs on GPUs during replica exchange simulations. + The number of OpenMM contexts that can be run on a single GPU at the same time. replica_exchange: bool Whether to run replica exchange simulation. Currently this can only be used when diff --git a/src/somd2/runner/_runner.py b/src/somd2/runner/_runner.py index 0998a115..c4f9dbcd 100644 --- a/src/somd2/runner/_runner.py +++ b/src/somd2/runner/_runner.py @@ -92,22 +92,27 @@ def _create_shared_resources(self): Also intialises the list with all available GPUs. """ if self._is_gpu: - if self._config.max_gpus is None: - self._gpu_pool = self._manager.list( - self._zero_gpu_devices(self._get_gpu_devices(self._config.platform)) - ) - else: - self._gpu_pool = self._manager.list( - self._zero_gpu_devices( - self._get_gpu_devices(self._config.platform)[ - : self._config.max_gpus - ] + devices = self._get_gpu_devices(self._config.platform) + if self._config.max_gpus is not None: + if self._config.max_gpus > len(devices): + _logger.warning( + f"Requested {self._config.max_gpus} GPUs, but only {len(devices)} are available." ) + num_devices = min(len(devices), self._config.max_gpus) + else: + num_devices = len(devices) + + # Create the GPU pool from the available devices. + self._gpu_pool = self._manager.list( + self._initialise_gpu_devices( + num_devices, + self._config.oversubscription_factor, ) + ) def _update_gpu_pool(self, gpu_num): """ - Updates the GPU pool to remove the GPU that has been assigned to a worker. + Updates the GPU pool to add the GPU assigned to a worker that has finished. Parameters ---------- @@ -119,7 +124,7 @@ def _update_gpu_pool(self, gpu_num): def _remove_gpu_from_pool(self, gpu_num): """ - Removes a GPU from the GPU pool. + Removes a GPU from the GPU pool when it is assigned to a worker. Parameters ---------- @@ -130,18 +135,31 @@ def _remove_gpu_from_pool(self, gpu_num): self._gpu_pool.remove(gpu_num) @staticmethod - def _zero_gpu_devices(devices): + def _initialise_gpu_devices(num_devices, oversubscription_factor=1): """ - Set all device numbers relative to the lowest (the device number becomes - equal to its index in the list). + Create the list of avaiable GPU devices. + + Parameters + ---------- + + num_devices: int + The number of GPU devices to use. + + oversubscription_factor: int + The oversubscription factor for the GPUs. This is the number of + workers that can use a single GPU at the same time. Returns ------- - devices : [int] - List of zeroed available device numbers. + devices : [(str, int)] + List of available device numbers with oversubscription factor. """ - return [str(devices.index(value)) for value in devices] + devices = [] + for i in range(oversubscription_factor): + for j in range(num_devices): + devices.append((str(j), i)) + return devices def run(self): """ @@ -261,8 +279,9 @@ def run_window(self, index): if self._is_gpu: # Get a GPU from the pool. with self._lock: - gpu_num = self._gpu_pool[0] - self._remove_gpu_from_pool(gpu_num) + gpu = self._gpu_pool[0] + gpu_num = gpu[0] + self._remove_gpu_from_pool(gpu) if lambda_value is not None: _logger.info( f"Running {_lam_sym} = {lambda_value} on GPU {gpu_num}" @@ -278,10 +297,10 @@ def run_window(self, index): ) with self._lock: - self._update_gpu_pool(gpu_num) + self._update_gpu_pool(gpu) except Exception as e: with self._lock: - self._update_gpu_pool(gpu_num) + self._update_gpu_pool(gpu) _logger.error(f"Error running {_lam_sym} = {lambda_value}: {e}") # All other platforms. From 648fe3669e2fd4e5b6e2e6f8f6e6fb33c526783c Mon Sep 17 00:00:00 2001 From: Lester Hedges Date: Thu, 5 Jun 2025 17:02:19 +0100 Subject: [PATCH 076/170] Log the oversubscription factor. [ci skip] --- src/somd2/runner/_base.py | 6 +++++- src/somd2/runner/_repex.py | 4 +++- src/somd2/runner/_runner.py | 4 +++- 3 files changed, 11 insertions(+), 3 deletions(-) diff --git a/src/somd2/runner/_base.py b/src/somd2/runner/_base.py index 8e003e82..2e4f325b 100644 --- a/src/somd2/runner/_base.py +++ b/src/somd2/runner/_base.py @@ -1110,7 +1110,7 @@ def _systems_are_same(system0, system1): return True, None @staticmethod - def _get_gpu_devices(platform): + def _get_gpu_devices(platform, oversubscription_factor=1): """ Get list of available GPUs from CUDA_VISIBLE_DEVICES, OPENCL_VISIBLE_DEVICES, or HIP_VISIBLE_DEVICES. @@ -1121,6 +1121,9 @@ def _get_gpu_devices(platform): platform: str The GPU platform to be used for simulations. + oversubscription_factor: int + The number of concurrent workers per GPU. Default is 1. + Returns -------- @@ -1159,6 +1162,7 @@ def _get_gpu_devices(platform): num_gpus = len(available_devices) _logger.info(f"Number of GPUs available: {num_gpus}") + _logger.info(f"Number of concurrent workers per GPU: {oversubscription_factor}") return available_devices diff --git a/src/somd2/runner/_repex.py b/src/somd2/runner/_repex.py index 9603b58b..f50a8c43 100644 --- a/src/somd2/runner/_repex.py +++ b/src/somd2/runner/_repex.py @@ -443,7 +443,9 @@ def __init__(self, system, config): # Get the number of available GPUs. try: - gpu_devices = self._get_gpu_devices("cuda") + gpu_devices = self._get_gpu_devices( + "cuda", self._config.oversubscription_factor + ) except Exception as e: _logger.error(f"Could not determine available GPU devices: {e}") raise e diff --git a/src/somd2/runner/_runner.py b/src/somd2/runner/_runner.py index c4f9dbcd..0e9bb4bc 100644 --- a/src/somd2/runner/_runner.py +++ b/src/somd2/runner/_runner.py @@ -92,7 +92,9 @@ def _create_shared_resources(self): Also intialises the list with all available GPUs. """ if self._is_gpu: - devices = self._get_gpu_devices(self._config.platform) + devices = self._get_gpu_devices( + self._config.platform, self._config.oversubscription_factor + ) if self._config.max_gpus is not None: if self._config.max_gpus > len(devices): _logger.warning( From 0d1acd499c30921a6e4d6a627e1fcde1d935ac2c Mon Sep 17 00:00:00 2001 From: Lester Hedges Date: Fri, 6 Jun 2025 09:25:18 +0100 Subject: [PATCH 077/170] Add section on GPU oversubscription using NVIDIA MPS. [ci skip] --- README.md | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/README.md b/README.md index 72f55293..32e6f84f 100644 --- a/README.md +++ b/README.md @@ -188,3 +188,20 @@ somd2 somd1.bss --pert-file somd1.pert --somd1-compatibility If you want to load an existing system from a perturbation file and use the new `somd2` ghost atom bonded-term modifications, then simply omit the `--somd1-compatibility` option. + +## GPU oversubscription + +If you have an NVIDIA GPU that supports the multi-process service (MPS), you can +oversubscibe the GPU to run multiple OpenMM contexts on the same GPU at once, +increasing the throughput of your simulation. To do this, you will need to first +enable MPS by running the following command: + +``` +nvidia-cuda-mps-control -d +``` + +The number of contexts that can be run in parallel is then controlled by the +`--oversubscription-factor` option, which defaults to 1. + +More details on MPS, including tuning options, can be found i nthe following +[techical blog](https://developer.nvidia.com/blog/maximizing-openmm-molecular-dynamics-throughput-with-nvidia-multi-process-service/). From 8b428d37070ac227b0ae1d8d2a961d0dfcda234f Mon Sep 17 00:00:00 2001 From: Lester Hedges Date: Fri, 6 Jun 2025 14:02:38 +0100 Subject: [PATCH 078/170] Write energy components when a crash occurs. --- src/somd2/runner/_repex.py | 14 ++++++++++++++ src/somd2/runner/_runner.py | 18 +++++++++++++++++- 2 files changed, 31 insertions(+), 1 deletion(-) diff --git a/src/somd2/runner/_repex.py b/src/somd2/runner/_repex.py index f50a8c43..ea672f5c 100644 --- a/src/somd2/runner/_repex.py +++ b/src/somd2/runner/_repex.py @@ -660,6 +660,13 @@ def run(self): replica_list[i * num_workers : (i + 1) * num_workers], ): if not success: + try: + context = self._dynamics_cache.get(index)[ + 0 + ].context() + self._save_energy_components(index, context) + except: + pass _logger.error( f"Equilibration failed for {_lam_sym} = {self._lambda_values[index]:.5f}: {e}" ) @@ -722,6 +729,13 @@ def run(self): repeat(write_gcmc_ghosts), ): if not result: + try: + context = self._dynamics_cache.get(index)[ + 0 + ].context() + self._save_energy_components(index, context) + except: + pass _logger.error( f"Dynamics failed for {_lam_sym} = {self._lambda_values[index]:.5f}: {energies}" ) diff --git a/src/somd2/runner/_runner.py b/src/somd2/runner/_runner.py index 0e9bb4bc..6af7f991 100644 --- a/src/somd2/runner/_runner.py +++ b/src/somd2/runner/_runner.py @@ -467,7 +467,7 @@ def generate_lam_vals(lambda_base, increment=0.001): # Equilibrate with GCMC moves. if gcmc_sampler is not None: _logger.info( - f"Euilibraing with GCMC moves at {_lam_sym} = {lambda_value:.5f}" + f"Euilibrating with GCMC moves at {_lam_sym} = {lambda_value:.5f}" ) for i in range(100): @@ -503,6 +503,10 @@ def generate_lam_vals(lambda_base, increment=0.001): perturbable_constraint=self._config.perturbable_constraint, ) except Exception as e: + try: + self._save_energy_components(index, dynamics.context()) + except: + pass raise RuntimeError(f"Equilibration failed: {e}") # Work out the lambda values for finite-difference gradient analysis. @@ -651,6 +655,10 @@ def generate_lam_vals(lambda_base, increment=0.001): null_energy=self._config.null_energy, ) except Exception as e: + try: + self._save_energy_components(index, dynamics.context()) + except: + pass raise RuntimeError( f"Dynamics block {block+1} for {_lam_sym} = {lambda_value:.5f} failed: {e}" ) @@ -775,6 +783,10 @@ def generate_lam_vals(lambda_base, increment=0.001): f"{_lam_sym} = {lambda_value:.5f} complete, speed = {speed:.2f} ns day-1" ) except Exception as e: + try: + self._save_energy_components(index, dynamics.context()) + except: + pass raise RuntimeError( f"Final dynamics block for {lam_sym} = {lambda_value:.5f} failed: {e}" ) @@ -828,6 +840,10 @@ def generate_lam_vals(lambda_base, increment=0.001): null_energy=self._config.null_energy, ) except Exception as e: + try: + self._save_energy_components(index, dynamics.context()) + except: + pass raise RuntimeError( f"Dynamics for {_lam_sym} = {lambda_value:.5f} failed: {e}" ) From 1f53615e57a89f41c35d68d67959042745459583 Mon Sep 17 00:00:00 2001 From: Lester Hedges Date: Fri, 6 Jun 2025 14:30:43 +0100 Subject: [PATCH 079/170] Simply fractional time calculation. [ci skip] --- src/somd2/runner/_repex.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/somd2/runner/_repex.py b/src/somd2/runner/_repex.py index ea672f5c..e72fd3b1 100644 --- a/src/somd2/runner/_repex.py +++ b/src/somd2/runner/_repex.py @@ -591,9 +591,7 @@ def run(self): if self._config.checkpoint_frequency.value() > 0.0: # Calculate the number of blocks and the remainder time. - frac = ( - self._config.runtime.value() / self._config.checkpoint_frequency.value() - ) + frac = (self._config.runtime / self._config.checkpoint_frequency).value() # Handle the case where the runtime is less than the checkpoint frequency. if frac < 1.0: From 3f45b7aaaf3c17289df9b33fc2f73f19811e9f95 Mon Sep 17 00:00:00 2001 From: Lester Hedges Date: Fri, 6 Jun 2025 15:17:24 +0100 Subject: [PATCH 080/170] Move location of energy component saving. [ci skip] --- src/somd2/runner/_repex.py | 24 ++++++++++-------------- 1 file changed, 10 insertions(+), 14 deletions(-) diff --git a/src/somd2/runner/_repex.py b/src/somd2/runner/_repex.py index e72fd3b1..a8e97585 100644 --- a/src/somd2/runner/_repex.py +++ b/src/somd2/runner/_repex.py @@ -658,13 +658,6 @@ def run(self): replica_list[i * num_workers : (i + 1) * num_workers], ): if not success: - try: - context = self._dynamics_cache.get(index)[ - 0 - ].context() - self._save_energy_components(index, context) - except: - pass _logger.error( f"Equilibration failed for {_lam_sym} = {self._lambda_values[index]:.5f}: {e}" ) @@ -727,13 +720,6 @@ def run(self): repeat(write_gcmc_ghosts), ): if not result: - try: - context = self._dynamics_cache.get(index)[ - 0 - ].context() - self._save_energy_components(index, context) - except: - pass _logger.error( f"Dynamics failed for {_lam_sym} = {self._lambda_values[index]:.5f}: {energies}" ) @@ -981,6 +967,11 @@ def _run_block( _logger.success(f"{_lam_sym} = {lam:.5f} complete") except Exception as e: + try: + # Save the energy components for debugging purposes. + self._save_energy_components(index, dynamics.context()) + except: + pass return False, index, e # Return the index and the energies. @@ -1131,6 +1122,11 @@ def _equilibrate(self, index): ) except Exception as e: + try: + # Save the energy components for debugging purposes. + self._save_energy_components(index, dynamics.context()) + except: + pass return False, index, e return True, index, None From aa374f12d0d7b01b15abb751c2a4e08c311be975 Mon Sep 17 00:00:00 2001 From: Lester Hedges Date: Fri, 6 Jun 2025 17:22:52 +0100 Subject: [PATCH 081/170] Reset the GCMCSampler following equilibration. --- src/somd2/runner/_repex.py | 6 ++++++ src/somd2/runner/_runner.py | 7 +++++++ 2 files changed, 13 insertions(+) diff --git a/src/somd2/runner/_repex.py b/src/somd2/runner/_repex.py index a8e97585..e0f46ce4 100644 --- a/src/somd2/runner/_repex.py +++ b/src/somd2/runner/_repex.py @@ -1071,6 +1071,12 @@ def _equilibrate(self, index): # Remove the PyCUDA context from the stack. gcmc_sampler.pop() + # Reset the GCMC sampler. This resets the sampling statistics and + # clears the associated OpenMM forces. This is required if a new + # context is created following equilibration, e.g. if the constraints + # are different for the production phase. + gcmc_sampler.reset() + # Equilibrate. dynamics.run( self._config.equilibration_time, diff --git a/src/somd2/runner/_runner.py b/src/somd2/runner/_runner.py index 6af7f991..826b0214 100644 --- a/src/somd2/runner/_runner.py +++ b/src/somd2/runner/_runner.py @@ -556,6 +556,13 @@ def generate_lam_vals(lambda_base, increment=0.001): # Create the dynamics object. dynamics = system.dynamics(**dynamics_kwargs) + # Reset the GCMC sampler. This resets the sampling statistics and clears + # the associated OpenMM forces. This is required if a new context is + # created following equilibration, e.g. if the constraints are different + # for the production phase. + if gcmc_sampler is not None: + gcmc_sampler.reset() + # Set the number of neighbours used for the energy calculation. # If not None, then we add one to account for the extra windows # used for finite-difference gradient analysis. From ecc145252928f0e5da87309f156f1af624cf2528 Mon Sep 17 00:00:00 2001 From: Lester Hedges Date: Mon, 9 Jun 2025 09:14:40 +0100 Subject: [PATCH 082/170] Change GCMC parameter name. [ci skip] --- src/somd2/runner/_base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/somd2/runner/_base.py b/src/somd2/runner/_base.py index 2e4f325b..ccbf4765 100644 --- a/src/somd2/runner/_base.py +++ b/src/somd2/runner/_base.py @@ -526,7 +526,7 @@ def __init__(self, system, config): ), "standard_volume": str(self._config.gcmc_standard_volume), "radius": str(self._config.gcmc_radius), - "max_gcmc_waters": self._config.gcmc_num_waters, + "num_ghost_waters": self._config.gcmc_num_waters, "bulk_sampling_probability": self._config.gcmc_bulk_sampling_probability, "cutoff_type": self._config.cutoff_type, "cutoff": str(self._config.cutoff), From 0133994e4a5b406b1682d9e24a4d364d50d690dd Mon Sep 17 00:00:00 2001 From: Lester Hedges Date: Mon, 9 Jun 2025 09:23:04 +0100 Subject: [PATCH 083/170] Don't evaluate timing statistics on error. [ci skip] --- src/somd2/runner/_runner.py | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/src/somd2/runner/_runner.py b/src/somd2/runner/_runner.py index 826b0214..183ff392 100644 --- a/src/somd2/runner/_runner.py +++ b/src/somd2/runner/_runner.py @@ -199,6 +199,7 @@ def run(self): import concurrent.futures as _futures + success = True with _futures.ProcessPoolExecutor(max_workers=self.max_workers) as executor: jobs = {} for index, lambda_value in enumerate(self._lambda_values): @@ -212,6 +213,7 @@ def run(self): _logger.error( f"Exception raised for {_lam_sym} = {lambda_value}: {e}" ) + success = False # Kill all current and future jobs if keyboard interrupt. except KeyboardInterrupt: @@ -219,22 +221,23 @@ def run(self): for pid in executor._processes: executor._processes[pid].terminate() - # Record the end time. - end = _timer() + if success: + # Record the end time. + end = _timer() - # Work how many fractional days the simulation took. - days = (end - start) / 86400 + # Work how many fractional days the simulation took. + days = (end - start) / 86400 - # Calculate the speed in nanoseconds per day. - speed = time.to("ns") / days + # Calculate the speed in nanoseconds per day. + speed = time.to("ns") / days - # Log the speed. - _logger.info(f"Overall performance: {speed:.2f} ns day-1") + # Log the speed. + _logger.info(f"Overall performance: {speed:.2f} ns day-1") - # Log the run time in minutes. - _logger.success( - f"Simulation finished. Run time: {(end - start) / 60:.2f} minutes" - ) + # Log the run time in minutes. + _logger.success( + f"Simulation finished. Run time: {(end - start) / 60:.2f} minutes" + ) def run_window(self, index): """ From f98df3e303b9ec236c5bcb16f59f641f63a91438 Mon Sep 17 00:00:00 2001 From: Lester Hedges Date: Mon, 9 Jun 2025 09:25:29 +0100 Subject: [PATCH 084/170] Comment tweak. [ci skip] --- src/somd2/runner/_runner.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/somd2/runner/_runner.py b/src/somd2/runner/_runner.py index 183ff392..06a609ae 100644 --- a/src/somd2/runner/_runner.py +++ b/src/somd2/runner/_runner.py @@ -560,9 +560,7 @@ def generate_lam_vals(lambda_base, increment=0.001): dynamics = system.dynamics(**dynamics_kwargs) # Reset the GCMC sampler. This resets the sampling statistics and clears - # the associated OpenMM forces. This is required if a new context is - # created following equilibration, e.g. if the constraints are different - # for the production phase. + # the associated OpenMM forces. if gcmc_sampler is not None: gcmc_sampler.reset() From f0d4a40a13ff5b9119fdc9692bd84bfbdc240c13 Mon Sep 17 00:00:00 2001 From: Lester Hedges Date: Mon, 9 Jun 2025 10:00:12 +0100 Subject: [PATCH 085/170] Set success to False if keyboard interrupt. [ci skip] --- src/somd2/runner/_runner.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/somd2/runner/_runner.py b/src/somd2/runner/_runner.py index 06a609ae..4486f3c8 100644 --- a/src/somd2/runner/_runner.py +++ b/src/somd2/runner/_runner.py @@ -220,6 +220,7 @@ def run(self): _logger.error("Cancelling job...") for pid in executor._processes: executor._processes[pid].terminate() + success = False if success: # Record the end time. From df2b98e72de3374a3db13ad8a0ab83bdc5d7ce69 Mon Sep 17 00:00:00 2001 From: Lester Hedges Date: Mon, 9 Jun 2025 10:17:58 +0100 Subject: [PATCH 086/170] Return when exception is caught. [ci skip] --- src/somd2/runner/_runner.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/somd2/runner/_runner.py b/src/somd2/runner/_runner.py index 4486f3c8..8cdd461b 100644 --- a/src/somd2/runner/_runner.py +++ b/src/somd2/runner/_runner.py @@ -272,7 +272,7 @@ def run_window(self, index): _logger.success( f"{_lam_sym} = {lambda_value} already complete. Skipping." ) - return True + return True, time else: _logger.info( f"Restarting {_lam_sym} = {lambda_value} at time {time}, " @@ -308,6 +308,7 @@ def run_window(self, index): with self._lock: self._update_gpu_pool(gpu) _logger.error(f"Error running {_lam_sym} = {lambda_value}: {e}") + return False, _sr.u("0ps") # All other platforms. else: @@ -318,6 +319,7 @@ def run_window(self, index): time = self._run(system, index, is_restart=self._is_restart) except Exception as e: _logger.error(f"Error running {_lam_sym} = {lambda_value}: {e}") + return False, _sr.u("0ps") return True, time From 06f9efa2fe17be9048c8a2011c8008cb9f29b255 Mon Sep 17 00:00:00 2001 From: Lester Hedges Date: Mon, 9 Jun 2025 11:56:26 +0100 Subject: [PATCH 087/170] Typo. [ci skip] --- src/somd2/runner/_runner.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/somd2/runner/_runner.py b/src/somd2/runner/_runner.py index 8cdd461b..71a0e134 100644 --- a/src/somd2/runner/_runner.py +++ b/src/somd2/runner/_runner.py @@ -473,7 +473,7 @@ def generate_lam_vals(lambda_base, increment=0.001): # Equilibrate with GCMC moves. if gcmc_sampler is not None: _logger.info( - f"Euilibrating with GCMC moves at {_lam_sym} = {lambda_value:.5f}" + f"Equilibrating with GCMC moves at {_lam_sym} = {lambda_value:.5f}" ) for i in range(100): From 1341dccfa1ff3483237f139a153386323416b45b Mon Sep 17 00:00:00 2001 From: Lester Hedges Date: Mon, 9 Jun 2025 12:20:25 +0100 Subject: [PATCH 088/170] Fix formatting of energy components output. [ci skip] --- src/somd2/runner/_base.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/somd2/runner/_base.py b/src/somd2/runner/_base.py index ccbf4765..7b2373ee 100644 --- a/src/somd2/runner/_base.py +++ b/src/somd2/runner/_base.py @@ -1466,8 +1466,10 @@ def _save_energy_components(self, index, context): # Process the records. for i, f in enumerate(system.getForces()): state = new_context.getState(getEnergy=True, groups={i}) - header += f"{f.getName():>25}" - record += f"{state.getPotentialEnergy().value_in_unit(openmm.unit.kilocalories_per_mole):>25.2f}" + name = f.getName() + name_len = len(name) + header += f"{f.getName():>{name_len+2}}" + record += f"{state.getPotentialEnergy().value_in_unit(openmm.unit.kilocalories_per_mole):>{name_len+2}.2f}" # Write to file. if self._nrg_sample == 0: From b1dc9214c5555937d683a2b443fd8cde597ff527 Mon Sep 17 00:00:00 2001 From: Lester Hedges Date: Fri, 13 Jun 2025 13:54:36 +0100 Subject: [PATCH 089/170] No need to store lambda array in parquet metadata. [ci skip] --- src/somd2/runner/_base.py | 1 - tests/runner/test_lambda_values.py | 20 ++++++++++++++------ 2 files changed, 14 insertions(+), 7 deletions(-) diff --git a/src/somd2/runner/_base.py b/src/somd2/runner/_base.py index 7b2373ee..144877a1 100644 --- a/src/somd2/runner/_base.py +++ b/src/somd2/runner/_base.py @@ -1324,7 +1324,6 @@ def _checkpoint( "somd2 version": __version__, "sire version": f"{_sire_version}+{_sire_revisionid}", "lambda": str(lam), - "lambda_array": lambda_energy, "speed": speed, "temperature": str(self._config.temperature.value()), } diff --git a/tests/runner/test_lambda_values.py b/tests/runner/test_lambda_values.py index c19f5b28..ec6267f1 100644 --- a/tests/runner/test_lambda_values.py +++ b/tests/runner/test_lambda_values.py @@ -41,9 +41,13 @@ def test_lambda_values(ethane_methanol): Path(tmpdir) / "energy_traj_0.00000.parquet" ) - # Make sure the lambda_array in the metadata is correct. This is the - # lambda_values list in the config. - assert meta["lambda_array"] == [0.0, 0.5, 1.0] + # Make sure the energy trajectory has the expected columns. + cols = energy_traj.columns + found = 0 + for col in cols: + if col in config["lambda_values"]: + found += 1 + assert found == len(config["lambda_values"]) # Make sure the second dimension of the energy trajectory is the correct # size. This is one for the current lambda value, one for its gradient, @@ -84,9 +88,13 @@ def test_lambda_energy(ethane_methanol): Path(tmpdir) / "energy_traj_0.00000.parquet" ) - # Make sure the lambda_array in the metadata is correct. This is the - # sampled lambda_values plus the lambda_energy values in the config. - assert meta["lambda_array"] == [0.0, 0.5, 1.0] + # Make sure the energy trajectory has the expected columns. + cols = energy_traj.columns + found = 0 + for col in cols: + if col in config["lambda_values"] or col in config["lambda_energy"]: + found += 1 + assert found == len(config["lambda_values"]) + len(config["lambda_energy"]) # Make sure the second dimension of the energy trajectory is the correct. # This is the sampled lambda values, i.e. unique entries from lambda_values From fe607f21c95820df9bcc2232791acc0ee9c8908f Mon Sep 17 00:00:00 2001 From: Lester Hedges Date: Tue, 17 Jun 2025 11:58:25 +0100 Subject: [PATCH 090/170] Raise exception if dynamics cache context creation fails. [ci skip] --- src/somd2/runner/_repex.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/somd2/runner/_repex.py b/src/somd2/runner/_repex.py index e0f46ce4..eaf51630 100644 --- a/src/somd2/runner/_repex.py +++ b/src/somd2/runner/_repex.py @@ -227,9 +227,9 @@ def _create_dynamics( try: dynamics = mols.dynamics(**dynamics_kwargs) except Exception as e: - _logger.error( - f"Could not create dynamics object for lambda {lam:.5f}: {e}" - ) + msg = f"Could not create dynamics object for lambda {lam:.5f} on device {device}: {e}" + _logger.error(msg) + raise RuntimeError(msg) from e # Append the dynamics object. self._dynamics.append(dynamics) From 62f2218594c288198b810f33b29e2622ea48c03a Mon Sep 17 00:00:00 2001 From: Lester Hedges Date: Tue, 17 Jun 2025 12:09:43 +0100 Subject: [PATCH 091/170] Use sys.exit() rather than exit(). [ci skip] --- src/somd2/app/run.py | 1 + src/somd2/runner/_base.py | 3 ++- src/somd2/runner/_repex.py | 9 +++++---- 3 files changed, 8 insertions(+), 5 deletions(-) diff --git a/src/somd2/app/run.py b/src/somd2/app/run.py index 2bb8b130..be63c782 100644 --- a/src/somd2/app/run.py +++ b/src/somd2/app/run.py @@ -36,6 +36,7 @@ def cli(): """ from argparse import Namespace + from sys import exit from somd2 import _logger from somd2.config import Config diff --git a/src/somd2/runner/_base.py b/src/somd2/runner/_base.py index 144877a1..4d40d852 100644 --- a/src/somd2/runner/_base.py +++ b/src/somd2/runner/_base.py @@ -851,6 +851,7 @@ def _prepare_output(self): Dictionary of file names for each lambda value. """ from pathlib import Path as _Path + from sys import exit as _exit filenames = {} deleted = [] @@ -872,7 +873,7 @@ def _prepare_output(self): _logger.error( f"The following files already exist, use --overwrite to overwrite them: {list(set((deleted_str)))} \n" ) - exit(1) + _exit(1) # Loop over files to be deleted, ignoring duplicates. for file in list(set(deleted)): file.unlink() diff --git a/src/somd2/runner/_repex.py b/src/somd2/runner/_repex.py index eaf51630..0101c067 100644 --- a/src/somd2/runner/_repex.py +++ b/src/somd2/runner/_repex.py @@ -25,6 +25,7 @@ import numpy as _np import pickle as _pickle +import sys as _sys import sire as _sr @@ -507,7 +508,7 @@ def __init__(self, system, config): time = self._system[0].time() if time > self._config.runtime - self._config.timestep: _logger.success(f"Simulation already complete. Exiting.") - exit(0) + _sys.exit(0) try: with open(self._repex_state, "rb") as f: @@ -646,7 +647,7 @@ def run(self): raise e except KeyboardInterrupt: _logger.error("Minimisation cancelled. Exiting.") - exit(1) + _sys.exit(1) # Equilibrate the system. if self._is_equilibration: @@ -664,7 +665,7 @@ def run(self): raise e except KeyboardInterrupt: _logger.error("Equilibration cancelled. Exiting.") - exit(1) + _sys.exit(1) # Current block number. block = self._start_block @@ -727,7 +728,7 @@ def run(self): results.append((index, energies)) except KeyboardInterrupt: _logger.error("Dynamics cancelled. Exiting.") - exit(1) + _sys.exit(1) if i < cycles: # Assemble and energy matrix from the results. From 7febee47cdc3374b0f654558626f8c4a269216ae Mon Sep 17 00:00:00 2001 From: Lester Hedges Date: Wed, 18 Jun 2025 09:30:31 +0100 Subject: [PATCH 092/170] Use a lock file when checkpointing. [ci skip] --- environment.yaml | 1 + src/somd2/config/_config.py | 2 +- src/somd2/runner/_base.py | 247 +++++++++++++++++++----------------- 3 files changed, 133 insertions(+), 117 deletions(-) diff --git a/environment.yaml b/environment.yaml index 0ad0294c..12846aaf 100644 --- a/environment.yaml +++ b/environment.yaml @@ -7,6 +7,7 @@ channels: dependencies: - biosimspace - git + - filelock - loguru - numba - pycuda diff --git a/src/somd2/config/_config.py b/src/somd2/config/_config.py index e71c41a9..d2fcf3f7 100644 --- a/src/somd2/config/_config.py +++ b/src/somd2/config/_config.py @@ -389,7 +389,7 @@ def __init__( This is useful when debugging crashes. timeout: str - Timeout for the minimiser. + Timeout for the minimiser and file lock. num_energy_neighbours: int The number of neighbouring windows to use when computing the energy diff --git a/src/somd2/runner/_base.py b/src/somd2/runner/_base.py index 4d40d852..11818955 100644 --- a/src/somd2/runner/_base.py +++ b/src/somd2/runner/_base.py @@ -490,6 +490,9 @@ def __init__(self, system, config): _logger.error(msg) raise ValueError(msg) + # Create the lock file name. + self._lock_file = str(self._config.output_directory / "somd2.lock") + # Create the default dynamics kwargs dictionary. These can be overloaded # as needed. self._dynamics_kwargs = { @@ -1288,149 +1291,161 @@ def _checkpoint( Whether this is the final block of the simulation. """ + from filelock import FileLock as _FileLock from somd2 import __version__, _sire_version, _sire_revisionid - # Save the end-state topologies for trajectory analysis and visualisation. - if block == 0 and index == 0: - mols0 = _sr.morph.link_to_reference(system) - mols1 = _sr.morph.link_to_perturbed(system) - _sr.save(mols0, self._filenames["topology0"]) - _sr.save(mols1, self._filenames["topology1"]) - - # If this is a GCMC simulation, then save the end state - # topologies to PDB format to allow analysis with grand. - if self._config.gcmc: - _sr.save( - mols0, - self._filenames["topology0"].replace(".prm7", ".pdb"), - ) - _sr.save( - mols1, - self._filenames["topology1"].replace(".prm7", ".pdb"), - ) - - # Get the lambda value. - lam = self._lambda_values[index] + # Create the lock. + lock = _FileLock(self._lock_file) - # Get the energy trajectory. - df = system.energy_trajectory(to_alchemlyb=True, energy_unit="kT") + # Acquire the file lock to ensure that the checkpoint files are in a consistent + # state if read by another process. + with lock.acquire(timeout=self._config.timeout): - # Set the lambda values at which energies were sampled. - if lambda_energy is None: - lambda_energy = self._lambda_values + # Save the end-state topologies for trajectory analysis and visualisation. + if block == 0 and index == 0: + mols0 = _sr.morph.link_to_reference(system) + mols1 = _sr.morph.link_to_perturbed(system) + _sr.save(mols0, self._filenames["topology0"]) + _sr.save(mols1, self._filenames["topology1"]) - # Create the metadata. - metadata = { - "attrs": df.attrs, - "somd2 version": __version__, - "sire version": f"{_sire_version}+{_sire_revisionid}", - "lambda": str(lam), - "speed": speed, - "temperature": str(self._config.temperature.value()), - } - - # Add the lambda gradient if available. - if lambda_grad is not None: - metadata["lambda_grad"] = lambda_grad - - if is_final_block: - # Assemble and save the final trajectory. - if self._config.save_trajectories: - # Save the final trajectory chunk to file. - if system.num_frames() > 0: - traj_filename = ( - self._filenames[index]["trajectory_chunk"] + f"{block}.dcd" + # If this is a GCMC simulation, then save the end state + # topologies to PDB format to allow analysis with grand. + if self._config.gcmc: + _sr.save( + mols0, + self._filenames["topology0"].replace(".prm7", ".pdb"), ) _sr.save( - system.trajectory(), - traj_filename, - format=["DCD"], + mols1, + self._filenames["topology1"].replace(".prm7", ".pdb"), ) - # Create the final topology file name. - topology0 = self._filenames["topology0"] + # Get the lambda value. + lam = self._lambda_values[index] - # Create the final trajectory file name. - traj_filename = self._filenames[index]["trajectory"] + # Get the energy trajectory. + df = system.energy_trajectory(to_alchemlyb=True, energy_unit="kT") - # Glob for the trajectory chunks. - from glob import glob + # Set the lambda values at which energies were sampled. + if lambda_energy is None: + lambda_energy = self._lambda_values - traj_chunks = sorted( - glob(f"{self._filenames[index]['trajectory_chunk']}*") - ) + # Create the metadata. + metadata = { + "attrs": df.attrs, + "somd2 version": __version__, + "sire version": f"{_sire_version}+{_sire_revisionid}", + "lambda": str(lam), + "speed": speed, + "temperature": str(self._config.temperature.value()), + } - # If this is a restart, then we need to check for an existing - # trajectory file with the same name. If it exists and is non-empty, - # then copy it to a backup file and prepend it to the list of chunks. - if self._config.restart: - path = _Path(traj_filename) - if path.exists() and path.stat().st_size > 0: - from shutil import copyfile + # Add the lambda gradient if available. + if lambda_grad is not None: + metadata["lambda_grad"] = lambda_grad + + if is_final_block: + # Assemble and save the final trajectory. + if self._config.save_trajectories: + # Save the final trajectory chunk to file. + if system.num_frames() > 0: + traj_filename = ( + self._filenames[index]["trajectory_chunk"] + f"{block}.dcd" + ) + _sr.save( + system.trajectory(), + traj_filename, + format=["DCD"], + ) - copyfile(traj_filename, f"{traj_filename}.bak") - traj_chunks = [f"{traj_filename}.bak"] + traj_chunks + # Create the final topology file name. + topology0 = self._filenames["topology0"] - # Load the topology and chunked trajectory files. - mols = _sr.load([topology0] + traj_chunks) + # Create the final trajectory file name. + traj_filename = self._filenames[index]["trajectory"] - # Save the final trajectory to a single file. - _sr.save(mols.trajectory(), traj_filename, format=["DCD"]) + # Glob for the trajectory chunks. + from glob import glob - # Now remove the chunked trajectory files. - for chunk in traj_chunks: - _Path(chunk).unlink() + traj_chunks = sorted( + glob(f"{self._filenames[index]['trajectory_chunk']}*") + ) - # Add config and lambda value to the system properties. - system.set_property("config", self._config.as_dict(sire_compatible=True)) - system.set_property("lambda", lam) + # If this is a restart, then we need to check for an existing + # trajectory file with the same name. If it exists and is non-empty, + # then copy it to a backup file and prepend it to the list of chunks. + if self._config.restart: + path = _Path(traj_filename) + if path.exists() and path.stat().st_size > 0: + from shutil import copyfile - # Stream the final system to file. - _sr.stream.save(system, self._filenames[index]["checkpoint"]) + copyfile(traj_filename, f"{traj_filename}.bak") + traj_chunks = [f"{traj_filename}.bak"] + traj_chunks - # Create the final parquet file. - _dataframe_to_parquet( - df, - metadata=metadata, - filename=self._filenames[index]["energy_traj"], - ) + # Load the topology and chunked trajectory files. + mols = _sr.load([topology0] + traj_chunks) - else: - # Update the starting block if necessary. - if block == 0: - block = self._start_block - - # Save the current trajectory chunk to file. - if self._config.save_trajectories: - if system.num_frames() > 0: - traj_filename = ( - self._filenames[index]["trajectory_chunk"] + f"{block}.dcd" - ) - _sr.save( - system.trajectory(), - traj_filename, - format=["DCD"], - ) + # Save the final trajectory to a single file. + _sr.save(mols.trajectory(), traj_filename, format=["DCD"]) - # Encode the configuration and lambda value as system properties. - system.set_property("config", self._config.as_dict(sire_compatible=True)) - system.set_property("lambda", lam) + # Now remove the chunked trajectory files. + for chunk in traj_chunks: + _Path(chunk).unlink() - # Stream the checkpoint to file. - _sr.stream.save(system, self._filenames[index]["checkpoint"]) + # Add config and lambda value to the system properties. + system.set_property( + "config", self._config.as_dict(sire_compatible=True) + ) + system.set_property("lambda", lam) - # Create the parquet file name. - filename = self._filenames[index]["energy_traj"] + # Stream the final system to file. + _sr.stream.save(system, self._filenames[index]["checkpoint"]) + + # Create the final parquet file. + _dataframe_to_parquet( + df, + metadata=metadata, + filename=self._filenames[index]["energy_traj"], + ) - # Create the parquet file. - if block == self._start_block: - _dataframe_to_parquet(df, metadata=metadata, filename=filename) - # Append to the parquet file. else: - _parquet_append( - filename, - df.iloc[-self._energy_per_block :], + # Update the starting block if necessary. + if block == 0: + block = self._start_block + + # Save the current trajectory chunk to file. + if self._config.save_trajectories: + if system.num_frames() > 0: + traj_filename = ( + self._filenames[index]["trajectory_chunk"] + f"{block}.dcd" + ) + _sr.save( + system.trajectory(), + traj_filename, + format=["DCD"], + ) + + # Encode the configuration and lambda value as system properties. + system.set_property( + "config", self._config.as_dict(sire_compatible=True) ) + system.set_property("lambda", lam) + + # Stream the checkpoint to file. + _sr.stream.save(system, self._filenames[index]["checkpoint"]) + + # Create the parquet file name. + filename = self._filenames[index]["energy_traj"] + + # Create the parquet file. + if block == self._start_block: + _dataframe_to_parquet(df, metadata=metadata, filename=filename) + # Append to the parquet file. + else: + _parquet_append( + filename, + df.iloc[-self._energy_per_block :], + ) def _save_energy_components(self, index, context): """ From 34d7acc1238034c4c9cc4d798de8f3baa1de27ea Mon Sep 17 00:00:00 2001 From: Lester Hedges Date: Wed, 18 Jun 2025 12:13:30 +0100 Subject: [PATCH 093/170] Fix parameter name. [ci skip] --- src/somd2/runner/_repex.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/somd2/runner/_repex.py b/src/somd2/runner/_repex.py index 0101c067..9c1dc660 100644 --- a/src/somd2/runner/_repex.py +++ b/src/somd2/runner/_repex.py @@ -533,7 +533,7 @@ def __init__(self, system, config): self._rest2_scale_factors, self._num_gpus, self._dynamics_kwargs, - gcmc_kwargs=self._config.gcmc_kwargs, + gcmc_kwargs=self._gcmc_kwargs, output_directory=self._config.output_directory, ) From 2f208f9acb35a028d87038ac58532942a06437c5 Mon Sep 17 00:00:00 2001 From: Lester Hedges Date: Wed, 18 Jun 2025 12:24:37 +0100 Subject: [PATCH 094/170] Use file lock when writing repex stats and state. [ci skip] --- src/somd2/runner/_repex.py | 36 ++++++++++++++++++++++-------------- 1 file changed, 22 insertions(+), 14 deletions(-) diff --git a/src/somd2/runner/_repex.py b/src/somd2/runner/_repex.py index 9c1dc660..6f052231 100644 --- a/src/somd2/runner/_repex.py +++ b/src/somd2/runner/_repex.py @@ -21,6 +21,7 @@ __all__ = ["RepexRunner"] +from filelock import FileLock as _FileLock from numba import njit as _njit import numpy as _np @@ -128,6 +129,8 @@ def __getstate__(self): "_states": self._states, "_old_states": self._old_states, "_openmm_states": self._openmm_states, + "_gcmc_samplers": self._gcmc_samplers, + "_gcmc_states": self._gcmc_states, "_num_proposed": self._num_proposed, "_num_accepted": self._num_accepted, "_num_swaps": self._num_swaps, @@ -752,26 +755,31 @@ def run(self): # Update the block number. block += 1 - # Save the transition matrix. - _logger.info("Saving replica exchange transition matrix") - self._save_transition_matrix() + # Guard the repex state and transition matrix saving with a file lock. + lock = _FileLock(self._lock_file) + with lock.acquire(timeout=self._config.timeout): + # Save the transition matrix. + _logger.info("Saving replica exchange transition matrix") + self._save_transition_matrix() - # Pickle the dynamics cache. - _logger.info("Saving replica exchange state") - with open(self._repex_state, "wb") as f: - _pickle.dump(self._dynamics_cache, f) + # Pickle the dynamics cache. + _logger.info("Saving replica exchange state") + with open(self._repex_state, "wb") as f: + _pickle.dump(self._dynamics_cache, f) # Record the end time for the production block. prod_end = time() - # Save the final transition matrix. - _logger.info("Saving final replica exchange transition matrix") - self._save_transition_matrix() + lock = _FileLock(self._lock_file) + with lock.acquire(timeout=self._config.timeout): + # Save the final transition matrix. + _logger.info("Saving final replica exchange transition matrix") + self._save_transition_matrix() - # Pickle final state of the dynamics cache. - _logger.info("Saving final replica exchange state") - with open(self._repex_state, "wb") as f: - _pickle.dump(self._dynamics_cache, f) + # Pickle final state of the dynamics cache. + _logger.info("Saving final replica exchange state") + with open(self._repex_state, "wb") as f: + _pickle.dump(self._dynamics_cache, f) # Record the end time. end = time() From 01f41f3237b7d49cdac53a1b660a70b755336771 Mon Sep 17 00:00:00 2001 From: Lester Hedges Date: Wed, 18 Jun 2025 12:28:08 +0100 Subject: [PATCH 095/170] Don't pickle GCMC samplers. [ci skip] --- src/somd2/runner/_repex.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/somd2/runner/_repex.py b/src/somd2/runner/_repex.py index 6f052231..e043efe8 100644 --- a/src/somd2/runner/_repex.py +++ b/src/somd2/runner/_repex.py @@ -129,7 +129,8 @@ def __getstate__(self): "_states": self._states, "_old_states": self._old_states, "_openmm_states": self._openmm_states, - "_gcmc_samplers": self._gcmc_samplers, + # Don't pickle the GCMC samplers since they need to be recreated. + "_gcmc_samplers": None * len(self._gcmc_samplers), "_gcmc_states": self._gcmc_states, "_num_proposed": self._num_proposed, "_num_accepted": self._num_accepted, From 8d533770a861642fb5c88b90122982a72e8b3dda Mon Sep 17 00:00:00 2001 From: Lester Hedges Date: Thu, 19 Jun 2025 20:17:42 +0100 Subject: [PATCH 096/170] Formatting tweak. [ci skip] --- src/somd2/runner/_base.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/somd2/runner/_base.py b/src/somd2/runner/_base.py index 11818955..b0769017 100644 --- a/src/somd2/runner/_base.py +++ b/src/somd2/runner/_base.py @@ -1016,12 +1016,14 @@ def _compare_configs(config1, config2): if isinstance(v2, _GeneralUnit): v2 = str(v2) - # If one is from sire and the other is not, will raise error even though they are the same. + # If one is from sire and the other is not, will raise error + # even though they are the same. if (v1 == None and v2 == False) or (v2 == None and v1 == False): continue elif v1 != v2: raise ValueError( - f"{key} has changed since the last run. This is not allowed when using the restart option." + f"{key} has changed since the last run. This is not " + "allowed when using the restart option." ) def _verify_restart_config(self): From 43a775aa74e104ba60ffaf22d36c0836a3a23658 Mon Sep 17 00:00:00 2001 From: Lester Hedges Date: Fri, 20 Jun 2025 12:34:53 +0100 Subject: [PATCH 097/170] Convert Sire containers to a list when loading config files. [ci skip] --- src/somd2/io/_io.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/somd2/io/_io.py b/src/somd2/io/_io.py index 0b4518db..1615102d 100644 --- a/src/somd2/io/_io.py +++ b/src/somd2/io/_io.py @@ -142,6 +142,13 @@ def yaml_to_dict(path): except Exception as e: raise ValueError(f"Could not load YAML file: {e}") + # Convert Sire containers to lists. + for key, value in d.items(): + try: + d[key] = value.to_list() + except: + pass + return d From 71d788cc59bc84fb67b6a18c3793adf78229da03 Mon Sep 17 00:00:00 2001 From: Lester Hedges Date: Fri, 20 Jun 2025 17:31:39 +0100 Subject: [PATCH 098/170] Convert Sire containers to lists prior to comparison. [ci skip] --- src/somd2/io/_io.py | 7 ------- src/somd2/runner/_base.py | 10 ++++++++++ 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/src/somd2/io/_io.py b/src/somd2/io/_io.py index 1615102d..0b4518db 100644 --- a/src/somd2/io/_io.py +++ b/src/somd2/io/_io.py @@ -142,13 +142,6 @@ def yaml_to_dict(path): except Exception as e: raise ValueError(f"Could not load YAML file: {e}") - # Convert Sire containers to lists. - for key, value in d.items(): - try: - d[key] = value.to_list() - except: - pass - return d diff --git a/src/somd2/runner/_base.py b/src/somd2/runner/_base.py index b0769017..21dfd513 100644 --- a/src/somd2/runner/_base.py +++ b/src/somd2/runner/_base.py @@ -1016,6 +1016,16 @@ def _compare_configs(config1, config2): if isinstance(v2, _GeneralUnit): v2 = str(v2) + # Convert Sire containers to lists for comparison. + try: + v1 = v1.to_list() + except: + pass + try: + v2 = v2.to_list() + except: + pass + # If one is from sire and the other is not, will raise error # even though they are the same. if (v1 == None and v2 == False) or (v2 == None and v1 == False): From bd86e57f8017740741295dffdcb7ef48079b656b Mon Sep 17 00:00:00 2001 From: Lester Hedges Date: Sat, 21 Jun 2025 11:22:32 +0100 Subject: [PATCH 099/170] Reset water state when context is recreated. [ci skip] --- src/somd2/runner/_repex.py | 31 +++++++++++++++++++++++++------ 1 file changed, 25 insertions(+), 6 deletions(-) diff --git a/src/somd2/runner/_repex.py b/src/somd2/runner/_repex.py index e043efe8..74674d1b 100644 --- a/src/somd2/runner/_repex.py +++ b/src/somd2/runner/_repex.py @@ -324,7 +324,7 @@ def save_gcmc_state(self, index): gcmc_sampler = self._gcmc_samplers[index] # Store the state. - self._gcmc_states[index] = gcmc_sampler.water_state().copy() + self._gcmc_states[index] = gcmc_sampler.water_state() def get_states(self): """ @@ -1081,11 +1081,8 @@ def _equilibrate(self, index): # Remove the PyCUDA context from the stack. gcmc_sampler.pop() - # Reset the GCMC sampler. This resets the sampling statistics and - # clears the associated OpenMM forces. This is required if a new - # context is created following equilibration, e.g. if the constraints - # are different for the production phase. - gcmc_sampler.reset() + # Store the current water state. + water_state = gcmc_sampler.water_state() # Equilibrate. dynamics.run( @@ -1119,6 +1116,28 @@ def _equilibrate(self, index): # Create the production dynamics object. dynamics = system.dynamics(**dynamics_kwargs) + # Reset the GCMC water state. + if gcmc_sampler is not None: + # Reset the GCMC sampler. This resets the sampling statistics and + # clears the associated OpenMM forces. This is required since a new + # context is created following equilibration, e.g. because constraints + # or the timestep are different for the production phase. + gcmc_sampler.reset() + + # Push the PyCUDA context on top of the stack. + gcmc_sampler.push() + + # Set the water state. + gcmc_sampler._set_water_state( + _np.arange(len(water_state)), + water_state, + dynamics.context(), + force=True, + ) + + # Remove the PyCUDA context from the stack. + gcmc_sampler.pop() + # Perform minimisation at the end of equilibration only if the # timestep is increasing, or the constraint is changing. if (self._config.timestep > self._config.equilibration_timestep) or ( From d0b5a6b5448405cbe1dddd4308db8769d179a836 Mon Sep 17 00:00:00 2001 From: Lester Hedges Date: Sat, 21 Jun 2025 12:42:08 +0100 Subject: [PATCH 100/170] Clarify reason for resetting the water state. [ci skip] --- src/somd2/runner/_repex.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/somd2/runner/_repex.py b/src/somd2/runner/_repex.py index 74674d1b..90a43110 100644 --- a/src/somd2/runner/_repex.py +++ b/src/somd2/runner/_repex.py @@ -1116,7 +1116,9 @@ def _equilibrate(self, index): # Create the production dynamics object. dynamics = system.dynamics(**dynamics_kwargs) - # Reset the GCMC water state. + # Reset the GCMC water state. The dynamics object is created from + # the original Sire system, so the water state in the context does + # not match the current GCMC water state. if gcmc_sampler is not None: # Reset the GCMC sampler. This resets the sampling statistics and # clears the associated OpenMM forces. This is required since a new From d756925e2c970c69808d0fcbc25f5ccc2a11791d Mon Sep 17 00:00:00 2001 From: Lester Hedges Date: Sat, 21 Jun 2025 16:33:34 +0100 Subject: [PATCH 101/170] Formatting tweaks. [ci skip] --- README.md | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index cecf0c3e..c201d22b 100644 --- a/README.md +++ b/README.md @@ -151,8 +151,9 @@ be chosen as a trade off between accuracy and computational cost. A value of aro ## Ghost atom modifications -We support the modification of ghost atom bonded terms to avoid spurious coupling -to the physical system using the approach described in [this](https://pubs.acs.org/doi/10.1021/acs.jctc.0c01328) paper. +We support modification of ghost atom bonded terms to avoid spurious coupling +to the physical system using the approach described in +[this](https://pubs.acs.org/doi/10.1021/acs.jctc.0c01328) paper. These are enabled by default, but can be disabled using the ``--no-ghost-modifications`` option. Alternatively, we also provide the `ghostly` command-line tool that can be used to apply the modifications to perturbable system without running a simulation, @@ -219,5 +220,5 @@ nvidia-cuda-mps-control -d The number of contexts that can be run in parallel is then controlled by the `--oversubscription-factor` option, which defaults to 1. -More details on MPS, including tuning options, can be found i nthe following +More details on MPS, including tuning options, can be found in the following [techical blog](https://developer.nvidia.com/blog/maximizing-openmm-molecular-dynamics-throughput-with-nvidia-multi-process-service/). From f083672428c0bcc240fb6e28dff5ea0fa710efba Mon Sep 17 00:00:00 2001 From: Lester Hedges Date: Sat, 21 Jun 2025 16:44:43 +0100 Subject: [PATCH 102/170] Add GCMC section to README. [ci skip] --- README.md | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index c201d22b..96e41096 100644 --- a/README.md +++ b/README.md @@ -112,6 +112,20 @@ the value of `--rest2-scale`. By passing multiple values for `--rest2-scale`, th user can fully control the schedule. When doing so, the number of values must match the number of lambda windows. +## GCMC + +SOMD2 also supports grand canonical Monte Carlo (GCMC) water sampling using +the [loch](https://github.com/OpenBioSim/loch) package. This can be enabled +using the `--gcmc` option. To define a GCMC region, use the `--gcmc-selection` +option, which should be a `Sire` selection string that specifies the atoms +defining the centre of geometry for the GCMC region. The radius of the GCMC +sphere can be controlled using the `--gcmc-radius` option. To see all GCMC +related options, run: + +``` +somd2 --help | grep -A2 ' --gcmc' +``` + ## Analysis Simulation output will be written to the directory specified using the @@ -152,7 +166,7 @@ be chosen as a trade off between accuracy and computational cost. A value of aro ## Ghost atom modifications We support modification of ghost atom bonded terms to avoid spurious coupling -to the physical system using the approach described in +to the physical system using the approach described in [this](https://pubs.acs.org/doi/10.1021/acs.jctc.0c01328) paper. These are enabled by default, but can be disabled using the ``--no-ghost-modifications`` option. Alternatively, we also provide the `ghostly` command-line tool that can From d0b3ea910c31208350d00c83ec76b44365fc05a9 Mon Sep 17 00:00:00 2001 From: Lester Hedges Date: Sat, 21 Jun 2025 16:47:32 +0100 Subject: [PATCH 103/170] Add link to description of Parquet format. [ci skip] --- README.md | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 96e41096..55fb6e84 100644 --- a/README.md +++ b/README.md @@ -130,8 +130,9 @@ somd2 --help | grep -A2 ' --gcmc' Simulation output will be written to the directory specified using the `--output-directory` parameter. This will contain a number of files, including -Parquet files for the energy trajectories of each λ window. These can be -processed using [BioSimSpace](https://github.com/OpenBioSim/biosimspace) as follows: +[Parquet filesa](https://en.wikipedia.org/wiki/Apache_Parquet) for the energy +trajectories of each λ window. These can be processed using +[BioSimSpace](https://github.com/OpenBioSim/biosimspace) as follows: ```python import BioSimSpace as BSS From 708de44263d73f3c06112fa4bb958a7b6916c7fa Mon Sep 17 00:00:00 2001 From: Lester Hedges Date: Sat, 21 Jun 2025 16:52:03 +0100 Subject: [PATCH 104/170] Use sys.exit() rather than exit(). [ci skip] --- src/somd2/app/_cli.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/somd2/app/_cli.py b/src/somd2/app/_cli.py index 34cc328f..457b0449 100644 --- a/src/somd2/app/_cli.py +++ b/src/somd2/app/_cli.py @@ -143,7 +143,7 @@ def ghostly(): system = sr.morph.link_to_reference(system) except Exception as e: _logger.error(f"An error occurred while loading the system: {e}") - exit(1) + sys.exit(1) # Try to apply the modifications. try: @@ -152,7 +152,7 @@ def ghostly(): _logger.error( f"An error occurred while applying the ghost atom modifications: {e}" ) - exit(1) + sys.exit(1) # Try to save the system. try: @@ -161,4 +161,4 @@ def ghostly(): sr.stream.save(system, f"{output}.bss") except Exception as e: _logger.error(f"An error occurred while saving the system: {e}") - exit(1) + sys.exit(1) From a24ea0edf9a5481be18aa86c68385c673b8126f2 Mon Sep 17 00:00:00 2001 From: Lester Hedges Date: Sat, 21 Jun 2025 16:58:20 +0100 Subject: [PATCH 105/170] Add note that GCMC is currently limited to the CUDA platform. [ci skip] --- README.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/README.md b/README.md index 55fb6e84..7ab812e1 100644 --- a/README.md +++ b/README.md @@ -126,6 +126,9 @@ related options, run: somd2 --help | grep -A2 ' --gcmc' ``` +> [!NOTE] +> GCMC is currently only supported when using the CUDA platform. + ## Analysis Simulation output will be written to the directory specified using the From 9ac6e9142fd17e4d7cd9600a76d2ba2f53631eec Mon Sep 17 00:00:00 2001 From: Lester Hedges Date: Mon, 23 Jun 2025 12:10:40 +0100 Subject: [PATCH 106/170] Fix bug in attempt to not pickle GCMCSampler. [ci skip] --- src/somd2/runner/_repex.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/somd2/runner/_repex.py b/src/somd2/runner/_repex.py index 90a43110..017cef47 100644 --- a/src/somd2/runner/_repex.py +++ b/src/somd2/runner/_repex.py @@ -130,7 +130,7 @@ def __getstate__(self): "_old_states": self._old_states, "_openmm_states": self._openmm_states, # Don't pickle the GCMC samplers since they need to be recreated. - "_gcmc_samplers": None * len(self._gcmc_samplers), + "_gcmc_samplers": len(self._gcmc_samplers) * [None], "_gcmc_states": self._gcmc_states, "_num_proposed": self._num_proposed, "_num_accepted": self._num_accepted, From b4c24aa199d87f9f3e9615d88ec241ed9c170b08 Mon Sep 17 00:00:00 2001 From: Lester Hedges Date: Mon, 23 Jun 2025 12:11:24 +0100 Subject: [PATCH 107/170] Checkpoint after dynamics block to avoid crash sync issues. [ci skip] --- src/somd2/runner/_repex.py | 164 ++++++++++++++++++++++--------------- 1 file changed, 100 insertions(+), 64 deletions(-) diff --git a/src/somd2/runner/_repex.py b/src/somd2/runner/_repex.py index 017cef47..b87c285b 100644 --- a/src/somd2/runner/_repex.py +++ b/src/somd2/runner/_repex.py @@ -717,11 +717,7 @@ def run(self): self._run_block, replicas, repeat(self._lambda_values), - repeat(is_checkpoint), repeat(i == 0), - repeat(i == cycles - 1), - repeat(block), - repeat(num_blocks + int(rem > 0)), repeat(write_gcmc_ghosts), ): if not result: @@ -734,6 +730,30 @@ def run(self): _logger.error("Dynamics cancelled. Exiting.") _sys.exit(1) + # Checkpoint. + if is_checkpoint or i == cycles - 1: + for j in range(num_batches): + # Get the indices of the replicas in this batch. + replicas = replica_list[j * num_workers : (j + 1) * num_workers] + with ThreadPoolExecutor(max_workers=num_workers) as executor: + try: + for result, error in executor.map( + self._checkpoint, + replicas, + repeat(self._lambda_values), + repeat(block), + repeat(num_blocks + int(rem > 0)), + repeat(i == cycles - 1), + ): + if not result: + _logger.error( + f"Checkpoint failed for {_lam_sym} = {self._lambda_values[index]:.5f}: {error}" + ) + raise error + except KeyboardInterrupt: + _logger.error("Checkpoint cancelled. Exiting.") + _sys.exit(1) + if i < cycles: # Assemble and energy matrix from the results. _logger.info("Assembling energy matrix") @@ -803,11 +823,7 @@ def _run_block( self, index, lambdas, - is_checkpoint, is_first_block, - is_final_block, - block, - num_blocks, write_gcmc_ghosts=False, ): """ @@ -822,21 +838,9 @@ def _run_block( lambdas: np.ndarray The lambda values for each replica. - rest2_scale: np.ndarray - The REST2 scaling factor for each replica. - - is_checkpoint: bool - Whether to checkpoint. - is_first_block: bool Whether this is the first block. - is_final_block: bool - Whether this is the final block. - - block: int - The block number. - num_blocks: int The total number of blocks. @@ -913,7 +917,7 @@ def _run_block( ), ) - # The frame frequency was hit, so writ the indices of the current + # The frame frequency was hit, so write the indices of the current # ghost water residues to file. if gcmc_sampler is not None and write_gcmc_ghosts: gcmc_sampler.write_ghost_residues() @@ -933,49 +937,6 @@ def _run_block( .to_numpy() ) - # Checkpoint. - if is_checkpoint or is_final_block: - # Commit the current system. - system = dynamics.commit() - - # If performing GCMC, then we need to flag the ghost waters. - if self._config.gcmc: - system = gcmc_sampler._flag_ghost_waters(system) - - # Get the simulation speed. - speed = dynamics.time_speed() - - # Checkpoint. - with self._lock: - self._checkpoint( - system, index, block, speed, is_final_block=is_final_block - ) - - # Delete all trajectory frames from the Sire system within the - # dynamics object. - dynamics._d._sire_mols.delete_all_frames() - - _logger.info( - f"Finished block {block+1} of {self._start_block + num_blocks} " - f"for {_lam_sym} = {lam:.5f}" - ) - - # Log the number of waters within the GCMC sampling volume. - if gcmc_sampler is not None: - # Push the PyCUDA context on top of the stack. - gcmc_sampler.push() - - _logger.info( - f"Current number of waters in GCMC volume at {_lam_sym} = {lam:.5f} " - f"is {gcmc_sampler.num_waters()}" - ) - - # Remove the PyCUDA context from the stack. - gcmc_sampler.pop() - - if is_final_block: - _logger.success(f"{_lam_sym} = {lam:.5f} complete") - except Exception as e: try: # Save the energy components for debugging purposes. @@ -1238,6 +1199,81 @@ def _assemble_results(self, results): return matrix + def _checkpoint(self, index, lambdas, block, num_blocks, is_final_block=False): + """ + Checkpoint the simulation. + + Parameters + ---------- + + index: int + The index of the replica. + + lambdas: np.ndarray + The lambda values for each replica. + + block: int + The current block number. + + num_blocks: int + The total number of blocks in the simulation. + + is_final_block: bool + Whether this is the final block. + """ + try: + # Get the lambda value. + lam = lambdas[index] + + # Get the dynamics object (and GCMC sampler). + dynamics, gcmc_sampler = self._dynamics_cache.get(index) + + # Commit the current system. + system = dynamics.commit() + + # If performing GCMC, then we need to flag the ghost waters. + if gcmc_sampler is not None: + system = gcmc_sampler._flag_ghost_waters(system) + + # Get the simulation speed. + speed = dynamics.time_speed() + + # Call the base class checkpoint method to save the system state. + with self._lock: + super()._checkpoint( + system, index, block, speed, is_final_block=is_final_block + ) + + # Delete all trajectory frames from the Sire system within the + # dynamics object. + dynamics._d._sire_mols.delete_all_frames() + + _logger.info( + f"Finished block {block+1} of {self._start_block + num_blocks} " + f"for {_lam_sym} = {lam:.5f}" + ) + + # Log the number of waters within the GCMC sampling volume. + if gcmc_sampler is not None: + # Push the PyCUDA context on top of the stack. + gcmc_sampler.push() + + _logger.info( + f"Current number of waters in GCMC volume at {_lam_sym} = {lam:.5f} " + f"is {gcmc_sampler.num_waters()}" + ) + + # Remove the PyCUDA context from the stack. + gcmc_sampler.pop() + + if is_final_block: + _logger.success(f"{_lam_sym} = {lam:.5f} complete") + + return True, None + + except Exception as e: + return False, e + @staticmethod @_njit def _mix_replicas(num_replicas, energy_matrix, proposed, accepted): From 160f8f806fe0cc9bf052cfa2d149ea09e4c8a2c1 Mon Sep 17 00:00:00 2001 From: Lester Hedges Date: Wed, 25 Jun 2025 09:56:12 +0100 Subject: [PATCH 108/170] Don't reset the time to zero. [ref OpenBioSim/sire#320] [ci skip] --- src/somd2/runner/_base.py | 6 ++++++ src/somd2/runner/_repex.py | 4 ++-- src/somd2/runner/_runner.py | 4 ++-- 3 files changed, 10 insertions(+), 4 deletions(-) diff --git a/src/somd2/runner/_base.py b/src/somd2/runner/_base.py index 44ba4d1a..61a514a5 100644 --- a/src/somd2/runner/_base.py +++ b/src/somd2/runner/_base.py @@ -490,6 +490,12 @@ def __init__(self, system, config): _logger.error(msg) raise ValueError(msg) + # Store the initial system time. + if isinstance(self._system, list): + self._initial_time = self._system[0].time() + else: + self._initial_time = system.time() + # Create the lock file name. self._lock_file = str(self._config.output_directory / "somd2.lock") diff --git a/src/somd2/runner/_repex.py b/src/somd2/runner/_repex.py index b87c285b..1e48e034 100644 --- a/src/somd2/runner/_repex.py +++ b/src/somd2/runner/_repex.py @@ -1057,8 +1057,8 @@ def _equilibrate(self, index): # Commit the system. system = dynamics.commit() - # Reset the timer to zero. - system.set_time(_sr.u("0ps")) + # Reset the timer. + system.set_time(self._system.time()) # Delete the dynamics object. self._dynamics_cache.delete(index) diff --git a/src/somd2/runner/_runner.py b/src/somd2/runner/_runner.py index 71a0e134..19980aba 100644 --- a/src/somd2/runner/_runner.py +++ b/src/somd2/runner/_runner.py @@ -491,8 +491,8 @@ def generate_lam_vals(lambda_base, increment=0.001): # Commit the system. system = dynamics.commit() - # Reset the timer to zero. - system.set_time(_sr.u("0ps")) + # Reset the timer. + system.set_time(self._system.time()) # Perform minimisation at the end of equilibration only if the # timestep is increasing, or the constraint is changing. From 393c8d3f8721d25bf19b36b0f3a1bc3da8680689 Mon Sep 17 00:00:00 2001 From: Lester Hedges Date: Thu, 26 Jun 2025 09:41:58 +0100 Subject: [PATCH 109/170] Bind GCMC sampler to dynamics object to allow crash recovery. [ci skip] --- src/somd2/runner/_repex.py | 22 +++++++++++----------- src/somd2/runner/_runner.py | 6 ++++++ 2 files changed, 17 insertions(+), 11 deletions(-) diff --git a/src/somd2/runner/_repex.py b/src/somd2/runner/_repex.py index 1e48e034..be654b01 100644 --- a/src/somd2/runner/_repex.py +++ b/src/somd2/runner/_repex.py @@ -236,6 +236,12 @@ def _create_dynamics( _logger.error(msg) raise RuntimeError(msg) from e + # Bind the GCMC sampler to the dynamics object. This allows the + # dynamics object to reset the water state in its internal OpenMM + # context following a crash recovery. + if gcmc_kwargs is not None: + dynamics._d._gcmc_sampler = gcmc_sampler + # Append the dynamics object. self._dynamics.append(dynamics) @@ -370,11 +376,7 @@ def mix_states(self): # Update the water state in the GCMCSampler. self._gcmc_samplers[i].push() - self._gcmc_samplers[i]._set_water_state( - water_idxs, - self._gcmc_states[state][water_idxs], - self._dynamics[i].context(), - ) + self._gcmc_samplers[i]._set_water_state(self._dynamics[i].context()) self._gcmc_samplers[i].pop() # Update the swap matrix. @@ -1091,16 +1093,14 @@ def _equilibrate(self, index): gcmc_sampler.push() # Set the water state. - gcmc_sampler._set_water_state( - _np.arange(len(water_state)), - water_state, - dynamics.context(), - force=True, - ) + gcmc_sampler._set_water_state(dynamics.context()) # Remove the PyCUDA context from the stack. gcmc_sampler.pop() + # Re-bind the GCMC sampler to the dynamics object. + dynamics._d._gcmc_sampler = gcmc_sampler + # Perform minimisation at the end of equilibration only if the # timestep is increasing, or the constraint is changing. if (self._config.timestep > self._config.equilibration_timestep) or ( diff --git a/src/somd2/runner/_runner.py b/src/somd2/runner/_runner.py index 19980aba..55f61426 100644 --- a/src/somd2/runner/_runner.py +++ b/src/somd2/runner/_runner.py @@ -472,6 +472,9 @@ def generate_lam_vals(lambda_base, increment=0.001): # Equilibrate with GCMC moves. if gcmc_sampler is not None: + # Bind the GCMC sampler to the dynamics object. + dynamics._d._gcmc_sampler = gcmc_sampler + _logger.info( f"Equilibrating with GCMC moves at {_lam_sym} = {lambda_value:.5f}" ) @@ -567,6 +570,9 @@ def generate_lam_vals(lambda_base, increment=0.001): if gcmc_sampler is not None: gcmc_sampler.reset() + # Bind the GCMC sampler to the dynamics object. + dynamics._d._gcmc_sampler = gcmc_sampler + # Set the number of neighbours used for the energy calculation. # If not None, then we add one to account for the extra windows # used for finite-difference gradient analysis. From 5e2f29a60463c0116e3cfacbbc58164cb45abdc4 Mon Sep 17 00:00:00 2001 From: Lester Hedges Date: Thu, 26 Jun 2025 09:54:41 +0100 Subject: [PATCH 110/170] Fix storing of initial time. --- src/somd2/runner/_base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/somd2/runner/_base.py b/src/somd2/runner/_base.py index 61a514a5..6279abec 100644 --- a/src/somd2/runner/_base.py +++ b/src/somd2/runner/_base.py @@ -494,7 +494,7 @@ def __init__(self, system, config): if isinstance(self._system, list): self._initial_time = self._system[0].time() else: - self._initial_time = system.time() + self._initial_time = self._system.time() # Create the lock file name. self._lock_file = str(self._config.output_directory / "somd2.lock") From 92336d796088cebc451f7204803f0e6c153d3777 Mon Sep 17 00:00:00 2001 From: Lester Hedges Date: Thu, 26 Jun 2025 10:02:04 +0100 Subject: [PATCH 111/170] Use a single GCMC based conditional block. [ci skip] --- src/somd2/runner/_repex.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/src/somd2/runner/_repex.py b/src/somd2/runner/_repex.py index be654b01..5ccc4219 100644 --- a/src/somd2/runner/_repex.py +++ b/src/somd2/runner/_repex.py @@ -919,17 +919,16 @@ def _run_block( ), ) - # The frame frequency was hit, so write the indices of the current - # ghost water residues to file. - if gcmc_sampler is not None and write_gcmc_ghosts: - gcmc_sampler.write_ghost_residues() - # Set the state. self._dynamics_cache.save_openmm_state(index) # Save the GCMC state. if gcmc_sampler is not None: self._dynamics_cache.save_gcmc_state(index) + # The frame frequency was hit, so write the indices of the + # current ghost water residues to file. + if write_gcmc_ghosts: + gcmc_sampler.write_ghost_residues() # Get the energy at each lambda value. energies = ( From da79206df3dae4afb84a8c9357da2f50e17f1191 Mon Sep 17 00:00:00 2001 From: Lester Hedges Date: Thu, 26 Jun 2025 12:14:55 +0100 Subject: [PATCH 112/170] Fix post-equilibration time reset. [ci skip] --- src/somd2/runner/_repex.py | 3 ++- src/somd2/runner/_runner.py | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/somd2/runner/_repex.py b/src/somd2/runner/_repex.py index 5ccc4219..8e221778 100644 --- a/src/somd2/runner/_repex.py +++ b/src/somd2/runner/_repex.py @@ -1059,7 +1059,8 @@ def _equilibrate(self, index): system = dynamics.commit() # Reset the timer. - system.set_time(self._system.time()) + if self._initial_time.value() != 0: + system.set_time(self._initial_time) # Delete the dynamics object. self._dynamics_cache.delete(index) diff --git a/src/somd2/runner/_runner.py b/src/somd2/runner/_runner.py index 55f61426..8f4676ad 100644 --- a/src/somd2/runner/_runner.py +++ b/src/somd2/runner/_runner.py @@ -495,7 +495,8 @@ def generate_lam_vals(lambda_base, increment=0.001): system = dynamics.commit() # Reset the timer. - system.set_time(self._system.time()) + if self._initial_time.value() != 0: + system.set_time(self._initial_time) # Perform minimisation at the end of equilibration only if the # timestep is increasing, or the constraint is changing. From e31a7a54cbaae4049c245a69936688f8aad930c9 Mon Sep 17 00:00:00 2001 From: Lester Hedges Date: Thu, 26 Jun 2025 13:30:43 +0100 Subject: [PATCH 113/170] Typo. [ci skip] --- src/somd2/config/_config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/somd2/config/_config.py b/src/somd2/config/_config.py index d2fcf3f7..9bfb76e2 100644 --- a/src/somd2/config/_config.py +++ b/src/somd2/config/_config.py @@ -205,7 +205,7 @@ def __init__( swap_end_states: bool Whether to perform the perturbation in the reverse direction. - couloumb_power : float + coulomb_power : float Power to use for the soft-core Coulomb interaction. This is used to soften the electrostatic interaction. From 7dc652d742677cec560d195fbba81ba3307d9958 Mon Sep 17 00:00:00 2001 From: Lester Hedges Date: Thu, 26 Jun 2025 13:57:30 +0100 Subject: [PATCH 114/170] Fix accidental change to water state update during replica mixing. [ci skip] --- src/somd2/runner/_repex.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/somd2/runner/_repex.py b/src/somd2/runner/_repex.py index 8e221778..3fccb2da 100644 --- a/src/somd2/runner/_repex.py +++ b/src/somd2/runner/_repex.py @@ -376,7 +376,11 @@ def mix_states(self): # Update the water state in the GCMCSampler. self._gcmc_samplers[i].push() - self._gcmc_samplers[i]._set_water_state(self._dynamics[i].context()) + self._gcmc_samplers[i]._set_water_state( + self._dynamics[i].context(), + indices=water_idxs, + states=self._gcmc_states[state][water_idxs], + ) self._gcmc_samplers[i].pop() # Update the swap matrix. From 258e1b6f6c3ca80b446623072d4f23157ec592bd Mon Sep 17 00:00:00 2001 From: Lester Hedges Date: Mon, 30 Jun 2025 13:03:37 +0100 Subject: [PATCH 115/170] Decouple ghostly and add as an external dependency. [ci skip] --- README.md | 14 +- environment.yaml | 1 + pyproject.toml | 1 - src/somd2/_utils/_ghosts.py | 1095 ----------------------------------- src/somd2/app/_cli.py | 76 +-- src/somd2/runner/_base.py | 4 +- tests/_utils/test_ghosts.py | 324 ----------- 7 files changed, 7 insertions(+), 1508 deletions(-) delete mode 100644 src/somd2/_utils/_ghosts.py delete mode 100644 tests/_utils/test_ghosts.py diff --git a/README.md b/README.md index 7ab812e1..43f1454a 100644 --- a/README.md +++ b/README.md @@ -126,7 +126,7 @@ related options, run: somd2 --help | grep -A2 ' --gcmc' ``` -> [!NOTE] +> [!NOTE] > GCMC is currently only supported when using the CUDA platform. ## Analysis @@ -173,16 +173,8 @@ We support modification of ghost atom bonded terms to avoid spurious coupling to the physical system using the approach described in [this](https://pubs.acs.org/doi/10.1021/acs.jctc.0c01328) paper. These are enabled by default, but can be disabled using the ``--no-ghost-modifications`` -option. Alternatively, we also provide the `ghostly` command-line tool that can -be used to apply the modifications to perturbable system without running a simulation, -e.g. for use elsewhere. This can be used via: - -```bash -ghostly perturbable_system.bss --output ghosted --log-level debug -``` - -(Here the log level is set to debug to provide more information on the modifications -that are applied.) +option. Modifications are implemented using the [ghostly](https://gitbub.com/OpenBioSim/ghostly) +package. ## Note for SOMD1 users diff --git a/environment.yaml b/environment.yaml index 12846aaf..be77b6cd 100644 --- a/environment.yaml +++ b/environment.yaml @@ -12,4 +12,5 @@ dependencies: - numba - pycuda - pip: + - git+https://github.com/openbiosim/ghostly - git+https://github.com/openbiosim/loch diff --git a/pyproject.toml b/pyproject.toml index ae7b5c9a..44249a66 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -21,7 +21,6 @@ license-files = ["LICENSE"] [project.scripts] somd2 = "somd2.app:somd2" -ghostly = "somd2.app:ghostly" [project.urls] repository = "https://github.com/OpenBioSim/somd2" diff --git a/src/somd2/_utils/_ghosts.py b/src/somd2/_utils/_ghosts.py deleted file mode 100644 index e8524805..00000000 --- a/src/somd2/_utils/_ghosts.py +++ /dev/null @@ -1,1095 +0,0 @@ -###################################################################### -# SOMD2: GPU accelerated alchemical free-energy engine. -# -# Copyright: 2023-2025 -# -# Authors: The OpenBioSim Team -# -# SOMD2 is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# SOMD2 is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with SOMD2. If not, see . -##################################################################### - -__all__ = ["boresch"] - -from sire.system import System as _System -from sire.legacy.System import System as _LegacySystem - -import sire.legacy.MM as _SireMM -import sire.legacy.Mol as _SireMol -import sire.morph as _morph - -from somd2 import _logger - -from . import _is_ghost -from . import _lam_sym - - -def boresch(system, k_hard=100, k_soft=5, optimise_angles=True): - """ - Apply Boresch modifications to ghost atom bonded terms to avoid non-physical - coupling between the ghost atoms and the physical region. - - Parameters - ---------- - - system : sire.system.System, sire.legacy.System.System - The system containing the molecules to be perturbed. - - k_hard : float, optional - The force constant to use to when setting angle terms involving ghost - atoms to 90 degrees to avoid flapping. (In kcal/mol/rad^2) - - k_soft : float, optional - The force constant to use when setting angle terms involving ghost atoms - for non-planar triple junctions. (In kcal/mol/rad^2) - - optimise_angles : bool, optional - Whether to optimise the equilibrium value of the angle terms involving - ghost atoms for non-planar triple junctions. - - Returns - ------- - - system : sire.system.System - The updated system. - - Notes - ----- - - For technical details, please refer to the original publication: - https://pubs.acs.org/doi/10.1021/acs.jctc.0c01328 - """ - - _logger.info(f"Applying Boresch modifications to ghost atom bonded terms") - - # Check the system is a Sire system. - if not isinstance(system, (_System, _LegacySystem)): - raise TypeError( - "'system' must of type 'sire.system.System' or 'sire.legacy.System.System'" - ) - - # Extract the legacy system. - if isinstance(system, _LegacySystem): - system = _System(system) - - # Clone the system. - system = system.clone() - - # Search for perturbable molecules. - try: - pert_mols = system.molecules("property is_perturbable") - except KeyError: - raise KeyError("No perturbable molecules in the system") - - for mol in pert_mols: - # Store the molecule info. - info = mol.info() - - # Generate the end state connectivity objects. - connectivity0 = _create_connectivity(_morph.link_to_reference(mol)) - connectivity1 = _create_connectivity(_morph.link_to_perturbed(mol)) - - # Find the indices of the ghost atoms at each end state. - ghosts0 = [ - _SireMol.AtomIdx(i) - for i, x in enumerate( - _is_ghost(mol, [_SireMol.AtomIdx(i) for i in range(mol.num_atoms())]) - ) - if x - ] - ghosts1 = [ - _SireMol.AtomIdx(i) - for i, x in enumerate( - _is_ghost( - mol, - [_SireMol.AtomIdx(i) for i in range(mol.num_atoms())], - is_lambda1=True, - ) - ) - if x - ] - - # Work out the physical bridge atoms at lambda = 0. These are the atoms - # that connect ghost atoms to the physical region. - bridges0 = {} - for ghost in ghosts0: - for c in connectivity0.connections_to(ghost): - if not _is_ghost(mol, [c])[0]: - if c not in bridges0: - bridges0[c] = [ghost] - else: - bridges0[c].append(ghost) - # Work out the indices of the other physical atoms that are connected to - # the bridge atoms, sorted by the atom index. - physical0 = {} - for b in bridges0: - physical0[b] = [] - for c in connectivity0.connections_to(b): - if not _is_ghost(mol, [c])[0]: - physical0[b].append(c) - for b in physical0: - physical0[b].sort(key=lambda x: x.value()) - - # Repeat the above for lambda = 1. - bridges1 = {} - for ghost in ghosts1: - for c in connectivity1.connections_to(ghost): - if not _is_ghost(mol, [c], is_lambda1=True)[0]: - if c not in bridges1: - bridges1[c] = [ghost] - else: - bridges1[c].append(ghost) - physical1 = {} - for b in bridges1: - physical1[b] = [] - for c in connectivity1.connections_to(b): - if not _is_ghost(mol, [c], is_lambda1=True)[0]: - physical1[b].append(c) - for b in physical1: - physical1[b].sort(key=lambda x: x.value()) - - # Log the results for each end state. - - if len(bridges0) > 0: - _logger.debug("Ghost atom bridges at lambda = 0") - for i, b in enumerate(bridges0): - _logger.debug(f" Bridge {i}: {b.value()}") - _logger.debug( - f" ghosts: [{','.join([str(x.value()) for x in bridges0[b]])}]" - ) - _logger.debug( - f" physical: [{','.join([str(x.value()) for x in physical0[b]])}]" - ) - _logger.debug(f" type: {len(physical0[b])}") - - if len(bridges1) > 0: - _logger.debug("Ghost atom bridges at lambda = 1") - for i, b in enumerate(bridges1): - _logger.debug(f" Bridge {i}: {b.value()}") - _logger.debug( - f" ghosts: [{','.join([str(x.value()) for x in bridges1[b]])}]" - ) - _logger.debug( - f" physical: [{','.join([str(x.value()) for x in physical1[b]])}]" - ) - _logger.debug(f" type: {len(physical1[b])}") - - # Now process the bridges. - - # First lambda = 0. - for b in bridges0: - # Determine the type of junction. - junction = len(physical0[b]) - - # Terminal junction. - if junction == 1: - mol = _terminal(mol, b, bridges0[b], physical0[b]) - - # Dual junction. - elif junction == 2: - mol = _dual(mol, b, bridges0[b], physical0[b], k_hard=k_hard) - - # Triple junction. - elif junction == 3: - mol = _triple( - mol, - b, - bridges0[b], - physical0[b], - k_hard=k_hard, - k_soft=k_soft, - optimise_angles=optimise_angles, - ) - - # Higher order junction. - else: - mol = _higher( - mol, - b, - bridges0[b], - physical0[b], - k_hard=k_hard, - k_soft=k_soft, - optimise_angles=optimise_angles, - ) - - # Now lambda = 1. - for b in bridges1: - junction = len(physical1[b]) - - if junction == 1: - mol = _terminal(mol, b, bridges1[b], physical1[b], is_lambda1=True) - - elif junction == 2: - mol = _dual( - mol, b, bridges1[b], physical1[b], k_hard=k_hard, is_lambda1=True - ) - - elif junction == 3: - mol = _triple( - mol, - b, - bridges1[b], - physical1[b], - k_hard=k_hard, - k_soft=k_soft, - optimise_angles=optimise_angles, - is_lambda1=True, - ) - - # Higher order junction. - else: - mol = _higher( - mol, - b, - bridges1[b], - physical1[b], - k_hard=k_hard, - k_soft=k_soft, - optimise_angles=optimise_angles, - is_lambda1=True, - ) - - # Update the molecule in the system. - system.update(mol) - - # Return the updated system. - return system - - -def _terminal(mol, bridge, ghosts, physical, is_lambda1=False): - r""" - Apply Boresch modifications to a terminal junction. - - An example terminal junction with three ghost branches. Here X is the - physical bridge atom. - - DR1 - / - / - R---X---DR2 - \ - \ - DR3 - - Parameters - ---------- - - mol : sire.mol.Molecule - The perturbable molecule. - - bridge : sire.legacy.Mol.AtomIdx - The physical bridge atom. - - ghosts : List[sire.legacy.Mol.AtomIdx] - The list of ghost atoms connected to the bridge atom. - - physical : List[sire.legacy.Mol.AtomIdx] - The list of physical atoms connected to the bridge atom. - - is_lambda1 : bool, optional - Whether the junction is at lambda = 1. - - Returns - ------- - - mol : sire.mol.Molecule - The updated molecule. - """ - - _logger.debug( - f"Applying Boresch modifications to terminal ghost junction at " - f"{_lam_sym} = {int(is_lambda1)}:" - ) - - # Store the molecular info. - info = mol.info() - - # Get the end state connectivity property. - if is_lambda1: - connectivity = _create_connectivity(_morph.link_to_perturbed(mol)) - else: - connectivity = _create_connectivity(_morph.link_to_reference(mol)) - - # First, we need to work out the physical atoms two atoms away from the - # bridge atom. - physical2 = [] - # Loop over the physical atoms connected to the bridge atom. - for p in physical: - # Loop over the atoms connected to the physical atom. - for c in connectivity.connections_to(p): - # If the atom is not a ghost atom or the bridge atom itself, we have - # found a physical atom two atoms away from the bridge atom. - if not _is_ghost(mol, [c], is_lambda1)[0] and c != bridge: - if c not in physical2: - physical2.append(c) - - # Sort based on the atom indices. - physical2.sort(key=lambda x: x.value()) - - # Get the end state dihedral functions. - prop = "dihedral0" if not is_lambda1 else "dihedral1" - dihedrals = mol.property(prop) - - # Initialise a container to store the updated dihedrals. - new_dihedrals = _SireMM.FourAtomFunctions(mol.info()) - - # Remove all dihedral terms for all but one of the physical atoms two atoms - # from the physical bridge atom. - physical2.pop(0) - for p in dihedrals.potentials(): - idx0 = info.atom_idx(p.atom0()) - idx1 = info.atom_idx(p.atom1()) - idx2 = info.atom_idx(p.atom2()) - idx3 = info.atom_idx(p.atom3()) - if (idx0 in physical2 and idx3 in ghosts) or ( - idx3 in physical2 and idx0 in ghosts - ): - _logger.debug( - f" Removing dihedral: [{idx0.value()}-{idx1.value()}-{idx2.value()}-{idx3.value()}], {p.function()}" - ) - else: - new_dihedrals.set(idx0, idx1, idx2, idx3, p.function()) - - # Set the updated dihedrals. - mol = mol.edit().set_property(prop, new_dihedrals).molecule().commit() - - # Return the updated molecule. - return mol - - -def _dual(mol, bridge, ghosts, physical, k_hard=100, is_lambda1=False): - r""" - Apply Boresch modifications to a dual junction. - - An example dual junction with two ghost branches. Here X is the physical - bridge atom. - - R1 DR1 - \ / - \ / - X - / \ - / \ - R2 DR2 - - Parameters - ---------- - - mol : sire.mol.Molecule - The perturbable molecule. - - bridge : sire.legacy.Mol.AtomIdx - The physical bridge atom. - - ghosts : List[sire.legacy.Mol.AtomIdx] - The list of ghost atoms connected to the bridge atom. - - physical : List[sire.legacy.Mol.AtomIdx] - The list of physical atoms connected to the bridge atom. - - k_hard : float, optional - The force constant to use when setting angle terms involving ghost - atoms to 90 degrees to avoid flapping. (In kcal/mol/rad^2) - - is_lambda1 : bool, optional - Whether the junction is at lambda = 1. - - Returns - ------- - - mol : sire.mol.Molecule - The updated molecule. - """ - - _logger.debug( - f"Applying Boresch modifications to dual ghost junction at " - f"{_lam_sym} = {int(is_lambda1)}:" - ) - - # Store the molecular info. - info = mol.info() - - # Property suffix based on the end state. - suffix = "0" if not is_lambda1 else "1" - - # Get the end state connectivity property. - try: - connectivity = mol.property("connectivity" + suffix) - except: - connectivity = mol.property("connectivity") - - # Single branch. - if len(ghosts) == 1: - _logger.debug(" Single branch:") - - # First remove all dihedrals starting from the ghost atom and ending in - # physical system. - - # Get the end state bond functions. - angles = mol.property("angle" + suffix) - dihedrals = mol.property("dihedral" + suffix) - - # Initialise a container to store the updated bonded terms. - new_dihedrals = _SireMM.FourAtomFunctions(mol.info()) - - # Dihedrals. - for p in dihedrals.potentials(): - idx0 = info.atom_idx(p.atom0()) - idx1 = info.atom_idx(p.atom1()) - idx2 = info.atom_idx(p.atom2()) - idx3 = info.atom_idx(p.atom3()) - - # Dihedral terminates at the ghost bridge. - if ( - not _is_ghost(mol, [idx0], is_lambda1)[0] - and idx3 in ghosts - or not _is_ghost(mol, [idx3], is_lambda1)[0] - and idx0 in ghosts - ): - _logger.debug( - f" Removing dihedral: [{idx0.value()}-{idx1.value()}-{idx2.value()}-{idx3.value()}], {p.function()}" - ) - # Dihedral terminates at the second physical atom. - elif (_is_ghost(mol, [idx0], is_lambda1)[0] and idx3 == physical[1]) or ( - _is_ghost(mol, [idx3], is_lambda1)[0] and idx0 == physical[1] - ): - _logger.debug( - f" Removing dihedral: [{idx0.value()}-{idx1.value()}-{idx2.value()}-{idx3.value()}], {p.function()}" - ) - else: - new_dihedrals.set(idx0, idx1, idx2, idx3, p.function()) - - # Next we modify the angle terms between the physical and - # ghost atom so that the equilibrium angle is 90 degrees. - new_angles = _SireMM.ThreeAtomFunctions(mol.info()) - for p in angles.potentials(): - idx0 = info.atom_idx(p.atom0()) - idx1 = info.atom_idx(p.atom1()) - idx2 = info.atom_idx(p.atom2()) - - if ( - idx0 in ghosts - and idx2 in physical - or idx0 in physical - and idx2 in ghosts - ): - from math import pi - from sire.legacy.CAS import Symbol - - theta0 = pi / 2.0 - - # Create the new angle function. - amber_angle = _SireMM.AmberAngle(k_hard, theta0) - - # Generate the new angle expression. - expression = amber_angle.to_expression(Symbol("theta")) - - # Set the equilibrium angle to 90 degrees. - new_angles.set(idx0, idx1, idx2, expression) - - _logger.debug( - f" Stiffening angle: [{idx0.value()}-{idx1.value()}-{idx2.value()}], " - f"{p.function()} --> {expression}" - ) - - else: - new_angles.set(idx0, idx1, idx2, p.function()) - - # Update the molecule. - mol = mol.edit().set_property("angle" + suffix, new_angles).molecule().commit() - mol = ( - mol.edit() - .set_property("dihedral" + suffix, new_dihedrals) - .molecule() - .commit() - ) - - # Dual branch. - else: - _logger.debug(" Dual branch:") - - # First, delete all bonded terms between atoms in two ghost branches. - - # Get the end state bond functions. - angles = mol.property("angle" + suffix) - dihedrals = mol.property("dihedral" + suffix) - - # Initialise containers to store the updated bonded terms. - new_angles = _SireMM.ThreeAtomFunctions(mol.info()) - new_dihedrals = _SireMM.FourAtomFunctions(mol.info()) - - # Angles. - for p in angles.potentials(): - idx0 = info.atom_idx(p.atom0()) - idx1 = info.atom_idx(p.atom1()) - idx2 = info.atom_idx(p.atom2()) - - if idx0 in ghosts and idx2 in ghosts: - _logger.debug( - f" Removing angle: [{idx0.value()}-{idx1.value()}-{idx2.value()}], {p.function()}" - ) - - else: - new_angles.set(idx0, idx1, idx2, p.function()) - - # Dihedrals. - for p in dihedrals.potentials(): - idx0 = info.atom_idx(p.atom0()) - idx1 = info.atom_idx(p.atom1()) - idx2 = info.atom_idx(p.atom2()) - idx3 = info.atom_idx(p.atom3()) - - # Work out the number of ghosts in the dihedral. - num_ghosts = len([idx for idx in [idx0, idx1, idx2, idx3] if idx in ghosts]) - - # If there is more than one ghost, then this dihedral must bridge the - # ghost branches. - if num_ghosts > 1: - _logger.debug( - f" Removing dihedral: [{idx0.value()}-{idx1.value()}-{idx2.value()}-{idx3.value()}], {p.function()}" - ) - else: - new_dihedrals.set(idx0, idx1, idx2, idx3, p.function()) - - # Set the updated bonded terms. - mol = mol.edit().set_property("angle" + suffix, new_angles).molecule().commit() - mol = ( - mol.edit() - .set_property("dihedral" + suffix, new_dihedrals) - .molecule() - .commit() - ) - - # Now treat the ghost branches individually. - for d in ghosts: - mol = _dual( - mol, bridge, [d], physical, k_hard=k_hard, is_lambda1=is_lambda1 - ) - - # Return the updated molecule. - return mol - - -def _triple( - mol, - bridge, - ghosts, - physical, - k_hard=100, - k_soft=5, - optimise_angles=True, - is_lambda1=False, -): - r""" - Apply Boresch modifications to a triple junction. - - An example triple junction. Here X is the physical bridge atom. - - R1 - \ - \ - R2---X---DR - / - / - R3 - - Parameters - ---------- - - mol : sire.mol.Molecule - The perturbable molecule. - - bridge : sire.legacy.Mol.AtomIdx - The physical bridge atom. - - ghosts : List[sire.legacy.Mol.AtomIdx] - The list of ghost atoms connected to the bridge atom. - - physical : List[sire.legacy.Mol.AtomIdx] - The list of physical atoms connected to the bridge atom. - - k_hard : float, optional - The force constant to use when setting angle terms involving ghost - atoms to 90 degrees to avoid flapping. (In kcal/mol/rad^2) - - k_soft : float, optional - The force constant to use when setting angle terms involving ghost - atoms for non-planar triple junctions. (In kcal/mol/rad^2) - - optimise_angles : bool, optional - Whether to optimise the equilibrium value of the angle terms involving - ghost atoms for non-planar triple junctions. - - is_lambda1 : bool, optional - Whether the junction is at lambda = 1. - - Returns - ------- - - mol : sire.mol.Molecule - The updated molecule. - """ - - _logger.debug( - f"Applying Boresch modifications to triple ghost junction at " - f"{_lam_sym} = {int(is_lambda1)}:" - ) - - # Store the molecular info. - info = mol.info() - - # Property suffix based on the end state. - suffix = "0" if not is_lambda1 else "1" - - # Get the end state connectivity property. - try: - connectivity = mol.property("connectivity" + suffix) - except: - connectivity = mol.property("connectivity") - - # Store the element of the bridge atom. - element = mol.atom(bridge).property("element" + suffix) - - # Planar junction. - if element == _SireMol.Element("C"): - _logger.debug(" Planar junction.") - - # First remove all bonded terms between one of the physical atoms - # and the ghost group. - - # Store the index of the first physical atom. - idx = physical[0] - - # Get the end state bond functions. - angles = mol.property("angle" + suffix) - dihedrals = mol.property("dihedral" + suffix) - - # Initialise a container to store the updated bonded terms. - new_angles = _SireMM.ThreeAtomFunctions(mol.info()) - new_dihedrals = _SireMM.FourAtomFunctions(mol.info()) - - # Angles. - for p in angles.potentials(): - idx0 = info.atom_idx(p.atom0()) - idx1 = info.atom_idx(p.atom1()) - idx2 = info.atom_idx(p.atom2()) - idxs = [idx0, idx1, idx2] - - if idx in idxs and any([x in ghosts for x in idxs]): - _logger.debug( - f" Removing angle: [{idx0.value()}-{idx1.value()}-{idx2.value()}], {p.function()}" - ) - - else: - new_angles.set(idx0, idx1, idx2, p.function()) - - # Dihedrals. - for p in dihedrals.potentials(): - idx0 = info.atom_idx(p.atom0()) - idx1 = info.atom_idx(p.atom1()) - idx2 = info.atom_idx(p.atom2()) - idx3 = info.atom_idx(p.atom3()) - idxs = [idx0, idx1, idx2, idx3] - - if idx in idxs and any([x in ghosts for x in idxs]): - _logger.debug( - f" Removing dihedral: [{idx0.value()}-{idx1.value()}-{idx2.value()}-{idx3.value()}], {p.function()}" - ) - - else: - new_dihedrals.set(idx0, idx1, idx2, idx3, p.function()) - - # Next we modify the angle terms between the remaining physical and - # ghost atoms so that the equilibrium angle is 90 degrees. - new_new_angles = _SireMM.ThreeAtomFunctions(mol.info()) - for p in new_angles.potentials(): - idx0 = info.atom_idx(p.atom0()) - idx1 = info.atom_idx(p.atom1()) - idx2 = info.atom_idx(p.atom2()) - - if ( - idx0 in ghosts - and idx2 in physical[1:] - or idx0 in physical[1:] - and idx2 in ghosts - ): - from math import pi - from sire.legacy.CAS import Symbol - - theta0 = pi / 2.0 - - # Create the new angle function. - amber_angle = _SireMM.AmberAngle(k_hard, theta0) - - # Generate the new angle expression. - expression = amber_angle.to_expression(Symbol("theta")) - - # Set the equilibrium angle to 90 degrees. - new_new_angles.set(idx0, idx1, idx2, expression) - - _logger.debug( - f" Stiffening angle: [{idx0.value()}-{idx1.value()}-{idx2.value()}], " - f"{p.function()} --> {expression}" - ) - - else: - new_new_angles.set(idx0, idx1, idx2, p.function()) - - # Update the molecule. - mol = ( - mol.edit() - .set_property("angle" + suffix, new_new_angles) - .molecule() - .commit() - ) - mol = ( - mol.edit() - .set_property("dihedral" + suffix, new_dihedrals) - .molecule() - .commit() - ) - - # Non-planar junction. - elif element == _SireMol.Element("N"): - _logger.debug(" Non-planar junction.") - - # First, modify the force constants of the angle terms between the ghost - # atoms and the physical system to be very low. - - # Get the end state angle functions. - angles = mol.property("angle" + suffix) - - # Initialise a container to store the updated angle functions. - new_angles = _SireMM.ThreeAtomFunctions(mol.info()) - - # Indices for the softened angle terms. - angle_idxs = [] - - for p in angles.potentials(): - idx0 = info.atom_idx(p.atom0()) - idx1 = info.atom_idx(p.atom1()) - idx2 = info.atom_idx(p.atom2()) - - if ( - idx0 in ghosts - and idx2 in physical - or idx2 in ghosts - and idx0 in physical - ): - from sire.legacy.CAS import Symbol - - theta = Symbol("theta") - - # Cast the angle to an Amber angle to get the expression. - amber_angle = _SireMM.AmberAngle(p.function(), theta) - - # Create a new Amber angle with a modified force constant. - - # We'll optimise the equilibrium angle for the softened angle term. - if optimise_angles: - amber_angle = _SireMM.AmberAngle(0.05, amber_angle.theta0()) - angle_idxs.append((idx0, idx1, idx2)) - # Use the existing equilibrium angle. - else: - amber_angle = _SireMM.AmberAngle(k_soft, amber_angle.theta0()) - - # Generate the new angle expression. - expression = amber_angle.to_expression(theta) - - # Set the force constant to a very low value. - new_angles.set(idx0, idx1, idx2, expression) - - _logger.debug( - f" Softening angle: [{idx0.value()}-{idx1.value()}-{idx2.value()}], " - f"{p.function()} --> {expression}" - ) - - else: - new_angles.set(idx0, idx1, idx2, p.function()) - - # Next, remove all dihedral starting from the ghost atoms and ending in - # the physical system. Also, only preserve dihedrals terminating at one - # of the physical atoms. - - # Get the end state dihedral functions. - dihedrals = mol.property("dihedral" + suffix) - - # Initialise containers to store the updated dihedral functions. - new_dihedrals = _SireMM.FourAtomFunctions(mol.info()) - - for p in dihedrals.potentials(): - idx0 = info.atom_idx(p.atom0()) - idx1 = info.atom_idx(p.atom1()) - idx2 = info.atom_idx(p.atom2()) - idx3 = info.atom_idx(p.atom3()) - idxs = [idx0, idx1, idx2, idx3] - - # If there is one ghost atom, then this dihedral must begin or terminate - # at the ghost atom. - num_ghosts = len([x for x in idxs if x in ghosts]) - if num_ghosts == 1: - _logger.debug( - f" Removing dihedral: [{idx0.value()}-{idx1.value()}-{idx2.value()}-{idx3.value()}], {p.function()}" - ) - # Remove the dihedral if includes a ghost and doesn't terminate at the first - # physical atom. - elif (_is_ghost(mol, [idx0], is_lambda1)[0] and idx3 in physical[1:]) or ( - _is_ghost(mol, [idx3], is_lambda1)[0] and idx0 in physical[1:] - ): - _logger.debug( - f" Removing dihedral: [{idx0.value()}-{idx1.value()}-{idx2.value()}-{idx3.value()}], {p.function()}" - ) - else: - new_dihedrals.set(idx0, idx1, idx2, idx3, p.function()) - - # Update the molecule. - mol = mol.edit().set_property("angle" + suffix, new_angles).molecule().commit() - mol = ( - mol.edit() - .set_property("dihedral" + suffix, new_dihedrals) - .molecule() - .commit() - ) - - # Optimise the equilibrium value of theta0 for the softened angle term. - if optimise_angles: - _logger.debug(" Optimising equilibrium values for softened angles.") - - import sire.morph as _morph - from sire.units import radian as _radian - - # Link properties to the appropriate end state. - if is_lambda1: - min_mol = _morph.link_to_perturbed(mol) - else: - min_mol = _morph.link_to_reference(mol) - - # Minimise the molecule. - minimiser = min_mol.minimisation(constraint="none", platform="cpu") - minimiser.run() - - # Commit the changes. - min_mol = minimiser.commit() - - # Get the equilibrium angle values. - theta0s = {} - for idx in angle_idxs: - try: - theta0s[idx] = min_mol.angles(*idx).sizes()[0].to(_radian) - except: - raise ValueError(f"Could not find optimised angle term: {idx}") - - # Get the existing angles. - angles = mol.property("angle" + suffix) - - # Initialise a container to store the updated angle functions. - new_angles = _SireMM.ThreeAtomFunctions(mol.info()) - - # Update the angle potentials. - for p in angles.potentials(): - idx0 = info.atom_idx(p.atom0()) - idx1 = info.atom_idx(p.atom1()) - idx2 = info.atom_idx(p.atom2()) - idx = (idx0, idx1, idx2) - - # This is the softened angle term. - if idx in angle_idxs: - # Get the optimised equilibrium angle. - theta0 = theta0s[idx] - - # Create the new angle function. - amber_angle = _SireMM.AmberAngle(k_soft, theta0) - - # Generate the new angle expression. - expression = amber_angle.to_expression(Symbol("theta")) - - # Set the equilibrium angle to 90 degrees. - new_angles.set(idx0, idx1, idx2, expression) - - _logger.debug( - f" Optimising angle: [{idx0.value()}-{idx1.value()}-{idx2.value()}], " - f"{p.function()} --> {expression}" - ) - - else: - new_angles.set(idx0, idx1, idx2, p.function()) - - # Update the molecule. - mol = ( - mol.edit() - .set_property("angle" + suffix, new_angles) - .molecule() - .commit() - ) - - # Return the updated molecule. - return mol - - -def _higher( - mol, - bridge, - ghosts, - physical, - k_hard=100, - k_soft=5, - optimise_angles=True, - is_lambda1=False, -): - r""" - Apply Boresch modifications to higher order junctions. - - Parameters - ---------- - - mol : sire.mol.Molecule - The perturbable molecule. - - bridge : sire.legacy.Mol.AtomIdx - The physical bridge atom. - - ghosts : List[sire.legacy.Mol.AtomIdx] - The list of ghost atoms connected to the bridge atom. - - physical : List[sire.legacy.Mol.AtomIdx] - The list of physical atoms connected to the bridge atom. - - k_hard : float, optional - The force constant to use when setting angle terms involving ghost - atoms to 90 degrees to avoid flapping. (In kcal/mol/rad^2) - - k_soft : float, optional - The force constant to use when setting angle terms involving ghost - atoms for non-planar triple junctions. (In kcal/mol/rad^2) - - optimise_angles : bool, optional - Whether to optimise the equilibrium value of the angle terms involving - ghost atoms for non-planar triple junctions. - - is_lambda1 : bool, optional - Whether the junction is at lambda = 1. - - Returns - ------- - - mol : sire.mol.Molecule - The updated molecule. - """ - - _logger.debug( - f"Applying Boresch modifications to higher order junction at " - f"{_lam_sym} = {int(is_lambda1)}:" - ) - - # Store the molecular info. - info = mol.info() - - # Property suffix based on the end state. - suffix = "0" if not is_lambda1 else "1" - - # Get the end state connectivity property. - try: - connectivity = mol.property("connectivity" + suffix) - except: - connectivity = mol.property("connectivity") - - # Now remove all bonded interactions between the ghost atoms and one of the - # physical atoms connected to the bridge atom, hence reducing the problem to - # that of a triple junction. - while len(physical) > 3: - # Pop the first physical atom index from the list. - idx = physical.pop(0) - - # Get the end state bond functions. - angles = mol.property("angle" + suffix) - dihedrals = mol.property("dihedral" + suffix) - - # Initialise containers to store the updated bonded terms. - new_angles = _SireMM.ThreeAtomFunctions(mol.info()) - new_dihedrals = _SireMM.FourAtomFunctions(mol.info()) - - # Angles. - for p in angles.potentials(): - idx0 = info.atom_idx(p.atom0()) - idx1 = info.atom_idx(p.atom1()) - idx2 = info.atom_idx(p.atom2()) - - if idx == idx0 and idx2 in ghosts or idx == idx2 and idx0 in ghosts: - _logger.debug( - f" Removing angle: [{idx0.value()}-{idx1.value()}-{idx2.value()}], {p.function()}" - ) - else: - new_angles.set(idx0, idx1, idx2, p.function()) - - # Dihedrals. - for p in dihedrals.potentials(): - idx0 = info.atom_idx(p.atom0()) - idx1 = info.atom_idx(p.atom1()) - idx2 = info.atom_idx(p.atom2()) - idx3 = info.atom_idx(p.atom3()) - idxs = [idx0, idx1, idx2, idx3] - - if idx in idxs and any([x in ghosts for x in idxs]): - _logger.debug( - f" Removing dihedral: [{idx0.value()}-{idx1.value()}-{idx2.value()}-{idx3.value()}], {p.function()}" - ) - else: - new_dihedrals.set(idx0, idx1, idx2, idx3, p.function()) - - # Update the molecule. - mol = mol.edit().set_property("angle" + suffix, new_angles).molecule().commit() - mol = ( - mol.edit() - .set_property("dihedral" + suffix, new_dihedrals) - .molecule() - .commit() - ) - - # Now treat the triple junction. - return _triple( - mol, - bridge, - ghosts, - physical, - k_hard=k_hard, - k_soft=k_soft, - optimise_angles=optimise_angles, - ) - - -def _create_connectivity(mol): - """ - Create a connectivity object for an end state molecule. - - Parameters - ---------- - - mol : sire.mol.Molecule - The molecule at the end state. - - Returns - - connectivity : sire.legacy.Mol.Connectivity - The connectivity object. - """ - - # Create an editable connectivity object. - connectivity = _SireMol.Connectivity(mol.info()).edit() - - # Loop over the bonds in the molecule and connect the atoms. - for bond in mol.bonds(): - connectivity.connect(bond.atom0().index(), bond.atom1().index()) - - # Commit the changes and return the connectivity object. - return connectivity.commit() diff --git a/src/somd2/app/_cli.py b/src/somd2/app/_cli.py index 457b0449..5f74c846 100644 --- a/src/somd2/app/_cli.py +++ b/src/somd2/app/_cli.py @@ -23,7 +23,7 @@ SOMD2 command line interface. """ -__all__ = ["somd2", "ghostly"] +__all__ = ["somd2"] def somd2(): @@ -88,77 +88,3 @@ def somd2(): except Exception as e: _logger.error(f"An error occurred during the simulation: {e}") exit(1) - - -def ghostly(): - """ - SOMD2: Command line interface. - """ - - import argparse - import os - import sys - - import sire as sr - - from somd2 import _logger - from somd2._utils._ghosts import boresch - - parser = argparse.ArgumentParser( - description="Ghostly: ghost atom bonded term modifications" - ) - - parser.add_argument( - "input", - type=str, - help="Path to a stream file containing the perturbable system.", - ) - - parser.add_argument( - "--output", - type=str, - help="File prefix for the output file.", - required=False, - ) - - parser.add_argument( - "--log-level", - type=str, - help="Log level for the logger.", - default="info", - choices=["debug", "info", "warning", "error", "critical"], - required=False, - ) - - # Parse the arguments. - args = parser.parse_args() - - # Set the logger level. - _logger.remove() - _logger.add(sys.stderr, level=args.log_level.upper(), enqueue=True) - - # Try to load the system. - try: - system = sr.stream.load(args.input) - system = sr.morph.link_to_reference(system) - except Exception as e: - _logger.error(f"An error occurred while loading the system: {e}") - sys.exit(1) - - # Try to apply the modifications. - try: - system = boresch(system) - except Exception as e: - _logger.error( - f"An error occurred while applying the ghost atom modifications: {e}" - ) - sys.exit(1) - - # Try to save the system. - try: - input = os.path.splitext(args.input)[0] - output = args.output if args.output else input + "_ghostly" - sr.stream.save(system, f"{output}.bss") - except Exception as e: - _logger.error(f"An error occurred while saving the system: {e}") - sys.exit(1) diff --git a/src/somd2/runner/_base.py b/src/somd2/runner/_base.py index 6279abec..45b83ed1 100644 --- a/src/somd2/runner/_base.py +++ b/src/somd2/runner/_base.py @@ -184,9 +184,9 @@ def __init__(self, system, config): # Apply Boresch modifications to bonded terms involving ghost atoms to # avoid spurious couplings to the physical system at the end states. elif self._config.ghost_modifications: - from .._utils._ghosts import boresch + from ghostly import modify - self._system = boresch(self._system) + self._system = modify(self._system) # Check for a periodic space. self._has_space = self._check_space() diff --git a/tests/_utils/test_ghosts.py b/tests/_utils/test_ghosts.py deleted file mode 100644 index 8eabfbbe..00000000 --- a/tests/_utils/test_ghosts.py +++ /dev/null @@ -1,324 +0,0 @@ -import sire as sr - -from somd2._utils._ghosts import boresch - - -def test_hexane_to_propane(): - """ - Test ghost atom modifications for hexane to propane. This has a terminal - junction at lambda = 1. - """ - - # Load the system. - mols = sr.load_test_files("hex2prp.s3") - - # Store the orginal angles and dihedrals at lambda = 1. - angles = mols[0].property("angle1") - dihedrals = mols[0].property("dihedral1") - - # Apply the ghost atom modifications. - new_mols = boresch(mols) - - # Get the new angles and dihedrals. - new_angles = new_mols[0].property("angle1") - new_dihedrals = new_mols[0].property("dihedral1") - - # No angles should be removed. - assert angles.num_functions() == new_angles.num_functions() - - # Six dihedrals should be removed. - assert dihedrals.num_functions() - 6 == new_dihedrals.num_functions() - - # Create dihedral IDs for the missing dihedrals. - - from sire.legacy.Mol import AtomIdx - - missing_dihedrals = [ - (AtomIdx(4), AtomIdx(3), AtomIdx(2), AtomIdx(11)), - (AtomIdx(4), AtomIdx(3), AtomIdx(2), AtomIdx(12)), - (AtomIdx(11), AtomIdx(2), AtomIdx(3), AtomIdx(13)), - (AtomIdx(11), AtomIdx(2), AtomIdx(3), AtomIdx(14)), - (AtomIdx(12), AtomIdx(2), AtomIdx(3), AtomIdx(14)), - (AtomIdx(12), AtomIdx(2), AtomIdx(3), AtomIdx(13)), - ] - - # Store the molecular info. - info = mols[0].info() - - # Check that the missing dihedrals are in the original dihedrals. - assert ( - all( - check_dihedral(info, dihedrals.potentials(), *dihedral) - for dihedral in missing_dihedrals - ) - == True - ) - - # Check that the missing dihedrals are not the new dihedrals. - assert ( - all( - check_dihedral(info, new_dihedrals.potentials(), *dihedral) - for dihedral in missing_dihedrals - ) - == False - ) - - -def test_toluene_to_pyridine(): - """ - Test ghost atom modifications for toluene to pyridine. This has a dual - junction with a single branch at lambda = 1. - """ - - # Load the system. - mols = sr.load_test_files("tol2pyr.s3") - - # Store the orginal angles and dihedrals at lambda = 1. - angles = mols[0].property("angle1") - dihedrals = mols[0].property("dihedral1") - - # Apply the ghost atom modifications. - new_mols = boresch(mols) - - # Get the new angles and dihedrals. - new_angles = new_mols[0].property("angle1") - new_dihedrals = new_mols[0].property("dihedral1") - - # The number of angles should remain the same. - assert angles.num_functions() == new_angles.num_functions() - - # There should be seven fewer dihedrals. - assert dihedrals.num_functions() - 7 == new_dihedrals.num_functions() - - # Create dihedral IDs for the missing dihedrals. - - from sire.legacy.Mol import AtomIdx - - missing_dihedrals = [ - (AtomIdx(0), AtomIdx(1), AtomIdx(2), AtomIdx(3)), - (AtomIdx(0), AtomIdx(1), AtomIdx(2), AtomIdx(10)), - (AtomIdx(0), AtomIdx(1), AtomIdx(6), AtomIdx(5)), - (AtomIdx(0), AtomIdx(1), AtomIdx(6), AtomIdx(14)), - (AtomIdx(6), AtomIdx(1), AtomIdx(0), AtomIdx(7)), - (AtomIdx(6), AtomIdx(1), AtomIdx(0), AtomIdx(8)), - (AtomIdx(6), AtomIdx(1), AtomIdx(0), AtomIdx(9)), - ] - - # Store the molecular info. - info = mols[0].info() - - # Check that the missing dihedrals are in the original dihedrals. - assert ( - all( - check_dihedral(info, dihedrals.potentials(), *dihedral) - for dihedral in missing_dihedrals - ) - == True - ) - - # Check that the missing dihedrals are not in the new dihedrals. - assert ( - all( - check_dihedral(info, new_dihedrals.potentials(), *dihedral) - for dihedral in missing_dihedrals - ) - == False - ) - - # Create a list of angle IDs for the modified angles. - modified_angles = [ - (AtomIdx(0), AtomIdx(1), AtomIdx(2)), - (AtomIdx(0), AtomIdx(1), AtomIdx(6)), - ] - - # Functional form of the modified angles. - expression = "100 [theta - 1.5708]^2" - - # Check that the original angles don't have the modified functional form. - for p in angles.potentials(): - idx0 = info.atom_idx(p.atom0()) - idx1 = info.atom_idx(p.atom1()) - idx2 = info.atom_idx(p.atom2()) - - if (idx0, idx1, idx2) in modified_angles: - assert str(p.function()) != expression - - # Check that the modified angles have the correct functional form. - for p in new_angles.potentials(): - idx0 = info.atom_idx(p.atom0()) - idx1 = info.atom_idx(p.atom1()) - idx2 = info.atom_idx(p.atom2()) - - if (idx0, idx1, idx2) in modified_angles: - assert str(p.function()) == expression - - -def test_acetone_to_propenol(): - """ - Test ghost atom modifications for acetone to propenol. This is a more - complex perturbation with a terminal junction at lambda = 0 and a planar - triple junction at lambda = 1. - """ - - # Load the system. - mols = sr.load_test_files("acepol.s3") - - # Store the orginal angles and dihedrals at lambda = 0 and lambda = 1. - angles0 = mols[0].property("angle0") - angles1 = mols[0].property("angle1") - dihedrals0 = mols[0].property("dihedral0") - dihedrals1 = mols[0].property("dihedral1") - - # Apply the ghost atom modifications. - new_mols = boresch(mols) - - # Get the new angles and dihedrals. - new_angles0 = new_mols[0].property("angle0") - new_angles1 = new_mols[0].property("angle1") - new_dihedrals0 = new_mols[0].property("dihedral0") - new_dihedrals1 = new_mols[0].property("dihedral1") - - # The number of angles should remain the same at lambda = 0. - assert angles0.num_functions() == new_angles0.num_functions() - - # The number of dihedrals should be one fewer at lambda = 0. - assert dihedrals0.num_functions() - 1 == new_dihedrals0.num_functions() - - # The number of angles should be one fewer at lambda = 1. - assert angles1.num_functions() - 1 == new_angles1.num_functions() - - # The number of dihedrals should be two fewer at lambda = 1. - assert dihedrals1.num_functions() - 2 == new_dihedrals1.num_functions() - - # Create dihedral IDs for the missing dihedrals at lambda = 0. - - from sire.legacy.Mol import AtomIdx - - missing_dihedrals0 = [ - (AtomIdx(8), AtomIdx(3), AtomIdx(9), AtomIdx(10)), - ] - - # Store the molecular info. - info = mols[0].info() - - # Check that the missing dihedrals are in the original dihedrals at lambda = 0. - assert ( - all( - check_dihedral(info, dihedrals0.potentials(), *dihedral) - for dihedral in missing_dihedrals0 - ) - == True - ) - - # Check that the missing dihedrals are not in the new dihedrals at lambda = 0. - assert ( - all( - check_dihedral(info, new_dihedrals0.potentials(), *dihedral) - for dihedral in missing_dihedrals0 - ) - == False - ) - - # Create dihedral IDs for the missing dihedrals at lambda = 1. - missing_dihedrals1 = [ - (AtomIdx(0), AtomIdx(1), AtomIdx(3), AtomIdx(7)), - (AtomIdx(2), AtomIdx(1), AtomIdx(3), AtomIdx(7)), - ] - - # Check that the missing dihedrals are in the original dihedrals at lambda = 1. - assert ( - all( - check_dihedral(info, dihedrals1.potentials(), *dihedral) - for dihedral in missing_dihedrals1 - ) - == True - ) - - # Check that the missing dihedrals are not in the new dihedrals at lambda = 1. - assert ( - all( - check_dihedral(info, new_dihedrals1.potentials(), *dihedral) - for dihedral in missing_dihedrals1 - ) - == False - ) - - # Create angle IDs for the removed angles at lambda = 1. - removed_angles = [ - (AtomIdx(1), AtomIdx(3), AtomIdx(7)), - ] - - # Check that the removed angles are in the original angles at lambda = 1. - assert ( - all(check_angle(info, angles1.potentials(), *angle) for angle in removed_angles) - == True - ) - - # Check that the removed angles are not in the new angles at lambda = 1. - assert ( - all( - check_angle(info, new_angles1.potentials(), *angle) - for angle in removed_angles - ) - == False - ) - - # Create angle IDs for the modified angles at lambda = 1. - modified_angles = [ - (AtomIdx(7), AtomIdx(3), AtomIdx(8)), - (AtomIdx(7), AtomIdx(3), AtomIdx(9)), - ] - - # Functional form of the modified angles. - expression = "100 [theta - 1.5708]^2" - - # Check that the original angles don't have the modified functional form. - for p in angles1.potentials(): - idx0 = info.atom_idx(p.atom0()) - idx1 = info.atom_idx(p.atom1()) - idx2 = info.atom_idx(p.atom2()) - - if (idx0, idx1, idx2) in modified_angles: - assert str(p.function()) != expression - - # Check that the modified angles have the correct functional form. - for p in new_angles1.potentials(): - idx0 = info.atom_idx(p.atom0()) - idx1 = info.atom_idx(p.atom1()) - idx2 = info.atom_idx(p.atom2()) - - if (idx0, idx1, idx2) in modified_angles: - assert str(p.function()) == expression - - -def check_angle(info, potentials, idx0, idx1, idx2): - """ - Check if an angle potential is in a list of potentials. - """ - - for p in potentials: - if ( - idx0 == info.atom_idx(p.atom0()) - and idx1 == info.atom_idx(p.atom1()) - and idx2 == info.atom_idx(p.atom2()) - ): - return True - - return False - - -def check_dihedral(info, potentials, idx0, idx1, idx2, idx3): - """ - Check if a dihedral potential is in a list of potentials. - """ - - for p in potentials: - if ( - idx0 == info.atom_idx(p.atom0()) - and idx1 == info.atom_idx(p.atom1()) - and idx2 == info.atom_idx(p.atom2()) - and idx3 == info.atom_idx(p.atom3()) - ): - return True - - return False From 2d16a12032c175faecee1fa4bd3fdce05918a1e8 Mon Sep 17 00:00:00 2001 From: Lester Hedges Date: Tue, 1 Jul 2025 09:46:43 +0100 Subject: [PATCH 116/170] Log ghost modifications. [ci skip] --- src/somd2/runner/_base.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/somd2/runner/_base.py b/src/somd2/runner/_base.py index 45b83ed1..432f6c7b 100644 --- a/src/somd2/runner/_base.py +++ b/src/somd2/runner/_base.py @@ -186,6 +186,7 @@ def __init__(self, system, config): elif self._config.ghost_modifications: from ghostly import modify + _logger.info("Applying Boresch modifications to ghost atom bonded terms") self._system = modify(self._system) # Check for a periodic space. From 3b01e2b83cea46ce5fd53d7140168ec3b9c3891b Mon Sep 17 00:00:00 2001 From: Lester Hedges Date: Wed, 2 Jul 2025 14:42:06 +0100 Subject: [PATCH 117/170] Set pressure based on the presence of solvent. [ci skip] --- src/somd2/runner/_base.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/somd2/runner/_base.py b/src/somd2/runner/_base.py index 432f6c7b..954e2a30 100644 --- a/src/somd2/runner/_base.py +++ b/src/somd2/runner/_base.py @@ -192,6 +192,14 @@ def __init__(self, system, config): # Check for a periodic space. self._has_space = self._check_space() + # Check for water. + try: + # The search will fail if there are no water molecules. + water = self._system["water"].molecules() + self._has_water = True + except: + self._has_water = False + # Check the end state contraints. self._check_end_state_constraints() @@ -505,7 +513,7 @@ def __init__(self, system, config): self._dynamics_kwargs = { "integrator": config.integrator, "temperature": config.temperature, - "pressure": config.pressure if self._has_space else None, + "pressure": config.pressure if self._has_water else None, "barostat_frequency": config.barostat_frequency, "timestep": config.timestep, "restraints": config.restraints, From 938372fc466e272dec79b5ed5585a384cd38197e Mon Sep 17 00:00:00 2001 From: Lester Hedges Date: Fri, 4 Jul 2025 15:40:22 +0100 Subject: [PATCH 118/170] Timeout must be a float in seconds. [ci skip] --- src/somd2/runner/_base.py | 2 +- src/somd2/runner/_repex.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/somd2/runner/_base.py b/src/somd2/runner/_base.py index 954e2a30..6ee49e07 100644 --- a/src/somd2/runner/_base.py +++ b/src/somd2/runner/_base.py @@ -1324,7 +1324,7 @@ def _checkpoint( # Acquire the file lock to ensure that the checkpoint files are in a consistent # state if read by another process. - with lock.acquire(timeout=self._config.timeout): + with lock.acquire(timeout=self._config.timeout.to("seconds")): # Save the end-state topologies for trajectory analysis and visualisation. if block == 0 and index == 0: diff --git a/src/somd2/runner/_repex.py b/src/somd2/runner/_repex.py index 3fccb2da..b8c51640 100644 --- a/src/somd2/runner/_repex.py +++ b/src/somd2/runner/_repex.py @@ -784,7 +784,7 @@ def run(self): # Guard the repex state and transition matrix saving with a file lock. lock = _FileLock(self._lock_file) - with lock.acquire(timeout=self._config.timeout): + with lock.acquire(timeout=self._config.timeout.to("seconds")): # Save the transition matrix. _logger.info("Saving replica exchange transition matrix") self._save_transition_matrix() @@ -798,7 +798,7 @@ def run(self): prod_end = time() lock = _FileLock(self._lock_file) - with lock.acquire(timeout=self._config.timeout): + with lock.acquire(timeout=self._config.timeout.to("seconds")): # Save the final transition matrix. _logger.info("Saving final replica exchange transition matrix") self._save_transition_matrix() From ed9fcb1a07e29e0326b26727e516b650e167f8d1 Mon Sep 17 00:00:00 2001 From: Lester Hedges Date: Wed, 9 Jul 2025 09:35:21 +0100 Subject: [PATCH 119/170] Use no cutoff for vacuum. [ci skip] --- src/somd2/runner/_base.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/somd2/runner/_base.py b/src/somd2/runner/_base.py index 6ee49e07..5cb8f1b1 100644 --- a/src/somd2/runner/_base.py +++ b/src/somd2/runner/_base.py @@ -576,11 +576,11 @@ def _check_space(self): return True else: _logger.info("No periodic space detected. Assuming vacuum simulation.") - if self._config.cutoff_type == "pme": + if self._config.cutoff_type != "none": _logger.info( - "Cannot use PME for non-periodic simulations. Using RF cutoff instead." + "Cannot use PME for non-periodic simulations. Using no cutoff instead." ) - self._config.cutoff_type = "rf" + self._config.cutoff_type = "none" return False def _check_end_state_constraints(self): From 96060765844d381156ac3b2350b35309a60f26f9 Mon Sep 17 00:00:00 2001 From: Lester Hedges Date: Wed, 9 Jul 2025 11:57:42 +0100 Subject: [PATCH 120/170] Save end-state topologies in base class constructor. [ci skip] --- src/somd2/runner/_base.py | 33 +++++++++++++++++---------------- 1 file changed, 17 insertions(+), 16 deletions(-) diff --git a/src/somd2/runner/_base.py b/src/somd2/runner/_base.py index 5cb8f1b1..7cd4ed4a 100644 --- a/src/somd2/runner/_base.py +++ b/src/somd2/runner/_base.py @@ -508,6 +508,12 @@ def __init__(self, system, config): # Create the lock file name. self._lock_file = str(self._config.output_directory / "somd2.lock") + # Write the end-state topologies to the output directory. + mols0 = _sr.morph.link_to_reference(self._system) + mols1 = _sr.morph.link_to_perturbed(self._system) + _sr.save(mols0, self._filenames["topology0"]) + _sr.save(mols1, self._filenames["topology1"]) + # Create the default dynamics kwargs dictionary. These can be overloaded # as needed. self._dynamics_kwargs = { @@ -1326,24 +1332,19 @@ def _checkpoint( # state if read by another process. with lock.acquire(timeout=self._config.timeout.to("seconds")): - # Save the end-state topologies for trajectory analysis and visualisation. - if block == 0 and index == 0: + # Save the end-state GCMC topologies for trajectory analysis and visualisation. + if self._config.gcmc and block == 0 and index == 0: mols0 = _sr.morph.link_to_reference(system) mols1 = _sr.morph.link_to_perturbed(system) - _sr.save(mols0, self._filenames["topology0"]) - _sr.save(mols1, self._filenames["topology1"]) - - # If this is a GCMC simulation, then save the end state - # topologies to PDB format to allow analysis with grand. - if self._config.gcmc: - _sr.save( - mols0, - self._filenames["topology0"].replace(".prm7", ".pdb"), - ) - _sr.save( - mols1, - self._filenames["topology1"].replace(".prm7", ".pdb"), - ) + + _sr.save( + mols0, + self._filenames["topology0"].replace(".prm7", ".pdb"), + ) + _sr.save( + mols1, + self._filenames["topology1"].replace(".prm7", ".pdb"), + ) # Get the lambda value. lam = self._lambda_values[index] From 71e3c5d8fb417008c8a0a74dc13ac4a9f6fc7258 Mon Sep 17 00:00:00 2001 From: Lester Hedges Date: Thu, 10 Jul 2025 20:40:26 +0100 Subject: [PATCH 121/170] Add support for OPC water. [ci skip] --- src/somd2/runner/_base.py | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/src/somd2/runner/_base.py b/src/somd2/runner/_base.py index 7cd4ed4a..f6b6d915 100644 --- a/src/somd2/runner/_base.py +++ b/src/somd2/runner/_base.py @@ -161,10 +161,23 @@ def __init__(self, system, config): # Here we assume TIP3p for any 3-point water model. model = "tip3p" elif num_atoms == 4: - model = "tip4p" + # Check for OPC water. + try: + if ( + waters[0] + .search("element Xx") + .atoms()[0] + .charge() + .value() + < -1.1 + ): + model = "opc" + else: + model = "tip4p" + except: + model = "tip4p" elif num_atoms == 5: model = "tip5p" - try: self._system = _System( _setAmberWater(self._system._system, model) From a36f0dce3b9b370d8bc395a4bf89879593a6ba4b Mon Sep 17 00:00:00 2001 From: Lester Hedges Date: Fri, 15 Aug 2025 14:02:44 +0100 Subject: [PATCH 122/170] Add link to ghostly repo. [ci skip] --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 43f1454a..d31804d2 100644 --- a/README.md +++ b/README.md @@ -213,8 +213,8 @@ somd2 somd1.bss --pert-file somd1.pert --somd1-compatibility (This only shows the limited options required. Others will take default values and can be set accordingly.) If you want to load an existing system from a perturbation file and use the -new `somd2` ghost atom bonded-term modifications, then simply omit the -`--somd1-compatibility` option. +new `somd2` [ghost atom bonded-term modifications](https://github.com/OpenBioSim/ghostly), +then simply omit the `--somd1-compatibility` option. ## GPU oversubscription From 6731da8467d80fc237a529e433b63290dc469ae3 Mon Sep 17 00:00:00 2001 From: Lester Hedges Date: Fri, 29 Aug 2025 14:45:53 +0100 Subject: [PATCH 123/170] Use spawn start method for multiprocessing processes. [closes #72] --- README.md | 7 +++++++ src/somd2/runner/_runner.py | 5 ++++- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index d31804d2..a6d53c9e 100644 --- a/README.md +++ b/README.md @@ -232,3 +232,10 @@ The number of contexts that can be run in parallel is then controlled by the More details on MPS, including tuning options, can be found in the following [techical blog](https://developer.nvidia.com/blog/maximizing-openmm-molecular-dynamics-throughput-with-nvidia-multi-process-service/). + +## Python API + +``SOMD2`` can also be used as a Python API, allowing it to be embedded +within other Python scripts. When doing so, it is important to to wrap +code within a ``if __name__ == "__main__":`` block since multiprocessing +is used with the ``spawn`` start method. diff --git a/src/somd2/runner/_runner.py b/src/somd2/runner/_runner.py index 8f4676ad..ada8d1bf 100644 --- a/src/somd2/runner/_runner.py +++ b/src/somd2/runner/_runner.py @@ -198,9 +198,12 @@ def run(self): self._max_workers = 1 import concurrent.futures as _futures + import multiprocessing as _mp success = True - with _futures.ProcessPoolExecutor(max_workers=self.max_workers) as executor: + with _futures.ProcessPoolExecutor( + max_workers=self.max_workers, mp_context=_mp.get_context("spawn") + ) as executor: jobs = {} for index, lambda_value in enumerate(self._lambda_values): jobs[executor.submit(self.run_window, index)] = lambda_value From 1eef48b7b9f2e86a0a5495e11ab43681fd03e8a0 Mon Sep 17 00:00:00 2001 From: Lester Hedges Date: Wed, 3 Sep 2025 15:46:23 +0100 Subject: [PATCH 124/170] Patch in fix from #73. [ci skip] --- src/somd2/runner/_runner.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/somd2/runner/_runner.py b/src/somd2/runner/_runner.py index ada8d1bf..6b2254cb 100644 --- a/src/somd2/runner/_runner.py +++ b/src/somd2/runner/_runner.py @@ -882,7 +882,15 @@ def generate_lam_vals(lambda_base, increment=0.001): speed = time.to("ns") / days # Checkpoint. - self._checkpoint(system, index, 0, speed, is_final_block=True) + self._checkpoint( + system, + index, + 0, + speed, + is_final_block=True, + lambda_energy=lambda_energy, + lambda_grad=lambda_grad, + ) _logger.success( f"{_lam_sym} = {lambda_value:.5f} complete, speed = {speed:.2f} ns day-1" From 97556ac57231556e42e3753f739f118567242bef Mon Sep 17 00:00:00 2001 From: Lester Hedges Date: Thu, 4 Sep 2025 13:38:37 +0100 Subject: [PATCH 125/170] Remove checkpoint_frequency from allowed_diffs. [ref #74] --- src/somd2/runner/_base.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/somd2/runner/_base.py b/src/somd2/runner/_base.py index f6b6d915..00769e0c 100644 --- a/src/somd2/runner/_base.py +++ b/src/somd2/runner/_base.py @@ -1026,7 +1026,6 @@ def _compare_configs(config1, config2): "save_trajectory", "frame_frequency", "save_velocities", - "checkpoint_frequency", "platform", "max_threads", "max_gpus", From 6044747cf04ee3f819f26375b672d40ff6ffa0d2 Mon Sep 17 00:00:00 2001 From: Lester Hedges Date: Thu, 4 Sep 2025 13:42:35 +0100 Subject: [PATCH 126/170] Use warning level. --- src/somd2/config/_config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/somd2/config/_config.py b/src/somd2/config/_config.py index 9bfb76e2..83cc451a 100644 --- a/src/somd2/config/_config.py +++ b/src/somd2/config/_config.py @@ -1240,7 +1240,7 @@ def checkpoint_frequency(self, checkpoint_frequency): "Checkpoint frequency is low. Should be greater min(energy_frequency, frame_frequency)" ) if t.value() > self._runtime.value(): - _logger.debug( + _logger.warning( "Checkpoint frequency < runtime, checkpointing will not occur before runtime is reached." ) t = _sr.u("0ps") From 7be15915220d517f231387f622d49d0a36a5c1dc Mon Sep 17 00:00:00 2001 From: Lester Hedges Date: Thu, 4 Sep 2025 15:20:08 +0100 Subject: [PATCH 127/170] Set start_block to zero when not checkpointing. [ref #74] --- src/somd2/runner/_repex.py | 9 ++++++--- src/somd2/runner/_runner.py | 9 ++++++--- 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/src/somd2/runner/_repex.py b/src/somd2/runner/_repex.py index b8c51640..d0910d04 100644 --- a/src/somd2/runner/_repex.py +++ b/src/somd2/runner/_repex.py @@ -564,9 +564,12 @@ def __init__(self, system, config): self._config.runtime = str(self._config.runtime - time) # Work out the current block number. - self._start_block = int( - round(time.value() / self._config.checkpoint_frequency.value(), 12) - ) + if self._config.checkpoint_frequency.value() > 0.0: + self._start_block = int( + round(time.value() / self._config.checkpoint_frequency.value(), 12) + ) + else: + self._start_block = 0 else: self._start_block = 0 diff --git a/src/somd2/runner/_runner.py b/src/somd2/runner/_runner.py index 6b2254cb..66f18ad1 100644 --- a/src/somd2/runner/_runner.py +++ b/src/somd2/runner/_runner.py @@ -376,9 +376,12 @@ def _run( return _sr.u("0ps") # Work out the current block number. - self._start_block = int( - round(time.value() / self._config.checkpoint_frequency.value(), 12) - ) + if self._config.checkpoint_frequency.value() > 0.0: + self._start_block = int( + round(time.value() / self._config.checkpoint_frequency.value(), 12) + ) + else: + self._start_block = 0 # Subtract the current time from the runtime. time = self._config.runtime - time From ef36f2a8998def41e90be7d0241a104753491701 Mon Sep 17 00:00:00 2001 From: Lester Hedges Date: Thu, 4 Sep 2025 16:03:58 +0100 Subject: [PATCH 128/170] Treat restarts for lambda windows independently. [ref #74] --- src/somd2/runner/_base.py | 91 +++++++++++++++++++++++++------------ src/somd2/runner/_repex.py | 4 +- src/somd2/runner/_runner.py | 4 +- 3 files changed, 67 insertions(+), 32 deletions(-) diff --git a/src/somd2/runner/_base.py b/src/somd2/runner/_base.py index 00769e0c..9ea4ffcc 100644 --- a/src/somd2/runner/_base.py +++ b/src/somd2/runner/_base.py @@ -389,6 +389,9 @@ def __init__(self, system, config): # Check the output directories and create names of output files. self._filenames = self._prepare_output() + # Store the current system as a reference. + self._reference_system = self._system.clone() + # Check for a valid restart. if self._config.restart: self._is_restart, self._system = self._check_restart() @@ -514,16 +517,41 @@ def __init__(self, system, config): # Store the initial system time. if isinstance(self._system, list): - self._initial_time = self._system[0].time() + self._initial_time = [] + for system in self._system: + if system is None: + self._initial_time.append(_sr.u("0 ps")) + else: + self._initial_time.append(system.time()) else: - self._initial_time = self._system.time() + self._initial_time = [self._system.time()] * len(self._lambda_values) + + # Check for missing systems in a multi-system simulation. + if isinstance(self._system, list): + ref_system = None + missing_systems = [] + for i, system in enumerate(self._system): + if system is not None: + ref_system = None + else: + missing_systems.append(i) + if ref_system is None: + ref_system = self._reference_system + + # Fill in any missing systems. + for i in missing_systems: + self._system[i] = ref_system.clone() # Create the lock file name. self._lock_file = str(self._config.output_directory / "somd2.lock") # Write the end-state topologies to the output directory. - mols0 = _sr.morph.link_to_reference(self._system) - mols1 = _sr.morph.link_to_perturbed(self._system) + if isinstance(self._system, list): + mols = self._system[0] + else: + mols = self._system + mols0 = _sr.morph.link_to_reference(mols) + mols1 = _sr.morph.link_to_perturbed(mols) _sr.save(mols0, self._filenames["topology0"]) _sr.save(mols1, self._filenames["topology1"]) @@ -937,17 +965,22 @@ def _check_restart(self): """ # List to store systems for each lambda value. - systems = [] + systems = [None] * len(self._lambda_values) for i, lambda_value in enumerate(self._lambda_values): # Try to load the checkpoint file. try: system = _sr.stream.load(self._filenames[i]["checkpoint"]) except: - _logger.warning( - f"Unable to load checkpoint file for {_lam_sym}={lambda_value:.5f}, starting from scratch." - ) - return False, self._system + if not self._config.replica_exchange: + _logger.warning( + f"Unable to load checkpoint file for {_lam_sym}={lambda_value:.5f}, starting from scratch." + ) + # Repex requires all files to be present. + else: + msg = f"Unable to load checkpoint file for {_lam_sym}={lambda_value:.5f}." + _logger.error(msg) + raise ValueError(msg) else: # Check the system is the same as the reference system. are_same, reason = self._systems_are_same(self._system, system) @@ -960,15 +993,14 @@ def _check_restart(self): self._compare_configs( self._last_config, dict(system.property("config")) ) - except: + except Exception as e: config = dict(system.property("config")) _logger.debug( f"last config: {self._last_config}, current config: {config}" ) - _logger.error( - f"Config for {_lam_sym}={lambda_value} does not match previous config." - ) - raise + msg = f"Config for {_lam_sym}={lambda_value} does not match previous config: {str(e)}" + _logger.error(msg) + raise ValueError(msg) # Make sure the lambda value is consistent. else: lambda_restart = system.property("lambda") @@ -976,27 +1008,30 @@ def _check_restart(self): lambda_restart == lambda_value except: filename = self._filenames[i]["checkpoint"] - raise ValueError( - f"Lambda value from checkpoint file {filename} for {lambda_restart} " - f"does not match expected value {lambda_value}." + msg = ( + f"Lambda value from checkpoint file {filename} for {_lam_sym}={lambda_restart} " + f"does not match expected value {_lam_sym}={lambda_value}." ) + _logger.error(msg) + raise ValueError(msg) - # Append the system to the list. - systems.append(_sr.morph.link_to_reference(system)) + # Store the system to the list. + systems[i] = _sr.morph.link_to_perturbed(system) # If this is a GCMC simulation, then remove all ghost waters from each of the systems. if self._config.gcmc: _logger.info("Removing existing ghost waters from GCMC checkpoint systems") for i, system in enumerate(systems): - # Remove the ghost waters from the system. - try: - for mol in system["property is_ghost_water"].molecules(): - _logger.debug( - f"Removing ghost water molecule {mol.number()} for {_lam_sym}={self._lambda_values[i]:.5f}" - ) - system.remove(mol) - except: - pass + if system is not None: + # Remove the ghost waters from the system. + try: + for mol in system["property is_ghost_water"].molecules(): + _logger.debug( + f"Removing ghost water molecule {mol.number()} for {_lam_sym}={self._lambda_values[i]:.5f}" + ) + system.remove(mol) + except: + pass return True, systems diff --git a/src/somd2/runner/_repex.py b/src/somd2/runner/_repex.py index d0910d04..178d4260 100644 --- a/src/somd2/runner/_repex.py +++ b/src/somd2/runner/_repex.py @@ -1066,8 +1066,8 @@ def _equilibrate(self, index): system = dynamics.commit() # Reset the timer. - if self._initial_time.value() != 0: - system.set_time(self._initial_time) + if self._initial_time[i].value() != 0: + system.set_time(self._initial_time[i]) # Delete the dynamics object. self._dynamics_cache.delete(index) diff --git a/src/somd2/runner/_runner.py b/src/somd2/runner/_runner.py index 66f18ad1..b12ae785 100644 --- a/src/somd2/runner/_runner.py +++ b/src/somd2/runner/_runner.py @@ -501,8 +501,8 @@ def generate_lam_vals(lambda_base, increment=0.001): system = dynamics.commit() # Reset the timer. - if self._initial_time.value() != 0: - system.set_time(self._initial_time) + if self._initial_time[i].value() != 0: + system.set_time(self._initial_time[i]) # Perform minimisation at the end of equilibration only if the # timestep is increasing, or the constraint is changing. From 1274802d334da3fb2170414a6f6726649ac0a63f Mon Sep 17 00:00:00 2001 From: Lester Hedges Date: Mon, 8 Sep 2025 12:47:37 +0100 Subject: [PATCH 129/170] Handle time rounding issues. [ci skip] [ref #74] --- src/somd2/runner/_repex.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/src/somd2/runner/_repex.py b/src/somd2/runner/_repex.py index 178d4260..0821249d 100644 --- a/src/somd2/runner/_repex.py +++ b/src/somd2/runner/_repex.py @@ -601,7 +601,14 @@ def run(self): start = time() # Work out the number of repex cycles. - cycles = ceil(self._config.runtime / self._config.energy_frequency) + cycles = (self._config.runtime / self._config.energy_frequency).value() + + # Handle rounding errors to to internal time representation. + if abs(cycles - round(cycles)) < 1e-6: + cycles = int(round(cycles)) + # Round up to ensure we run at least the requested time. + else: + cycles = int(ceil(cycles)) if self._config.checkpoint_frequency.value() > 0.0: # Calculate the number of blocks and the remainder time. @@ -613,7 +620,7 @@ def run(self): self._config.checkpoint_frequency = str(self._config.runtime) num_blocks = int(frac) - rem = frac - num_blocks + rem = round(frac - num_blocks, 12) # Work out the number of repex cycles per block. frac = ( From cc80924e7f29eb0574c475f8c7fc664d9c94c18a Mon Sep 17 00:00:00 2001 From: Lester Hedges Date: Mon, 22 Sep 2025 14:08:30 +0100 Subject: [PATCH 130/170] Fix start time index. [ci skip] --- src/somd2/runner/_repex.py | 4 ++-- src/somd2/runner/_runner.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/somd2/runner/_repex.py b/src/somd2/runner/_repex.py index 0821249d..0e1b95d3 100644 --- a/src/somd2/runner/_repex.py +++ b/src/somd2/runner/_repex.py @@ -1073,8 +1073,8 @@ def _equilibrate(self, index): system = dynamics.commit() # Reset the timer. - if self._initial_time[i].value() != 0: - system.set_time(self._initial_time[i]) + if self._initial_time[index].value() != 0: + system.set_time(self._initial_time[index]) # Delete the dynamics object. self._dynamics_cache.delete(index) diff --git a/src/somd2/runner/_runner.py b/src/somd2/runner/_runner.py index b12ae785..f7cea6ed 100644 --- a/src/somd2/runner/_runner.py +++ b/src/somd2/runner/_runner.py @@ -501,8 +501,8 @@ def generate_lam_vals(lambda_base, increment=0.001): system = dynamics.commit() # Reset the timer. - if self._initial_time[i].value() != 0: - system.set_time(self._initial_time[i]) + if self._initial_time[index].value() != 0: + system.set_time(self._initial_time[index]) # Perform minimisation at the end of equilibration only if the # timestep is increasing, or the constraint is changing. From f9f0d199613ae51e95fbe9891a7f9d1f590cf8af Mon Sep 17 00:00:00 2001 From: Lester Hedges Date: Tue, 23 Sep 2025 15:09:06 +0100 Subject: [PATCH 131/170] Typos. [ci skip] --- README.md | 2 +- src/somd2/runner/_base.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index a6d53c9e..3f2361e2 100644 --- a/README.md +++ b/README.md @@ -133,7 +133,7 @@ somd2 --help | grep -A2 ' --gcmc' Simulation output will be written to the directory specified using the `--output-directory` parameter. This will contain a number of files, including -[Parquet filesa](https://en.wikipedia.org/wiki/Apache_Parquet) for the energy +[Parquet files](https://en.wikipedia.org/wiki/Apache_Parquet) for the energy trajectories of each λ window. These can be processed using [BioSimSpace](https://github.com/OpenBioSim/biosimspace) as follows: diff --git a/src/somd2/runner/_base.py b/src/somd2/runner/_base.py index 9ea4ffcc..5999c5a0 100644 --- a/src/somd2/runner/_base.py +++ b/src/somd2/runner/_base.py @@ -158,7 +158,7 @@ def __init__(self, system, config): num_atoms = waters[0].num_atoms() if num_atoms == 3: - # Here we assume TIP3p for any 3-point water model. + # Here we assume TIP3P for any 3-point water model. model = "tip3p" elif num_atoms == 4: # Check for OPC water. From 1037a7076e654906d495dc5e5114600c8c26dbb3 Mon Sep 17 00:00:00 2001 From: Lester Hedges Date: Wed, 24 Sep 2025 09:32:51 +0100 Subject: [PATCH 132/170] Save GCMC system topology to AMBER and PDB format. [ci skip] --- src/somd2/runner/_base.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/somd2/runner/_base.py b/src/somd2/runner/_base.py index 5999c5a0..b62541d0 100644 --- a/src/somd2/runner/_base.py +++ b/src/somd2/runner/_base.py @@ -1384,6 +1384,11 @@ def _checkpoint( mols0 = _sr.morph.link_to_reference(system) mols1 = _sr.morph.link_to_perturbed(system) + # Save to AMBER format. + _sr.save(mols0, self._filenames["topology0"]) + _sr.save(mols1, self._filenames["topology1"]) + + # Save to PDB format. _sr.save( mols0, self._filenames["topology0"].replace(".prm7", ".pdb"), From 21d93e2fd0ff4e32c85f4523d0eae69a192f47bb Mon Sep 17 00:00:00 2001 From: Lester Hedges Date: Thu, 25 Sep 2025 15:54:58 +0100 Subject: [PATCH 133/170] Create backup files to aid restarts following a crash. [ci skip] --- src/somd2/runner/_base.py | 55 +++++++++++++++++++++++++++++++++++-- src/somd2/runner/_repex.py | 25 +++++++++++++++++ src/somd2/runner/_runner.py | 3 ++ 3 files changed, 81 insertions(+), 2 deletions(-) diff --git a/src/somd2/runner/_base.py b/src/somd2/runner/_base.py index b62541d0..ccb9d771 100644 --- a/src/somd2/runner/_base.py +++ b/src/somd2/runner/_base.py @@ -382,7 +382,7 @@ def __init__(self, system, config): # Flag whether this is a GPU simulation. self._is_gpu = self._config.platform in ["cuda", "opencl", "hip"] - # Need to verify before doing any directory checks + # Need to verify before doing any directory checks. if self._config.restart: self._verify_restart_config() @@ -397,8 +397,9 @@ def __init__(self, system, config): self._is_restart, self._system = self._check_restart() else: self._is_restart = False + self._cleanup() - # Save config whenever 'configure' is called to keep it up to date + # Save config whenever 'configure' is called to keep it up to date. if self._config.write_config: _dict_to_yaml( self._config.as_dict(), @@ -1370,6 +1371,7 @@ def _checkpoint( """ from filelock import FileLock as _FileLock + from shutil import copyfile as _copyfile from somd2 import __version__, _sire_version, _sire_revisionid # Create the lock. @@ -1476,9 +1478,25 @@ def _checkpoint( ) system.set_property("lambda", lam) + # Backup the existing checkpoint file, if it exists. + path = _Path(self._filenames[index]["checkpoint"]) + if path.exists() and path.stat().st_size > 0: + _copyfile( + self._filenames[index]["checkpoint"], + str(self._filenames[index]["checkpoint"]) + ".bak", + ) + # Stream the final system to file. _sr.stream.save(system, self._filenames[index]["checkpoint"]) + # Backup the existing energy trajectory file, if it exists. + path = _Path(self._filenames[index]["energy_traj"]) + if path.exists() and path.stat().st_size > 0: + _copyfile( + self._filenames[index]["energy_traj"], + str(self._filenames[index]["energy_traj"]) + ".bak", + ) + # Create the final parquet file. _dataframe_to_parquet( df, @@ -1509,6 +1527,14 @@ def _checkpoint( ) system.set_property("lambda", lam) + # Backup the existing checkpoint file, if it exists. + path = _Path(self._filenames[index]["checkpoint"]) + if path.exists() and path.stat().st_size > 0: + _copyfile( + self._filenames[index]["checkpoint"], + str(self._filenames[index]["checkpoint"]) + ".bak", + ) + # Stream the checkpoint to file. _sr.stream.save(system, self._filenames[index]["checkpoint"]) @@ -1520,6 +1546,14 @@ def _checkpoint( _dataframe_to_parquet(df, metadata=metadata, filename=filename) # Append to the parquet file. else: + # Backup the existing energy trajectory file, if it exists. + path = _Path(self._filenames[index]["energy_traj"]) + if path.exists() and path.stat().st_size > 0: + _copyfile( + self._filenames[index]["energy_traj"], + str(self._filenames[index]["energy_traj"]) + ".bak", + ) + _parquet_append( filename, df.iloc[-self._energy_per_block :], @@ -1575,3 +1609,20 @@ def _save_energy_components(self, index, context): # Increment the sample number. self._nrg_sample += 1 + + def _cleanup(self): + """ + Clean up backup files from the working directory. + """ + + from glob import glob as _glob + + # Find all files with a .bak extension in the working directory. + backup_files = _glob(str(self._config.output_directory / "*.bak")) + + for file in backup_files: + path = _Path(file) + try: + path.unlink() + except Exception as e: + _logger.warning(f"Unable to delete backup file {file}: {str(e)}") diff --git a/src/somd2/runner/_repex.py b/src/somd2/runner/_repex.py index 0e1b95d3..00b04cad 100644 --- a/src/somd2/runner/_repex.py +++ b/src/somd2/runner/_repex.py @@ -23,6 +23,7 @@ from filelock import FileLock as _FileLock from numba import njit as _njit +from shutil import copyfile as _copyfile import numpy as _np import pickle as _pickle @@ -799,6 +800,13 @@ def run(self): _logger.info("Saving replica exchange transition matrix") self._save_transition_matrix() + # Backup the dynamics cache pickle file, if it exists. + if self._repex_state.exists(): + _copyfile( + self._repex_state, + self._repex_state.with_suffix(".pkl.bak"), + ) + # Pickle the dynamics cache. _logger.info("Saving replica exchange state") with open(self._repex_state, "wb") as f: @@ -813,6 +821,13 @@ def run(self): _logger.info("Saving final replica exchange transition matrix") self._save_transition_matrix() + # Backup the dynamics cache pickle file, if it exists. + if self._repex_state.exists(): + _copyfile( + self._repex_state, + self._repex_state.with_suffix(".pkl.bak"), + ) + # Pickle final state of the dynamics cache. _logger.info("Saving final replica exchange state") with open(self._repex_state, "wb") as f: @@ -835,6 +850,9 @@ def run(self): f"Simulation finished. Run time: {(end - start) / 60:.2f} minutes" ) + # Delete all backup files from the working directory. + self._cleanup() + def _run_block( self, index, @@ -1367,6 +1385,13 @@ def _save_transition_matrix(self): else: t[i_state, i_state] = 1.0 + # Backup the existing transition matrix, if it exists. + if self._repex_matrix.exists(): + _copyfile( + self._repex_matrix, + self._repex_matrix.with_suffix(".txt.bak"), + ) + # Save the replica exchange swap acceptance matrix. _np.savetxt( self._repex_matrix, diff --git a/src/somd2/runner/_runner.py b/src/somd2/runner/_runner.py index f7cea6ed..bd403ffa 100644 --- a/src/somd2/runner/_runner.py +++ b/src/somd2/runner/_runner.py @@ -243,6 +243,9 @@ def run(self): f"Simulation finished. Run time: {(end - start) / 60:.2f} minutes" ) + # Cleanup backup files. + self._cleanup() + def run_window(self, index): """ Run a single lamdba window. From c979eec080e351cfcb6ae90c3b0417ae8cea0cd4 Mon Sep 17 00:00:00 2001 From: Lester Hedges Date: Thu, 25 Sep 2025 16:02:25 +0100 Subject: [PATCH 134/170] Add option to restart from backup files. [ci skip] --- src/somd2/config/_config.py | 18 ++++++++++++++++++ src/somd2/runner/_base.py | 24 ++++++++++++++++++++++++ 2 files changed, 42 insertions(+) diff --git a/src/somd2/config/_config.py b/src/somd2/config/_config.py index 83cc451a..a23b5a27 100644 --- a/src/somd2/config/_config.py +++ b/src/somd2/config/_config.py @@ -137,6 +137,7 @@ def __init__( rest2_selection=None, output_directory="output", restart=False, + use_backup=False, write_config=True, overwrite=False, somd1_compatibility=False, @@ -364,6 +365,12 @@ def __init__( restart: bool Whether to restart from a previous simulation using files found in 'output-directory'. + use_backup: bool + Whether to use backup files when restarting a simulation. If True, then + files from the last but one checkpoint will be used, rather than the most + recent checkpoint files. This can be useful if the most recent checkpoint + files are corrupted, or incomplete, e.g. you are recovering from a crash. + write_config: bool Whether to write the configuration options to a YAML file in the output directory. @@ -460,6 +467,7 @@ def __init__( self.rest2_scale = rest2_scale self.rest2_selection = rest2_selection self.restart = restart + self.use_backup = use_backup self.somd1_compatibility = somd1_compatibility self.pert_file = pert_file self.save_energy_components = save_energy_components @@ -1555,6 +1563,16 @@ def restart(self, restart): raise ValueError("'restart' must be of type 'bool'") self._restart = restart + @property + def use_backup(self): + return self._use_backup + + @use_backup.setter + def use_backup(self, use_backup): + if not isinstance(use_backup, bool): + raise ValueError("'use_backup' must be of type 'bool'") + self._use_backup = use_backup + @property def somd1_compatibility(self): return self._somd1_compatibility diff --git a/src/somd2/runner/_base.py b/src/somd2/runner/_base.py index ccb9d771..7d9c457b 100644 --- a/src/somd2/runner/_base.py +++ b/src/somd2/runner/_base.py @@ -394,6 +394,8 @@ def __init__(self, system, config): # Check for a valid restart. if self._config.restart: + if self._config.use_backup: + self._restore_backup_files() self._is_restart, self._system = self._check_restart() else: self._is_restart = False @@ -1610,6 +1612,28 @@ def _save_energy_components(self, index, context): # Increment the sample number. self._nrg_sample += 1 + def _restore_backup_files(self): + """ + Restore backup files in the working directory. + """ + + from glob import glob as _glob + from shutil import copyfile as _copyfile + + # Find all files with a .bak extension in the working directory. + backup_files = _glob(str(self._config.output_directory / "*.bak")) + + # Strip the .bak extension and copy to the original file name. + for file in backup_files: + path = _Path(file) + new_path = _Path(str(path)[:-4]) + try: + _copyfile(file, new_path) + except Exception as e: + msg = f"Unable to restore backup file {file}: {str(e)}" + _logger.error(msg) + raise IOError(msg) + def _cleanup(self): """ Clean up backup files from the working directory. From 12cffec210df480d10dafb66c5ead307fc9f155d Mon Sep 17 00:00:00 2001 From: Lester Hedges Date: Fri, 26 Sep 2025 15:08:36 +0100 Subject: [PATCH 135/170] Adjust default HMR factor. [ci skip] --- src/somd2/config/_config.py | 2 +- src/somd2/runner/_base.py | 7 +++++++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/src/somd2/config/_config.py b/src/somd2/config/_config.py index a23b5a27..5242a36a 100644 --- a/src/somd2/config/_config.py +++ b/src/somd2/config/_config.py @@ -91,7 +91,7 @@ def __init__( integrator="langevin_middle", cutoff_type="pme", cutoff="7.5 A", - h_mass_factor=1.5, + h_mass_factor=3, hmr=True, num_lambda=11, lambda_values=None, diff --git a/src/somd2/runner/_base.py b/src/somd2/runner/_base.py index 7d9c457b..a8c6d44f 100644 --- a/src/somd2/runner/_base.py +++ b/src/somd2/runner/_base.py @@ -326,6 +326,13 @@ def __init__(self, system, config): # Work out the current hydrogen mass factor. factor_non_water, factor_water = self._get_h_mass_factor(self._system) + # If using SOMD1 compatibility, then adjust the default value. + if self._config.somd1_compatibility and self._config.h_mass_factor == 3.0: + self._config.h_mass_factor = 1.5 + _logger.info( + "Using hydrogen mass repartitioning factor of 1.5 for SOMD1 compatibility." + ) + # We don't support repartiioning water molecules, so check those first. if factor_water is not None: if not isclose(factor_water, 1.0, abs_tol=1e-4): From 20f9e5b5a7ed9a6a4b57bcc59b6df2188e3383ac Mon Sep 17 00:00:00 2001 From: Lester Hedges Date: Fri, 26 Sep 2025 20:53:02 +0100 Subject: [PATCH 136/170] Add option to restrict workers for checkpointing. [ci skip] --- src/somd2/config/_config.py | 25 +++++++++++++++++++++++++ src/somd2/runner/_repex.py | 15 +++++++++++++-- 2 files changed, 38 insertions(+), 2 deletions(-) diff --git a/src/somd2/config/_config.py b/src/somd2/config/_config.py index 5242a36a..516effb2 100644 --- a/src/somd2/config/_config.py +++ b/src/somd2/config/_config.py @@ -119,6 +119,7 @@ def __init__( frame_frequency="20 ps", save_velocities=False, checkpoint_frequency="100 ps", + num_checkpoint_workers=None, num_energy_neighbours=None, null_energy="10000 kcal/mol", platform="auto", @@ -292,6 +293,14 @@ def __init__( min(energy_frequency, frame_frequency). If zero, then no checkpointing will be performed. + num_checkpoint_workers: int + The number of parallel workers to use when checkpointing during a replica + exchange simulation. By default, this is set to the number of concurrent + GPU contexts, i.e. the number of GPUs multiplied by the oversubscription + factor. The option can be used to reduce the number of workers, which + can be useful when the system size is large, i.e. when many large + trajectory files could be written simultaneously. + platform: str Platform to run simulation on. @@ -452,6 +461,7 @@ def __init__( self.frame_frequency = frame_frequency self.save_velocities = save_velocities self.checkpoint_frequency = checkpoint_frequency + self.num_checkpoint_workers = num_checkpoint_workers self.platform = platform self.max_threads = max_threads self.max_gpus = max_gpus @@ -1254,6 +1264,21 @@ def checkpoint_frequency(self, checkpoint_frequency): t = _sr.u("0ps") self._checkpoint_frequency = t + @property + def num_checkpoint_workers(self): + return self._num_checkpoint_workers + + @num_checkpoint_workers.setter + def num_checkpoint_workers(self, num_checkpoint_workers): + if num_checkpoint_workers is not None: + if not isinstance(num_checkpoint_workers, int): + try: + num_checkpoint_workers = int(num_checkpoint_workers) + except: + raise ValueError("'num_checkpoint_workers' must be of type 'int'") + if num_checkpoint_workers < 1: + raise ValueError("'num_checkpoint_workers' must be greater than 0") + @property def platform(self): return self._platform diff --git a/src/somd2/runner/_repex.py b/src/somd2/runner/_repex.py index 00b04cad..38785931 100644 --- a/src/somd2/runner/_repex.py +++ b/src/somd2/runner/_repex.py @@ -646,8 +646,17 @@ def run(self): # Store the number of concurrent workers. num_workers = self._num_gpus * self._config.oversubscription_factor + # Store the number of workers to use for checkpointing. + if self._config.num_checkpoint_workers is None: + num_checkpoint_workers = num_workers + else: + num_checkpoint_workers = min( + self._config.num_checkpoint_workers, num_workers + ) + # Work out the required number of batches. num_batches = ceil(self._config.num_lambda / num_workers) + num_checkpoint_batches = ceil(self._config.num_lambda / num_checkpoint_workers) # Create the replica list. replica_list = list(range(self._config.num_lambda)) @@ -749,9 +758,11 @@ def run(self): # Checkpoint. if is_checkpoint or i == cycles - 1: - for j in range(num_batches): + for j in range(num_checkpoint_batches): # Get the indices of the replicas in this batch. - replicas = replica_list[j * num_workers : (j + 1) * num_workers] + replicas = replica_list[ + j * num_checkpoint_workers : (j + 1) * num_checkpoint_workers + ] with ThreadPoolExecutor(max_workers=num_workers) as executor: try: for result, error in executor.map( From 573ca624390d0e6a8f5a3d1b1255754955da3842 Mon Sep 17 00:00:00 2001 From: Lester Hedges Date: Mon, 29 Sep 2025 09:28:36 +0100 Subject: [PATCH 137/170] Move lock file operations outside of checkpoint function. [ci skip] --- src/somd2/config/_config.py | 1 + src/somd2/runner/_base.py | 292 +++++++++++++++++------------------- src/somd2/runner/_repex.py | 55 ++++--- src/somd2/runner/_runner.py | 51 ++++--- 4 files changed, 204 insertions(+), 195 deletions(-) diff --git a/src/somd2/config/_config.py b/src/somd2/config/_config.py index 516effb2..52e221e7 100644 --- a/src/somd2/config/_config.py +++ b/src/somd2/config/_config.py @@ -1278,6 +1278,7 @@ def num_checkpoint_workers(self, num_checkpoint_workers): raise ValueError("'num_checkpoint_workers' must be of type 'int'") if num_checkpoint_workers < 1: raise ValueError("'num_checkpoint_workers' must be greater than 0") + self._num_checkpoint_workers = num_checkpoint_workers @property def platform(self): diff --git a/src/somd2/runner/_base.py b/src/somd2/runner/_base.py index a8c6d44f..99d4fbc2 100644 --- a/src/somd2/runner/_base.py +++ b/src/somd2/runner/_base.py @@ -1379,125 +1379,170 @@ def _checkpoint( Whether this is the final block of the simulation. """ - from filelock import FileLock as _FileLock from shutil import copyfile as _copyfile from somd2 import __version__, _sire_version, _sire_revisionid - # Create the lock. - lock = _FileLock(self._lock_file) + # Save the end-state GCMC topologies for trajectory analysis and visualisation. + if self._config.gcmc and block == 0 and index == 0: + mols0 = _sr.morph.link_to_reference(system) + mols1 = _sr.morph.link_to_perturbed(system) - # Acquire the file lock to ensure that the checkpoint files are in a consistent - # state if read by another process. - with lock.acquire(timeout=self._config.timeout.to("seconds")): + # Save to AMBER format. + _sr.save(mols0, self._filenames["topology0"]) + _sr.save(mols1, self._filenames["topology1"]) - # Save the end-state GCMC topologies for trajectory analysis and visualisation. - if self._config.gcmc and block == 0 and index == 0: - mols0 = _sr.morph.link_to_reference(system) - mols1 = _sr.morph.link_to_perturbed(system) + # Save to PDB format. + _sr.save( + mols0, + self._filenames["topology0"].replace(".prm7", ".pdb"), + ) + _sr.save( + mols1, + self._filenames["topology1"].replace(".prm7", ".pdb"), + ) - # Save to AMBER format. - _sr.save(mols0, self._filenames["topology0"]) - _sr.save(mols1, self._filenames["topology1"]) + # Get the lambda value. + lam = self._lambda_values[index] - # Save to PDB format. - _sr.save( - mols0, - self._filenames["topology0"].replace(".prm7", ".pdb"), - ) - _sr.save( - mols1, - self._filenames["topology1"].replace(".prm7", ".pdb"), - ) + # Get the energy trajectory. + df = system.energy_trajectory(to_alchemlyb=True, energy_unit="kT") - # Get the lambda value. - lam = self._lambda_values[index] + # Set the lambda values at which energies were sampled. + if lambda_energy is None: + lambda_energy = self._lambda_values - # Get the energy trajectory. - df = system.energy_trajectory(to_alchemlyb=True, energy_unit="kT") + # Create the metadata. + metadata = { + "attrs": df.attrs, + "somd2 version": __version__, + "sire version": f"{_sire_version}+{_sire_revisionid}", + "lambda": str(lam), + "speed": speed, + "temperature": str(self._config.temperature.value()), + } - # Set the lambda values at which energies were sampled. - if lambda_energy is None: - lambda_energy = self._lambda_values + # Add the lambda gradient if available. + if lambda_grad is not None: + metadata["lambda_grad"] = lambda_grad + + if is_final_block: + # Assemble and save the final trajectory. + if self._config.save_trajectories: + # Save the final trajectory chunk to file. + if system.num_frames() > 0: + traj_filename = ( + self._filenames[index]["trajectory_chunk"] + f"{block}.dcd" + ) + _sr.save( + system.trajectory(), + traj_filename, + format=["DCD"], + ) - # Create the metadata. - metadata = { - "attrs": df.attrs, - "somd2 version": __version__, - "sire version": f"{_sire_version}+{_sire_revisionid}", - "lambda": str(lam), - "speed": speed, - "temperature": str(self._config.temperature.value()), - } + # Create the final topology file name. + topology0 = self._filenames["topology0"] - # Add the lambda gradient if available. - if lambda_grad is not None: - metadata["lambda_grad"] = lambda_grad - - if is_final_block: - # Assemble and save the final trajectory. - if self._config.save_trajectories: - # Save the final trajectory chunk to file. - if system.num_frames() > 0: - traj_filename = ( - self._filenames[index]["trajectory_chunk"] + f"{block}.dcd" - ) - _sr.save( - system.trajectory(), - traj_filename, - format=["DCD"], - ) + # Create the final trajectory file name. + traj_filename = self._filenames[index]["trajectory"] + + # Glob for the trajectory chunks. + from glob import glob - # Create the final topology file name. - topology0 = self._filenames["topology0"] + traj_chunks = sorted( + glob(f"{self._filenames[index]['trajectory_chunk']}*") + ) - # Create the final trajectory file name. - traj_filename = self._filenames[index]["trajectory"] + # If this is a restart, then we need to check for an existing + # trajectory file with the same name. If it exists and is non-empty, + # then copy it to a backup file and prepend it to the list of chunks. + if self._config.restart: + path = _Path(traj_filename) + if path.exists() and path.stat().st_size > 0: + from shutil import copyfile - # Glob for the trajectory chunks. - from glob import glob + copyfile(traj_filename, f"{traj_filename}.bak") + traj_chunks = [f"{traj_filename}.bak"] + traj_chunks - traj_chunks = sorted( - glob(f"{self._filenames[index]['trajectory_chunk']}*") - ) + # Load the topology and chunked trajectory files. + mols = _sr.load([topology0] + traj_chunks) - # If this is a restart, then we need to check for an existing - # trajectory file with the same name. If it exists and is non-empty, - # then copy it to a backup file and prepend it to the list of chunks. - if self._config.restart: - path = _Path(traj_filename) - if path.exists() and path.stat().st_size > 0: - from shutil import copyfile + # Save the final trajectory to a single file. + _sr.save(mols.trajectory(), traj_filename, format=["DCD"]) - copyfile(traj_filename, f"{traj_filename}.bak") - traj_chunks = [f"{traj_filename}.bak"] + traj_chunks + # Now remove the chunked trajectory files. + for chunk in traj_chunks: + _Path(chunk).unlink() - # Load the topology and chunked trajectory files. - mols = _sr.load([topology0] + traj_chunks) + # Add config and lambda value to the system properties. + system.set_property("config", self._config.as_dict(sire_compatible=True)) + system.set_property("lambda", lam) - # Save the final trajectory to a single file. - _sr.save(mols.trajectory(), traj_filename, format=["DCD"]) + # Backup the existing checkpoint file, if it exists. + path = _Path(self._filenames[index]["checkpoint"]) + if path.exists() and path.stat().st_size > 0: + _copyfile( + self._filenames[index]["checkpoint"], + str(self._filenames[index]["checkpoint"]) + ".bak", + ) - # Now remove the chunked trajectory files. - for chunk in traj_chunks: - _Path(chunk).unlink() + # Stream the final system to file. + _sr.stream.save(system, self._filenames[index]["checkpoint"]) - # Add config and lambda value to the system properties. - system.set_property( - "config", self._config.as_dict(sire_compatible=True) + # Backup the existing energy trajectory file, if it exists. + path = _Path(self._filenames[index]["energy_traj"]) + if path.exists() and path.stat().st_size > 0: + _copyfile( + self._filenames[index]["energy_traj"], + str(self._filenames[index]["energy_traj"]) + ".bak", ) - system.set_property("lambda", lam) - # Backup the existing checkpoint file, if it exists. - path = _Path(self._filenames[index]["checkpoint"]) - if path.exists() and path.stat().st_size > 0: - _copyfile( - self._filenames[index]["checkpoint"], - str(self._filenames[index]["checkpoint"]) + ".bak", + # Create the final parquet file. + _dataframe_to_parquet( + df, + metadata=metadata, + filename=self._filenames[index]["energy_traj"], + ) + + else: + # Update the starting block if necessary. + if block == 0: + block = self._start_block + + # Save the current trajectory chunk to file. + if self._config.save_trajectories: + if system.num_frames() > 0: + traj_filename = ( + self._filenames[index]["trajectory_chunk"] + f"{block}.dcd" ) + _sr.save( + system.trajectory(), + traj_filename, + format=["DCD"], + ) + + # Encode the configuration and lambda value as system properties. + system.set_property("config", self._config.as_dict(sire_compatible=True)) + system.set_property("lambda", lam) - # Stream the final system to file. - _sr.stream.save(system, self._filenames[index]["checkpoint"]) + # Backup the existing checkpoint file, if it exists. + path = _Path(self._filenames[index]["checkpoint"]) + if path.exists() and path.stat().st_size > 0: + _copyfile( + self._filenames[index]["checkpoint"], + str(self._filenames[index]["checkpoint"]) + ".bak", + ) + + # Stream the checkpoint to file. + _sr.stream.save(system, self._filenames[index]["checkpoint"]) + + # Create the parquet file name. + filename = self._filenames[index]["energy_traj"] + # Create the parquet file. + if block == self._start_block: + _dataframe_to_parquet(df, metadata=metadata, filename=filename) + # Append to the parquet file. + else: # Backup the existing energy trajectory file, if it exists. path = _Path(self._filenames[index]["energy_traj"]) if path.exists() and path.stat().st_size > 0: @@ -1506,68 +1551,11 @@ def _checkpoint( str(self._filenames[index]["energy_traj"]) + ".bak", ) - # Create the final parquet file. - _dataframe_to_parquet( - df, - metadata=metadata, - filename=self._filenames[index]["energy_traj"], + _parquet_append( + filename, + df.iloc[-self._energy_per_block :], ) - else: - # Update the starting block if necessary. - if block == 0: - block = self._start_block - - # Save the current trajectory chunk to file. - if self._config.save_trajectories: - if system.num_frames() > 0: - traj_filename = ( - self._filenames[index]["trajectory_chunk"] + f"{block}.dcd" - ) - _sr.save( - system.trajectory(), - traj_filename, - format=["DCD"], - ) - - # Encode the configuration and lambda value as system properties. - system.set_property( - "config", self._config.as_dict(sire_compatible=True) - ) - system.set_property("lambda", lam) - - # Backup the existing checkpoint file, if it exists. - path = _Path(self._filenames[index]["checkpoint"]) - if path.exists() and path.stat().st_size > 0: - _copyfile( - self._filenames[index]["checkpoint"], - str(self._filenames[index]["checkpoint"]) + ".bak", - ) - - # Stream the checkpoint to file. - _sr.stream.save(system, self._filenames[index]["checkpoint"]) - - # Create the parquet file name. - filename = self._filenames[index]["energy_traj"] - - # Create the parquet file. - if block == self._start_block: - _dataframe_to_parquet(df, metadata=metadata, filename=filename) - # Append to the parquet file. - else: - # Backup the existing energy trajectory file, if it exists. - path = _Path(self._filenames[index]["energy_traj"]) - if path.exists() and path.stat().st_size > 0: - _copyfile( - self._filenames[index]["energy_traj"], - str(self._filenames[index]["energy_traj"]) + ".bak", - ) - - _parquet_append( - filename, - df.iloc[-self._energy_per_block :], - ) - def _save_energy_components(self, index, context): """ Internal function to save the energy components for each force group to file. diff --git a/src/somd2/runner/_repex.py b/src/somd2/runner/_repex.py index 38785931..b7ad61cf 100644 --- a/src/somd2/runner/_repex.py +++ b/src/somd2/runner/_repex.py @@ -758,29 +758,38 @@ def run(self): # Checkpoint. if is_checkpoint or i == cycles - 1: - for j in range(num_checkpoint_batches): - # Get the indices of the replicas in this batch. - replicas = replica_list[ - j * num_checkpoint_workers : (j + 1) * num_checkpoint_workers - ] - with ThreadPoolExecutor(max_workers=num_workers) as executor: - try: - for result, error in executor.map( - self._checkpoint, - replicas, - repeat(self._lambda_values), - repeat(block), - repeat(num_blocks + int(rem > 0)), - repeat(i == cycles - 1), - ): - if not result: - _logger.error( - f"Checkpoint failed for {_lam_sym} = {self._lambda_values[index]:.5f}: {error}" - ) - raise error - except KeyboardInterrupt: - _logger.error("Checkpoint cancelled. Exiting.") - _sys.exit(1) + # Create the lock. + lock = _FileLock(self._lock_file) + + # Acquire the file lock to ensure that the checkpoint files are + # in a consistent state if read by another process. + with lock.acquire(timeout=self._config.timeout.to("seconds")): + for j in range(num_checkpoint_batches): + # Get the indices of the replicas in this batch. + replicas = replica_list[ + j + * num_checkpoint_workers : (j + 1) + * num_checkpoint_workers + ] + with ThreadPoolExecutor(max_workers=num_workers) as executor: + try: + for result, error in executor.map( + self._checkpoint, + replicas, + repeat(self._lambda_values), + repeat(block), + repeat(num_blocks + int(rem > 0)), + repeat(i == cycles - 1), + ): + if not result: + _logger.error( + f"Checkpoint failed for {_lam_sym} = " + "{self._lambda_values[index]:.5f}: {error}" + ) + raise error + except KeyboardInterrupt: + _logger.error("Checkpoint cancelled. Exiting.") + _sys.exit(1) if i < cycles: # Assemble and energy matrix from the results. diff --git a/src/somd2/runner/_runner.py b/src/somd2/runner/_runner.py index bd403ffa..ea67e80d 100644 --- a/src/somd2/runner/_runner.py +++ b/src/somd2/runner/_runner.py @@ -21,6 +21,7 @@ __all__ = ["Runner"] +from filelock import FileLock as _FileLock from time import time as _timer import numpy as _np @@ -717,16 +718,21 @@ def generate_lam_vals(lambda_base, increment=0.001): block - self._start_block ) == num_blocks - 1 and rem == 0 - # Checkpoint. - self._checkpoint( - system, - index, - block, - speed, - lambda_energy=lambda_energy, - lambda_grad=lambda_grad, - is_final_block=is_final_block, - ) + # Create the lock. + lock = _FileLock(self._lock_file) + + # Acquire the file lock to ensure that the checkpoint files are + # in a consistent state if read by another process. + with lock.acquire(timeout=self._config.timeout.to("seconds")): + self._checkpoint( + system, + index, + block, + speed, + lambda_energy=lambda_energy, + lambda_grad=lambda_grad, + is_final_block=is_final_block, + ) # Delete all trajectory frames from the Sire system within the # dynamics object. @@ -786,16 +792,21 @@ def generate_lam_vals(lambda_base, increment=0.001): # Calculate the speed in nanoseconds per day. speed = checkpoint_interval / block_time - # Checkpoint. - self._checkpoint( - system, - index, - block, - speed, - lambda_energy=lambda_energy, - lambda_grad=lambda_grad, - is_final_block=True, - ) + # Create the lock. + lock = _FileLock(self._lock_file) + + # Acquire the file lock to ensure that the checkpoint files are + # in a consistent state if read by another process. + with lock.acquire(timeout=self._config.timeout.to("seconds")): + self._checkpoint( + system, + index, + block, + speed, + lambda_energy=lambda_energy, + lambda_grad=lambda_grad, + is_final_block=True, + ) # Delete all trajectory frames from the Sire system within the # dynamics object. From c86ab1031c8bddfee5cdbfe41af057d7610a5cc0 Mon Sep 17 00:00:00 2001 From: Lester Hedges Date: Mon, 29 Sep 2025 10:10:38 +0100 Subject: [PATCH 138/170] Pass dynamics kwargs to GCMC sampler constructor. [ci skip] --- src/somd2/runner/_repex.py | 13 ++++++------- src/somd2/runner/_runner.py | 16 ++++++++++++++-- 2 files changed, 20 insertions(+), 9 deletions(-) diff --git a/src/somd2/runner/_repex.py b/src/somd2/runner/_repex.py index b7ad61cf..5a14f37a 100644 --- a/src/somd2/runner/_repex.py +++ b/src/somd2/runner/_repex.py @@ -200,6 +200,11 @@ def _create_dynamics( else: mols = system + # Overload the device and lambda value. + dynamics_kwargs["device"] = device + dynamics_kwargs["lambda_value"] = lam + dynamics_kwargs["rest2_scale"] = scale + if gcmc_kwargs is not None: from loch import GCMCSampler @@ -208,10 +213,9 @@ def _create_dynamics( # Create the GCMC sampler. gcmc_sampler = GCMCSampler( mols, - device=device, - lambda_value=lam, ghost_file=ghost_file, **gcmc_kwargs, + **dynamics_kwargs, ) # Get the modified GCMC system. @@ -224,11 +228,6 @@ def _create_dynamics( f"Created GCMC sampler for lambda {lam:.5f} on device {device}" ) - # Overload the device and lambda value. - dynamics_kwargs["device"] = device - dynamics_kwargs["lambda_value"] = lam - dynamics_kwargs["rest2_scale"] = scale - # Create the dynamics object. try: dynamics = mols.dynamics(**dynamics_kwargs) diff --git a/src/somd2/runner/_runner.py b/src/somd2/runner/_runner.py index ea67e80d..69f540a5 100644 --- a/src/somd2/runner/_runner.py +++ b/src/somd2/runner/_runner.py @@ -411,12 +411,24 @@ def generate_lam_vals(lambda_base, increment=0.001): from loch import GCMCSampler + # Copy the dynamics kwargs. + dynamics_kwargs = self._dynamics_kwargs.copy() + + # Overload the dynamics kwargs with the simulation options. + dynamics_kwargs.update( + { + "device": device, + "lambda_value": lambda_value, + "rest2_scale": rest2_scale, + } + ) + + # Create the GCMC sampler. gcmc_sampler = GCMCSampler( system, - device=int(device), - lambda_value=lambda_value, ghost_file=self._filenames[index]["gcmc_ghosts"], **self._gcmc_kwargs, + **dynamics_kwargs, ) # Get the GCMC system. From 86042a4c1fb85465bf5ea0de1ea2a6e6323ca45a Mon Sep 17 00:00:00 2001 From: Lester Hedges Date: Mon, 29 Sep 2025 10:15:43 +0100 Subject: [PATCH 139/170] Only pass required dynamics kwargs. [ci skip] --- src/somd2/runner/_repex.py | 4 +++- src/somd2/runner/_runner.py | 17 +++-------------- 2 files changed, 6 insertions(+), 15 deletions(-) diff --git a/src/somd2/runner/_repex.py b/src/somd2/runner/_repex.py index 5a14f37a..45abc5c7 100644 --- a/src/somd2/runner/_repex.py +++ b/src/somd2/runner/_repex.py @@ -213,9 +213,11 @@ def _create_dynamics( # Create the GCMC sampler. gcmc_sampler = GCMCSampler( mols, + device=device, + lambda_value=lam, + rest2_scale=scale, ghost_file=ghost_file, **gcmc_kwargs, - **dynamics_kwargs, ) # Get the modified GCMC system. diff --git a/src/somd2/runner/_runner.py b/src/somd2/runner/_runner.py index 69f540a5..f36270a0 100644 --- a/src/somd2/runner/_runner.py +++ b/src/somd2/runner/_runner.py @@ -411,24 +411,13 @@ def generate_lam_vals(lambda_base, increment=0.001): from loch import GCMCSampler - # Copy the dynamics kwargs. - dynamics_kwargs = self._dynamics_kwargs.copy() - - # Overload the dynamics kwargs with the simulation options. - dynamics_kwargs.update( - { - "device": device, - "lambda_value": lambda_value, - "rest2_scale": rest2_scale, - } - ) - - # Create the GCMC sampler. gcmc_sampler = GCMCSampler( system, + device=int(device), + lambda_value=lambda_value, + rest2_scale=rest2_scale, ghost_file=self._filenames[index]["gcmc_ghosts"], **self._gcmc_kwargs, - **dynamics_kwargs, ) # Get the GCMC system. From 21274ca0c0bb64cbe343ba707b58e56e02251b8b Mon Sep 17 00:00:00 2001 From: Lester Hedges Date: Mon, 29 Sep 2025 13:55:27 +0100 Subject: [PATCH 140/170] Decouple backup from checkpointing. [ci skip] --- src/somd2/runner/_base.py | 84 ++++++++++++++++++------------------- src/somd2/runner/_repex.py | 25 +++++++++++ src/somd2/runner/_runner.py | 4 ++ 3 files changed, 69 insertions(+), 44 deletions(-) diff --git a/src/somd2/runner/_base.py b/src/somd2/runner/_base.py index 99d4fbc2..c679236e 100644 --- a/src/somd2/runner/_base.py +++ b/src/somd2/runner/_base.py @@ -21,7 +21,9 @@ __all__ = ["RunnerBase"] +from glob import glob as _glob from pathlib import Path as _Path +from shutil import copyfile as _copyfile import sire as _sr from sire.system import System as _System @@ -1379,7 +1381,6 @@ def _checkpoint( Whether this is the final block of the simulation. """ - from shutil import copyfile as _copyfile from somd2 import __version__, _sire_version, _sire_revisionid # Save the end-state GCMC topologies for trajectory analysis and visualisation. @@ -1446,10 +1447,8 @@ def _checkpoint( traj_filename = self._filenames[index]["trajectory"] # Glob for the trajectory chunks. - from glob import glob - traj_chunks = sorted( - glob(f"{self._filenames[index]['trajectory_chunk']}*") + _glob(f"{self._filenames[index]['trajectory_chunk']}*") ) # If this is a restart, then we need to check for an existing @@ -1458,9 +1457,7 @@ def _checkpoint( if self._config.restart: path = _Path(traj_filename) if path.exists() and path.stat().st_size > 0: - from shutil import copyfile - - copyfile(traj_filename, f"{traj_filename}.bak") + _copyfile(traj_filename, f"{traj_filename}.bak") traj_chunks = [f"{traj_filename}.bak"] + traj_chunks # Load the topology and chunked trajectory files. @@ -1477,25 +1474,9 @@ def _checkpoint( system.set_property("config", self._config.as_dict(sire_compatible=True)) system.set_property("lambda", lam) - # Backup the existing checkpoint file, if it exists. - path = _Path(self._filenames[index]["checkpoint"]) - if path.exists() and path.stat().st_size > 0: - _copyfile( - self._filenames[index]["checkpoint"], - str(self._filenames[index]["checkpoint"]) + ".bak", - ) - # Stream the final system to file. _sr.stream.save(system, self._filenames[index]["checkpoint"]) - # Backup the existing energy trajectory file, if it exists. - path = _Path(self._filenames[index]["energy_traj"]) - if path.exists() and path.stat().st_size > 0: - _copyfile( - self._filenames[index]["energy_traj"], - str(self._filenames[index]["energy_traj"]) + ".bak", - ) - # Create the final parquet file. _dataframe_to_parquet( df, @@ -1524,14 +1505,6 @@ def _checkpoint( system.set_property("config", self._config.as_dict(sire_compatible=True)) system.set_property("lambda", lam) - # Backup the existing checkpoint file, if it exists. - path = _Path(self._filenames[index]["checkpoint"]) - if path.exists() and path.stat().st_size > 0: - _copyfile( - self._filenames[index]["checkpoint"], - str(self._filenames[index]["checkpoint"]) + ".bak", - ) - # Stream the checkpoint to file. _sr.stream.save(system, self._filenames[index]["checkpoint"]) @@ -1543,19 +1516,47 @@ def _checkpoint( _dataframe_to_parquet(df, metadata=metadata, filename=filename) # Append to the parquet file. else: - # Backup the existing energy trajectory file, if it exists. - path = _Path(self._filenames[index]["energy_traj"]) - if path.exists() and path.stat().st_size > 0: - _copyfile( - self._filenames[index]["energy_traj"], - str(self._filenames[index]["energy_traj"]) + ".bak", - ) - _parquet_append( filename, df.iloc[-self._energy_per_block :], ) + def _backup_checkpoint(self, index): + """ + Create a backup of the previous checkpoint files. + + Parameters + ---------- + + index : int + The index of the window or replica. + """ + + try: + # Backup the existing checkpoint file, if it exists. + path = _Path(self._filenames[index]["checkpoint"]) + if path.exists() and path.stat().st_size > 0: + _copyfile( + self._filenames[index]["checkpoint"], + str(self._filenames[index]["checkpoint"]) + ".bak", + ) + traj_filename = self._filenames[index]["trajectory"] + except Exception as e: + return False, e + + try: + # Backup the existing energy trajectory file, if it exists. + path = _Path(self._filenames[index]["energy_traj"]) + if path.exists() and path.stat().st_size > 0: + _copyfile( + self._filenames[index]["energy_traj"], + str(self._filenames[index]["energy_traj"]) + ".bak", + ) + except Exception as e: + return False, e + + return True, None + def _save_energy_components(self, index, context): """ Internal function to save the energy components for each force group to file. @@ -1612,9 +1613,6 @@ def _restore_backup_files(self): Restore backup files in the working directory. """ - from glob import glob as _glob - from shutil import copyfile as _copyfile - # Find all files with a .bak extension in the working directory. backup_files = _glob(str(self._config.output_directory / "*.bak")) @@ -1634,8 +1632,6 @@ def _cleanup(self): Clean up backup files from the working directory. """ - from glob import glob as _glob - # Find all files with a .bak extension in the working directory. backup_files = _glob(str(self._config.output_directory / "*.bak")) diff --git a/src/somd2/runner/_repex.py b/src/somd2/runner/_repex.py index 45abc5c7..dadb60ac 100644 --- a/src/somd2/runner/_repex.py +++ b/src/somd2/runner/_repex.py @@ -765,6 +765,31 @@ def run(self): # Acquire the file lock to ensure that the checkpoint files are # in a consistent state if read by another process. with lock.acquire(timeout=self._config.timeout.to("seconds")): + # First backup existing checkpoint files. + for j in range(num_checkpoint_batches): + # Get the indices of the replicas in this batch. + replicas = replica_list[ + j + * num_checkpoint_workers : (j + 1) + * num_checkpoint_workers + ] + with ThreadPoolExecutor(max_workers=num_workers) as executor: + try: + for result, error in executor.map( + self._backup_checkpoint, + replicas, + ): + if not result: + _logger.error( + f"Backup failed for {_lam_sym} = " + "{self._lambda_values[index]:.5f}: {error}" + ) + raise error + except KeyboardInterrupt: + _logger.error("Backup cancelled. Exiting.") + _sys.exit(1) + + # Now write the new checkpoint files. for j in range(num_checkpoint_batches): # Get the indices of the replicas in this batch. replicas = replica_list[ diff --git a/src/somd2/runner/_runner.py b/src/somd2/runner/_runner.py index f36270a0..c67ea9d3 100644 --- a/src/somd2/runner/_runner.py +++ b/src/somd2/runner/_runner.py @@ -725,6 +725,10 @@ def generate_lam_vals(lambda_base, increment=0.001): # Acquire the file lock to ensure that the checkpoint files are # in a consistent state if read by another process. with lock.acquire(timeout=self._config.timeout.to("seconds")): + # Backup any existing checkpoint files. + self._backup_checkpoint_files(index) + + # Write the checkpoint files. self._checkpoint( system, index, From 2c9cfbafbcf9dfaf8813f5c235c301fd28d37ce9 Mon Sep 17 00:00:00 2001 From: Lester Hedges Date: Mon, 29 Sep 2025 13:56:52 +0100 Subject: [PATCH 141/170] Rename previous trajectory file so it isn't cleaned up. [ci skip] --- src/somd2/runner/_base.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/somd2/runner/_base.py b/src/somd2/runner/_base.py index c679236e..f9cb8097 100644 --- a/src/somd2/runner/_base.py +++ b/src/somd2/runner/_base.py @@ -1457,8 +1457,8 @@ def _checkpoint( if self._config.restart: path = _Path(traj_filename) if path.exists() and path.stat().st_size > 0: - _copyfile(traj_filename, f"{traj_filename}.bak") - traj_chunks = [f"{traj_filename}.bak"] + traj_chunks + _copyfile(traj_filename, f"{traj_filename}.prev") + traj_chunks = [f"{traj_filename}.prev"] + traj_chunks # Load the topology and chunked trajectory files. mols = _sr.load([topology0] + traj_chunks) From 9b12054bbd089110e8fab1e293dba2e1c9a6ede0 Mon Sep 17 00:00:00 2001 From: Lester Hedges Date: Mon, 29 Sep 2025 14:02:11 +0100 Subject: [PATCH 142/170] Fix backup method name. [ci skip] --- src/somd2/runner/_runner.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/somd2/runner/_runner.py b/src/somd2/runner/_runner.py index c67ea9d3..e214ace5 100644 --- a/src/somd2/runner/_runner.py +++ b/src/somd2/runner/_runner.py @@ -726,7 +726,7 @@ def generate_lam_vals(lambda_base, increment=0.001): # in a consistent state if read by another process. with lock.acquire(timeout=self._config.timeout.to("seconds")): # Backup any existing checkpoint files. - self._backup_checkpoint_files(index) + self._backup_checkpoint(index) # Write the checkpoint files. self._checkpoint( From d9d8ad06cb431d4295e6d48fa35e2b6835db6942 Mon Sep 17 00:00:00 2001 From: Lester Hedges Date: Tue, 30 Sep 2025 15:14:11 +0100 Subject: [PATCH 143/170] Reset GCMC water state when restarting. [ci skip] --- src/somd2/runner/_base.py | 19 +++++++++++++++++++ src/somd2/runner/_repex.py | 17 +++++++++++++++++ src/somd2/runner/_runner.py | 35 +++++++++++++++++++++++++++++++++++ 3 files changed, 71 insertions(+) diff --git a/src/somd2/runner/_base.py b/src/somd2/runner/_base.py index f9cb8097..e7fbdfb8 100644 --- a/src/somd2/runner/_base.py +++ b/src/somd2/runner/_base.py @@ -1032,11 +1032,30 @@ def _check_restart(self): # If this is a GCMC simulation, then remove all ghost waters from each of the systems. if self._config.gcmc: + # List to store the indices of the current ghost waters. + self._restart_ghost_waters = [] + # List to store the current positions. + self._restart_positions = [] _logger.info("Removing existing ghost waters from GCMC checkpoint systems") for i, system in enumerate(systems): + # Store the positions of all atoms. + self._restart_positions.append(_sr.io.get_coords_array(system)) if system is not None: # Remove the ghost waters from the system. try: + # Get the water molecule indices. + waters = system.molecules().find(system["water"].molecules()) + + # Get the ghost waters and their indices. + ghost_waters = system["property is_ghost_water"].molecules() + ghost_waters = system.molecules().find(ghost_waters) + + # Store the indices of the ghost waters in the waters list. + idxs = [] + for index in ghost_waters: + idxs.append(waters.index(index)) + self._restart_ghost_waters.append(idxs) + for mol in system["property is_ghost_water"].molecules(): _logger.debug( f"Removing ghost water molecule {mol.number()} for {_lam_sym}={self._lambda_values[i]:.5f}" diff --git a/src/somd2/runner/_repex.py b/src/somd2/runner/_repex.py index dadb60ac..5ae74cd3 100644 --- a/src/somd2/runner/_repex.py +++ b/src/somd2/runner/_repex.py @@ -549,6 +549,23 @@ def __init__(self, system, config): output_directory=self._config.output_directory, ) + # Reset the state of the OpenMM contexts and GCMC samplers. + for i in range(len(self._lambda_values)): + dynamics, gcmc_sampler = self._dynamics_cache.get(index) + + # Reset the OpenMM state. + dynamics.context().setState(self._dynamics_cache._openmm_states[i]) + + # Reset the GCMC water state. + if gcmc_sampler is not None: + gcmc_sampler.push() + gcmc_sampler._set_water_state( + dynamics.context(), + states=self._dynamics_cache._gcmc_states[i], + force=True, + ) + gcmc_sampler.pop() + # Conversion factor for reduced potential. kT = (_sr.units.k_boltz * self._config.temperature).to(_sr.units.kcal_per_mol) self._beta = 1.0 / kT diff --git a/src/somd2/runner/_runner.py b/src/somd2/runner/_runner.py index e214ace5..48d25a53 100644 --- a/src/somd2/runner/_runner.py +++ b/src/somd2/runner/_runner.py @@ -450,7 +450,9 @@ def generate_lam_vals(lambda_base, increment=0.001): raise RuntimeError(f"Minimisation failed: {e}") # Equilibration. + is_equilibrated = False if self._config.equilibration_time.value() > 0.0 and not is_restart: + is_equilibrated = True try: # Run without saving energies or frames. _logger.info(f"Equilibrating at {_lam_sym} = {lambda_value:.5f}") @@ -585,6 +587,39 @@ def generate_lam_vals(lambda_base, increment=0.001): # Bind the GCMC sampler to the dynamics object. dynamics._d._gcmc_sampler = gcmc_sampler + # If this is a restart, then we need to reset the GCMC water state + # to match that of the restart system. + if self._is_restart: + from openmm.unit import angstrom + + # First set all waters to non-ghosts. + gcmc_sampler.set_water_state( + dynamics.context(), + states=_np.ones(len(gcmc_sampler._water_indices)), + force=True, + ) + + # Now set the ghost waters. + gcmc_sampler.set_water_state( + dynamics.context(), + self._restart_ghost_waters[index], + states=_np.zeros(len(gcmc_sampler._water_indices)), + force=True, + ) + + # Finally, reset the context positions to match the restart system. + dynamics.context().setPositions( + self._restart_positions[index] * angstrom + ) + # Otherwise, if we've performed equilibration, then we need to reset + # the water state in the new context to match the equilibrated system. + elif is_equilibrated: + # Reset the water state. + gcmc_sampler.set_water_state( + dynamics.context(), + force=True, + ) + # Set the number of neighbours used for the energy calculation. # If not None, then we add one to account for the extra windows # used for finite-difference gradient analysis. From 70451d75dad514318f9bc8d56cbc4704a69919c8 Mon Sep 17 00:00:00 2001 From: Lester Hedges Date: Wed, 1 Oct 2025 09:42:43 +0100 Subject: [PATCH 144/170] Reset default HMR factor for now. [ci skip] --- src/somd2/config/_config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/somd2/config/_config.py b/src/somd2/config/_config.py index 52e221e7..a08bf1fe 100644 --- a/src/somd2/config/_config.py +++ b/src/somd2/config/_config.py @@ -91,7 +91,7 @@ def __init__( integrator="langevin_middle", cutoff_type="pme", cutoff="7.5 A", - h_mass_factor=3, + h_mass_factor=1.5, hmr=True, num_lambda=11, lambda_values=None, From 4da4c209a1efe476b24f42b87c740bef0c981526 Mon Sep 17 00:00:00 2001 From: Lester Hedges Date: Thu, 2 Oct 2025 11:56:07 +0100 Subject: [PATCH 145/170] Fix missing format strings. [ci skip] --- src/somd2/runner/_repex.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/somd2/runner/_repex.py b/src/somd2/runner/_repex.py index 5ae74cd3..9b6e840b 100644 --- a/src/somd2/runner/_repex.py +++ b/src/somd2/runner/_repex.py @@ -799,7 +799,7 @@ def run(self): if not result: _logger.error( f"Backup failed for {_lam_sym} = " - "{self._lambda_values[index]:.5f}: {error}" + f"{self._lambda_values[index]:.5f}: {error}" ) raise error except KeyboardInterrupt: @@ -827,7 +827,7 @@ def run(self): if not result: _logger.error( f"Checkpoint failed for {_lam_sym} = " - "{self._lambda_values[index]:.5f}: {error}" + f"{self._lambda_values[index]:.5f}: {error}" ) raise error except KeyboardInterrupt: From 18f79fbed654a05515937b75f689d648f1969840 Mon Sep 17 00:00:00 2001 From: Lester Hedges Date: Thu, 2 Oct 2025 14:14:29 +0100 Subject: [PATCH 146/170] Expose GCMC acceptance tolerance option. [ci skip] --- src/somd2/config/_config.py | 22 ++++++++++++++++++++++ src/somd2/runner/_base.py | 1 + 2 files changed, 23 insertions(+) diff --git a/src/somd2/config/_config.py b/src/somd2/config/_config.py index a08bf1fe..5f770c89 100644 --- a/src/somd2/config/_config.py +++ b/src/somd2/config/_config.py @@ -134,6 +134,7 @@ def __init__( gcmc_num_waters=20, gcmc_radius="4 A", gcmc_bulk_sampling_probability=0.1, + gcmc_tolerance=0.0, rest2_scale=1.0, rest2_selection=None, output_directory="output", @@ -349,6 +350,11 @@ def __init__( density, i.e. acting as a barostat. (This option has no affect when 'gcmc_selection=None'.) + gcmc_tolerance: float + The tolerance for the GCMC acceptance probability, i.e. the minimum probability + of acceptance for a move. This can be used to exclude low probability candidates + that can cause instabilities or crashes for the MD engine. + rest2_scale: float, list(float) The scaling factor for Replica Exchange with Solute Tempering (REST) simulations. This is the factor by which the temperature of the solute is scaled with respect to @@ -474,6 +480,7 @@ def __init__( self.gcmc_num_waters = gcmc_num_waters self.gcmc_radius = gcmc_radius self.gcmc_bulk_sampling_probability = gcmc_bulk_sampling_probability + self.gcmc_tolerance = gcmc_tolerance self.rest2_scale = rest2_scale self.rest2_selection = rest2_selection self.restart = restart @@ -1543,6 +1550,21 @@ def gcmc_bulk_sampling_probability(self, gcmc_bulk_sampling_probability): ) self._gcmc_bulk_sampling_probability = gcmc_bulk_sampling_probability + @property + def gcmc_tolerance(self): + return self._gcmc_tolerance + + @gcmc_tolerance.setter + def gcmc_tolerance(self, gcmc_tolerance): + if not isinstance(gcmc_tolerance, float): + try: + gcmc_tolerance = float(gcmc_tolerance) + except Exception: + raise ValueError("'gcmc_tolerance' must be a float") + if gcmc_tolerance < 0.0: + raise ValueError("'gcmc_tolerance' must be greater than or equal to 0.0") + self._gcmc_tolerance = gcmc_tolerance + @property def rest2_scale(self): return self._rest2_scale diff --git a/src/somd2/runner/_base.py b/src/somd2/runner/_base.py index e7fbdfb8..604e0c29 100644 --- a/src/somd2/runner/_base.py +++ b/src/somd2/runner/_base.py @@ -612,6 +612,7 @@ def __init__(self, system, config): "coulomb_power": self._config.coulomb_power, "shift_coulomb": str(self._config.shift_coulomb), "shift_delta": str(self._config.shift_delta), + "tolerance": self._config.gcmc_tolerance, "overwrite": self._config.overwrite, "no_logger": True, } From 5f06daabae2407f33509b5666a524feeeeab1719 Mon Sep 17 00:00:00 2001 From: Lester Hedges Date: Fri, 3 Oct 2025 09:32:34 +0100 Subject: [PATCH 147/170] Enforce frame_frequency <= checkpoint_frequency [ci skip] --- src/somd2/runner/_base.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/src/somd2/runner/_base.py b/src/somd2/runner/_base.py index 604e0c29..6ccfa499 100644 --- a/src/somd2/runner/_base.py +++ b/src/somd2/runner/_base.py @@ -87,6 +87,19 @@ def __init__(self, system, config): _logger.info(f"somd2 version: {__version__}") _logger.info(f"sire version: {_sire_version}+{_sire_revisionid}") + # Make sure the frame frequency doesn't exceed the checkpoint frequency. + # This constraint is currently required to avoid issues with missing + # frames when restarting from a checkpoint. This could be fixed by + # temporarily adjusting the frame frequency for the first checkpoint + # interval after a restart. + if ( + self._config.frame_frequency > 0 + and self._config.frame_frequency > self._config.checkpoint_frequency + ): + msg = "'frame_frequency' cannot be greater than 'checkpoint_frequency'." + _logger.error(msg) + raise ValueError(msg) + # Check whether we need to apply a perturbation to the reference system. if self._config.pert_file is not None: _logger.info( From 64a05dbeb0d3c7965e3543ec966d0d076161d7df Mon Sep 17 00:00:00 2001 From: Lester Hedges Date: Fri, 3 Oct 2025 09:50:25 +0100 Subject: [PATCH 148/170] Handle case where no trajectory frames are written. [ci skip] --- src/somd2/runner/_base.py | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/src/somd2/runner/_base.py b/src/somd2/runner/_base.py index 6ccfa499..3a053dd5 100644 --- a/src/somd2/runner/_base.py +++ b/src/somd2/runner/_base.py @@ -87,13 +87,22 @@ def __init__(self, system, config): _logger.info(f"somd2 version: {__version__}") _logger.info(f"sire version: {_sire_version}+{_sire_revisionid}") + # Flag whether frames are being saved. + if ( + self._config.frame_frequency > 0 + and self._config.frame_frequency <= self._config.runtime + ): + self._save_frames = True + else: + self._save_frames = False + # Make sure the frame frequency doesn't exceed the checkpoint frequency. # This constraint is currently required to avoid issues with missing # frames when restarting from a checkpoint. This could be fixed by # temporarily adjusting the frame frequency for the first checkpoint # interval after a restart. if ( - self._config.frame_frequency > 0 + self._save_frames and self._config.frame_frequency > self._config.checkpoint_frequency ): msg = "'frame_frequency' cannot be greater than 'checkpoint_frequency'." @@ -1463,7 +1472,7 @@ def _checkpoint( # Assemble and save the final trajectory. if self._config.save_trajectories: # Save the final trajectory chunk to file. - if system.num_frames() > 0: + if self._save_frames and system.num_frames() > 0: traj_filename = ( self._filenames[index]["trajectory_chunk"] + f"{block}.dcd" ) @@ -1524,7 +1533,7 @@ def _checkpoint( # Save the current trajectory chunk to file. if self._config.save_trajectories: - if system.num_frames() > 0: + if self._save_frames and system.num_frames() > 0: traj_filename = ( self._filenames[index]["trajectory_chunk"] + f"{block}.dcd" ) From 68fa84476a6fa8052803fb2cfd32c651942cf665 Mon Sep 17 00:00:00 2001 From: Lester Hedges Date: Mon, 6 Oct 2025 12:25:20 +0100 Subject: [PATCH 149/170] Remove redundant GCMC validation. [ci skip] --- src/somd2/runner/_base.py | 55 +++++++++++++-------------------------- 1 file changed, 18 insertions(+), 37 deletions(-) diff --git a/src/somd2/runner/_base.py b/src/somd2/runner/_base.py index 3a053dd5..0722c7ce 100644 --- a/src/somd2/runner/_base.py +++ b/src/somd2/runner/_base.py @@ -439,43 +439,6 @@ def __init__(self, system, config): self._filenames[0]["config"], ) - # Save the end state topologies to the output directory. - if isinstance(self._system, list): - mols = self._system[0] - else: - mols = self._system - # Add ghost waters to the system. - if self._config.gcmc and self._has_space: - # Make sure that a pressure has not been set. - if self._config.pressure is not None: - msg = "GCMC simulations must be run in the NVT ensemble." - _logger.error(msg) - raise ValueError(msg) - - from loch import GCMCSampler - from numpy.random import default_rng - - # Create a random number generator. - rng = default_rng() - - # Check that the system is solvated with water molecules. This - # is required for GCMC simulations since the existing waters - # provide a template for the ghost waters. - try: - water = mols["water"].molecules()[0] - except: - msg = "No water molecules in the system. Cannot perform GCMC." - _logger.error(msg) - raise ValueError(msg) - - # Create the GCMC system. - mols = GCMCSampler._prepare_system( - mols, water, rng, self._config.gcmc_num_waters - ) - - # Store the excess chemical potential. - self._mu_ex = self._config.gcmc_excess_chemical_potential.value() - # Append only this number of lines from the end of the dataframe during checkpointing. self._energy_per_block = int( self._config.checkpoint_frequency / self._config.energy_frequency @@ -501,6 +464,21 @@ def __init__(self, system, config): _logger.error(msg) raise ValueError(msg) + if isinstance(self._system, list): + mols = self._system[0] + else: + mols = self._system + + # Check that the system is solvated with water molecules. This + # is required for GCMC simulations since the existing waters + # provide a template for the ghost waters. + try: + water = mols["water"].molecules()[0] + except: + msg = "No water molecules in the system. Cannot perform GCMC." + _logger.error(msg) + raise ValueError(msg) + # Make sure the frame frequency is a multiple of the energy frequency. # Get the ratio. @@ -549,6 +527,9 @@ def __init__(self, system, config): _logger.error(msg) raise ValueError(msg) + # Store the excess chemcical potential value. + self._mu_ex = self._config.gcmc_excess_chemical_potential.value() + # Store the initial system time. if isinstance(self._system, list): self._initial_time = [] From afc7571eeab15cef07fe18a53fa5d70799031d3b Mon Sep 17 00:00:00 2001 From: Lester Hedges Date: Mon, 6 Oct 2025 13:02:37 +0100 Subject: [PATCH 150/170] Reset logger attributes for spawned child processes. [ci skip] --- src/somd2/config/_config.py | 17 +++++++++++++++++ src/somd2/runner/_runner.py | 4 ++++ 2 files changed, 21 insertions(+) diff --git a/src/somd2/config/_config.py b/src/somd2/config/_config.py index 5f770c89..8ae2f0b9 100644 --- a/src/somd2/config/_config.py +++ b/src/somd2/config/_config.py @@ -1895,3 +1895,20 @@ def _create_parser(cls): ) return parser + + def _reset_logger(self, logger): + """ + Internal method to reset the logger. + + This can be used when a parallel process is spawned to ensure that + the logger is correctly configured. + """ + + import sys + + logger.remove() + logger.add(sys.stderr, level=self.log_level.upper(), enqueue=True) + if self.log_file is not None and self.output_directory is not None: + logger.add( + self.output_directory / self.log_file, level=self.log_level.upper() + ) diff --git a/src/somd2/runner/_runner.py b/src/somd2/runner/_runner.py index 48d25a53..3826d0a0 100644 --- a/src/somd2/runner/_runner.py +++ b/src/somd2/runner/_runner.py @@ -267,6 +267,10 @@ def run_window(self, index): The duration of the simulation. """ + # Since this method is called in a separate process with the "spawn" + # method, we need to re-set the logger. + self._config._reset_logger(_logger) + # Get the lambda value. lambda_value = self._lambda_values[index] From d8f8c22f22b953c7ab0bd493f362f1c495d0d50d Mon Sep 17 00:00:00 2001 From: Lester Hedges Date: Mon, 6 Oct 2025 16:40:49 +0100 Subject: [PATCH 151/170] Only run CI on default branches and for PRs. --- .github/workflows/main.yaml | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml index 14369869..66b2aa98 100644 --- a/.github/workflows/main.yaml +++ b/.github/workflows/main.yaml @@ -3,11 +3,9 @@ name: Build and test on: workflow_dispatch: push: - branches: - - "**" + branches: [devel, main] pull_request: - branches: - - "main" + branches: [devel, main] jobs: pre_job: From 0905caf4944281afc70a89e51a48e081efab7aac Mon Sep 17 00:00:00 2001 From: Lester Hedges Date: Tue, 7 Oct 2025 15:35:31 +0100 Subject: [PATCH 152/170] Save GCMC topology for all indices for non-repex simulations. --- src/somd2/runner/_base.py | 36 +++++++++++++++++++----------------- 1 file changed, 19 insertions(+), 17 deletions(-) diff --git a/src/somd2/runner/_base.py b/src/somd2/runner/_base.py index 0722c7ce..d31fe666 100644 --- a/src/somd2/runner/_base.py +++ b/src/somd2/runner/_base.py @@ -1407,23 +1407,25 @@ def _checkpoint( from somd2 import __version__, _sire_version, _sire_revisionid # Save the end-state GCMC topologies for trajectory analysis and visualisation. - if self._config.gcmc and block == 0 and index == 0: - mols0 = _sr.morph.link_to_reference(system) - mols1 = _sr.morph.link_to_perturbed(system) - - # Save to AMBER format. - _sr.save(mols0, self._filenames["topology0"]) - _sr.save(mols1, self._filenames["topology1"]) - - # Save to PDB format. - _sr.save( - mols0, - self._filenames["topology0"].replace(".prm7", ".pdb"), - ) - _sr.save( - mols1, - self._filenames["topology1"].replace(".prm7", ".pdb"), - ) + if self._config.gcmc: + # Only save for first replica if performing replica exchange. + if not self._config.replica_exchange or (block == 0 and index == 0): + mols0 = _sr.morph.link_to_reference(system) + mols1 = _sr.morph.link_to_perturbed(system) + + # Save to AMBER format. + _sr.save(mols0, self._filenames["topology0"]) + _sr.save(mols1, self._filenames["topology1"]) + + # Save to PDB format. + _sr.save( + mols0, + self._filenames["topology0"].replace(".prm7", ".pdb"), + ) + _sr.save( + mols1, + self._filenames["topology1"].replace(".prm7", ".pdb"), + ) # Get the lambda value. lam = self._lambda_values[index] From bb89d20a46167612ad94a4c1d40e48561398201d Mon Sep 17 00:00:00 2001 From: Lester Hedges Date: Wed, 8 Oct 2025 10:18:46 +0100 Subject: [PATCH 153/170] Fix method name. --- src/somd2/runner/_runner.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/somd2/runner/_runner.py b/src/somd2/runner/_runner.py index 3826d0a0..1d8576c4 100644 --- a/src/somd2/runner/_runner.py +++ b/src/somd2/runner/_runner.py @@ -597,14 +597,14 @@ def generate_lam_vals(lambda_base, increment=0.001): from openmm.unit import angstrom # First set all waters to non-ghosts. - gcmc_sampler.set_water_state( + gcmc_sampler._set_water_state( dynamics.context(), states=_np.ones(len(gcmc_sampler._water_indices)), force=True, ) # Now set the ghost waters. - gcmc_sampler.set_water_state( + gcmc_sampler._set_water_state( dynamics.context(), self._restart_ghost_waters[index], states=_np.zeros(len(gcmc_sampler._water_indices)), @@ -619,7 +619,7 @@ def generate_lam_vals(lambda_base, increment=0.001): # the water state in the new context to match the equilibrated system. elif is_equilibrated: # Reset the water state. - gcmc_sampler.set_water_state( + gcmc_sampler._set_water_state( dynamics.context(), force=True, ) From 57c31736e7322fa5d800e96b0c20f486b59f8b63 Mon Sep 17 00:00:00 2001 From: Lester Hedges Date: Wed, 8 Oct 2025 15:29:29 +0100 Subject: [PATCH 154/170] Use new GCMCSampler.bind_dynamics() method. --- src/somd2/runner/_repex.py | 2 +- src/somd2/runner/_runner.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/somd2/runner/_repex.py b/src/somd2/runner/_repex.py index 9b6e840b..131c4e9a 100644 --- a/src/somd2/runner/_repex.py +++ b/src/somd2/runner/_repex.py @@ -242,7 +242,7 @@ def _create_dynamics( # dynamics object to reset the water state in its internal OpenMM # context following a crash recovery. if gcmc_kwargs is not None: - dynamics._d._gcmc_sampler = gcmc_sampler + gcmc_sampler.bind_dynamics(dynamics) # Append the dynamics object. self._dynamics.append(dynamics) diff --git a/src/somd2/runner/_runner.py b/src/somd2/runner/_runner.py index 1d8576c4..995d5b25 100644 --- a/src/somd2/runner/_runner.py +++ b/src/somd2/runner/_runner.py @@ -490,7 +490,7 @@ def generate_lam_vals(lambda_base, increment=0.001): # Equilibrate with GCMC moves. if gcmc_sampler is not None: # Bind the GCMC sampler to the dynamics object. - dynamics._d._gcmc_sampler = gcmc_sampler + gcmc_sampler.bind_dynamics(dynamics) _logger.info( f"Equilibrating with GCMC moves at {_lam_sym} = {lambda_value:.5f}" @@ -589,7 +589,7 @@ def generate_lam_vals(lambda_base, increment=0.001): gcmc_sampler.reset() # Bind the GCMC sampler to the dynamics object. - dynamics._d._gcmc_sampler = gcmc_sampler + gcmc_sampler.bind_dynamics(dynamics) # If this is a restart, then we need to reset the GCMC water state # to match that of the restart system. From d213ab079d3fd6d74fe2108eaa2dd2b9238d46f7 Mon Sep 17 00:00:00 2001 From: Lester Hedges Date: Thu, 9 Oct 2025 12:05:33 +0100 Subject: [PATCH 155/170] Add support for multi-conformational seeding. --- src/somd2/config/_config.py | 42 +++++++++++++++++++++++++++++++++ src/somd2/runner/_base.py | 46 +++++++++++++++++++++++++++++++++++++ src/somd2/runner/_repex.py | 19 +++++++++++++-- 3 files changed, 105 insertions(+), 2 deletions(-) diff --git a/src/somd2/config/_config.py b/src/somd2/config/_config.py index 8ae2f0b9..fae104d1 100644 --- a/src/somd2/config/_config.py +++ b/src/somd2/config/_config.py @@ -127,6 +127,7 @@ def __init__( max_gpus=None, oversubscription_factor=1, replica_exchange=False, + perturbed_system=None, gcmc=False, gcmc_selection=None, gcmc_excess_chemical_potential="-6.09 kcal/mol", @@ -320,6 +321,11 @@ def __init__( Whether to run replica exchange simulation. Currently this can only be used when GPU resources are available. + perturbed_system: str + The path to a stream file containing a Sire system for the equilibrated perturbed + end state (lambda = 1). This will be used as the starting conformation all lambda + windows > 0.5 when performing a replica exchange simulation. + gcmc: bool Whether to perform Grand Canonical Monte Carlo (GCMC) water insertions/deletions. @@ -473,6 +479,7 @@ def __init__( self.max_gpus = max_gpus self.oversubscription_factor = oversubscription_factor self.replica_exchange = replica_exchange + self.perturbed_system = perturbed_system self.gcmc = gcmc self.gcmc_selection = gcmc_selection self.gcmc_excess_chemical_potential = gcmc_excess_chemical_potential @@ -582,6 +589,13 @@ def as_dict(self, sire_compatible=False): self._charge_scale_factor ): d["lambda_schedule"] = "charge_scaled_morph" + + # Use the path for the perturbed_system option, since the system + # isn't serializable. + if self.perturbed_system is not None: + d["perturbed_system"] = str(self._perturbed_system_file) + d.pop("perturbed_system_file", None) + return d @property @@ -1424,6 +1438,34 @@ def replica_exchange(self, replica_exchange): raise ValueError("'replica_exchange' must be of type 'bool'") self._replica_exchange = replica_exchange + @property + def perturbed_system(self): + return self._perturbed_system + + @perturbed_system.setter + def perturbed_system(self, perturbed_system): + if perturbed_system is not None: + if isinstance(perturbed_system, str): + import os + + if not os.path.exists(perturbed_system): + raise ValueError( + f"'perturbed_system' stream file does not exist: {perturbed_system}" + ) + + try: + self._perturbed_system = _sr.stream.load(perturbed_system) + self._perturbed_system_file = perturbed_system + except Exception as e: + raise ValueError( + f"Unable to load 'perturbed_system' stream file: {e}" + ) + else: + raise TypeError("'perturbed_system' must be of type 'str'") + else: + self._perturbed_system = None + self._perturbed_system_file = None + @property def gcmc(self): return self._gcmc diff --git a/src/somd2/runner/_base.py b/src/somd2/runner/_base.py index d31fe666..7fcc7d64 100644 --- a/src/somd2/runner/_base.py +++ b/src/somd2/runner/_base.py @@ -81,6 +81,21 @@ def __init__(self, system, config): self._config = config self._config._extra_args = {} + if self._config.replica_exchange and self._config.perturbed_system is not None: + # Make sure the number of positions is correct. + num_atoms = self._system.num_atoms() + num_pert_atoms = self._config.perturbed_system.num_atoms() + if num_atoms != num_pert_atoms: + msg = ( + f"Number of atoms in 'perturbed_system' ({num_pert_atoms}) does not match " + f"the number of atoms in the 'system' ({num_atoms})." + ) + _logger.error(msg) + raise ValueError(msg) + self._perturbed_system = self._config.perturbed_system.clone() + else: + self._perturbed_system = None + # Log the versions of somd2 and sire. from somd2 import __version__, _sire_version, _sire_revisionid @@ -118,6 +133,10 @@ def __init__(self, system, config): from .._utils._somd1 import apply_pert self._system = apply_pert(self._system, self._config.pert_file) + if self._perturbed_system is not None: + self._perturbed_system = apply_pert( + self._perturbed_system, self._config.pert_file + ) except Exception as e: msg = f"Unable to apply perturbation to reference system: {e}" _logger.error(msg) @@ -133,6 +152,8 @@ def __init__(self, system, config): from .._utils._somd1 import reconstruct_system self._system = reconstruct_system(self._system) + if self._perturbed_system is not None: + self._perturbed_system = reconstruct_system(self._perturbed_system) # Make sure the system contains perturbable molecules. try: @@ -144,6 +165,8 @@ def __init__(self, system, config): # Link properties to the lambda = 0 end state. self._system = _sr.morph.link_to_reference(self._system) + if self._perturbed_system is not None: + self._perturbed_system = _sr.morph.link_to_reference(self._perturbed_system) # Set the default configuration options. @@ -166,6 +189,11 @@ def __init__(self, system, config): _logger.info("Applying SOMD1 perturbation compatibility.") self._system = make_compatible(self._system) self._system = _sr.morph.link_to_reference(self._system) + if self._perturbed_system is not None: + self._perturbed_system = make_compatible(self._perturbed_system) + self._perturbed_system = _sr.morph.link_to_reference( + self._perturbed_system + ) # Next, swap the water topology so that it is in AMBER format. @@ -206,6 +234,10 @@ def __init__(self, system, config): self._system = _System( _setAmberWater(self._system._system, model) ) + if self._perturbed_system is not None: + self._perturbed_system = _System( + _setAmberWater(self._perturbed_system._system, model) + ) _logger.info( "Converting water topology to AMBER format for SOMD1 compatibility." ) @@ -225,6 +257,8 @@ def __init__(self, system, config): _logger.info("Applying Boresch modifications to ghost atom bonded terms") self._system = modify(self._system) + if self._perturbed_system is not None: + self._perturbed_system = modify(self._perturbed_system) # Check for a periodic space. self._has_space = self._check_space() @@ -274,6 +308,10 @@ def __init__(self, system, config): # Create alchemical ions. if charge_diff != 0: self._system = self._create_alchemical_ions(self._system, charge_diff) + if self._perturbed_system is not None: + self._perturbed_system = self._create_alchemical_ions( + self._perturbed_system, charge_diff + ) # Set the lambda values. if self._config.lambda_values: @@ -387,11 +425,19 @@ def __init__(self, system, config): self._system = self._repartition_h_mass( self._system, new_factor ) + if self._perturbed_system is not None: + self._perturbed_system = self._repartition_h_mass( + self._perturbed_system, new_factor + ) else: self._system = self._repartition_h_mass( self._system, self._config.h_mass_factor ) + if self._perturbed_system is not None: + self._perturbed_system = self._repartition_h_mass( + self._perturbed_system, self._config.h_mass_factor + ) # Make sure the REST2 selection is valid. if self._config.rest2_selection is not None: diff --git a/src/somd2/runner/_repex.py b/src/somd2/runner/_repex.py index 131c4e9a..6b93d5a0 100644 --- a/src/somd2/runner/_repex.py +++ b/src/somd2/runner/_repex.py @@ -52,6 +52,7 @@ def __init__( dynamics_kwargs, gcmc_kwargs=None, output_directory=None, + perturbed_system=None, ): """ Constructor. @@ -79,6 +80,10 @@ def __init__( output_directory: pathlib.Path The directory for simulation output. + + perturbed_system: sire.system.System + The system for the perturbed state. If None, then the perturbed state + is not used. """ # Warn if the number of replicas is not a multiple of the number of GPUs. @@ -109,6 +114,7 @@ def __init__( dynamics_kwargs, gcmc_kwargs=gcmc_kwargs, output_directory=output_directory, + perturbed_system=perturbed_system, ) def __setstate__(self, state): @@ -149,6 +155,7 @@ def _create_dynamics( dynamics_kwargs, gcmc_kwargs=None, output_directory=None, + perturbed_system=None, ): """ Create the dynamics objects. @@ -176,6 +183,10 @@ def _create_dynamics( output_directory: pathlib.Path The directory for simulation output. + + perturbed_system: sire.system.System + The system for the perturbed state. If None, then the perturbed state + is not used. """ # Copy the dynamics keyword arguments. @@ -198,7 +209,10 @@ def _create_dynamics( mols = system[i] # This is a new simulation. else: - mols = system + if perturbed_system is not None and lam > 0.5: + mols = perturbed_system + else: + mols = system # Overload the device and lambda value. dynamics_kwargs["device"] = device @@ -512,7 +526,8 @@ def __init__(self, system, config): self._rest2_scale_factors, self._num_gpus, dynamics_kwargs, - self._gcmc_kwargs, + gcmc_kwargs=self._gcmc_kwargs, + perturbed_system=self._perturbed_system, output_directory=self._config.output_directory, ) else: From 42cdf2d1cf1f44170e3b21706eadade29e440da4 Mon Sep 17 00:00:00 2001 From: Lester Hedges Date: Thu, 9 Oct 2025 12:40:27 +0100 Subject: [PATCH 156/170] Fix invalid variable name. --- src/somd2/runner/_repex.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/somd2/runner/_repex.py b/src/somd2/runner/_repex.py index 6b93d5a0..4c25fe1a 100644 --- a/src/somd2/runner/_repex.py +++ b/src/somd2/runner/_repex.py @@ -566,7 +566,7 @@ def __init__(self, system, config): # Reset the state of the OpenMM contexts and GCMC samplers. for i in range(len(self._lambda_values)): - dynamics, gcmc_sampler = self._dynamics_cache.get(index) + dynamics, gcmc_sampler = self._dynamics_cache.get(i) # Reset the OpenMM state. dynamics.context().setState(self._dynamics_cache._openmm_states[i]) From 8c5544de125f3cb3541c2399fd4110b0b2d6ec19 Mon Sep 17 00:00:00 2001 From: Lester Hedges Date: Thu, 9 Oct 2025 13:19:22 +0100 Subject: [PATCH 157/170] Simplify multiconformational seeding. --- src/somd2/runner/_base.py | 55 +++++++++++++++----------------------- src/somd2/runner/_repex.py | 44 +++++++++++++++++++++--------- 2 files changed, 54 insertions(+), 45 deletions(-) diff --git a/src/somd2/runner/_base.py b/src/somd2/runner/_base.py index 7fcc7d64..292bd41c 100644 --- a/src/somd2/runner/_base.py +++ b/src/somd2/runner/_base.py @@ -85,6 +85,7 @@ def __init__(self, system, config): # Make sure the number of positions is correct. num_atoms = self._system.num_atoms() num_pert_atoms = self._config.perturbed_system.num_atoms() + if num_atoms != num_pert_atoms: msg = ( f"Number of atoms in 'perturbed_system' ({num_pert_atoms}) does not match " @@ -92,9 +93,28 @@ def __init__(self, system, config): ) _logger.error(msg) raise ValueError(msg) - self._perturbed_system = self._config.perturbed_system.clone() + + # Make sure the coordinates property is linked. + perturbed_system = _sr.morph.link_to_perturbed( + self._config.perturbed_system + ) + + # Store the positions. + self._perturbed_positions = _sr.io.get_coords_array(perturbed_system) + + # Store the box vectors. + cell = self._config.perturbed_system.space().box_matrix() + c0 = cell.column0() + c1 = cell.column1() + c2 = cell.column2() + self._perturbed_box = ( + (c0.x().value(), c0.y().value(), c0.z().value()), + (c1.x().value(), c1.y().value(), c1.z().value()), + (c2.x().value(), c2.y().value(), c2.z().value()), + ) else: - self._perturbed_system = None + self._perturbed_positions = None + self._perturbed_box = None # Log the versions of somd2 and sire. from somd2 import __version__, _sire_version, _sire_revisionid @@ -133,10 +153,6 @@ def __init__(self, system, config): from .._utils._somd1 import apply_pert self._system = apply_pert(self._system, self._config.pert_file) - if self._perturbed_system is not None: - self._perturbed_system = apply_pert( - self._perturbed_system, self._config.pert_file - ) except Exception as e: msg = f"Unable to apply perturbation to reference system: {e}" _logger.error(msg) @@ -152,8 +168,6 @@ def __init__(self, system, config): from .._utils._somd1 import reconstruct_system self._system = reconstruct_system(self._system) - if self._perturbed_system is not None: - self._perturbed_system = reconstruct_system(self._perturbed_system) # Make sure the system contains perturbable molecules. try: @@ -165,8 +179,6 @@ def __init__(self, system, config): # Link properties to the lambda = 0 end state. self._system = _sr.morph.link_to_reference(self._system) - if self._perturbed_system is not None: - self._perturbed_system = _sr.morph.link_to_reference(self._perturbed_system) # Set the default configuration options. @@ -189,11 +201,6 @@ def __init__(self, system, config): _logger.info("Applying SOMD1 perturbation compatibility.") self._system = make_compatible(self._system) self._system = _sr.morph.link_to_reference(self._system) - if self._perturbed_system is not None: - self._perturbed_system = make_compatible(self._perturbed_system) - self._perturbed_system = _sr.morph.link_to_reference( - self._perturbed_system - ) # Next, swap the water topology so that it is in AMBER format. @@ -234,10 +241,6 @@ def __init__(self, system, config): self._system = _System( _setAmberWater(self._system._system, model) ) - if self._perturbed_system is not None: - self._perturbed_system = _System( - _setAmberWater(self._perturbed_system._system, model) - ) _logger.info( "Converting water topology to AMBER format for SOMD1 compatibility." ) @@ -257,8 +260,6 @@ def __init__(self, system, config): _logger.info("Applying Boresch modifications to ghost atom bonded terms") self._system = modify(self._system) - if self._perturbed_system is not None: - self._perturbed_system = modify(self._perturbed_system) # Check for a periodic space. self._has_space = self._check_space() @@ -308,10 +309,6 @@ def __init__(self, system, config): # Create alchemical ions. if charge_diff != 0: self._system = self._create_alchemical_ions(self._system, charge_diff) - if self._perturbed_system is not None: - self._perturbed_system = self._create_alchemical_ions( - self._perturbed_system, charge_diff - ) # Set the lambda values. if self._config.lambda_values: @@ -425,19 +422,11 @@ def __init__(self, system, config): self._system = self._repartition_h_mass( self._system, new_factor ) - if self._perturbed_system is not None: - self._perturbed_system = self._repartition_h_mass( - self._perturbed_system, new_factor - ) else: self._system = self._repartition_h_mass( self._system, self._config.h_mass_factor ) - if self._perturbed_system is not None: - self._perturbed_system = self._repartition_h_mass( - self._perturbed_system, self._config.h_mass_factor - ) # Make sure the REST2 selection is valid. if self._config.rest2_selection is not None: diff --git a/src/somd2/runner/_repex.py b/src/somd2/runner/_repex.py index 4c25fe1a..9b2062fa 100644 --- a/src/somd2/runner/_repex.py +++ b/src/somd2/runner/_repex.py @@ -52,7 +52,8 @@ def __init__( dynamics_kwargs, gcmc_kwargs=None, output_directory=None, - perturbed_system=None, + perturbed_positions=None, + perturbed_box=None, ): """ Constructor. @@ -81,8 +82,12 @@ def __init__( output_directory: pathlib.Path The directory for simulation output. - perturbed_system: sire.system.System - The system for the perturbed state. If None, then the perturbed state + perturbed_positions: numpy.ndarray + The positions for the perturbed state. If None, then the perturbed state + is not used. + + perturbed_box: numpy.ndarray + The box vectors for the perturbed state. If None, then the perturbed state is not used. """ @@ -114,7 +119,8 @@ def __init__( dynamics_kwargs, gcmc_kwargs=gcmc_kwargs, output_directory=output_directory, - perturbed_system=perturbed_system, + perturbed_positions=perturbed_positions, + perturbed_box=perturbed_box, ) def __setstate__(self, state): @@ -155,7 +161,8 @@ def _create_dynamics( dynamics_kwargs, gcmc_kwargs=None, output_directory=None, - perturbed_system=None, + perturbed_positions=None, + perturbed_box=None, ): """ Create the dynamics objects. @@ -184,8 +191,12 @@ def _create_dynamics( output_directory: pathlib.Path The directory for simulation output. - perturbed_system: sire.system.System - The system for the perturbed state. If None, then the perturbed state + perturbed_positions: numpy.ndarray + The positions for the perturbed state. If None, then the perturbed state + is not used. + + perturbed_box: numpy.ndarray + The box vectors for the perturbed state. If None, then the perturbed state is not used. """ @@ -209,10 +220,7 @@ def _create_dynamics( mols = system[i] # This is a new simulation. else: - if perturbed_system is not None and lam > 0.5: - mols = perturbed_system - else: - mols = system + mols = system # Overload the device and lambda value. dynamics_kwargs["device"] = device @@ -252,6 +260,17 @@ def _create_dynamics( _logger.error(msg) raise RuntimeError(msg) from e + # Update the box vectors and positions if the perturbed state is used. + if ( + perturbed_positions is not None + and perturbed_box is not None + and lam > 0.5 + ): + from openmm.unit import angstrom + + dynamics.context().setPeriodicBoxVectors(*perturbed_box * angstrom) + dynamics.context().setPositions(perturbed_positions * angstrom) + # Bind the GCMC sampler to the dynamics object. This allows the # dynamics object to reset the water state in its internal OpenMM # context following a crash recovery. @@ -527,7 +546,8 @@ def __init__(self, system, config): self._num_gpus, dynamics_kwargs, gcmc_kwargs=self._gcmc_kwargs, - perturbed_system=self._perturbed_system, + perturbed_positions=self._perturbed_positions, + perturbed_box=self._perturbed_box, output_directory=self._config.output_directory, ) else: From cad087bbeeddf6917ae91d8e6354694290968517 Mon Sep 17 00:00:00 2001 From: Lester Hedges Date: Thu, 9 Oct 2025 18:30:00 +0100 Subject: [PATCH 158/170] Pad trajectory chunk file names. --- src/somd2/runner/_base.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/somd2/runner/_base.py b/src/somd2/runner/_base.py index 292bd41c..5f031dbe 100644 --- a/src/somd2/runner/_base.py +++ b/src/somd2/runner/_base.py @@ -1492,7 +1492,7 @@ def _checkpoint( # Save the final trajectory chunk to file. if self._save_frames and system.num_frames() > 0: traj_filename = ( - self._filenames[index]["trajectory_chunk"] + f"{block}.dcd" + self._filenames[index]["trajectory_chunk"] + f"{block:05d}.dcd" ) _sr.save( system.trajectory(), @@ -1553,7 +1553,7 @@ def _checkpoint( if self._config.save_trajectories: if self._save_frames and system.num_frames() > 0: traj_filename = ( - self._filenames[index]["trajectory_chunk"] + f"{block}.dcd" + self._filenames[index]["trajectory_chunk"] + f"{block:05d}.dcd" ) _sr.save( system.trajectory(), From 192d76eae93a43cb1ff5522ebb93d5fd91c85083 Mon Sep 17 00:00:00 2001 From: Lester Hedges Date: Tue, 21 Oct 2025 11:03:59 +0100 Subject: [PATCH 159/170] Add note about temporary file limit issue. [ci skip] --- README.md | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/README.md b/README.md index 3f2361e2..53ccf697 100644 --- a/README.md +++ b/README.md @@ -239,3 +239,12 @@ More details on MPS, including tuning options, can be found in the following within other Python scripts. When doing so, it is important to to wrap code within a ``if __name__ == "__main__":`` block since multiprocessing is used with the ``spawn`` start method. + +**## Known issues** + +During a checkpoint cycle trajectory frames are stored in memory before being +paged to disk. When running replica exchange simulations with a large number +of replicas this can lead to exceeding the temporary file storage limit on +some systems, causing the simulation to hang. This can be resolved by either +reducing the frequency at which frames are stored, or checkpointing more. +(Frames are written to disk and cleared from memory at each checkpoint.) From 0d426e307199cc0a5f4e6d61478862114ea6757b Mon Sep 17 00:00:00 2001 From: Lester Hedges Date: Thu, 23 Oct 2025 10:47:01 +0100 Subject: [PATCH 160/170] Remove redundant attribute. --- src/somd2/runner/_repex.py | 7 ------- 1 file changed, 7 deletions(-) diff --git a/src/somd2/runner/_repex.py b/src/somd2/runner/_repex.py index 9b2062fa..d3c7e9a2 100644 --- a/src/somd2/runner/_repex.py +++ b/src/somd2/runner/_repex.py @@ -605,13 +605,6 @@ def __init__(self, system, config): kT = (_sr.units.k_boltz * self._config.temperature).to(_sr.units.kcal_per_mol) self._beta = 1.0 / kT - # Store the pressure times Avaogadro's number. - if self._config.pressure is not None: - NA = 6.02214076e23 / _sr.units.mole - self._pressure = (self._config.pressure * NA).value() - else: - self._pressure = None - # If restarting, subtract the time already run from the total runtime if self._config.restart: time = self._system[0].time() From cc1b3baca7aae6b984a133b5d9151167a8ecc3b3 Mon Sep 17 00:00:00 2001 From: Lester Hedges Date: Thu, 23 Oct 2025 10:51:18 +0100 Subject: [PATCH 161/170] Expose membrane barostat surface_tension configuration option. --- src/somd2/config/_config.py | 34 ++++++++++++++++++++++++++++++++++ src/somd2/runner/_base.py | 1 + 2 files changed, 35 insertions(+) diff --git a/src/somd2/config/_config.py b/src/somd2/config/_config.py index fae104d1..03ed989d 100644 --- a/src/somd2/config/_config.py +++ b/src/somd2/config/_config.py @@ -87,6 +87,7 @@ def __init__( timestep="4 fs", temperature="300 K", pressure="1 atm", + surface_tension=None, barostat_frequency=25, integrator="langevin_middle", cutoff_type="pme", @@ -167,6 +168,9 @@ def __init__( Simulation pressure. (Simulations will run in the NVT ensemble unless a pressure is specified.) + surface_tension: str + Surface tension to use for NPT simulations with a membrane barostat. + barostat_frequency: int The number of integration steps between barostat updates. @@ -440,6 +444,7 @@ def __init__( self.runtime = runtime self.temperature = temperature self.pressure = pressure + self.surface_tension = surface_tension self.barostat_frequency = barostat_frequency self.integrator = integrator self.cutoff_type = cutoff_type @@ -693,6 +698,35 @@ def barostat_frequency(self, barostat_frequency): self._barostat_frequency = barostat_frequency + @property + def surface_tension(self): + return self._surface_tension + + @surface_tension.setter + def surface_tension(self, surface_tension): + if surface_tension is not None and not isinstance(surface_tension, str): + raise TypeError("'surface_tension' must be of type 'str'") + + from sire.units import atm, angstrom + + if surface_tension is not None: + try: + st = _sr.u(surface_tension) + except: + raise ValueError( + f"Unable to parse 'surface_tension' as a Sire GeneralUnit: {surface_tension}" + ) + # Make sure we can handle a value of zero. + if st == 0: + st = 0 * atm * angstrom + elif not st.has_same_units(atm * angstrom): + raise ValueError("'surface_tension' units are invalid.") + + self._surface_tension = st + + else: + self._surface_tension = surface_tension + @property def integrator(self): return self._integrator diff --git a/src/somd2/runner/_base.py b/src/somd2/runner/_base.py index 5f031dbe..142dc3a1 100644 --- a/src/somd2/runner/_base.py +++ b/src/somd2/runner/_base.py @@ -611,6 +611,7 @@ def __init__(self, system, config): "integrator": config.integrator, "temperature": config.temperature, "pressure": config.pressure if self._has_water else None, + "surface_tension": config.surface_tension, "barostat_frequency": config.barostat_frequency, "timestep": config.timestep, "restraints": config.restraints, From ac0f3222b3ba4eb392376c080dd1806f36c0d35f Mon Sep 17 00:00:00 2001 From: Lester Hedges Date: Thu, 23 Oct 2025 13:41:29 +0100 Subject: [PATCH 162/170] Logging tweak. [ci skip] --- src/somd2/runner/_base.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/somd2/runner/_base.py b/src/somd2/runner/_base.py index 142dc3a1..f8011791 100644 --- a/src/somd2/runner/_base.py +++ b/src/somd2/runner/_base.py @@ -253,12 +253,12 @@ def __init__(self, system, config): # Ghost atoms are considered light when adding bond constraints. self._config._extra_args["ghosts_are_light"] = True - # Apply Boresch modifications to bonded terms involving ghost atoms to - # avoid spurious couplings to the physical system at the end states. + # Apply modifications to bonded terms involving ghost atoms to avoid + # spurious couplings to the physical system at the end states. elif self._config.ghost_modifications: from ghostly import modify - _logger.info("Applying Boresch modifications to ghost atom bonded terms") + _logger.info("Applying modifications to ghost atom bonded terms") self._system = modify(self._system) # Check for a periodic space. From 20cea14c65e4b299e166d1240868b103d6f7bdcf Mon Sep 17 00:00:00 2001 From: Lester Hedges Date: Thu, 23 Oct 2025 14:21:53 +0100 Subject: [PATCH 163/170] Add note about using Runner class via Python API. [ci skip] --- README.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/README.md b/README.md index 53ccf697..8638e273 100644 --- a/README.md +++ b/README.md @@ -242,6 +242,10 @@ is used with the ``spawn`` start method. **## Known issues** +If using the regular `Runner` class via the Python API, then you will need to +guard calls to its `run()` method within a `if __name__ == "__main__":` block +since it uses multiprocessing with the `spawn` start method. + During a checkpoint cycle trajectory frames are stored in memory before being paged to disk. When running replica exchange simulations with a large number of replicas this can lead to exceeding the temporary file storage limit on From f34f4b4b56ac7fcd8f0f489354d8446ea34e5840 Mon Sep 17 00:00:00 2001 From: Lester Hedges Date: Sat, 25 Oct 2025 11:03:29 +0100 Subject: [PATCH 164/170] Default to one frame per checkpoint. --- src/somd2/config/_config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/somd2/config/_config.py b/src/somd2/config/_config.py index 03ed989d..aabad3e8 100644 --- a/src/somd2/config/_config.py +++ b/src/somd2/config/_config.py @@ -117,7 +117,7 @@ def __init__( equilibration_constraints=False, energy_frequency="1 ps", save_trajectories=True, - frame_frequency="20 ps", + frame_frequency="100 ps", save_velocities=False, checkpoint_frequency="100 ps", num_checkpoint_workers=None, From 13a30140c43bc92b0227bf706e2e740a4d3dd686 Mon Sep 17 00:00:00 2001 From: Lester Hedges Date: Sat, 25 Oct 2025 11:14:45 +0100 Subject: [PATCH 165/170] Expose option to adjust the trajectory page size. --- src/somd2/config/_config.py | 24 ++++++++++++++++++++++++ src/somd2/runner/_base.py | 8 ++++++++ 2 files changed, 32 insertions(+) diff --git a/src/somd2/config/_config.py b/src/somd2/config/_config.py index aabad3e8..9f5fd71c 100644 --- a/src/somd2/config/_config.py +++ b/src/somd2/config/_config.py @@ -147,6 +147,7 @@ def __init__( somd1_compatibility=False, pert_file=None, save_energy_components=False, + page_size=None, timeout="300 s", ): """ @@ -420,6 +421,10 @@ def __init__( Whether to save the energy contribution for each force when checkpointing. This is useful when debugging crashes. + page_size: int + The page size for trajectory handling in megabytes. If None, then Sire + will automatically set the page size. + timeout: str Timeout for the minimiser and file lock. @@ -503,6 +508,7 @@ def __init__( self.timeout = timeout self.num_energy_neighbours = num_energy_neighbours self.null_energy = null_energy + self.page_size = page_size self.write_config = write_config @@ -1733,6 +1739,24 @@ def save_energy_components(self, save_energy_components): raise ValueError("'save_energy_components' must be of type 'bool'") self._save_energy_components = save_energy_components + @property + def page_size(self): + return self._page_size + + @page_size.setter + def page_size(self, page_size): + if page_size is not None: + if not isinstance(page_size, int): + try: + page_size = int(page_size) + except: + raise ValueError("'page_size' must be of type 'int'") + + if page_size < 1: + raise ValueError("'page_size' must be greater than 0") + + self._page_size = page_size + @property def timeout(self): return self._timeout diff --git a/src/somd2/runner/_base.py b/src/somd2/runner/_base.py index f8011791..4246ce6c 100644 --- a/src/somd2/runner/_base.py +++ b/src/somd2/runner/_base.py @@ -605,6 +605,14 @@ def __init__(self, system, config): _sr.save(mols0, self._filenames["topology0"]) _sr.save(mols1, self._filenames["topology1"]) + # Update the tajectory page size. + if self._config.page_size is not None: + # Convert from MB to bytes. + page_size = int(self._config.page_size * 1024 * 1024) + + # Set the new page size. + _sr.base.PageCache.set_max_page_size(page_size) + # Create the default dynamics kwargs dictionary. These can be overloaded # as needed. self._dynamics_kwargs = { From 18516935461d0ff7141dd44b2e47365e1a05f487 Mon Sep 17 00:00:00 2001 From: Lester Hedges Date: Mon, 27 Oct 2025 11:13:21 +0000 Subject: [PATCH 166/170] Docstring clarification. [ci skip] --- src/somd2/config/_config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/somd2/config/_config.py b/src/somd2/config/_config.py index 9f5fd71c..822b1471 100644 --- a/src/somd2/config/_config.py +++ b/src/somd2/config/_config.py @@ -212,7 +212,7 @@ def __init__( Factor by which to scale charges for charge scaled morph. swap_end_states: bool - Whether to perform the perturbation in the reverse direction. + Whether to swap the end states of the alchemical system. coulomb_power : float Power to use for the soft-core Coulomb interaction. This is used From fe2e20a2188a1ece85d5f50bf46c77c7808859eb Mon Sep 17 00:00:00 2001 From: Lester Hedges Date: Wed, 29 Oct 2025 10:01:39 +0000 Subject: [PATCH 167/170] Update to recommended RBFE shift_delta value. --- src/somd2/config/_config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/somd2/config/_config.py b/src/somd2/config/_config.py index 822b1471..c048e6f3 100644 --- a/src/somd2/config/_config.py +++ b/src/somd2/config/_config.py @@ -102,7 +102,7 @@ def __init__( swap_end_states=False, coulomb_power=0.0, shift_coulomb="1 A", - shift_delta="2.25 A", + shift_delta="1.5 A", restraints=None, constraint="h_bonds", perturbable_constraint="h_bonds_not_heavy_perturbed", From f712e76eb0c8597a79116eab82a7d8167b0a1c84 Mon Sep 17 00:00:00 2001 From: Lester Hedges Date: Wed, 29 Oct 2025 10:07:28 +0000 Subject: [PATCH 168/170] Add swap_end_states option to GCMC kwargs. --- src/somd2/runner/_base.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/somd2/runner/_base.py b/src/somd2/runner/_base.py index 4246ce6c..b3ae16b0 100644 --- a/src/somd2/runner/_base.py +++ b/src/somd2/runner/_base.py @@ -659,6 +659,7 @@ def __init__(self, system, config): "coulomb_power": self._config.coulomb_power, "shift_coulomb": str(self._config.shift_coulomb), "shift_delta": str(self._config.shift_delta), + "swap_end_states": self._config.swap_end_states, "tolerance": self._config.gcmc_tolerance, "overwrite": self._config.overwrite, "no_logger": True, From 778d73da7f08fd6c7a25996ebc50f05b86b062ae Mon Sep 17 00:00:00 2001 From: Lester Hedges Date: Wed, 29 Oct 2025 11:00:59 +0000 Subject: [PATCH 169/170] Always save GCMC system, but only at first checkpoint block. --- src/somd2/runner/_base.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/somd2/runner/_base.py b/src/somd2/runner/_base.py index b3ae16b0..ded857cc 100644 --- a/src/somd2/runner/_base.py +++ b/src/somd2/runner/_base.py @@ -1453,8 +1453,8 @@ def _checkpoint( # Save the end-state GCMC topologies for trajectory analysis and visualisation. if self._config.gcmc: - # Only save for first replica if performing replica exchange. - if not self._config.replica_exchange or (block == 0 and index == 0): + # Only save for first block. + if block == 0: mols0 = _sr.morph.link_to_reference(system) mols1 = _sr.morph.link_to_perturbed(system) From c74e49b0cc55c8e0c01dad1beec4538c1b75e963 Mon Sep 17 00:00:00 2001 From: Lester Hedges Date: Fri, 31 Oct 2025 12:12:31 +0000 Subject: [PATCH 170/170] Add macOS specific environment and handle missing loch package. --- .github/workflows/main.yaml | 4 +++- README.md | 12 +++++++++--- environment_macos.yaml | 14 ++++++++++++++ src/somd2/config/_config.py | 8 ++++++++ src/somd2/runner/_repex.py | 6 +++++- src/somd2/runner/_runner.py | 6 +++++- 6 files changed, 44 insertions(+), 6 deletions(-) create mode 100644 environment_macos.yaml diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml index 66b2aa98..4d6695ec 100644 --- a/.github/workflows/main.yaml +++ b/.github/workflows/main.yaml @@ -34,6 +34,8 @@ jobs: defaults: run: shell: ${{ matrix.platform.shell }} + env: + ENVIRONMENT_FILE: ${{ matrix.platform.name == 'macos' && 'environment_macos.yaml' || 'environment.yaml' }} steps: # - name: Checkout the repository (commit) @@ -51,7 +53,7 @@ jobs: auto-update-conda: true python-version: ${{ matrix.python-version }} activate-environment: somd2 - environment-file: environment.yaml + environment-file: ${{ env.ENVIRONMENT_FILE }} miniforge-version: latest run-post: ${{ matrix.platform.name != 'windows' }} # diff --git a/README.md b/README.md index 8638e273..52d90089 100644 --- a/README.md +++ b/README.md @@ -16,6 +16,9 @@ conda env create -f environment.yaml (We recommend using [Miniforge](https://github.com/conda-forge/miniforge).) +> [!NOTE] +> On macOS, you will need to use the `environment_macos.yaml` file instead. + Now install `somd2` into the environment: ``` @@ -33,8 +36,10 @@ somd2 --help In order to run an alchemical free-energy simulation you will need to first create a stream file containing the _perturbable_ system of interest. -This can be created using [BioSimSpace](https://github.com/OpenBioSim/biosimspace). For example, following the tutorial -[here](https://biosimspace.openbiosim.org/versions/2023.4.0/tutorials/hydration_freenrg.html). Once the system is created, it can be streamed to file using, e.g.: +This can be created using [BioSimSpace](https://github.com/OpenBioSim/biosimspace). +For example, following the tutorial +[here](https://biosimspace.openbiosim.org/versions/2023.4.0/tutorials/hydration_freenrg.html). +Once the system is created, it can be streamed to file using, e.g.: ```python import BioSimSpace as BSS @@ -127,7 +132,8 @@ somd2 --help | grep -A2 ' --gcmc' ``` > [!NOTE] -> GCMC is currently only supported when using the CUDA platform. +> GCMC is currently only supported when using the CUDA platform and isn't +> available on macOS, where the `pycuda` package is not available. ## Analysis diff --git a/environment_macos.yaml b/environment_macos.yaml new file mode 100644 index 00000000..750a5f24 --- /dev/null +++ b/environment_macos.yaml @@ -0,0 +1,14 @@ +name: somd2 + +channels: + - conda-forge + - openbiosim/label/dev + +dependencies: + - biosimspace + - git + - filelock + - loguru + - numba + - pip: + - git+https://github.com/openbiosim/ghostly diff --git a/src/somd2/config/_config.py b/src/somd2/config/_config.py index c048e6f3..7cf88873 100644 --- a/src/somd2/config/_config.py +++ b/src/somd2/config/_config.py @@ -1514,6 +1514,14 @@ def gcmc(self): def gcmc(self, gcmc): if not isinstance(gcmc, bool): raise ValueError("'gcmc' must be of type 'bool'") + + # GCMC isn't supported on macOS. + if gcmc: + import platform as _platform + + if _platform.system() == "Darwin": + raise ValueError("GCMC is not supported on macOS systems.") + self._gcmc = gcmc @property diff --git a/src/somd2/runner/_repex.py b/src/somd2/runner/_repex.py index d3c7e9a2..9e1f18fd 100644 --- a/src/somd2/runner/_repex.py +++ b/src/somd2/runner/_repex.py @@ -228,7 +228,11 @@ def _create_dynamics( dynamics_kwargs["rest2_scale"] = scale if gcmc_kwargs is not None: - from loch import GCMCSampler + try: + from loch import GCMCSampler + except: + msg = "loch is not installed. GCMC sampling cannot be performed." + _logger.error(msg) ghost_file = str(output_directory / f"gcmc_ghosts_{lam:.5f}.txt") diff --git a/src/somd2/runner/_runner.py b/src/somd2/runner/_runner.py index 995d5b25..76f6b543 100644 --- a/src/somd2/runner/_runner.py +++ b/src/somd2/runner/_runner.py @@ -413,7 +413,11 @@ def generate_lam_vals(lambda_base, increment=0.001): if self._config.gcmc: _logger.info(f"Preparing GCMC sampler at {_lam_sym} = {lambda_value:.5f}") - from loch import GCMCSampler + try: + from loch import GCMCSampler + except: + msg = "loch is not installed. GCMC sampling cannot be performed." + _logger.error(msg) gcmc_sampler = GCMCSampler( system,