From e7882f0ca650ea84853436b91a398f362f32ae30 Mon Sep 17 00:00:00 2001 From: hannahbaumann Date: Thu, 25 Jun 2026 15:58:46 +0200 Subject: [PATCH 1/9] Initial change of lig and cofactor resname --- .../protocols/openmm_rfe/hybridtop_units.py | 14 ++- .../protocols/openmm_utils/molecule_utils.py | 109 ++++++++++++++++++ .../openmm_rfe/test_hybrid_top_protocol.py | 31 +++++ 3 files changed, 153 insertions(+), 1 deletion(-) create mode 100644 src/openfe/protocols/openmm_utils/molecule_utils.py diff --git a/src/openfe/protocols/openmm_rfe/hybridtop_units.py b/src/openfe/protocols/openmm_rfe/hybridtop_units.py index 5f4c1c9f3..03fbe4e9c 100644 --- a/src/openfe/protocols/openmm_rfe/hybridtop_units.py +++ b/src/openfe/protocols/openmm_rfe/hybridtop_units.py @@ -49,6 +49,7 @@ from openfe.protocols.openmm_utils.omm_settings import ( BasePartialChargeSettings, ) +from openfe.protocols.openmm_utils.molecule_utils import _get_offmol_resname, _set_offmol_resname from ...analysis import plotting from ...utils import log_system_probe, without_oechem_backend @@ -753,6 +754,17 @@ def run( alchem_comps = self._inputs["alchemical_components"] solvent_comp, protein_comp, small_mols = self._get_components(stateA, stateB) + alchemical = set(alchem_comps["stateA"]) | set(alchem_comps["stateB"]) + cofactors = [smc for smc in small_mols if smc not in alchemical] + + for smc, offmol in small_mols.items(): + if _get_offmol_resname(offmol) is not None: + continue + if smc in alchemical: + _set_offmol_resname(offmol, "LIG") + else: + _set_offmol_resname(offmol, f"CF{cofactors.index(smc) + 1}") + # Assign partial charges now to avoid any discrepancies later self._assign_partial_charges(settings["charge_settings"], small_mols) @@ -1535,7 +1547,7 @@ def _structural_analysis( from openfe_analysis import rmsd try: - data = rmsd.gather_rms_data(pdb_file, trj_file) + data = rmsd.gather_rms_data(pdb_file, trj_file, ligand_selection="resname LIG") # TODO: eventually change this to more specific exception types except Exception as e: return {"structural_analysis_error": str(e)} diff --git a/src/openfe/protocols/openmm_utils/molecule_utils.py b/src/openfe/protocols/openmm_utils/molecule_utils.py new file mode 100644 index 000000000..aec4a21d7 --- /dev/null +++ b/src/openfe/protocols/openmm_utils/molecule_utils.py @@ -0,0 +1,109 @@ +# This code is part of OpenFE and is licensed under the MIT license. +# For details, see https://github.com/OpenFreeEnergy/openfe +import logging +from typing import Any + +from openff.toolkit import Molecule as OFFMolecule + +logger = logging.getLogger(__name__) + + +def _set_offmol_metadata( + offmol: OFFMolecule, + key: Any, + val: Any | None, +) -> None: + """ + Set a given metadata entry for a whole Molecule. + + Parameters + ---------- + offmol : openff.toolkit.Molecule + The Molecule to set the metadata for. + key : Any + The metadata key. + val : Any + The value to set the metadata entry to. + """ + if val is None: + for a in offmol.atoms: + a.metadata.pop(key, None) + else: + for a in offmol.atoms: + a.metadata[key] = val + + +def _get_offmol_metadata(offmol: OFFMolecule, key: Any) -> Any | None: + """ + Get an offmol's given metadata entry and make sure it is + consistent across all atoms in the Molecule. + + Parameters + ---------- + offmol : openff.toolkit.Molecule + Molecule to get the metadata value from. + key: Any + The metadata entry key. + + Returns + ------- + value : Any | None + Metadata for the given key in the molecule. ``None`` if the + Molecule does not have that metadata entry set, or if + the value is inconsistent across all the atoms. + """ + value: Any | None = None + for a in offmol.atoms: + if value is None: + try: + value = a.metadata[key] + except KeyError: + return None + + if value != a.metadata[key]: + wmsg = f"Inconsistent metadata {key} in OFFMol: {offmol}" + logger.warning(wmsg) + return None + + return value + + +def _set_offmol_resname( + offmol: OFFMolecule, + resname: str | None, +) -> None: + """ + Helper method to set offmol residue names + + Parameters + ---------- + offmol : openff.toolkit.Molecule + Molecule to assign a residue name to. + resname : str | None + Residue name to be set. Set to None to clear it. + + Returns + ------- + None + """ + _set_offmol_metadata(offmol, "residue_name", resname) + + +def _get_offmol_resname(offmol: OFFMolecule) -> str | None: + """ + Helper method to get an offmol's residue name and make sure it is + consistent across all atoms in the Molecule. + + Parameters + ---------- + offmol : openff.toolkit.Molecule + Molecule to get the residue name from. + + Returns + ------- + resname : Optional[str] + Residue name of the molecule. ``None`` if the Molecule + does not have a residue name, or if the residue name is + inconsistent across all the atoms. + """ + return _get_offmol_metadata(offmol, "residue_name") \ No newline at end of file diff --git a/src/openfe/tests/protocols/openmm_rfe/test_hybrid_top_protocol.py b/src/openfe/tests/protocols/openmm_rfe/test_hybrid_top_protocol.py index a1f9fef2d..bd0ce84e4 100644 --- a/src/openfe/tests/protocols/openmm_rfe/test_hybrid_top_protocol.py +++ b/src/openfe/tests/protocols/openmm_rfe/test_hybrid_top_protocol.py @@ -11,6 +11,7 @@ from unittest import mock import gufe +import MDAnalysis as mda import mdtraj as mdt import numpy as np import openmm @@ -2401,3 +2402,33 @@ def test_dry_run_vacuum_write_frequency( assert reporter.velocity_interval == velocities_write_frequency.m else: assert reporter.velocity_interval == 0 + + +@pytest.fixture +def eg5_vac_inputs(eg5_ligands, eg5_cofactor): + ligA, ligB = eg5_ligands[0], eg5_ligands[1] + mapper = openfe.LomapAtomMapper() + mapping = next(mapper.suggest_mappings(ligA, ligB)) + stateA = openfe.ChemicalSystem({"ligand": ligA, "cofactor": eg5_cofactor}) + stateB = openfe.ChemicalSystem({"ligand": ligB, "cofactor": eg5_cofactor}) + return stateA, stateB, mapping + + +def _run_setup_dry(stateA, stateB, mapping, settings, tmp_path): + protocol = openmm_rfe.RelativeHybridTopologyProtocol(settings=settings) + dag = protocol.create(stateA=stateA, stateB=stateB, mapping=mapping) + setup_unit = _get_units(dag.protocol_units, HybridTopologySetupUnit)[0] + return setup_unit.run(dry=True, scratch_basepath=tmp_path, shared_basepath=tmp_path) + + +def test_ligand_separable_from_cofactor(eg5_vac_inputs, vac_settings, tmp_path): + vac_settings.output_settings.output_indices = "all" + stateA, stateB, mapping = eg5_vac_inputs + out = _run_setup_dry(stateA, stateB, mapping, vac_settings, tmp_path) + + u = mda.Universe(out["pdb_structure"]) + lig = u.select_atoms("resname LIG") + cof = u.select_atoms("resname CF1") + assert lig.n_atoms > 0 + assert cof.n_atoms > 0 + assert set(lig.indices).isdisjoint(cof.indices) \ No newline at end of file From a32cf025c9757c1a10f2fb04e1694a2730a9c10c Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 29 Jun 2026 08:16:13 +0000 Subject: [PATCH 2/9] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/openfe/protocols/openmm_rfe/hybridtop_units.py | 2 +- src/openfe/protocols/openmm_utils/molecule_utils.py | 2 +- .../tests/protocols/openmm_rfe/test_hybrid_top_protocol.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/openfe/protocols/openmm_rfe/hybridtop_units.py b/src/openfe/protocols/openmm_rfe/hybridtop_units.py index 03fbe4e9c..e92239668 100644 --- a/src/openfe/protocols/openmm_rfe/hybridtop_units.py +++ b/src/openfe/protocols/openmm_rfe/hybridtop_units.py @@ -46,10 +46,10 @@ from openmmtools import multistate import openfe +from openfe.protocols.openmm_utils.molecule_utils import _get_offmol_resname, _set_offmol_resname from openfe.protocols.openmm_utils.omm_settings import ( BasePartialChargeSettings, ) -from openfe.protocols.openmm_utils.molecule_utils import _get_offmol_resname, _set_offmol_resname from ...analysis import plotting from ...utils import log_system_probe, without_oechem_backend diff --git a/src/openfe/protocols/openmm_utils/molecule_utils.py b/src/openfe/protocols/openmm_utils/molecule_utils.py index aec4a21d7..a715af4e2 100644 --- a/src/openfe/protocols/openmm_utils/molecule_utils.py +++ b/src/openfe/protocols/openmm_utils/molecule_utils.py @@ -106,4 +106,4 @@ def _get_offmol_resname(offmol: OFFMolecule) -> str | None: does not have a residue name, or if the residue name is inconsistent across all the atoms. """ - return _get_offmol_metadata(offmol, "residue_name") \ No newline at end of file + return _get_offmol_metadata(offmol, "residue_name") diff --git a/src/openfe/tests/protocols/openmm_rfe/test_hybrid_top_protocol.py b/src/openfe/tests/protocols/openmm_rfe/test_hybrid_top_protocol.py index bd0ce84e4..0f4259ae6 100644 --- a/src/openfe/tests/protocols/openmm_rfe/test_hybrid_top_protocol.py +++ b/src/openfe/tests/protocols/openmm_rfe/test_hybrid_top_protocol.py @@ -2431,4 +2431,4 @@ def test_ligand_separable_from_cofactor(eg5_vac_inputs, vac_settings, tmp_path): cof = u.select_atoms("resname CF1") assert lig.n_atoms > 0 assert cof.n_atoms > 0 - assert set(lig.indices).isdisjoint(cof.indices) \ No newline at end of file + assert set(lig.indices).isdisjoint(cof.indices) From 369f9d5a07771e4086ad702b7557130b8441d63c Mon Sep 17 00:00:00 2001 From: hannahbaumann Date: Mon, 29 Jun 2026 11:42:02 +0200 Subject: [PATCH 3/9] Fix tests --- .../hybrid_topology_atoms.csv | 32 +++++++++---------- .../openmm_rfe/test_hybrid_top_protocol.py | 4 +-- 2 files changed, 18 insertions(+), 18 deletions(-) diff --git a/src/openfe/tests/data/openmm_rfe/benzene_toluene_hybrid_top/hybrid_topology_atoms.csv b/src/openfe/tests/data/openmm_rfe/benzene_toluene_hybrid_top/hybrid_topology_atoms.csv index 3ed6ee0e8..86ed7f043 100644 --- a/src/openfe/tests/data/openmm_rfe/benzene_toluene_hybrid_top/hybrid_topology_atoms.csv +++ b/src/openfe/tests/data/openmm_rfe/benzene_toluene_hybrid_top/hybrid_topology_atoms.csv @@ -1,17 +1,17 @@ ,serial,name,element,resSeq,resName,chainID,segmentID,formal_charge -0,,C1x,C,0,UNK,0,, -1,,C2x,C,0,UNK,0,, -2,,C3x,C,0,UNK,0,, -3,,C4x,C,0,UNK,0,, -4,,C5x,C,0,UNK,0,, -5,,C6x,C,0,UNK,0,, -6,,H1x,H,0,UNK,0,, -7,,H2x,H,0,UNK,0,, -8,,H3x,H,0,UNK,0,, -9,,H4x,H,0,UNK,0,, -10,,H5x,H,0,UNK,0,, -11,,H6x,H,0,UNK,0,, -12,,H1x,H,0,UNK,0,, -13,,H2x,H,0,UNK,0,, -14,,C1x,C,0,UNK,0,, -15,,H3x,H,0,UNK,0,, +0,,C1x,C,0,LIG,0,, +1,,C2x,C,0,LIG,0,, +2,,C3x,C,0,LIG,0,, +3,,C4x,C,0,LIG,0,, +4,,C5x,C,0,LIG,0,, +5,,C6x,C,0,LIG,0,, +6,,H1x,H,0,LIG,0,, +7,,H2x,H,0,LIG,0,, +8,,H3x,H,0,LIG,0,, +9,,H4x,H,0,LIG,0,, +10,,H5x,H,0,LIG,0,, +11,,H6x,H,0,LIG,0,, +12,,H1x,H,0,LIG,0,, +13,,H2x,H,0,LIG,0,, +14,,C1x,C,0,LIG,0,, +15,,H3x,H,0,LIG,0,, diff --git a/src/openfe/tests/protocols/openmm_rfe/test_hybrid_top_protocol.py b/src/openfe/tests/protocols/openmm_rfe/test_hybrid_top_protocol.py index 0f4259ae6..6d8e110a4 100644 --- a/src/openfe/tests/protocols/openmm_rfe/test_hybrid_top_protocol.py +++ b/src/openfe/tests/protocols/openmm_rfe/test_hybrid_top_protocol.py @@ -508,7 +508,7 @@ def test_dry_run_ligand( ): # this might be a bit time consuming solv_settings.simulation_settings.sampler_method = method - solv_settings.output_settings.output_indices = "resname UNK" + solv_settings.output_settings.output_indices = "resname LIG" protocol = openmm_rfe.RelativeHybridTopologyProtocol( settings=solv_settings, @@ -1092,7 +1092,7 @@ def test_dry_run_complex( ): # this will be very time consuming solv_settings.simulation_settings.sampler_method = method - solv_settings.output_settings.output_indices = "protein or resname UNK" + solv_settings.output_settings.output_indices = "protein or resname LIG" protocol = openmm_rfe.RelativeHybridTopologyProtocol( settings=solv_settings, From 69a6a0807fe9fbbc5d1cfc023208b2eb73b50cdd Mon Sep 17 00:00:00 2001 From: hannahbaumann Date: Mon, 29 Jun 2026 13:48:46 +0200 Subject: [PATCH 4/9] Add news entry --- news/fix_unk_resnames.rst | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) create mode 100644 news/fix_unk_resnames.rst diff --git a/news/fix_unk_resnames.rst b/news/fix_unk_resnames.rst new file mode 100644 index 000000000..8f252ccef --- /dev/null +++ b/news/fix_unk_resnames.rst @@ -0,0 +1,23 @@ +**Added:** + +* + +**Changed:** + +* Small molecules in RelativeHybridTopology topologies (including the output PDB) are now named LIG (alchemical ligand) and CF1, CF2… (cofactors) instead of UNK. + +**Deprecated:** + +* + +**Removed:** + +* + +**Fixed:** + +* Fixed inflated ligand RMSD in the RelativeHybridTopology protocol's structural analysis for systems containing cofactors; the ligand RMSD is now computed for the alchemical ligand alone rather than conflating it with cofactors that shared the UNK residue name. + +**Security:** + +* From 2609bc4fcbe5f117e9de2874b561dcab6e6d9d4d Mon Sep 17 00:00:00 2001 From: hannahbaumann Date: Mon, 29 Jun 2026 14:06:11 +0200 Subject: [PATCH 5/9] Add more tests --- .../openmm_rfe/test_hybrid_top_protocol.py | 47 +++++++++++++++++++ 1 file changed, 47 insertions(+) diff --git a/src/openfe/tests/protocols/openmm_rfe/test_hybrid_top_protocol.py b/src/openfe/tests/protocols/openmm_rfe/test_hybrid_top_protocol.py index 6d8e110a4..c03cd71c4 100644 --- a/src/openfe/tests/protocols/openmm_rfe/test_hybrid_top_protocol.py +++ b/src/openfe/tests/protocols/openmm_rfe/test_hybrid_top_protocol.py @@ -51,6 +51,12 @@ HAS_NAGL, HAS_OPENEYE, ) +from openfe.protocols.openmm_utils.molecule_utils import ( + _get_offmol_metadata, + _set_offmol_metadata, + _set_offmol_resname, + _get_offmol_resname, +) def _get_units(protocol_units, unit_type): @@ -2432,3 +2438,44 @@ def test_ligand_separable_from_cofactor(eg5_vac_inputs, vac_settings, tmp_path): assert lig.n_atoms > 0 assert cof.n_atoms > 0 assert set(lig.indices).isdisjoint(cof.indices) + + +def test_get_metadata_inconsistent_warns(caplog): + mol = Molecule.from_smiles("CC") + _set_offmol_metadata(mol, "residue_name", "LIG") + mol.atoms[0].metadata["residue_name"] = "COF" + + with caplog.at_level(logging.WARNING): + result = _get_offmol_metadata(mol, "residue_name") + + assert result is None + assert "Inconsistent metadata" in caplog.text + + +def test_set_metadata_none_clears(): + mol = Molecule.from_smiles("CC") + _set_offmol_metadata(mol, "residue_name", "LIG") + _set_offmol_metadata(mol, "residue_name", None) + assert all("residue_name" not in a.metadata for a in mol.atoms) + + +def test_existing_resname_preserved(eg5_ligands, eg5_cofactor, vac_settings, tmp_path): + vac_settings.output_settings.output_indices = "all" + + ligA, ligB = eg5_ligands[0], eg5_ligands[1] + off = eg5_cofactor.to_openff() + _set_offmol_resname(off, "MYC") + cof = openfe.SmallMoleculeComponent.from_openff(off) + + mapper = openfe.setup.KartografAtomMapper() + mapping = next(mapper.suggest_mappings(ligA, ligB)) + stateA = openfe.ChemicalSystem({"ligand": ligA, "cofactor": cof}) + stateB = openfe.ChemicalSystem({"ligand": ligB, "cofactor": cof}) + + out = _run_setup_dry(stateA, stateB, mapping, vac_settings, tmp_path) + + u = mda.Universe(out["pdb_structure"]) + resnames = {r.resname for r in u.residues} + assert "MYC" in resnames + assert "CF1" not in resnames + assert "LIG" in resnames \ No newline at end of file From 0844937cbf1da39371a2fa2274cf39bfd5f67396 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 29 Jun 2026 12:12:56 +0000 Subject: [PATCH 6/9] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- .../tests/protocols/openmm_rfe/test_hybrid_top_protocol.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/openfe/tests/protocols/openmm_rfe/test_hybrid_top_protocol.py b/src/openfe/tests/protocols/openmm_rfe/test_hybrid_top_protocol.py index c03cd71c4..9a5dec58d 100644 --- a/src/openfe/tests/protocols/openmm_rfe/test_hybrid_top_protocol.py +++ b/src/openfe/tests/protocols/openmm_rfe/test_hybrid_top_protocol.py @@ -53,9 +53,9 @@ ) from openfe.protocols.openmm_utils.molecule_utils import ( _get_offmol_metadata, + _get_offmol_resname, _set_offmol_metadata, _set_offmol_resname, - _get_offmol_resname, ) @@ -2478,4 +2478,4 @@ def test_existing_resname_preserved(eg5_ligands, eg5_cofactor, vac_settings, tmp resnames = {r.resname for r in u.residues} assert "MYC" in resnames assert "CF1" not in resnames - assert "LIG" in resnames \ No newline at end of file + assert "LIG" in resnames From 11b9c2c9dcbfb2d213799a2d7713a291f040fac9 Mon Sep 17 00:00:00 2001 From: Hannah Baumann <43765638+hannahbaumann@users.noreply.github.com> Date: Thu, 2 Jul 2026 11:06:40 +0200 Subject: [PATCH 7/9] Update src/openfe/tests/protocols/openmm_rfe/test_hybrid_top_protocol.py Co-authored-by: Irfan Alibay --- .../tests/protocols/openmm_rfe/test_hybrid_top_protocol.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/openfe/tests/protocols/openmm_rfe/test_hybrid_top_protocol.py b/src/openfe/tests/protocols/openmm_rfe/test_hybrid_top_protocol.py index 9a5dec58d..40857e73d 100644 --- a/src/openfe/tests/protocols/openmm_rfe/test_hybrid_top_protocol.py +++ b/src/openfe/tests/protocols/openmm_rfe/test_hybrid_top_protocol.py @@ -2475,7 +2475,6 @@ def test_existing_resname_preserved(eg5_ligands, eg5_cofactor, vac_settings, tmp out = _run_setup_dry(stateA, stateB, mapping, vac_settings, tmp_path) u = mda.Universe(out["pdb_structure"]) - resnames = {r.resname for r in u.residues} - assert "MYC" in resnames - assert "CF1" not in resnames - assert "LIG" in resnames + assert "MYC" in u.residues.resnames + assert "CF1" not in u.residues.resnames + assert "LIG" in u.residues.resnames From 12b9840868838b7cda1e2cf370ae4d5e8280bde6 Mon Sep 17 00:00:00 2001 From: hannahbaumann Date: Thu, 2 Jul 2026 11:52:39 +0200 Subject: [PATCH 8/9] Address review comments --- news/fix_unk_resnames.rst | 2 +- .../protocols/openmm_rfe/hybridtop_units.py | 17 +++- ...molecule_utils.py => offmolecule_utils.py} | 0 .../openmm_rfe/test_hybrid_top_protocol.py | 80 ++++++++++++++----- .../tests/protocols/test_openmmutils.py | 27 ++++++- 5 files changed, 101 insertions(+), 25 deletions(-) rename src/openfe/protocols/openmm_utils/{molecule_utils.py => offmolecule_utils.py} (100%) diff --git a/news/fix_unk_resnames.rst b/news/fix_unk_resnames.rst index 8f252ccef..0dd6b8302 100644 --- a/news/fix_unk_resnames.rst +++ b/news/fix_unk_resnames.rst @@ -4,7 +4,7 @@ **Changed:** -* Small molecules in RelativeHybridTopology topologies (including the output PDB) are now named LIG (alchemical ligand) and CF1, CF2… (cofactors) instead of UNK. +* Small molecules in RelativeHybridTopology topologies (including the output PDB) are now named LIG (alchemical ligand) and CF1, CF2… (cofactors) instead of UNK. If a residue name was already assigned, the assigned one is kept. **Deprecated:** diff --git a/src/openfe/protocols/openmm_rfe/hybridtop_units.py b/src/openfe/protocols/openmm_rfe/hybridtop_units.py index e92239668..1cefbb3a4 100644 --- a/src/openfe/protocols/openmm_rfe/hybridtop_units.py +++ b/src/openfe/protocols/openmm_rfe/hybridtop_units.py @@ -46,7 +46,7 @@ from openmmtools import multistate import openfe -from openfe.protocols.openmm_utils.molecule_utils import _get_offmol_resname, _set_offmol_resname +from openfe.protocols.openmm_utils.offmolecule_utils import _get_offmol_resname, _set_offmol_resname from openfe.protocols.openmm_utils.omm_settings import ( BasePartialChargeSettings, ) @@ -765,6 +765,9 @@ def run( else: _set_offmol_resname(offmol, f"CF{cofactors.index(smc) + 1}") + # Record the resname the alchemical ligand actually ended up with + alchem_resname = _get_offmol_resname(small_mols[mapping.componentA]) + # Assign partial charges now to avoid any discrepancies later self._assign_partial_charges(settings["charge_settings"], small_mols) @@ -822,6 +825,7 @@ def run( "positions": positions_outfile, "pdb_structure": self.shared_basepath / settings["output_settings"].output_structure, "selection_indices": selection_indices, + "alchemical_resname": alchem_resname, } if dry: @@ -1517,6 +1521,7 @@ def _structural_analysis( trj_file: pathlib.Path, output_directory: pathlib.Path, dry: bool, + ligand_resname: str = "LIG", ) -> dict[str, str | pathlib.Path]: """ Run structural analysis using ``openfe-analysis``. @@ -1532,6 +1537,8 @@ def _structural_analysis( will be stored. dry : bool Whether or not we are running a dry run. + ligand_resname: str + The residue name of the hybrid topology ligand. Default: LIG Returns ------- @@ -1547,7 +1554,7 @@ def _structural_analysis( from openfe_analysis import rmsd try: - data = rmsd.gather_rms_data(pdb_file, trj_file, ligand_selection="resname LIG") + data = rmsd.gather_rms_data(pdb_file, trj_file, ligand_selection=f"resname {ligand_resname}") # TODO: eventually change this to more specific exception types except Exception as e: return {"structural_analysis_error": str(e)} @@ -1585,6 +1592,7 @@ def run( pdb_file: pathlib.Path, trajectory: pathlib.Path, checkpoint: pathlib.Path, + ligand_resname: str = "LIG", dry: bool = False, verbose: bool = True, scratch_basepath: pathlib.Path | None = None, @@ -1600,6 +1608,8 @@ def run( Path to the MultiStateReporter generated NetCDF file. checkpoint : pathlib.Path Path to the checkpoint file generated by MultiStateReporter. + ligand_resname: str + The residue name of the hybrid topology ligand. Default: LIG dry : bool Do a dry run of the calculation, creating all necessary hybrid system components (topology, system, sampler, etc...) but without @@ -1653,6 +1663,7 @@ def run( trj_file=trajectory, output_directory=self.shared_basepath, dry=dry, + ligand_resname=ligand_resname, ) # Return relevant things @@ -1681,6 +1692,8 @@ def _execute( pdb_file=pdb_file, trajectory=trajectory, checkpoint=checkpoint, + ligand_resname=setup_results.outputs.get("alchemical_resname", + "LIG"), # Adding this for backward compatibility scratch_basepath=ctx.scratch, shared_basepath=ctx.shared, ) diff --git a/src/openfe/protocols/openmm_utils/molecule_utils.py b/src/openfe/protocols/openmm_utils/offmolecule_utils.py similarity index 100% rename from src/openfe/protocols/openmm_utils/molecule_utils.py rename to src/openfe/protocols/openmm_utils/offmolecule_utils.py diff --git a/src/openfe/tests/protocols/openmm_rfe/test_hybrid_top_protocol.py b/src/openfe/tests/protocols/openmm_rfe/test_hybrid_top_protocol.py index 40857e73d..516d3e752 100644 --- a/src/openfe/tests/protocols/openmm_rfe/test_hybrid_top_protocol.py +++ b/src/openfe/tests/protocols/openmm_rfe/test_hybrid_top_protocol.py @@ -51,7 +51,7 @@ HAS_NAGL, HAS_OPENEYE, ) -from openfe.protocols.openmm_utils.molecule_utils import ( +from openfe.protocols.openmm_utils.offmolecule_utils import ( _get_offmol_metadata, _get_offmol_resname, _set_offmol_metadata, @@ -2440,25 +2440,6 @@ def test_ligand_separable_from_cofactor(eg5_vac_inputs, vac_settings, tmp_path): assert set(lig.indices).isdisjoint(cof.indices) -def test_get_metadata_inconsistent_warns(caplog): - mol = Molecule.from_smiles("CC") - _set_offmol_metadata(mol, "residue_name", "LIG") - mol.atoms[0].metadata["residue_name"] = "COF" - - with caplog.at_level(logging.WARNING): - result = _get_offmol_metadata(mol, "residue_name") - - assert result is None - assert "Inconsistent metadata" in caplog.text - - -def test_set_metadata_none_clears(): - mol = Molecule.from_smiles("CC") - _set_offmol_metadata(mol, "residue_name", "LIG") - _set_offmol_metadata(mol, "residue_name", None) - assert all("residue_name" not in a.metadata for a in mol.atoms) - - def test_existing_resname_preserved(eg5_ligands, eg5_cofactor, vac_settings, tmp_path): vac_settings.output_settings.output_indices = "all" @@ -2478,3 +2459,62 @@ def test_existing_resname_preserved(eg5_ligands, eg5_cofactor, vac_settings, tmp assert "MYC" in u.residues.resnames assert "CF1" not in u.residues.resnames assert "LIG" in u.residues.resnames + + +def test_hybrid_ligand_takes_componentA_resname( + eg5_ligands, eg5_cofactor, vac_settings, tmp_path +): + vac_settings.output_settings.output_indices = "all" + + ligA, ligB = eg5_ligands[0], eg5_ligands[1] + offA, offB = ligA.to_openff(), ligB.to_openff() + _set_offmol_resname(offA, "LGA") + _set_offmol_resname(offB, "LGB") + ligA_named = openfe.SmallMoleculeComponent.from_openff(offA) + ligB_named = openfe.SmallMoleculeComponent.from_openff(offB) + + mapper = openfe.setup.KartografAtomMapper() + mapping = next(mapper.suggest_mappings(ligA_named, ligB_named)) + stateA = openfe.ChemicalSystem({"ligand": ligA_named, "cofactor": eg5_cofactor}) + stateB = openfe.ChemicalSystem({"ligand": ligB_named, "cofactor": eg5_cofactor}) + + out = _run_setup_dry(stateA, stateB, mapping, vac_settings, tmp_path) + + u = mda.Universe(out["pdb_structure"]) + lig = u.select_atoms("resname LGA") + cof = u.select_atoms("resname CF1") + + # the setup unit records componentA's resname for the analysis unit + assert out["alchemical_resname"] == "LGA" + + # the whole hybrid ligand is under componentA's resname + assert lig.n_atoms > 0 + assert u.select_atoms("resname LGB").n_atoms == 0 + # user-set names are kept, not overwritten with the LIG default + assert u.select_atoms("resname LIG").n_atoms == 0 + assert "UNK" not in {r.resname for r in u.residues} + # cofactor stays separate + assert cof.n_atoms > 0 + assert set(lig.indices).isdisjoint(cof.indices) + + +def test_structural_analysis_uses_ligand_resname(): + captured = {} + + def fake_gather(pdb, trj, ligand_selection="resname UNK"): + captured["ligand_selection"] = ligand_selection + return { + "protein_RMSD": [], "ligand_RMSD": [], "ligand_wander": [], + "protein_2D_RMSD": [], "time(ps)": [], + } + + with mock.patch("openfe_analysis.rmsd.gather_rms_data", side_effect=fake_gather): + HybridTopologyMultiStateAnalysisUnit._structural_analysis( + pdb_file=Path("dummy.pdb"), + trj_file=Path("dummy.nc"), + output_directory=Path("."), + dry=True, + ligand_resname="LGA", + ) + + assert captured["ligand_selection"] == "resname LGA" \ No newline at end of file diff --git a/src/openfe/tests/protocols/test_openmmutils.py b/src/openfe/tests/protocols/test_openmmutils.py index ffacad813..e230afbce 100644 --- a/src/openfe/tests/protocols/test_openmmutils.py +++ b/src/openfe/tests/protocols/test_openmmutils.py @@ -2,6 +2,7 @@ # For details, see https://github.com/OpenFreeEnergy/openfe import copy import gzip +import logging import os import sys from importlib import resources @@ -9,9 +10,7 @@ from unittest import mock import numpy as np -import pooch import pytest -from gufe import BaseSolventComponent from gufe.components.errors import ComponentValidationError from gufe.settings import OpenMMSystemGeneratorFFSettings, ThermoSettings from numpy.testing import assert_allclose, assert_equal @@ -44,6 +43,11 @@ HAS_NAGL, HAS_OPENEYE, ) +from openfe.protocols.openmm_utils.offmolecule_utils import ( + _get_offmol_metadata, + _set_offmol_metadata, + _set_offmol_resname, +) from ..conftest import HAS_INTERNET @@ -1189,3 +1193,22 @@ def test_forward_backwards_failure(simulation_nc): ret = ana.get_forward_and_reverse_analysis() assert ret is None + + +def test_get_metadata_inconsistent_warns(caplog): + mol = OFFMol.from_smiles("CC") + _set_offmol_metadata(mol, "residue_name", "LIG") + mol.atoms[0].metadata["residue_name"] = "COF" + + with caplog.at_level(logging.WARNING): + result = _get_offmol_metadata(mol, "residue_name") + + assert result is None + assert "Inconsistent metadata" in caplog.text + + +def test_set_metadata_none_clears(): + mol = OFFMol.from_smiles("CC") + _set_offmol_metadata(mol, "residue_name", "LIG") + _set_offmol_metadata(mol, "residue_name", None) + assert all("residue_name" not in a.metadata for a in mol.atoms) From c68b88eca8c8228e26ba4f97b0f3c85d37ebe4c8 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 2 Jul 2026 09:54:04 +0000 Subject: [PATCH 9/9] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/openfe/protocols/openmm_rfe/hybridtop_units.py | 9 ++++++--- .../openmm_rfe/test_hybrid_top_protocol.py | 13 +++++++------ 2 files changed, 13 insertions(+), 9 deletions(-) diff --git a/src/openfe/protocols/openmm_rfe/hybridtop_units.py b/src/openfe/protocols/openmm_rfe/hybridtop_units.py index 1cefbb3a4..2e5436c18 100644 --- a/src/openfe/protocols/openmm_rfe/hybridtop_units.py +++ b/src/openfe/protocols/openmm_rfe/hybridtop_units.py @@ -1554,7 +1554,9 @@ def _structural_analysis( from openfe_analysis import rmsd try: - data = rmsd.gather_rms_data(pdb_file, trj_file, ligand_selection=f"resname {ligand_resname}") + data = rmsd.gather_rms_data( + pdb_file, trj_file, ligand_selection=f"resname {ligand_resname}" + ) # TODO: eventually change this to more specific exception types except Exception as e: return {"structural_analysis_error": str(e)} @@ -1692,8 +1694,9 @@ def _execute( pdb_file=pdb_file, trajectory=trajectory, checkpoint=checkpoint, - ligand_resname=setup_results.outputs.get("alchemical_resname", - "LIG"), # Adding this for backward compatibility + ligand_resname=setup_results.outputs.get( + "alchemical_resname", "LIG" + ), # Adding this for backward compatibility scratch_basepath=ctx.scratch, shared_basepath=ctx.shared, ) diff --git a/src/openfe/tests/protocols/openmm_rfe/test_hybrid_top_protocol.py b/src/openfe/tests/protocols/openmm_rfe/test_hybrid_top_protocol.py index 516d3e752..662b46ae1 100644 --- a/src/openfe/tests/protocols/openmm_rfe/test_hybrid_top_protocol.py +++ b/src/openfe/tests/protocols/openmm_rfe/test_hybrid_top_protocol.py @@ -2461,9 +2461,7 @@ def test_existing_resname_preserved(eg5_ligands, eg5_cofactor, vac_settings, tmp assert "LIG" in u.residues.resnames -def test_hybrid_ligand_takes_componentA_resname( - eg5_ligands, eg5_cofactor, vac_settings, tmp_path -): +def test_hybrid_ligand_takes_componentA_resname(eg5_ligands, eg5_cofactor, vac_settings, tmp_path): vac_settings.output_settings.output_indices = "all" ligA, ligB = eg5_ligands[0], eg5_ligands[1] @@ -2504,8 +2502,11 @@ def test_structural_analysis_uses_ligand_resname(): def fake_gather(pdb, trj, ligand_selection="resname UNK"): captured["ligand_selection"] = ligand_selection return { - "protein_RMSD": [], "ligand_RMSD": [], "ligand_wander": [], - "protein_2D_RMSD": [], "time(ps)": [], + "protein_RMSD": [], + "ligand_RMSD": [], + "ligand_wander": [], + "protein_2D_RMSD": [], + "time(ps)": [], } with mock.patch("openfe_analysis.rmsd.gather_rms_data", side_effect=fake_gather): @@ -2517,4 +2518,4 @@ def fake_gather(pdb, trj, ligand_selection="resname UNK"): ligand_resname="LGA", ) - assert captured["ligand_selection"] == "resname LGA" \ No newline at end of file + assert captured["ligand_selection"] == "resname LGA"