diff --git a/src/smileyllama/score/__init__.py b/src/smileyllama/score/__init__.py index 5d40bd4..706a798 100644 --- a/src/smileyllama/score/__init__.py +++ b/src/smileyllama/score/__init__.py @@ -6,6 +6,7 @@ from .iminer_props import * from .vina import * from .normalizer import * +from .clogs import * from .registry import REGISTRY diff --git a/src/smileyllama/score/clogs.py b/src/smileyllama/score/clogs.py new file mode 100644 index 0000000..0b46f6a --- /dev/null +++ b/src/smileyllama/score/clogs.py @@ -0,0 +1,121 @@ +import os, math +from typing import Optional, List +from pathlib import Path + +import numpy as np + +from .base import Score +from ..utils import safe_run_command + + +class CLogS(Score): + """Score for aqueous solubility (cLogS) using the clogs_alteri executable. + + Computes cLogS values for a list of SMILES by writing them to a .smi file, + invoking the clogs_alteri tool (which modifies the file in-place to append + tab-separated cLogS values), and parsing the results. + """ + + def __init__( + self, + exec_path: str = '/global/scratch/users/gbalteri/CACHE/calc_cLogS/clogs_alteri', + *, + wdir: Optional[os.PathLike] = None, + ): + """Initialize CLogS scorer. + + Parameters + ---------- + exec_path : str, optional + Path to the clogs_alteri executable. + wdir : os.PathLike, optional + Working directory for intermediate files. If None, must be set later. + """ + super().__init__() + assert os.path.isfile(exec_path), f'{exec_path} not found' + self.exec = exec_path + if wdir is not None: + self.set_working_dir(wdir) + + def write_smiles_file(self, smiles: List[str], path: os.PathLike): + """Write SMILES strings to a .smi file (one per line). + + Parameters + ---------- + smiles : list of str + SMILES strings. + path : os.PathLike + Output file path. + """ + with open(path, 'w') as f: + for smi in smiles: + f.write(f'{smi}\n') + + def parse_results(self, path: os.PathLike, n: int) -> np.ndarray: + """Parse cLogS results from the modified .smi file. + + After execution, the file has a header line ``SMILES\\tcLogS`` followed + by tab-separated SMILES and score lines. + + Parameters + ---------- + path : os.PathLike + Path to the results file (modified in-place by clogs_alteri). + n : int + Expected number of results. + + Returns + ------- + numpy.ndarray + Array of cLogS scores. NaN for any lines that failed to parse. + """ + scores = [math.nan] * n + try: + with open(path) as f: + lines = f.readlines() + # first line is the header "SMILES\tcLogS" + for i, line in enumerate(lines[1:]): + if i >= n: + break + parts = line.strip().split('\t') + if len(parts) >= 2: + try: + scores[i] = float(parts[1]) + except ValueError: + pass + except FileNotFoundError: + pass + return np.array(scores) + + def compute_batch(self, smiles: List[str]) -> np.ndarray: + """Compute cLogS scores for a batch of molecules. + + Parameters + ---------- + smiles : list of str + List of SMILES strings. + + Returns + ------- + numpy.ndarray + Array of cLogS scores. + """ + smi_path = self.wdir / 'input.smi' + self.write_smiles_file(smiles, smi_path) + safe_run_command([self.exec, str(smi_path)]) + return self.parse_results(smi_path, len(smiles)) + + def compute(self, smiles: str) -> float: + """Compute cLogS score for a single molecule. + + Parameters + ---------- + smiles : str + SMILES string. + + Returns + ------- + float + cLogS value. + """ + return self.compute_batch([smiles])[0].item() diff --git a/tests/test_clogs.py b/tests/test_clogs.py new file mode 100644 index 0000000..fd57e40 --- /dev/null +++ b/tests/test_clogs.py @@ -0,0 +1,24 @@ +import os, shutil +from pathlib import Path + +from smileyllama.score.clogs import CLogS +from smileyllama.utils import run_score_test + +cfg = [ + ("clogs", CLogS, '/global/scratch/users/gbalteri/CACHE/calc_cLogS/clogs_alteri'), +] + +test_smiles = [ + 'c1ccccn1', 'x', 'CN1C=C2C=C(C(=CC2=N1)Cl)NC3=NC(=O)N(C(=O)N3CC4=CC(=C(C=C4F)F)F)CC5=NN(C=N5)C', + 'C' * 150 +] + + +def test_clogs(): + for tag, cls, exec_path in cfg: + print(f"===== Testing {tag} =====") + workdir = Path(__file__).parent / f'_test_{tag}' + if workdir.is_dir(): + shutil.rmtree(workdir) + score_obj = cls(exec_path=exec_path) + print(tag, run_score_test(score_obj, test_smiles, wdir=workdir))