Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/smileyllama/score/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from .iminer_props import *
from .vina import *
from .normalizer import *
from .clogs import *

from .registry import REGISTRY

Expand Down
121 changes: 121 additions & 0 deletions src/smileyllama/score/clogs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
import os, math
from typing import Optional, List
from pathlib import Path

import numpy as np

from .base import Score
from ..utils import safe_run_command


class CLogS(Score):
"""Score for aqueous solubility (cLogS) using the clogs_alteri executable.

Computes cLogS values for a list of SMILES by writing them to a .smi file,
invoking the clogs_alteri tool (which modifies the file in-place to append
tab-separated cLogS values), and parsing the results.
"""

def __init__(
self,
exec_path: str = '/global/scratch/users/gbalteri/CACHE/calc_cLogS/clogs_alteri',
*,
wdir: Optional[os.PathLike] = None,
):
"""Initialize CLogS scorer.

Parameters
----------
exec_path : str, optional
Path to the clogs_alteri executable.
wdir : os.PathLike, optional
Working directory for intermediate files. If None, must be set later.
"""
super().__init__()
assert os.path.isfile(exec_path), f'{exec_path} not found'
self.exec = exec_path
if wdir is not None:
self.set_working_dir(wdir)

def write_smiles_file(self, smiles: List[str], path: os.PathLike):
"""Write SMILES strings to a .smi file (one per line).

Parameters
----------
smiles : list of str
SMILES strings.
path : os.PathLike
Output file path.
"""
with open(path, 'w') as f:
for smi in smiles:
f.write(f'{smi}\n')

def parse_results(self, path: os.PathLike, n: int) -> np.ndarray:
"""Parse cLogS results from the modified .smi file.

After execution, the file has a header line ``SMILES\\tcLogS`` followed
by tab-separated SMILES and score lines.

Parameters
----------
path : os.PathLike
Path to the results file (modified in-place by clogs_alteri).
n : int
Expected number of results.

Returns
-------
numpy.ndarray
Array of cLogS scores. NaN for any lines that failed to parse.
"""
scores = [math.nan] * n
try:
with open(path) as f:
lines = f.readlines()
# first line is the header "SMILES\tcLogS"
for i, line in enumerate(lines[1:]):
if i >= n:
break
parts = line.strip().split('\t')
if len(parts) >= 2:
try:
scores[i] = float(parts[1])
except ValueError:
pass
except FileNotFoundError:
pass
return np.array(scores)

def compute_batch(self, smiles: List[str]) -> np.ndarray:
"""Compute cLogS scores for a batch of molecules.

Parameters
----------
smiles : list of str
List of SMILES strings.

Returns
-------
numpy.ndarray
Array of cLogS scores.
"""
smi_path = self.wdir / 'input.smi'
self.write_smiles_file(smiles, smi_path)
safe_run_command([self.exec, str(smi_path)])
return self.parse_results(smi_path, len(smiles))

def compute(self, smiles: str) -> float:
"""Compute cLogS score for a single molecule.

Parameters
----------
smiles : str
SMILES string.

Returns
-------
float
cLogS value.
"""
return self.compute_batch([smiles])[0].item()
24 changes: 24 additions & 0 deletions tests/test_clogs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
import os, shutil
from pathlib import Path

from smileyllama.score.clogs import CLogS
from smileyllama.utils import run_score_test

cfg = [
("clogs", CLogS, '/global/scratch/users/gbalteri/CACHE/calc_cLogS/clogs_alteri'),
]

test_smiles = [
'c1ccccn1', 'x', 'CN1C=C2C=C(C(=CC2=N1)Cl)NC3=NC(=O)N(C(=O)N3CC4=CC(=C(C=C4F)F)F)CC5=NN(C=N5)C',
'C' * 150
]


def test_clogs():
for tag, cls, exec_path in cfg:
print(f"===== Testing {tag} =====")
workdir = Path(__file__).parent / f'_test_{tag}'
if workdir.is_dir():
shutil.rmtree(workdir)
score_obj = cls(exec_path=exec_path)
print(tag, run_score_test(score_obj, test_smiles, wdir=workdir))
Loading