Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions Code/GraphMol/Descriptors/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,11 @@ if(RDK_BUILD_OSMORDRED)
OsmordredTopologicalConnectivityShape.cpp
OsmordredMatrixAutocorrEStateFragments.cpp)

# smarts291: Abraham SMARTS-based features (291 features)
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/smarts291)
set(SMARTS291_HDRS smarts291/SMARTS291.h smarts291/abraham_queries.h)
set(SMARTS291_SOURCES smarts291/abraham_integration.cpp smarts291/abraham_queries.cpp)

include_directories(Descriptors LAPACK::LAPACK ${LAPACK_INCLUDE_DIRS})
endif(RDK_BUILD_OSMORDRED)

Expand All @@ -35,6 +40,7 @@ rdkit_library(Descriptors
OxidationNumbers.cpp
DCLV.cpp
${OSMORDRED_SOURCES}
${SMARTS291_SOURCES}
${DESC3D_SOURCES}
LINK_LIBRARIES DataStructs Fingerprints PartialCharges SmilesParse FileParsers Subgraphs SubstructMatch MolTransforms GraphMol
EigenSolvers RDGeneral)
Expand All @@ -51,6 +57,7 @@ rdkit_headers(Crippen.h BCUT.h Lipinski.h
OxidationNumbers.h
DCLV.h
${OSMORDRED_HDRS}
${SMARTS291_HDRS}
${DESC3D_HDRS}
DEST GraphMol/Descriptors)

Expand Down Expand Up @@ -95,6 +102,7 @@ rdkit_catch_test(descriptorsTestCatch catch_tests.cpp LINK_LIBRARIES Descriptors

if(RDK_BUILD_OSMORDRED)
rdkit_catch_test(testOsmordred test_osmordred.cpp LINK_LIBRARIES Descriptors SmilesParse FileParsers)
rdkit_catch_test(testSMARTS291 smarts291/test_smarts291.cpp LINK_LIBRARIES Descriptors SmilesParse FileParsers)
endif(RDK_BUILD_OSMORDRED)

if(RDK_BUILD_PYTHON_WRAPPERS)
Expand Down
60 changes: 60 additions & 0 deletions Code/GraphMol/Descriptors/Wrap/rdMolDescriptors.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@

#include <GraphMol/Descriptors/MolDescriptors.h>
#include <GraphMol/Descriptors/Osmordred.h>
#include <GraphMol/Descriptors/smarts291/SMARTS291.h>
#include <GraphMol/Descriptors/AtomFeat.h>
#include <GraphMol/Descriptors/OxidationNumbers.h>
#include <GraphMol/Fingerprints/AtomPairs.h>
Expand Down Expand Up @@ -2226,5 +2227,64 @@ BOOST_PYTHON_MODULE(rdMolDescriptors) {
python::def("HasOsmordredSupport", hasOsmordredSupport,
"Returns True if the RDKit is compiled with osmordred support, False otherwise.\n"
"If false, all osmordred functions return zero or empty vectors.");

// =========================================================================
// SMARTS291: Abraham SMARTS-based features (291 features)
// =========================================================================
python::def("HasSMARTS291Support", RDKit::Descriptors::SMARTS291::hasSMARTS291Support,
"Check if SMARTS291 support is available.\n"
"Returns: True if SMARTS291 features can be computed.\n");

python::def("CalcAbrahamFeatures", RDKit::Descriptors::Osmordred::calcAbrahamsFeatures,
"Calculate 291 Abraham SMARTS-based features for molecular property prediction.\n"
"Returns: vector of 291 double values (241 base SMARTS + 50 golden ratio features)\n"
"These features are used for physicochemical property prediction (V, E, L, B, S, A).\n");

// Batch wrapper for SMARTS291 with multi-threading (accepts Python list of SMILES strings)
auto smarts291_batch_impl = +[](python::list smiles_py, char param, int n_jobs) {
std::vector<std::string> smiles_list;
smiles_list.reserve(python::len(smiles_py));
for (int i = 0; i < python::len(smiles_py); ++i) {
python::object obj = smiles_py[i];
if (obj.is_none()) {
smiles_list.push_back(""); // Empty string for invalid SMILES
} else {
smiles_list.push_back(python::extract<std::string>(obj));
}
}
return RDKit::Descriptors::SMARTS291::extractSMARTS291Batch(smiles_list, param, n_jobs);
};
python::def("ExtractSMARTS291Batch", smarts291_batch_impl,
(python::arg("smiles_list"), python::arg("param")='A', python::arg("n_jobs")=0),
"Extract 291 SMARTS-based Abraham features from SMILES in parallel.\n"
"Input: list of SMILES strings, param (model type: 'A' default), n_jobs (0=auto)\n"
"Output: list of 291-feature vectors (241 base + 50 golden features)\n"
"Uses parallel processing when n_jobs > 0 (0 = auto-detect CPU count).\n");

// Batch wrapper for SMARTS291 from Mol objects (accepts Python list of Mol objects)
auto smarts291_from_mols_impl = +[](python::list mols_py, char param, int n_jobs) {
std::vector<const RDKit::ROMol*> mols;
mols.reserve(python::len(mols_py));
for (int i = 0; i < python::len(mols_py); ++i) {
python::object obj = mols_py[i];
if (obj.is_none()) {
mols.push_back(nullptr);
} else {
mols.push_back(python::extract<const RDKit::ROMol*>(obj));
}
}
return RDKit::Descriptors::SMARTS291::extractSMARTS291FromMolsBatch(mols, param, n_jobs);
};
python::def("ExtractSMARTS291FromMolsBatch", smarts291_from_mols_impl,
(python::arg("mols"), python::arg("param")='A', python::arg("n_jobs")=0),
"Extract 291 SMARTS-based Abraham features from Mol objects in parallel.\n"
"Input: list of RDKit Mol objects, param (model type: 'A' default), n_jobs (0=auto)\n"
"Output: list of 291-feature vectors (NaN for invalid molecules).\n");

python::def("GetSMARTS291FeatureNames", RDKit::Descriptors::SMARTS291::getSMARTS291FeatureNames,
(python::arg("param")='A'),
"Get the 291 SMARTS feature names (241 base + 50 golden for specified model).\n"
"Returns: vector of 291 strings with feature names.\n");

#endif // osmordred
}
92 changes: 92 additions & 0 deletions Code/GraphMol/Descriptors/smarts291/SMARTS291.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
// Copyright (c) 2025, Guillaume Godin Osmo Labs, PBC's and others
// All rights reserved.
//
// SMARTS291 - Abraham SMARTS-based Features
//
// This module provides 291 SMARTS-based features for molecular property prediction.
// The features consist of:
// - 241 base features: SMARTS pattern counts (sorted alphabetically)
// - 50 golden features: Ratio features derived from base features
//
// These features are designed for Abraham parameter prediction (A, B, E, L, S, V)
// and are used as input to machine learning models for physicochemical property prediction.

#ifndef SMARTS291_H
#define SMARTS291_H

#include <RDGeneral/export.h>
#include <GraphMol/ROMol.h>
#include <vector>
#include <string>

namespace RDKit {
namespace Descriptors {
namespace SMARTS291 {

// Check if SMARTS291 support is available
RDKIT_DESCRIPTORS_EXPORT bool hasSMARTS291Support();

// Extract 241 base SMARTS features
// These are SMARTS pattern match counts, sorted alphabetically by feature name
// Returns: vector of 241 double values (count of matches for each SMARTS pattern)
RDKIT_DESCRIPTORS_EXPORT std::vector<double> extractBaseFeatures(const RDKit::ROMol& mol);

// Generate 50 golden features from base features
// These are ratio features: baseFeatures[i] / baseFeatures[j]
// Different Abraham parameters (A, B, E, L, S, V) use different golden feature definitions
// Returns: vector of 50 double values (0.0 if denominator is 0)
RDKIT_DESCRIPTORS_EXPORT std::vector<double> generateGoldenFeaturesA(const std::vector<double>& baseFeatures);
RDKIT_DESCRIPTORS_EXPORT std::vector<double> generateGoldenFeaturesS(const std::vector<double>& baseFeatures);
RDKIT_DESCRIPTORS_EXPORT std::vector<double> generateGoldenFeaturesB(const std::vector<double>& baseFeatures);
RDKIT_DESCRIPTORS_EXPORT std::vector<double> generateGoldenFeaturesE(const std::vector<double>& baseFeatures);
RDKIT_DESCRIPTORS_EXPORT std::vector<double> generateGoldenFeaturesL(const std::vector<double>& baseFeatures);
RDKIT_DESCRIPTORS_EXPORT std::vector<double> generateGoldenFeaturesV(const std::vector<double>& baseFeatures);

// Extract all 291 SMARTS features for a given Abraham parameter
// Returns: vector of 291 double values (241 base + 50 golden)
RDKIT_DESCRIPTORS_EXPORT std::vector<double> extractSMARTS291_A(const RDKit::ROMol& mol);
RDKIT_DESCRIPTORS_EXPORT std::vector<double> extractSMARTS291_S(const RDKit::ROMol& mol);
RDKIT_DESCRIPTORS_EXPORT std::vector<double> extractSMARTS291_B(const RDKit::ROMol& mol);
RDKIT_DESCRIPTORS_EXPORT std::vector<double> extractSMARTS291_E(const RDKit::ROMol& mol);
RDKIT_DESCRIPTORS_EXPORT std::vector<double> extractSMARTS291_L(const RDKit::ROMol& mol);
RDKIT_DESCRIPTORS_EXPORT std::vector<double> extractSMARTS291_V(const RDKit::ROMol& mol);

// Batch extraction from SMILES list
RDKIT_DESCRIPTORS_EXPORT std::vector<std::vector<double>> extractSMARTS291Batch(
const std::vector<std::string>& smiles_list, char param = 'A', int n_jobs = 0);

// Batch extraction from Mol objects
RDKIT_DESCRIPTORS_EXPORT std::vector<std::vector<double>> extractSMARTS291FromMolsBatch(
const std::vector<const RDKit::ROMol*>& mols, char param = 'A', int n_jobs = 0);

// Get feature names
RDKIT_DESCRIPTORS_EXPORT std::vector<std::string> getBaseFeatureNames();
RDKIT_DESCRIPTORS_EXPORT std::vector<std::string> getGoldenFeatureNames(char param = 'A');
RDKIT_DESCRIPTORS_EXPORT std::vector<std::string> getSMARTS291FeatureNames(char param = 'A');

} // namespace SMARTS291

// Legacy Osmordred namespace for compatibility
namespace Osmordred {

// Extract 241 base features using SMARTS patterns
RDKIT_DESCRIPTORS_EXPORT std::vector<double> extractAbrahamBaseFeatures(const RDKit::ROMol& mol);

// Generate 50 golden features (ratio features)
RDKIT_DESCRIPTORS_EXPORT std::vector<double> generateGoldenFeaturesA(const std::vector<double>& baseFeatures);
RDKIT_DESCRIPTORS_EXPORT std::vector<double> generateGoldenFeaturesS(const std::vector<double>& baseFeatures);
RDKIT_DESCRIPTORS_EXPORT std::vector<double> generateGoldenFeaturesRidge(const std::vector<double>& baseFeatures);

// Calculate 291 Abraham features (241 base + 50 golden for A model)
RDKIT_DESCRIPTORS_EXPORT std::vector<double> calcAbrahamsFeatures(const RDKit::ROMol& mol);

#ifdef HAVE_ABRAHAM_MODELS
// Full Abraham parameter prediction (requires trained models)
RDKIT_DESCRIPTORS_EXPORT std::vector<double> calcAbrahams(const RDKit::ROMol& mol);
#endif

} // namespace Osmordred
} // namespace Descriptors
} // namespace RDKit

#endif // SMARTS291_H
Loading