Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion AUTHORS
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ the authors tag in the respective file header.
- Achal Bajpai
- Aditya Muzumdar
- Ahmed Khalil
- Alen Saric
- Alen Šarić
- Alexandra Scherbart
- Alexandra Zerck
- Amanda Wein
Expand Down
28 changes: 11 additions & 17 deletions src/openms/include/OpenMS/CHEMISTRY/DigestionEnzyme.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
//
// --------------------------------------------------------------------------
// $Maintainer: Xiao Liang $
// $Authors: Xiao Liang $
// $Authors: Xiao Liang, Alen Šarić $
// --------------------------------------------------------------------------
//

Expand All @@ -25,13 +25,14 @@ namespace OpenMS

@brief Base class for digestion enzymes
*/
class OPENMS_DLLAPI DigestionEnzyme
{
public:
class OPENMS_DLLAPI DigestionEnzyme
{

/** @name Constructors
/** @name Constructors
*/
//@{
//@{
public:

/// Copy constructor
DigestionEnzyme(const DigestionEnzyme&) = default;

Expand All @@ -44,14 +45,6 @@ namespace OpenMS
const std::set<String>& synonyms = std::set<String>(),
String regex_description = "");

/// Detailed constructor 2
explicit DigestionEnzyme(const String& name,
String cut_before,
const String& nocut_after = "",
String sense = "C",
const std::set<String>& synonyms = std::set<String>(),
String regex_description = "");

/// Destructor
virtual ~DigestionEnzyme();
//@}
Expand Down Expand Up @@ -128,8 +121,6 @@ namespace OpenMS

protected:

/// default constructor
DigestionEnzyme();

// basic
String name_;
Expand All @@ -139,6 +130,10 @@ namespace OpenMS
std::set<String> synonyms_;

String regex_description_;

/// default constructor
DigestionEnzyme();

};

OPENMS_DLLAPI std::ostream& operator<<(std::ostream& os, const DigestionEnzyme& enzyme);
Expand All @@ -164,4 +159,3 @@ namespace std
}
};
} // namespace std

39 changes: 31 additions & 8 deletions src/openms/include/OpenMS/CHEMISTRY/DigestionEnzymeProtein.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
//
// --------------------------------------------------------------------------
// $Maintainer: Xiao Liang $
// $Authors: Xiao Liang $
// $Authors: Xiao Liang, Alen Šarić $
// --------------------------------------------------------------------------
//

Expand All @@ -17,13 +17,25 @@ namespace OpenMS
/**
@ingroup Chemistry

@brief Representation of a digestion enzyme for proteins (protease)
* @brief Constructs a DigestionEnzymeProtein from amino acid cleavage rules.
*
* @param name Name of the enzyme
* @param cut_before Set of amino acids before/after which cleavage occurs (e.g. "KR" for Trypsin)
* @param sense Whether cleavage is C-terminal or N-terminal
* @param nocut_after Set of amino acids that inhibit cleavage (e.g. "P" for Trypsin)
* @param synonyms Optional synonyms for the enzyme
* @param regex_description Optional description of the regex
*
* @throw Exception::MissingInformation if cut_before is empty
* @throw Exception::InvalidParameter if cut_before or nocut_after contain non-uppercase amino acid characters
*
* @note 'X' is automatically appended to cut_before to match any amino acid
*/
class OPENMS_DLLAPI DigestionEnzymeProtein :
public DigestionEnzyme
{
public:

enum class Sense {C_TERM,N_TERM};
/** @name Constructors
*/
//@{
Expand All @@ -44,16 +56,23 @@ namespace OpenMS
explicit DigestionEnzymeProtein(const String& name,
const String& cleavage_regex,
const std::set<String>& synonyms = std::set<String>(),
String regex_description = "",
const String& regex_description = "",
EmpiricalFormula n_term_gain = EmpiricalFormula("H"),
EmpiricalFormula c_term_gain = EmpiricalFormula("OH"),
String psi_id = "",
String xtandem_id = "",
const String& psi_id = "",
const String& xtandem_id = "",
Int comet_id = -1,
Int msgf_id = -1,
Int omssa_id = -1);

/// Destructor
explicit DigestionEnzymeProtein(const String& name,
const String& cut_before,
Sense sense,
const String& nocut_after = "",
const std::set<String>& synonyms = std::set<String>(),
const String& regex_description = "");

/// Destructor
~DigestionEnzymeProtein() override;
//@}

Expand Down Expand Up @@ -159,10 +178,14 @@ namespace OpenMS

Int omssa_id_;

// @param cut_before: a set of Amino Acids, before which a cut in a given sequence should be set
// @param nocut_after: a set of Amino Acids, which disvalidate a cut, even though a given Amino Acids from cut_before has been met
// @param sense: the sense, as to how the sequence has to be read.
String buildRegex_(String cut_before, const String& nocut_after,const DigestionEnzymeProtein::Sense& sense);
};


OPENMS_DLLAPI std::ostream& operator<<(std::ostream& os, const DigestionEnzymeProtein& enzyme);

typedef DigestionEnzymeProtein Protease;
}

Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
//
// --------------------------------------------------------------------------
// $Maintainer: Hannes Roest $
// $Authors: Hannes Roest, Luis Jacob Keller, Alen Saric$
// $Authors: Hannes Roest, Luis Jacob Keller, Alen Šarić$
// --------------------------------------------------------------------------

#pragma once
Expand Down
55 changes: 1 addition & 54 deletions src/openms/source/CHEMISTRY/DigestionEnzyme.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
//
// --------------------------------------------------------------------------
// $Maintainer: Xiao Liang $
// $Authors: Xiao Liang $
// $Authors: Xiao Liang, Alen Šarić $
// --------------------------------------------------------------------------
//

Expand Down Expand Up @@ -36,58 +36,6 @@ namespace OpenMS
{
}

DigestionEnzyme::DigestionEnzyme(const String& name,
String cut_before,
const String& nocut_after,
String sense,
const std::set<String>& synonyms,
String regex_description) :
name_(name),
synonyms_(synonyms),
regex_description_(std::move(regex_description))
{
//TODO check if all letters are A-Z?
if (cut_before.empty())
{
//Maybe assertion?
throw Exception::MissingInformation(
__FILE__,
__LINE__,
OPENMS_PRETTY_FUNCTION,
"No cleavage position given when trying to construct a DigestionEnzyme.");
}
else if (!cut_before.hasSuffix("X"))
{
//TODO think about this
cut_before = cut_before + "X";
}
cleavage_regex_ = "";
if (sense.toLower() == "c")
{
cleavage_regex_ += "(?<=[" + cut_before + "]";
if (!nocut_after.empty())
{
cleavage_regex_ += "(?!" + nocut_after + "])";
}
}
else if (sense.toLower() == "n")
{
if (!nocut_after.empty())
{
cleavage_regex_ += "(?<![" + nocut_after + "])";
}
cleavage_regex_ += "(?=[" + cut_before + "]";
}
else
{
throw Exception::MissingInformation(
__FILE__,
__LINE__,
OPENMS_PRETTY_FUNCTION,
"Cannot infer cleavage sense when constructing DigestionEnzyme. Has to be N or C.");
}
}

DigestionEnzyme::~DigestionEnzyme() = default;

void DigestionEnzyme::setName(const String& name)
Expand Down Expand Up @@ -196,4 +144,3 @@ namespace OpenMS
}

}

89 changes: 81 additions & 8 deletions src/openms/source/CHEMISTRY/DigestionEnzymeProtein.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,13 @@
//
// --------------------------------------------------------------------------
// $Maintainer: Xiao Liang $
// $Authors: Xiao Liang $
// $Authors: Xiao Liang, Alen Šarić $
// --------------------------------------------------------------------------
//

#include <OpenMS/CHEMISTRY/DigestionEnzymeProtein.h>

#include <algorithm>
#include <iostream>

using namespace std;
Expand Down Expand Up @@ -42,24 +43,33 @@ namespace OpenMS
DigestionEnzymeProtein::DigestionEnzymeProtein(const String& name,
const String& cleavage_regex,
const std::set<String>& synonyms,
String regex_description,
const String& regex_description,
EmpiricalFormula n_term_gain,
EmpiricalFormula c_term_gain,
String psi_id,
String xtandem_id,
const String& psi_id,
const String& xtandem_id,
Int comet_id,
Int msgf_id,
Int omssa_id) :
DigestionEnzyme(name, cleavage_regex, synonyms, std::move(regex_description)),
DigestionEnzyme(name, cleavage_regex, synonyms,regex_description),
n_term_gain_(std::move(n_term_gain)),
c_term_gain_(std::move(c_term_gain)),
psi_id_(std::move(psi_id)),
xtandem_id_(std::move(xtandem_id)),
psi_id_(psi_id),
xtandem_id_(xtandem_id),
comet_id_(comet_id),
msgf_id_(msgf_id),
omssa_id_(omssa_id)
{
}
DigestionEnzymeProtein::DigestionEnzymeProtein(const String& name,
const String& cut_before,
Sense sense,
const String& nocut_after,
const std::set<String>& synonyms,
const String& regex_description):
DigestionEnzyme(name, buildRegex_(cut_before, nocut_after, sense), synonyms, regex_description)
{
}

DigestionEnzymeProtein::~DigestionEnzymeProtein() = default;

Expand Down Expand Up @@ -210,6 +220,70 @@ namespace OpenMS
return false;
}

String DigestionEnzymeProtein::buildRegex_(String cut_before, const String& nocut_after, const DigestionEnzymeProtein::Sense& sense)
{
if (cut_before.empty())
{
throw Exception::MissingInformation(
__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION,
"No cleavage position given when trying to construct a DigestionEnzyme.");
}

for(char c : cut_before)
{
if (c > 'Z' || c < 'A')
{
throw Exception::InvalidParameter(
__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION,
"Amino Acids for cleavage contain unknown character: " + String(c));
}
}

for(char c : nocut_after)
{
if (c > 'Z' || c < 'A')
{
throw Exception::InvalidParameter(
__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION,
"Amino Acids to stop cleavage contain unknown character: " + String(c));
}
}
Comment on lines +225 to +250

Copy link
Copy Markdown
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

document these exceptions in the constructor using

@throw Exception::InvalidParameter if ....


if (!cut_before.hasSuffix("X"))
{
if(cut_before.find('X') != std::string::npos){
throw Exception::InvalidParameter(__FILE__,__LINE__,OPENMS_PRETTY_FUNCTION,"cut_before must not contain X in the set of cleavage points, as this creates a Protease which would cleave everywhere.");
}
cut_before += "X";
}

Comment on lines +252 to +259

Copy link
Copy Markdown
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I know this is copied, but we may as well do it right when touching it:
has_suffix is a bad test, that you want is to find all 'X'.

Also document (in the C'tor) that 'X' is added to the cleavage site

String result = "";
if (sense == DigestionEnzymeProtein::Sense::C_TERM)
{
result = "(?<=[" + cut_before + "])";
if (!nocut_after.empty())
{
result += "(?!" + nocut_after + "])";
}
}
else if (sense == DigestionEnzymeProtein::Sense::N_TERM)
{
if (!nocut_after.empty())
{
result = "(?<![" + nocut_after + "])";
}
result += "(?=[" + cut_before + "])";
}
else
{
throw Exception::MissingInformation(
__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION,
"Cannot infer cleavage sense. Has to be N or C.");
}

return result;
}

ostream& operator<<(ostream& os, const DigestionEnzymeProtein& enzyme)
{
os << static_cast<const DigestionEnzyme&>(enzyme) << " "
Expand All @@ -218,4 +292,3 @@ namespace OpenMS
}

}

Loading