diff --git a/AUTHORS b/AUTHORS index 0ab2634af79..b88bbae9025 100644 --- a/AUTHORS +++ b/AUTHORS @@ -8,7 +8,7 @@ the authors tag in the respective file header. - Achal Bajpai - Aditya Muzumdar - Ahmed Khalil - - Alen Saric + - Alen Šarić - Alexandra Scherbart - Alexandra Zerck - Amanda Wein diff --git a/src/openms/include/OpenMS/CHEMISTRY/DigestionEnzyme.h b/src/openms/include/OpenMS/CHEMISTRY/DigestionEnzyme.h index d9f95631555..bc963acccab 100644 --- a/src/openms/include/OpenMS/CHEMISTRY/DigestionEnzyme.h +++ b/src/openms/include/OpenMS/CHEMISTRY/DigestionEnzyme.h @@ -3,7 +3,7 @@ // // -------------------------------------------------------------------------- // $Maintainer: Xiao Liang $ -// $Authors: Xiao Liang $ +// $Authors: Xiao Liang, Alen Šarić $ // -------------------------------------------------------------------------- // @@ -25,13 +25,14 @@ namespace OpenMS @brief Base class for digestion enzymes */ - class OPENMS_DLLAPI DigestionEnzyme - { - public: + class OPENMS_DLLAPI DigestionEnzyme + { - /** @name Constructors + /** @name Constructors */ - //@{ + //@{ + public: + /// Copy constructor DigestionEnzyme(const DigestionEnzyme&) = default; @@ -44,14 +45,6 @@ namespace OpenMS const std::set& synonyms = std::set(), String regex_description = ""); - /// Detailed constructor 2 - explicit DigestionEnzyme(const String& name, - String cut_before, - const String& nocut_after = "", - String sense = "C", - const std::set& synonyms = std::set(), - String regex_description = ""); - /// Destructor virtual ~DigestionEnzyme(); //@} @@ -128,8 +121,6 @@ namespace OpenMS protected: - /// default constructor - DigestionEnzyme(); // basic String name_; @@ -139,6 +130,10 @@ namespace OpenMS std::set synonyms_; String regex_description_; + + /// default constructor + DigestionEnzyme(); + }; OPENMS_DLLAPI std::ostream& operator<<(std::ostream& os, const DigestionEnzyme& enzyme); @@ -164,4 +159,3 @@ namespace std } }; } // namespace std - diff --git a/src/openms/include/OpenMS/CHEMISTRY/DigestionEnzymeProtein.h b/src/openms/include/OpenMS/CHEMISTRY/DigestionEnzymeProtein.h index 394100355fe..4119b386856 100644 --- a/src/openms/include/OpenMS/CHEMISTRY/DigestionEnzymeProtein.h +++ b/src/openms/include/OpenMS/CHEMISTRY/DigestionEnzymeProtein.h @@ -3,7 +3,7 @@ // // -------------------------------------------------------------------------- // $Maintainer: Xiao Liang $ -// $Authors: Xiao Liang $ +// $Authors: Xiao Liang, Alen Šarić $ // -------------------------------------------------------------------------- // @@ -17,13 +17,25 @@ namespace OpenMS /** @ingroup Chemistry - @brief Representation of a digestion enzyme for proteins (protease) + * @brief Constructs a DigestionEnzymeProtein from amino acid cleavage rules. +* +* @param name Name of the enzyme +* @param cut_before Set of amino acids before/after which cleavage occurs (e.g. "KR" for Trypsin) +* @param sense Whether cleavage is C-terminal or N-terminal +* @param nocut_after Set of amino acids that inhibit cleavage (e.g. "P" for Trypsin) +* @param synonyms Optional synonyms for the enzyme +* @param regex_description Optional description of the regex +* +* @throw Exception::MissingInformation if cut_before is empty +* @throw Exception::InvalidParameter if cut_before or nocut_after contain non-uppercase amino acid characters +* +* @note 'X' is automatically appended to cut_before to match any amino acid */ class OPENMS_DLLAPI DigestionEnzymeProtein : public DigestionEnzyme { public: - + enum class Sense {C_TERM,N_TERM}; /** @name Constructors */ //@{ @@ -44,16 +56,23 @@ namespace OpenMS explicit DigestionEnzymeProtein(const String& name, const String& cleavage_regex, const std::set& synonyms = std::set(), - String regex_description = "", + const String& regex_description = "", EmpiricalFormula n_term_gain = EmpiricalFormula("H"), EmpiricalFormula c_term_gain = EmpiricalFormula("OH"), - String psi_id = "", - String xtandem_id = "", + const String& psi_id = "", + const String& xtandem_id = "", Int comet_id = -1, Int msgf_id = -1, Int omssa_id = -1); - /// Destructor + explicit DigestionEnzymeProtein(const String& name, + const String& cut_before, + Sense sense, + const String& nocut_after = "", + const std::set& synonyms = std::set(), + const String& regex_description = ""); + + /// Destructor ~DigestionEnzymeProtein() override; //@} @@ -159,10 +178,14 @@ namespace OpenMS Int omssa_id_; + // @param cut_before: a set of Amino Acids, before which a cut in a given sequence should be set + // @param nocut_after: a set of Amino Acids, which disvalidate a cut, even though a given Amino Acids from cut_before has been met + // @param sense: the sense, as to how the sequence has to be read. + String buildRegex_(String cut_before, const String& nocut_after,const DigestionEnzymeProtein::Sense& sense); }; + OPENMS_DLLAPI std::ostream& operator<<(std::ostream& os, const DigestionEnzymeProtein& enzyme); typedef DigestionEnzymeProtein Protease; } - diff --git a/src/openms/include/OpenMS/PROCESSING/RESAMPLING/LinearResamplerAlign.h b/src/openms/include/OpenMS/PROCESSING/RESAMPLING/LinearResamplerAlign.h index fe812bfce07..0e3716834e2 100644 --- a/src/openms/include/OpenMS/PROCESSING/RESAMPLING/LinearResamplerAlign.h +++ b/src/openms/include/OpenMS/PROCESSING/RESAMPLING/LinearResamplerAlign.h @@ -3,7 +3,7 @@ // // -------------------------------------------------------------------------- // $Maintainer: Hannes Roest $ -// $Authors: Hannes Roest, Luis Jacob Keller, Alen Saric$ +// $Authors: Hannes Roest, Luis Jacob Keller, Alen Šarić$ // -------------------------------------------------------------------------- #pragma once diff --git a/src/openms/source/CHEMISTRY/DigestionEnzyme.cpp b/src/openms/source/CHEMISTRY/DigestionEnzyme.cpp index 1ea653c265a..9cd43884ee8 100644 --- a/src/openms/source/CHEMISTRY/DigestionEnzyme.cpp +++ b/src/openms/source/CHEMISTRY/DigestionEnzyme.cpp @@ -3,7 +3,7 @@ // // -------------------------------------------------------------------------- // $Maintainer: Xiao Liang $ -// $Authors: Xiao Liang $ +// $Authors: Xiao Liang, Alen Šarić $ // -------------------------------------------------------------------------- // @@ -36,58 +36,6 @@ namespace OpenMS { } - DigestionEnzyme::DigestionEnzyme(const String& name, - String cut_before, - const String& nocut_after, - String sense, - const std::set& synonyms, - String regex_description) : - name_(name), - synonyms_(synonyms), - regex_description_(std::move(regex_description)) - { - //TODO check if all letters are A-Z? - if (cut_before.empty()) - { - //Maybe assertion? - throw Exception::MissingInformation( - __FILE__, - __LINE__, - OPENMS_PRETTY_FUNCTION, - "No cleavage position given when trying to construct a DigestionEnzyme."); - } - else if (!cut_before.hasSuffix("X")) - { - //TODO think about this - cut_before = cut_before + "X"; - } - cleavage_regex_ = ""; - if (sense.toLower() == "c") - { - cleavage_regex_ += "(?<=[" + cut_before + "]"; - if (!nocut_after.empty()) - { - cleavage_regex_ += "(?!" + nocut_after + "])"; - } - } - else if (sense.toLower() == "n") - { - if (!nocut_after.empty()) - { - cleavage_regex_ += "(? +#include #include using namespace std; @@ -42,24 +43,33 @@ namespace OpenMS DigestionEnzymeProtein::DigestionEnzymeProtein(const String& name, const String& cleavage_regex, const std::set& synonyms, - String regex_description, + const String& regex_description, EmpiricalFormula n_term_gain, EmpiricalFormula c_term_gain, - String psi_id, - String xtandem_id, + const String& psi_id, + const String& xtandem_id, Int comet_id, Int msgf_id, Int omssa_id) : - DigestionEnzyme(name, cleavage_regex, synonyms, std::move(regex_description)), + DigestionEnzyme(name, cleavage_regex, synonyms,regex_description), n_term_gain_(std::move(n_term_gain)), c_term_gain_(std::move(c_term_gain)), - psi_id_(std::move(psi_id)), - xtandem_id_(std::move(xtandem_id)), + psi_id_(psi_id), + xtandem_id_(xtandem_id), comet_id_(comet_id), msgf_id_(msgf_id), omssa_id_(omssa_id) { } + DigestionEnzymeProtein::DigestionEnzymeProtein(const String& name, + const String& cut_before, + Sense sense, + const String& nocut_after, + const std::set& synonyms, + const String& regex_description): + DigestionEnzyme(name, buildRegex_(cut_before, nocut_after, sense), synonyms, regex_description) + { + } DigestionEnzymeProtein::~DigestionEnzymeProtein() = default; @@ -210,6 +220,70 @@ namespace OpenMS return false; } + String DigestionEnzymeProtein::buildRegex_(String cut_before, const String& nocut_after, const DigestionEnzymeProtein::Sense& sense) + { + if (cut_before.empty()) + { + throw Exception::MissingInformation( + __FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, + "No cleavage position given when trying to construct a DigestionEnzyme."); + } + + for(char c : cut_before) + { + if (c > 'Z' || c < 'A') + { + throw Exception::InvalidParameter( + __FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, + "Amino Acids for cleavage contain unknown character: " + String(c)); + } + } + + for(char c : nocut_after) + { + if (c > 'Z' || c < 'A') + { + throw Exception::InvalidParameter( + __FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, + "Amino Acids to stop cleavage contain unknown character: " + String(c)); + } + } + + if (!cut_before.hasSuffix("X")) + { + if(cut_before.find('X') != std::string::npos){ + throw Exception::InvalidParameter(__FILE__,__LINE__,OPENMS_PRETTY_FUNCTION,"cut_before must not contain X in the set of cleavage points, as this creates a Protease which would cleave everywhere."); + } + cut_before += "X"; + } + + String result = ""; + if (sense == DigestionEnzymeProtein::Sense::C_TERM) + { + result = "(?<=[" + cut_before + "])"; + if (!nocut_after.empty()) + { + result += "(?!" + nocut_after + "])"; + } + } + else if (sense == DigestionEnzymeProtein::Sense::N_TERM) + { + if (!nocut_after.empty()) + { + result = "(?(enzyme) << " " @@ -218,4 +292,3 @@ namespace OpenMS } } - diff --git a/src/openms/source/FORMAT/PepXMLFile.cpp b/src/openms/source/FORMAT/PepXMLFile.cpp index d2e1da05995..a6466ac2ea4 100644 --- a/src/openms/source/FORMAT/PepXMLFile.cpp +++ b/src/openms/source/FORMAT/PepXMLFile.cpp @@ -1254,7 +1254,7 @@ namespace OpenMS value = attributeAsDouble_(attributes, "value"); peptide_hit_.setMetaValue("Comet:lnrSp", value); // name: Comet:lnrSp peptide_hit_.setMetaValue("COMET:lnRankSP", value); // name: COMET:lnRankSP - } + } else if (name == "deltLCn") { value = attributeAsDouble_(attributes, "value"); @@ -1263,7 +1263,7 @@ namespace OpenMS else if (name == "lnExpect") { value = attributeAsDouble_(attributes, "value"); - peptide_hit_.setMetaValue("COMET:lnExpect", value); // name: Comet:lnExpect + peptide_hit_.setMetaValue("COMET:lnExpect", value); // name: Comet:lnExpect } else if (name == "IonFrac") { @@ -1275,7 +1275,7 @@ namespace OpenMS { value = attributeAsDouble_(attributes, "value"); peptide_hit_.setMetaValue("COMET:lnNumSP", value); // name: Comet:lnNumSP - } + } } else if (parse_unknown_scores_) { @@ -1295,7 +1295,7 @@ namespace OpenMS //TODO warn about non-numeric score? Or even do not catch the conversion error? peptide_hit_.setMetaValue(name, attributeAsString_(attributes, "value")); // Any other generic score (fallback String) } - + } } } @@ -1350,7 +1350,7 @@ namespace OpenMS { bool current_prot_is_decoy = protein.hasPrefix(decoy_prefix_); auto current_type = peptide_hit_.getTargetDecoyType(); - + if (current_type == PeptideHit::TargetDecoyType::UNKNOWN) { // No annotation yet, set based on current protein @@ -1403,8 +1403,8 @@ namespace OpenMS current_peptide_.setSpectrumReference( String("scan=") + String(scannr_)); } //TODO else error? - - + + if (!experiment_label_.empty()) { current_peptide_.setExperimentLabel(experiment_label_); @@ -1650,7 +1650,7 @@ namespace OpenMS { bool current_prot_is_decoy = protein.hasPrefix(decoy_prefix_); auto current_type = peptide_hit_.getTargetDecoyType(); - + if (current_type == PeptideHit::TargetDecoyType::UNKNOWN) { // No annotation yet, set based on current protein @@ -1664,7 +1664,7 @@ namespace OpenMS // Peptide matches both target and decoy proteins peptide_hit_.setTargetDecoyType(PeptideHit::TargetDecoyType::TARGET_DECOY); } - + hit.setTargetDecoyType(current_prot_is_decoy ? ProteinHit::TargetDecoyType::DECOY : ProteinHit::TargetDecoyType::TARGET); @@ -1926,16 +1926,17 @@ namespace OpenMS String cut_before = attributeAsString_(attributes, "cut"); String no_cut_after = attributeAsString_(attributes, "no_cut"); String sense = attributeAsString_(attributes, "sense"); - params_.digestion_enzyme = DigestionEnzymeProtein(DigestionEnzyme( + const DigestionEnzymeProtein::Sense sen = (sense.toLower() == "c") ? DigestionEnzymeProtein::Sense::C_TERM : DigestionEnzymeProtein::Sense::N_TERM; + params_.digestion_enzyme = DigestionEnzymeProtein( "user-defined," + enzyme_ + "," + cut_before + "," + no_cut_after + "," + sense, - cut_before, no_cut_after, sense)); + cut_before, sen, no_cut_after); } else if (element == "enzymatic_search_constraint") // parent: "search_summary" { //TODO we should not overwrite the enzyme here! Luckily in most files it is the same // enzyme as in sample_enzyme or something useless like "default". /// - enzyme_ = attributeAsString_(attributes, "enzyme"); + enzyme_ = attributeAsString_(attributes, "enzyme"); if (enzyme_ == "stricttrypsin") enzyme_ = "Trypsin/P"; // MSFragger synonyme if (ProteaseDB::getInstance()->hasEnzyme(enzyme_)) diff --git a/src/tests/class_tests/openms/executables.cmake b/src/tests/class_tests/openms/executables.cmake index a07bb25f18a..6aa4aa0d073 100644 --- a/src/tests/class_tests/openms/executables.cmake +++ b/src/tests/class_tests/openms/executables.cmake @@ -419,6 +419,7 @@ set(chemistry_executables_list CoarseIsotopeDistribution_test CrossLinksDB_test DecoyGenerator_test + DigestionEnzyme_test DigestionEnzymeProtein_test ElementDB_test Element_test @@ -611,7 +612,7 @@ set(transformations_executables_list EmgFitter1D_test EmgModel_test ExtendedIsotopeFitter1D_test - ExtendedIsotopeModel_test + ExtendedIsotopeModel_test FeatureFinderAlgorithmPickedHelperStructs_test FeatureFinderAlgorithmPicked_test FeatureFinderIdentificationAlgorithm_test diff --git a/src/tests/class_tests/openms/source/DigestionEnzymeProtein_test.cpp b/src/tests/class_tests/openms/source/DigestionEnzymeProtein_test.cpp index 46e987b5015..e607888bc73 100644 --- a/src/tests/class_tests/openms/source/DigestionEnzymeProtein_test.cpp +++ b/src/tests/class_tests/openms/source/DigestionEnzymeProtein_test.cpp @@ -3,7 +3,7 @@ // // -------------------------------------------------------------------------- // $Maintainer: Xiao Liang $ -// $Authors: Xiao Liang $ +// $Authors: Xiao Liang, Alen Šarić $ // -------------------------------------------------------------------------- // @@ -275,5 +275,40 @@ END_SECTION delete e_ptr; -END_TEST +START_SECTION((DigestionEnzymeProtein(const String& name, String cut_before, Sense sense, const String& nocut_after, const std::set& synonyms, String regex_description))) +{ + DigestionEnzymeProtein trypsin_style("TrypsinStyle", "K", DigestionEnzymeProtein::Sense::C_TERM, "P"); + TEST_EQUAL(trypsin_style.getRegEx(), "(?<=[KX])(?!P])") + + DigestionEnzymeProtein arg_c("Arg-C_Style", "R", DigestionEnzymeProtein::Sense::C_TERM); + TEST_EQUAL(arg_c.getRegEx(), "(?<=[RX])") + + DigestionEnzymeProtein n_term_test("N-Term_Style", "D", DigestionEnzymeProtein::Sense::N_TERM, "E"); + TEST_EQUAL(n_term_test.getRegEx(), "(? +#include + +////////////////////////////////////////// + +#include +#include //Needed for Empty initialization +#include + +using namespace OpenMS; +using namespace std; + +////////////////////////////////////////// + +START_TEST(DigestionEnzyme,"$ID") + +////////////////////////////////////////// + +START_SECTION(bool setValueFromFile(const String& key, const String& value)) + DigestionEnzymeProtein enzyme; + + // Test the Name Setting. + TEST_TRUE(enzyme.setValueFromFile("test:Name","Trypsin")) + TEST_EQUAL(enzyme.getName(),"Trypsin") + + // Test the RegEx Setting. + TEST_TRUE(enzyme.setValueFromFile("test:RegEx","Reg")) + TEST_EQUAL(enzyme.getRegEx(), "Reg") + + // Test the RegExDescription Setting. + TEST_TRUE(enzyme.setValueFromFile("test:RegExDescription","Desc")) + TEST_EQUAL(enzyme.getRegExDescription(),"Desc") + + // Test Synonym Setting + TEST_TRUE(enzyme.setValueFromFile("syn:Synonyms:","Trypsin")) + TEST_TRUE(enzyme.setValueFromFile("test:Synonyms:","TrypsinI")) + + // Since Synonyms are a set, test using set functions. + TEST_EQUAL(enzyme.getSynonyms().count("Trypsin"),1) + TEST_EQUAL(enzyme.getSynonyms().size(),2) + + // Test incorrect keys. + TEST_FALSE(enzyme.setValueFromFile("test","Tryp-Like")) +END_SECTION + +START_SECTION(bool operator==(const String& cleavage_regex) const) + DigestionEnzymeProtein enzyme; + enzyme.setRegEx("Verify"); + + TEST_TRUE(enzyme == "Verify") + TEST_FALSE(enzyme == "Accept") +END_SECTION + +START_SECTION(bool operator!=(const String& cleavage_regex) const) + DigestionEnzymeProtein enzyme; + enzyme.setRegEx("Verify"); + + TEST_TRUE(enzyme != "Accept") + TEST_FALSE(enzyme != "Verify") +END_SECTION + +// < compares the names of the enzymes. +START_SECTION(bool operator<(const DigestionEnzyme& enzyme) const) + DigestionEnzymeProtein e1,e2; + + e1.setName("A_Enzyme"); + e2.setName("B_ENZYME"); + + TEST_TRUE(e1 < e2) + TEST_FALSE(e2 < e1) + + // Safety test: when names are same, neither greater nor smaller, whatever the regex. + DigestionEnzymeProtein e3; + e3.setName("A_Enzyme"); + e3.setRegEx("Greater"); + + TEST_FALSE(e1 < e3) + TEST_FALSE(e3 < e1) +END_SECTION + +START_SECTION(std::ostream& operator<<(std::ostream& os, const DigestionEnzyme& enzyme)) + DigestionEnzymeProtein enzyme; + enzyme.setName("TestEnzyme"); + enzyme.setRegEx("[K]"); + enzyme.setRegExDescription("cuts at K"); + + stringstream ss; + ss << enzyme; + String output = ss.str(); + + TEST_TRUE(output.hasSubstring("digestion enzyme:TestEnzyme")) + TEST_TRUE(output.hasSubstring("(cleavage: [K] - cuts at K)")) +END_SECTION + +END_TEST diff --git a/src/tests/class_tests/openms/source/LinearResamplerAlign_test.cpp b/src/tests/class_tests/openms/source/LinearResamplerAlign_test.cpp index b1335321802..51f14a2bcc1 100644 --- a/src/tests/class_tests/openms/source/LinearResamplerAlign_test.cpp +++ b/src/tests/class_tests/openms/source/LinearResamplerAlign_test.cpp @@ -3,7 +3,7 @@ // // -------------------------------------------------------------------------- // $Maintainer: Hannes Roest $ -// $Authors: Hannes Roest, Luis Jacob Keller, Alen Saric$ +// $Authors: Hannes Roest, Luis Jacob Keller, Alen Šarić$ // -------------------------------------------------------------------------- #include