From 7a2221a1fd483842ae052955cfced195b1ba6a73 Mon Sep 17 00:00:00 2001 From: Simon Van de Vyver Date: Wed, 20 May 2026 16:35:29 +0200 Subject: [PATCH 1/3] add extra option to pept2data to map taxa to a specific rank --- api/src/controllers/mpa/pept2data.rs | 28 ++++++++++++++++++++++++++-- 1 file changed, 26 insertions(+), 2 deletions(-) diff --git a/api/src/controllers/mpa/pept2data.rs b/api/src/controllers/mpa/pept2data.rs index 9402719..a2c292d 100644 --- a/api/src/controllers/mpa/pept2data.rs +++ b/api/src/controllers/mpa/pept2data.rs @@ -3,6 +3,7 @@ use axum::{extract::State, Json}; use itertools::Itertools; use serde::{Deserialize, Serialize}; use index::{ProteinInfo, SearchResult}; +use datastore::LineageStore; use crate::{ controllers::{generate_handlers, mpa::default_equate_il, mpa::default_tryptic, mpa::default_report_taxa, api::default_cutoff, api::default_validate_taxa}, helpers::{ @@ -33,6 +34,8 @@ pub struct Parameters { cutoff: usize, #[serde(default = "default_report_taxa")] report_taxa: bool, + #[serde(default)] + taxa_rank: Option, #[serde(default = "default_validate_taxa")] validate_taxa: bool, filter: Option, @@ -67,7 +70,7 @@ pub struct Data { async fn handler( State(AppState { index, datastore, .. }): State, - Parameters { mut peptides, equate_il, tryptic, cutoff, report_taxa, validate_taxa, filter }: Parameters + Parameters { mut peptides, equate_il, tryptic, cutoff, report_taxa, taxa_rank, validate_taxa, filter }: Parameters ) -> Result { if peptides.is_empty() { return Ok(Data { peptides: Vec::new() }); @@ -103,6 +106,7 @@ async fn handler( }; let crap_filter = CrapFilter::new(); + let taxa_rank_idx = taxa_rank.as_ref().and_then(|rank| LineageStore::rank_to_idx(rank.to_lowercase().as_str())); Ok(Data { peptides: result @@ -121,6 +125,26 @@ async fn handler( let taxa: Vec = filtered_proteins.iter().map(|protein| protein.taxon).unique().collect(); + let taxa_at_rank: Option> = if report_taxa { + match taxa_rank_idx { + Some(idx) => Some(taxa + .iter() + .filter_map(|taxon_id| { + let lineage = get_lineage_array(*taxon_id, LineageVersion::V2, lineage_store); + lineage.get(idx).and_then(|taxon| *taxon).map(|taxon_id| taxon_id as u32) + }) + .unique() + .collect()), + None => Some(taxa.clone()) + } + } else { + None + }; + + println!("taxa_rank_idx: {:?}", taxa_rank_idx); + println!("taxa: {:?}", taxa); + println!("taxa_at_rank: {:?}", taxa_at_rank); + let lca = calculate_lca( taxa.clone(), LineageVersion::V2, @@ -136,7 +160,7 @@ async fn handler( lca: Some(lca as u32), lineage, fa: calculate_fa(&filtered_proteins), - taxa: if report_taxa { Some(taxa) } else { None }, + taxa: taxa_at_rank, crap_filtered, }) }) From f16f366258ddde0d8f48b506c987237142488c90 Mon Sep 17 00:00:00 2001 From: Simon Van de Vyver Date: Thu, 4 Jun 2026 14:04:34 +0200 Subject: [PATCH 2/3] add taxa2rank endpoint --- api/src/controllers/api/mod.rs | 1 + api/src/controllers/api/taxa2rank.rs | 80 ++++++++++++++++++++++++++++ api/src/controllers/mpa/pept2data.rs | 26 ++------- api/src/routes.rs | 6 ++- 4 files changed, 89 insertions(+), 24 deletions(-) create mode 100644 api/src/controllers/api/taxa2rank.rs diff --git a/api/src/controllers/api/mod.rs b/api/src/controllers/api/mod.rs index 8072460..9c9c221 100644 --- a/api/src/controllers/api/mod.rs +++ b/api/src/controllers/api/mod.rs @@ -10,6 +10,7 @@ pub mod pept2taxa; pub mod peptinfo; pub mod protinfo; pub mod taxa2lca; +pub mod taxa2rank; pub mod taxa2tree; pub mod taxonomy; diff --git a/api/src/controllers/api/taxa2rank.rs b/api/src/controllers/api/taxa2rank.rs new file mode 100644 index 0000000..07c5368 --- /dev/null +++ b/api/src/controllers/api/taxa2rank.rs @@ -0,0 +1,80 @@ +use std::collections::HashMap; + +use axum::{extract::State, Json}; +use itertools::Itertools; +use serde::{Deserialize, Serialize}; + +use datastore::LineageStore; +use crate::{ + controllers::generate_handlers, + helpers::lineage_helper::{get_lineage_array, LineageVersion}, + AppState +}; +use crate::errors::ApiError; + +#[derive(Deserialize)] +pub struct Parameters { + /// Vector of taxa vectors, one per peptide + taxa: Vec>, + /// The rank to map taxa to (e.g., "species", "genus", "family") + rank: String, +} + +#[derive(Serialize)] +pub struct RankMappingResult { + /// The mapped taxa at the specified rank + mapped_taxa: Vec>, +} + +/// Maps taxa to a specific taxonomic rank with caching for duplicate taxa. +/// Uses a HashMap to cache lineage lookups, which is more efficient when there are many duplicates. +async fn handler( + State(AppState { datastore, .. }): State, + Parameters { taxa, rank }: Parameters, +) -> Result { + + let rank_lowercase = rank.to_lowercase(); + let rank_idx = LineageStore::rank_to_idx(&rank_lowercase) + .ok_or_else(|| ApiError::UnknownRankError(format!("Invalid rank: {}", rank)))?; + + let lineage_store = datastore.lineage_store(); + + // Build a cache of taxon_id -> taxon_id_at_rank mappings + let mut cache: HashMap> = HashMap::new(); + + let mapped_taxa: Vec> = taxa + .iter() + .map(|taxa_vec| { + taxa_vec + .iter() + .filter_map(|taxon_id| { + let mapped_taxon = cache.entry(*taxon_id).or_insert_with(|| { + let lineage = get_lineage_array(*taxon_id, LineageVersion::V2, lineage_store); + lineage + .get(rank_idx) + .and_then(|taxon| *taxon) + .map(|taxon_id| taxon_id as u32) + }); + + *mapped_taxon + }) + .unique() + .collect() + }) + .collect(); + + Ok(RankMappingResult { + mapped_taxa, + }) +} + +// Default handler without cache +generate_handlers!( + async fn json_handler( + state => State, + params => Parameters + ) -> Result, ApiError> { + Ok(Json(handler(state, params).await?)) + } +); + diff --git a/api/src/controllers/mpa/pept2data.rs b/api/src/controllers/mpa/pept2data.rs index a2c292d..c62d9c1 100644 --- a/api/src/controllers/mpa/pept2data.rs +++ b/api/src/controllers/mpa/pept2data.rs @@ -3,7 +3,6 @@ use axum::{extract::State, Json}; use itertools::Itertools; use serde::{Deserialize, Serialize}; use index::{ProteinInfo, SearchResult}; -use datastore::LineageStore; use crate::{ controllers::{generate_handlers, mpa::default_equate_il, mpa::default_tryptic, mpa::default_report_taxa, api::default_cutoff, api::default_validate_taxa}, helpers::{ @@ -34,8 +33,6 @@ pub struct Parameters { cutoff: usize, #[serde(default = "default_report_taxa")] report_taxa: bool, - #[serde(default)] - taxa_rank: Option, #[serde(default = "default_validate_taxa")] validate_taxa: bool, filter: Option, @@ -70,7 +67,7 @@ pub struct Data { async fn handler( State(AppState { index, datastore, .. }): State, - Parameters { mut peptides, equate_il, tryptic, cutoff, report_taxa, taxa_rank, validate_taxa, filter }: Parameters + Parameters { mut peptides, equate_il, tryptic, cutoff, report_taxa, validate_taxa, filter }: Parameters ) -> Result { if peptides.is_empty() { return Ok(Data { peptides: Vec::new() }); @@ -106,7 +103,6 @@ async fn handler( }; let crap_filter = CrapFilter::new(); - let taxa_rank_idx = taxa_rank.as_ref().and_then(|rank| LineageStore::rank_to_idx(rank.to_lowercase().as_str())); Ok(Data { peptides: result @@ -125,26 +121,12 @@ async fn handler( let taxa: Vec = filtered_proteins.iter().map(|protein| protein.taxon).unique().collect(); - let taxa_at_rank: Option> = if report_taxa { - match taxa_rank_idx { - Some(idx) => Some(taxa - .iter() - .filter_map(|taxon_id| { - let lineage = get_lineage_array(*taxon_id, LineageVersion::V2, lineage_store); - lineage.get(idx).and_then(|taxon| *taxon).map(|taxon_id| taxon_id as u32) - }) - .unique() - .collect()), - None => Some(taxa.clone()) - } + let taxa_to_return: Option> = if report_taxa { + Some(taxa.clone()) } else { None }; - println!("taxa_rank_idx: {:?}", taxa_rank_idx); - println!("taxa: {:?}", taxa); - println!("taxa_at_rank: {:?}", taxa_at_rank); - let lca = calculate_lca( taxa.clone(), LineageVersion::V2, @@ -160,7 +142,7 @@ async fn handler( lca: Some(lca as u32), lineage, fa: calculate_fa(&filtered_proteins), - taxa: taxa_at_rank, + taxa: taxa_to_return, crap_filtered, }) }) diff --git a/api/src/routes.rs b/api/src/routes.rs index 4efae4e..b933ff2 100644 --- a/api/src/routes.rs +++ b/api/src/routes.rs @@ -15,7 +15,7 @@ use crate::{ controllers::{ api::{ pept2ec, pept2funct, pept2go, pept2interpro, pept2lca, pept2prot, pept2taxa, peptinfo, protinfo, taxa2lca, - taxa2tree, taxonomy + taxa2rank, taxa2tree, taxonomy }, datasets::sampledata, mpa::{pept2data}, @@ -103,7 +103,9 @@ fn create_api_v2_routes() -> Router { "/taxa2tree", get(taxa2tree::get_json_handler_v2).post(taxa2tree::post_json_handler_v2), "/taxonomy", - get(taxonomy::get_json_handler_v2).post(taxonomy::post_json_handler_v2) + get(taxonomy::get_json_handler_v2).post(taxonomy::post_json_handler_v2), + "/taxa2rank", + get(taxa2rank::get_json_handler).post(taxa2rank::post_json_handler) ) .route("/taxa2tree.html", get(taxa2tree::get_html_handler_v2).post(taxa2tree::post_html_handler_v2)) } From c682c9f0f43ce9c3b311542c135c1dfa368ac6dc Mon Sep 17 00:00:00 2001 From: Simon Van de Vyver Date: Thu, 4 Jun 2026 14:09:07 +0200 Subject: [PATCH 3/3] simplification in pept2data --- api/src/controllers/mpa/pept2data.rs | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/api/src/controllers/mpa/pept2data.rs b/api/src/controllers/mpa/pept2data.rs index c62d9c1..9402719 100644 --- a/api/src/controllers/mpa/pept2data.rs +++ b/api/src/controllers/mpa/pept2data.rs @@ -121,12 +121,6 @@ async fn handler( let taxa: Vec = filtered_proteins.iter().map(|protein| protein.taxon).unique().collect(); - let taxa_to_return: Option> = if report_taxa { - Some(taxa.clone()) - } else { - None - }; - let lca = calculate_lca( taxa.clone(), LineageVersion::V2, @@ -142,7 +136,7 @@ async fn handler( lca: Some(lca as u32), lineage, fa: calculate_fa(&filtered_proteins), - taxa: taxa_to_return, + taxa: if report_taxa { Some(taxa) } else { None }, crap_filtered, }) })