diff --git a/api/src/controllers/api/mod.rs b/api/src/controllers/api/mod.rs index 8072460..9c9c221 100644 --- a/api/src/controllers/api/mod.rs +++ b/api/src/controllers/api/mod.rs @@ -10,6 +10,7 @@ pub mod pept2taxa; pub mod peptinfo; pub mod protinfo; pub mod taxa2lca; +pub mod taxa2rank; pub mod taxa2tree; pub mod taxonomy; diff --git a/api/src/controllers/api/taxa2rank.rs b/api/src/controllers/api/taxa2rank.rs new file mode 100644 index 0000000..07c5368 --- /dev/null +++ b/api/src/controllers/api/taxa2rank.rs @@ -0,0 +1,80 @@ +use std::collections::HashMap; + +use axum::{extract::State, Json}; +use itertools::Itertools; +use serde::{Deserialize, Serialize}; + +use datastore::LineageStore; +use crate::{ + controllers::generate_handlers, + helpers::lineage_helper::{get_lineage_array, LineageVersion}, + AppState +}; +use crate::errors::ApiError; + +#[derive(Deserialize)] +pub struct Parameters { + /// Vector of taxa vectors, one per peptide + taxa: Vec>, + /// The rank to map taxa to (e.g., "species", "genus", "family") + rank: String, +} + +#[derive(Serialize)] +pub struct RankMappingResult { + /// The mapped taxa at the specified rank + mapped_taxa: Vec>, +} + +/// Maps taxa to a specific taxonomic rank with caching for duplicate taxa. +/// Uses a HashMap to cache lineage lookups, which is more efficient when there are many duplicates. +async fn handler( + State(AppState { datastore, .. }): State, + Parameters { taxa, rank }: Parameters, +) -> Result { + + let rank_lowercase = rank.to_lowercase(); + let rank_idx = LineageStore::rank_to_idx(&rank_lowercase) + .ok_or_else(|| ApiError::UnknownRankError(format!("Invalid rank: {}", rank)))?; + + let lineage_store = datastore.lineage_store(); + + // Build a cache of taxon_id -> taxon_id_at_rank mappings + let mut cache: HashMap> = HashMap::new(); + + let mapped_taxa: Vec> = taxa + .iter() + .map(|taxa_vec| { + taxa_vec + .iter() + .filter_map(|taxon_id| { + let mapped_taxon = cache.entry(*taxon_id).or_insert_with(|| { + let lineage = get_lineage_array(*taxon_id, LineageVersion::V2, lineage_store); + lineage + .get(rank_idx) + .and_then(|taxon| *taxon) + .map(|taxon_id| taxon_id as u32) + }); + + *mapped_taxon + }) + .unique() + .collect() + }) + .collect(); + + Ok(RankMappingResult { + mapped_taxa, + }) +} + +// Default handler without cache +generate_handlers!( + async fn json_handler( + state => State, + params => Parameters + ) -> Result, ApiError> { + Ok(Json(handler(state, params).await?)) + } +); + diff --git a/api/src/routes.rs b/api/src/routes.rs index 4efae4e..b933ff2 100644 --- a/api/src/routes.rs +++ b/api/src/routes.rs @@ -15,7 +15,7 @@ use crate::{ controllers::{ api::{ pept2ec, pept2funct, pept2go, pept2interpro, pept2lca, pept2prot, pept2taxa, peptinfo, protinfo, taxa2lca, - taxa2tree, taxonomy + taxa2rank, taxa2tree, taxonomy }, datasets::sampledata, mpa::{pept2data}, @@ -103,7 +103,9 @@ fn create_api_v2_routes() -> Router { "/taxa2tree", get(taxa2tree::get_json_handler_v2).post(taxa2tree::post_json_handler_v2), "/taxonomy", - get(taxonomy::get_json_handler_v2).post(taxonomy::post_json_handler_v2) + get(taxonomy::get_json_handler_v2).post(taxonomy::post_json_handler_v2), + "/taxa2rank", + get(taxa2rank::get_json_handler).post(taxa2rank::post_json_handler) ) .route("/taxa2tree.html", get(taxa2tree::get_html_handler_v2).post(taxa2tree::post_html_handler_v2)) }