From bef003341f2cf1ad5edc199ff02e9e878c6083f6 Mon Sep 17 00:00:00 2001 From: Lukas von Ziegler Date: Mon, 23 Feb 2026 16:24:44 +0100 Subject: [PATCH 01/10] extended aa score calculation --- NEWS.md | 1 + R/calculate_aa_scores.R | 33 +++++++++++++++++++++++++++++---- 2 files changed, 30 insertions(+), 4 deletions(-) diff --git a/NEWS.md b/NEWS.md index eecd62cc..10b9faaf 100644 --- a/NEWS.md +++ b/NEWS.md @@ -5,6 +5,7 @@ * `calculate_peptide_abundance()` new function to calculate peptide abundances from precursor abundances. * `fit_drc_4p()` received the `show_progress` argument that is by default `TRUE` and allows the user to show or hide progress bars. This closes issue #278. * `assign_missingness()` gained the `completeness_MNAR_reference` argument, allowing users to control how complete a condition must be to be considered sufficiently observed when assigning MNAR missingness. This closes issue #200. +* `calculate_aa_scores()` gained a `methods` argument, allowing user to choose between additive or multiplicative score caluclation mode. The function now returns min-max normalized scores in addition to raw values. ## Bug fixes diff --git a/R/calculate_aa_scores.R b/R/calculate_aa_scores.R index 5bdf151d..58030bf4 100644 --- a/R/calculate_aa_scores.R +++ b/R/calculate_aa_scores.R @@ -19,6 +19,8 @@ #' data frame. Default is not retaining additional columns \code{retain_columns = NULL}. Specific #' columns can be retained by providing their names (not in quotations marks, just like other #' column names, but in a vector). +#' @param method a character argument selecting the method used for score calculation. +#' supported are "multiplicative" = -log10(adj_pval) * abs(diff) (default) or "additive" = -log10(adj_pval) + abs(diff) #' #' @return A data frame that contains the aggregated scores per amino acid position, enabling to #' draw fingerprints for each individual protein. @@ -51,12 +53,18 @@ calculate_aa_scores <- function(data, adj_pval = adj_pval, start_position, end_position, - retain_columns = NULL) { + retain_columns = NULL, + method = "multiplicative") { output <- data %>% dplyr::ungroup() %>% dplyr::distinct({{ protein }}, {{ diff }}, {{ adj_pval }}, {{ start_position }}, {{ end_position }}) %>% tidyr::drop_na({{ diff }}, {{ adj_pval }}) %>% - dplyr::mutate(score = -log10({{ adj_pval }}) * abs({{ diff }})) %>% + dplyr::mutate( + score = dplyr::case_when( + method == "multiplicative" ~ -log10({{ adj_pval }}) * abs({{ diff }}), + method == "additive" ~ -log10({{ adj_pval }}) + abs({{ diff }}) + ) + ) %>% dplyr::rowwise() %>% dplyr::mutate(residue = list(seq({{ start_position }}, {{ end_position }}))) %>% tidyr::unnest("residue") %>% @@ -64,17 +72,34 @@ calculate_aa_scores <- function(data, dplyr::mutate(amino_acid_score = mean(.data$score)) %>% dplyr::distinct({{ protein }}, .data$residue, .data$amino_acid_score) + # normalization (per protein) + output <- output %>% + dplyr::group_by({{ protein }}) %>% + dplyr::mutate( + amino_acid_score_normalized = { + min_val <- min(amino_acid_score, na.rm = TRUE) + max_val <- max(amino_acid_score, na.rm = TRUE) + if (max_val == min_val) { + 1 # avoid division by zero; constant protein gets 1 + } else { + (amino_acid_score - min_val) / (max_val - min_val) + } + } + ) %>% + dplyr::ungroup() if (!missing(retain_columns)) { output <- data %>% dplyr::select(!!enquo(retain_columns), colnames(output)[!colnames(output) %in% c( "residue", - "amino_acid_score" + "amino_acid_score", + "amino_acid_score_normalized" )]) %>% dplyr::distinct() %>% dplyr::right_join(output, by = colnames(output)[!colnames(output) %in% c( "residue", - "amino_acid_score" + "amino_acid_score", + "amino_acid_score_normalized" )]) } From 582899feb55e895e182a4a247f578a2178a05cda Mon Sep 17 00:00:00 2001 From: Lukas von Ziegler Date: Mon, 2 Mar 2026 11:38:03 +0100 Subject: [PATCH 02/10] fixed small issue and, updated docs and test --- R/calculate_aa_scores.R | 6 +++--- man/calculate_aa_scores.Rd | 6 +++++- tests/testthat/test-workflow.R | 2 +- 3 files changed, 9 insertions(+), 5 deletions(-) diff --git a/R/calculate_aa_scores.R b/R/calculate_aa_scores.R index 58030bf4..438575c8 100644 --- a/R/calculate_aa_scores.R +++ b/R/calculate_aa_scores.R @@ -77,12 +77,12 @@ calculate_aa_scores <- function(data, dplyr::group_by({{ protein }}) %>% dplyr::mutate( amino_acid_score_normalized = { - min_val <- min(amino_acid_score, na.rm = TRUE) - max_val <- max(amino_acid_score, na.rm = TRUE) + min_val <- min(.data$amino_acid_score, na.rm = TRUE) + max_val <- max(.data$amino_acid_score, na.rm = TRUE) if (max_val == min_val) { 1 # avoid division by zero; constant protein gets 1 } else { - (amino_acid_score - min_val) / (max_val - min_val) + (.data$amino_acid_score - min_val) / (max_val - min_val) } } ) %>% diff --git a/man/calculate_aa_scores.Rd b/man/calculate_aa_scores.Rd index 8a8247b7..9e0ff4e1 100644 --- a/man/calculate_aa_scores.Rd +++ b/man/calculate_aa_scores.Rd @@ -11,7 +11,8 @@ calculate_aa_scores( adj_pval = adj_pval, start_position, end_position, - retain_columns = NULL + retain_columns = NULL, + method = "multiplicative" ) } \arguments{ @@ -33,6 +34,9 @@ precursor.} data frame. Default is not retaining additional columns \code{retain_columns = NULL}. Specific columns can be retained by providing their names (not in quotations marks, just like other column names, but in a vector).} + +\item{method}{a character argument selecting the method used for score calculation. +supported are "multiplicative" = -log10(adj_pval) * abs(diff) (default) or "additive" = -log10(adj_pval) + abs(diff)} } \value{ A data frame that contains the aggregated scores per amino acid position, enabling to diff --git a/tests/testthat/test-workflow.R b/tests/testthat/test-workflow.R index 0bc4cdd2..d528f4ab 100644 --- a/tests/testthat/test-workflow.R +++ b/tests/testthat/test-workflow.R @@ -852,7 +852,7 @@ test_that("calculate_aa_scores works", { expect_is(aa_fingerprint, "data.frame") expect_equal(nrow(aa_fingerprint), 45) - expect_equal(ncol(aa_fingerprint), 3) + expect_equal(ncol(aa_fingerprint), 4) }) # Test for random forest imputation From 558808f7a2ff191cf5e9cce23f4c4996b0a7767e Mon Sep 17 00:00:00 2001 From: lukasvonziegler Date: Mon, 2 Mar 2026 11:24:58 +0000 Subject: [PATCH 03/10] Style code (GHA) --- R/calculate_aa_scores.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/calculate_aa_scores.R b/R/calculate_aa_scores.R index 438575c8..b0684a92 100644 --- a/R/calculate_aa_scores.R +++ b/R/calculate_aa_scores.R @@ -62,7 +62,7 @@ calculate_aa_scores <- function(data, dplyr::mutate( score = dplyr::case_when( method == "multiplicative" ~ -log10({{ adj_pval }}) * abs({{ diff }}), - method == "additive" ~ -log10({{ adj_pval }}) + abs({{ diff }}) + method == "additive" ~ -log10({{ adj_pval }}) + abs({{ diff }}) ) ) %>% dplyr::rowwise() %>% @@ -80,7 +80,7 @@ calculate_aa_scores <- function(data, min_val <- min(.data$amino_acid_score, na.rm = TRUE) max_val <- max(.data$amino_acid_score, na.rm = TRUE) if (max_val == min_val) { - 1 # avoid division by zero; constant protein gets 1 + 1 # avoid division by zero; constant protein gets 1 } else { (.data$amino_acid_score - min_val) / (max_val - min_val) } From aafec5f5e6a9187a0d191a5056a64211dccbf62b Mon Sep 17 00:00:00 2001 From: lukasvonziegler <43876912+lukasvonziegler@users.noreply.github.com> Date: Tue, 3 Mar 2026 12:40:56 +0100 Subject: [PATCH 04/10] spelling fixes in R/calculate_aa_scores.R Co-authored-by: Jan-Philipp Quast <59307482+jpquast@users.noreply.github.com> --- R/calculate_aa_scores.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/calculate_aa_scores.R b/R/calculate_aa_scores.R index b0684a92..7da706a3 100644 --- a/R/calculate_aa_scores.R +++ b/R/calculate_aa_scores.R @@ -20,7 +20,7 @@ #' columns can be retained by providing their names (not in quotations marks, just like other #' column names, but in a vector). #' @param method a character argument selecting the method used for score calculation. -#' supported are "multiplicative" = -log10(adj_pval) * abs(diff) (default) or "additive" = -log10(adj_pval) + abs(diff) +#' Supported are "multiplicative" = `-log10(adj_pval) * abs(diff)` (default) or "additive" = `-log10(adj_pval) + abs(diff)`. #' #' @return A data frame that contains the aggregated scores per amino acid position, enabling to #' draw fingerprints for each individual protein. From 78c3efbaccb21088ba627ce5c7dcf59edff112ba Mon Sep 17 00:00:00 2001 From: lukasvonziegler <43876912+lukasvonziegler@users.noreply.github.com> Date: Tue, 3 Mar 2026 12:41:23 +0100 Subject: [PATCH 05/10] Improve wording in NEWS.md Co-authored-by: Jan-Philipp Quast <59307482+jpquast@users.noreply.github.com> --- NEWS.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/NEWS.md b/NEWS.md index 10b9faaf..d49349ed 100644 --- a/NEWS.md +++ b/NEWS.md @@ -5,7 +5,7 @@ * `calculate_peptide_abundance()` new function to calculate peptide abundances from precursor abundances. * `fit_drc_4p()` received the `show_progress` argument that is by default `TRUE` and allows the user to show or hide progress bars. This closes issue #278. * `assign_missingness()` gained the `completeness_MNAR_reference` argument, allowing users to control how complete a condition must be to be considered sufficiently observed when assigning MNAR missingness. This closes issue #200. -* `calculate_aa_scores()` gained a `methods` argument, allowing user to choose between additive or multiplicative score caluclation mode. The function now returns min-max normalized scores in addition to raw values. +* `calculate_aa_scores()` gained a `methods` argument, allowing user to choose between additive or the new multiplicative score calculation mode. The function now also returns min-max normalised scores in addition to raw values. ## Bug fixes From 251a754f65d8ca7d93853e24f0028de30dc86611 Mon Sep 17 00:00:00 2001 From: Lukas von Ziegler Date: Tue, 3 Mar 2026 12:57:27 +0100 Subject: [PATCH 06/10] fixed typos in qc_sample_correlation --- R/qc_sample_correlation.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/qc_sample_correlation.R b/R/qc_sample_correlation.R index 55c06008..c0287bdd 100644 --- a/R/qc_sample_correlation.R +++ b/R/qc_sample_correlation.R @@ -136,7 +136,7 @@ qc_sample_correlation <- function(data, heatmap_interactive <- heatmaply::heatmaply( correlation, - main = "Correlation based hirachical clustering of samples", + main = "Correlation based hierarchical clustering of samples", col_side_colors = annotation, col_side_palette = c( annotation_colours[[1]], @@ -190,7 +190,7 @@ qc_sample_correlation <- function(data, cluster_cols = stats::as.hclust(dendrogram_column), annotation = annotation, annotation_colors = annotation_colours, - main = "Correlation based hierachical clustering of samples", + main = "Correlation based hierarchical clustering of samples", color = viridis_colours, silent = TRUE ) From 5aea526d6dcee30013437a42fbe1901561bb646c Mon Sep 17 00:00:00 2001 From: jpquast Date: Tue, 3 Mar 2026 13:44:02 +0100 Subject: [PATCH 07/10] Fix Note in check --- R/calculate_go_enrichment.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/calculate_go_enrichment.R b/R/calculate_go_enrichment.R index cc119cde..41149e00 100644 --- a/R/calculate_go_enrichment.R +++ b/R/calculate_go_enrichment.R @@ -385,7 +385,7 @@ if you used the right organism ID.", prefix = "\n", initial = "")) groups_to_skip <- cont_table %>% dplyr::group_by({{ group }}) %>% dplyr::summarise(n_levels = dplyr::n_distinct({{ is_significant }}), .groups = "drop") %>% - dplyr::filter(n_levels < 2) %>% + dplyr::filter(.data$n_levels < 2) %>% dplyr::pull({{ group }}) cont_table <- cont_table %>% From 6aec249a7c100988ffb0cc80e1c9d11502c9760d Mon Sep 17 00:00:00 2001 From: lukasvonziegler <43876912+lukasvonziegler@users.noreply.github.com> Date: Tue, 3 Mar 2026 16:31:34 +0100 Subject: [PATCH 08/10] added method validation to R/calculate_aa_scores.R Co-authored-by: Jan-Philipp Quast <59307482+jpquast@users.noreply.github.com> --- R/calculate_aa_scores.R | 3 +++ 1 file changed, 3 insertions(+) diff --git a/R/calculate_aa_scores.R b/R/calculate_aa_scores.R index 7da706a3..f87f53e1 100644 --- a/R/calculate_aa_scores.R +++ b/R/calculate_aa_scores.R @@ -55,6 +55,9 @@ calculate_aa_scores <- function(data, end_position, retain_columns = NULL, method = "multiplicative") { +# validate method input +method <- match.arg(method, c("multiplicative", "additive")) + output <- data %>% dplyr::ungroup() %>% dplyr::distinct({{ protein }}, {{ diff }}, {{ adj_pval }}, {{ start_position }}, {{ end_position }}) %>% From d1d3c46f2b399257848b627bd675d6b106ee88b3 Mon Sep 17 00:00:00 2001 From: lukasvonziegler <43876912+lukasvonziegler@users.noreply.github.com> Date: Tue, 3 Mar 2026 16:32:49 +0100 Subject: [PATCH 09/10] Use more apprporiate method logic selection in R/calculate_aa_scores.R Co-authored-by: Jan-Philipp Quast <59307482+jpquast@users.noreply.github.com> --- R/calculate_aa_scores.R | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/R/calculate_aa_scores.R b/R/calculate_aa_scores.R index f87f53e1..e3491f84 100644 --- a/R/calculate_aa_scores.R +++ b/R/calculate_aa_scores.R @@ -63,10 +63,11 @@ method <- match.arg(method, c("multiplicative", "additive")) dplyr::distinct({{ protein }}, {{ diff }}, {{ adj_pval }}, {{ start_position }}, {{ end_position }}) %>% tidyr::drop_na({{ diff }}, {{ adj_pval }}) %>% dplyr::mutate( - score = dplyr::case_when( - method == "multiplicative" ~ -log10({{ adj_pval }}) * abs({{ diff }}), - method == "additive" ~ -log10({{ adj_pval }}) + abs({{ diff }}) - ) + score = if (method == "multiplicative") { + -log10({{ adj_pval }}) * abs({{ diff }}) + } else if (method == "additive") { + -log10({{ adj_pval }}) + abs({{ diff }}) + } ) %>% dplyr::rowwise() %>% dplyr::mutate(residue = list(seq({{ start_position }}, {{ end_position }}))) %>% From e423abf7ab0074d9bf9b9193e34fb8caa1941e74 Mon Sep 17 00:00:00 2001 From: lukasvonziegler Date: Tue, 3 Mar 2026 15:34:32 +0000 Subject: [PATCH 10/10] Style code (GHA) --- R/calculate_aa_scores.R | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/R/calculate_aa_scores.R b/R/calculate_aa_scores.R index e3491f84..a6418d10 100644 --- a/R/calculate_aa_scores.R +++ b/R/calculate_aa_scores.R @@ -55,8 +55,8 @@ calculate_aa_scores <- function(data, end_position, retain_columns = NULL, method = "multiplicative") { -# validate method input -method <- match.arg(method, c("multiplicative", "additive")) + # validate method input + method <- match.arg(method, c("multiplicative", "additive")) output <- data %>% dplyr::ungroup() %>% @@ -64,10 +64,10 @@ method <- match.arg(method, c("multiplicative", "additive")) tidyr::drop_na({{ diff }}, {{ adj_pval }}) %>% dplyr::mutate( score = if (method == "multiplicative") { - -log10({{ adj_pval }}) * abs({{ diff }}) - } else if (method == "additive") { - -log10({{ adj_pval }}) + abs({{ diff }}) - } + -log10({{ adj_pval }}) * abs({{ diff }}) + } else if (method == "additive") { + -log10({{ adj_pval }}) + abs({{ diff }}) + } ) %>% dplyr::rowwise() %>% dplyr::mutate(residue = list(seq({{ start_position }}, {{ end_position }}))) %>%