diff --git a/.Rbuildignore b/.Rbuildignore index 674edf6..5e74ab2 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -9,3 +9,4 @@ ^pkgdown$ ^\.travis\.yml$ ^\.github$ +^tmp$ diff --git a/.gitignore b/.gitignore index 4fb4af4..3fa650d 100644 --- a/.gitignore +++ b/.gitignore @@ -5,3 +5,4 @@ inst/doc docs/ docs +tmp/ diff --git a/DESCRIPTION b/DESCRIPTION old mode 100644 new mode 100755 index 9efe068..ceae92c --- a/DESCRIPTION +++ b/DESCRIPTION @@ -49,7 +49,8 @@ Imports: memoise, httr2, tidyjson, - tibblify + tibblify, + testthat Suggests: knitr, rmarkdown @@ -57,3 +58,4 @@ VignetteBuilder: knitr URL: https://github.com/patterninstitute/ensemblr, https://www.pattern.institute/ensemblr/ BugReports: https://github.com/patterninstitute/ensemblr/issues Config/Needs/website: patterninstitute/chic +Roxygen: list(markdown = TRUE) diff --git a/NAMESPACE b/NAMESPACE old mode 100644 new mode 100755 index 7ae2656..6ef8923 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,33 +1,77 @@ # Generated by roxygen2: do not edit by hand export("%>%") +export(._get_divisions) +export(._get_rest_version) +export(._get_software_version) +export(._get_variation_sources) export(genomic_range) +export(get_alignment_by_region) export(get_analyses) +export(get_analysis_info) export(get_assemblies) +export(get_assembly_info) +export(get_biotypes) +export(get_biotypes_by_name) +export(get_biotypes_groups) +export(get_cafe_genetree_by_id) +export(get_cafe_genetree_by_species_id) +export(get_cafe_genetree_by_symbol) +export(get_compara_methods) +export(get_compara_species_sets) +export(get_comparas) +export(get_consequence_types) export(get_cytogenetic_bands) +export(get_data) +export(get_data_version2) export(get_data_versions) export(get_divisions) +export(get_eg_version) export(get_ensembl_genomes_version) +export(get_external_dbs) +export(get_genetree_by_id) +export(get_genetree_by_species_id) +export(get_genetree_by_symbol) +export(get_genome_info) +export(get_genome_info_by_accession) +export(get_genome_info_by_assembly) +export(get_genome_info_by_division) +export(get_genome_info_by_taxonomy) +export(get_homology_by_species_id) +export(get_homology_by_symbol) export(get_id) export(get_individuals) export(get_karyotypes) +export(get_ld_by_variant) +export(get_ld_values_by_region) export(get_ld_variants_by_pair) export(get_ld_variants_by_pair_combn) export(get_ld_variants_by_range) export(get_ld_variants_by_window) +export(get_pairwise_ld_values) +export(get_population_individuals) export(get_populations) +export(get_region_info) export(get_rest_version) +export(get_rest_version2) export(get_software_version) +export(get_software_version2) export(get_species) +export(get_species_info) +export(get_species_populations) export(get_toplevel_sequence_info) export(get_toplevel_sequences) export(get_variant_consequences) export(get_variation_sources) export(get_versioning) +export(get_versioning2) export(get_xrefs_by_ensembl_id) export(get_xrefs_by_gene) +export(get_xrefs_by_id) +export(get_xrefs_by_name) +export(get_xrefs_by_symbol) export(is_ensembl_reachable) +export(ping_service) importFrom(magrittr,"%>%") -importFrom(memoise,memoise) importFrom(rlang,.data) importFrom(tibble,tibble) diff --git a/R/analysis.R b/R/analysis.R index 6fabebc..2806924 100644 --- a/R/analysis.R +++ b/R/analysis.R @@ -1,7 +1,8 @@ analysis_tbl <- function( - species_name = character(), - database = character(), - analysis = character()) { + species_name = character(), + database = character(), + analysis = character() +) { tbl <- tibble::tibble( species_name = species_name, database = database, @@ -11,14 +12,13 @@ analysis_tbl <- function( } json_list_to_analysis_tbl <- function(species_name, json_list) { + tbl <- purrr::imap_dfr( json_list, - .f = ~ analysis_tbl( - species_name = species_name, - database = .x, - analysis = .y + .f = ~ analysis_tbl(species_name = species_name, + database = .x, + analysis = .y) ) - ) # Drop rows if all columns except species_name are NA return(tidyr::drop_na(tbl, -species_name)) @@ -54,6 +54,7 @@ get_analyses <- function(species_name, verbose = FALSE, warnings = TRUE, progress_bar = TRUE) { + # Assert species_name argument. assert_species_name(species_name) # Assert verbose argument. @@ -67,8 +68,8 @@ get_analyses <- function(species_name, e <- urltools::url_encode resource_urls <- glue::glue( - "/info/analysis/", - "{e(species_name)}?" + '/info/analysis/', + '{e(species_name)}?' ) # Usually we'd use purrr::map here but we opted for plyr::llply @@ -89,13 +90,11 @@ get_analyses <- function(species_name, ) # Only keep those responses that responded successfully, i.e. with status == "OK". - responses_ok <- purrr::keep(responses, ~ identical(.x$status, "OK")) + responses_ok <- purrr::keep(responses, ~ identical(.x$status, 'OK')) # If none of the responses were successful then return an empty linkage # disequilibrium tibble. - if (rlang::is_empty(responses_ok)) { - return(analysis_tbl()) - } + if (rlang::is_empty(responses_ok)) return(analysis_tbl()) tbl <- purrr::imap_dfr( .x = responses_ok, @@ -106,4 +105,5 @@ get_analyses <- function(species_name, analysis <- rlang::expr(analysis) return(dplyr::arrange(tbl, species_name, !!database, !!analysis)) + } diff --git a/R/api-versioning.R b/R/api-versioning.R new file mode 100755 index 0000000..fe07a13 --- /dev/null +++ b/R/api-versioning.R @@ -0,0 +1,90 @@ +##----Re-wrote the already existing functions from `versioning.R`---- + +#' Get REST API version +#' +#' Retrieve the version of the Ensembl REST API currently in use. +#' The version format is `major.minor.point`. +#' @return A numeric value representing the REST API version components: +#' `major`, `minor`, and `point`. +#' @source \url{https://github.com/Ensembl/ensembl-rest/wiki/API-Versioning} +#' @examples +#' \dontrun{ +#' rest_version <- get_rest_version() +#' print(rest_version) +#' } +#' @export +get_rest_version2 <- function() { + res <- "info/rest" + version <- ensemblr:::get(res)[[1]] |> + httr2::resp_body_json() + version$release +} + +#' Get data version +#' +#' Retreive the version(s) of the Ensembl data that the REST API is accessing. +#' @return A numeric value representing of data release version. +#' @source \url{https://github.com/Ensembl/ensembl-rest/wiki/API-Versioning} +#' @examples +#' \dontrun{ +#' data_version <- get_data_version() +#' print(data_version) +#' } +#' @export +get_data_version2 <- function() { + res <- "info/data" + version <- ensemblr:::get(res)[[1]] |> + httr2::resp_body_json() + version$release |> as.numeric() +} + +#' Get API software version +#' +#' Retreive the version of the Ensembl software the REST API is using. +#' @return A numeric value representing the software version. +#' @source \url{https://github.com/Ensembl/ensembl-rest/wiki/API-Versioning} +#' @examples +#' \dontrun{ +#' software_version <- get_software_version() +#' print(software_version) +#' } +#' @export +get_software_version2 <- function() { + res <- "info/software" + version <- ensemblr:::get(res)[[1]] |> + httr2::resp_body_json() + version$release |> as.numeric() +} + +#' Get Ensembl REST versions +#' +#' Retreive the versions of the different entities involved in the +#' REST API requests. When accessing the Ensembl REST API, you are actually +#' accessing three interconnected entities: +#' \itemize{ +#' \item Ensembl databases (\code{data}). +#' \item Perl API (\code{software}). +#' \item REST API (\code{rest}). +#' } +#' \figure{ensembl_api_versioning_wo_fonts.svg} +#' +#' @return A named list of three elements: \code{data}, \code{software} and +#' \code{rest}. +#' +#' @examples +#' # Get the versions of the different entities involved in the REST API +#' # requests. +#' get_versioning() +#' +#' @export +get_versioning2 <- function() { + data_version <- get_data_version2() + software_version <- get_software_version2() + rest_version <- get_rest_version2() + + api_versions <- list(data = data_version, + software = software_version, + rest = rest_version) + + return(api_versions) +} diff --git a/R/archive.R b/R/archive.R index de151ab..ff875f5 100644 --- a/R/archive.R +++ b/R/archive.R @@ -1,13 +1,14 @@ id_tbl <- function( - id = character(), - id_latest = character(), - type = character(), - id_version = integer(), - release = integer(), - is_current = logical(), - genome_assembly_name = character(), - peptide = character(), - possible_replacement = list()) { + id = character(), + id_latest = character(), + type = character(), + id_version = integer(), + release = integer(), + is_current = logical(), + genome_assembly_name = character(), + peptide = character(), + possible_replacement = list() +) { tbl <- tibble::tibble( id = id, id_latest = id_latest, @@ -24,16 +25,17 @@ id_tbl <- function( } json_list_to_id_tbl <- function(json_list) { + tbl <- id_tbl( - id = purrr::pluck(json_list, "id", .default = NA_character_), - id_latest = purrr::pluck(json_list, "latest", .default = NA_character_), - type = purrr::pluck(json_list, "type", .default = NA_character_), - id_version = as.integer(purrr::pluck(json_list, "version", .default = NA_integer_)), - release = as.integer(purrr::pluck(json_list, "release", .default = NA_integer_)), - is_current = purrr::pluck(json_list, "is_current", .default = NA) == "1", - genome_assembly_name = purrr::pluck(json_list, "assembly", .default = NA_character_), - peptide = purrr::pluck(json_list, "peptide", .default = NA_character_), - possible_replacement = purrr::pluck(json_list, "possible_replacement", .default = list(character())) + id = purrr::pluck(json_list, 'id', .default = NA_character_), + id_latest = purrr::pluck(json_list, 'latest', .default = NA_character_), + type = purrr::pluck(json_list, 'type', .default = NA_character_), + id_version = as.integer(purrr::pluck(json_list, 'version', .default = NA_integer_)), + release = as.integer(purrr::pluck(json_list, 'release', .default = NA_integer_)), + is_current = purrr::pluck(json_list, 'is_current', .default = NA) == '1', + genome_assembly_name = purrr::pluck(json_list, 'assembly', .default = NA_character_), + peptide = purrr::pluck(json_list, 'peptide', .default = NA_character_), + possible_replacement = purrr::pluck(json_list, 'possible_replacement', .default = list(character())) ) return(tbl) @@ -71,13 +73,15 @@ json_list_to_id_tbl <- function(json_list) { #' } #' #' @examples -#' get_id(c("ENSDARE00000830915", "ENSG00000248378", "ENSDART00000033574", "ENSP00000000233")) +#' get_id(c('ENSDARE00000830915', 'ENSG00000248378', 'ENSDART00000033574', 'ENSP00000000233')) #' #' @export get_id <- function(id, verbose = FALSE, warnings = TRUE, progress_bar = TRUE) { + + # Assert verbose argument. assertthat::assert_that(assertthat::is.flag(verbose)) # Assert warnings argument. @@ -89,8 +93,8 @@ get_id <- function(id, e <- urltools::url_encode resource_urls <- glue::glue( - "/archive/id/", - "{e(id)}?" + '/archive/id/', + '{e(id)}?' ) # Usually we'd use purrr::map here but we opted for plyr::llply @@ -111,13 +115,11 @@ get_id <- function(id, ) # Only keep those responses that responded successfully, i.e. with status == "OK". - responses_ok <- purrr::keep(responses, ~ identical(.x$status, "OK")) + responses_ok <- purrr::keep(responses, ~ identical(.x$status, 'OK')) # If none of the responses were successful then return an empty linkage # disequilibrium tibble. - if (rlang::is_empty(responses_ok)) { - return(id_tbl()) - } + if (rlang::is_empty(responses_ok)) return(id_tbl()) return( purrr::imap_dfr( diff --git a/R/assert.R b/R/assert.R index 0110d7a..55d21c3 100644 --- a/R/assert.R +++ b/R/assert.R @@ -1,14 +1,13 @@ assert_division <- function(division) { + if ( !((rlang::is_character(division) && - all(division %in% (divisions <- get_divisions()))) + all(division %in% (divisions <- get_divisions()))) ) ) { possible_values <- concatenate::cc_or(divisions, oxford = TRUE) - msg <- glue::glue( - "`division` must be one or more Ensembl divisions:\n", - "{possible_values}." - ) + msg <- glue::glue('`division` must be one or more Ensembl divisions:\n', + '{possible_values}.') rlang::abort(msg) } @@ -16,10 +15,11 @@ assert_division <- function(division) { } assert_variant_id <- function(variant_id) { + # Is variant_id NULL? assertthat::assert_that( !rlang::is_null(variant_id), - msg = "`variant_id` cannot be NULL." + msg = '`variant_id` cannot be NULL.' ) # Is variant_id empty? @@ -28,7 +28,7 @@ assert_variant_id <- function(variant_id) { # Is variant_id a character vector? assertthat::assert_that( rlang::is_character(variant_id), - msg = "`variant_id` must be a character vector." + msg = '`variant_id` must be a character vector.' ) # Does variant_id contain NAs? @@ -38,16 +38,17 @@ assert_variant_id <- function(variant_id) { } assert_genomic_window_size <- function(genomic_window_size) { + # Is genomic_window_size NULL? assertthat::assert_that( !rlang::is_null(genomic_window_size), - msg = "`genomic_window_size` cannot be NULL." + msg = '`genomic_window_size` cannot be NULL.' ) # Is genomic_window_size an integer vector? assertthat::assert_that( rlang::is_integer(genomic_window_size), - msg = "`genomic_window_size` must be an integer vector." + msg = '`genomic_window_size` must be an integer vector.' ) # Does genomic_window_size contain NAs? @@ -56,17 +57,18 @@ assert_genomic_window_size <- function(genomic_window_size) { # Is genomic_window_size in [1, 500]? assertthat::assert_that( all(dplyr::between(genomic_window_size, 1L, 500L)), - msg = "`genomic_window_size` contains values outside the range [1, 500]." + msg = '`genomic_window_size` contains values outside the range [1, 500].' ) return(TRUE) } assert_species_name <- function(species_name) { + # Is species_name NULL? assertthat::assert_that( !rlang::is_null(species_name), - msg = "`species_name` cannot be NULL." + msg = '`species_name` cannot be NULL.' ) # Is species_name empty? @@ -75,7 +77,7 @@ assert_species_name <- function(species_name) { # Is species_name a character vector? assertthat::assert_that( rlang::is_character(species_name), - msg = "`species_name` must be a character vector." + msg = '`species_name` must be a character vector.' ) # Does species_name contain NAs? @@ -85,10 +87,11 @@ assert_species_name <- function(species_name) { } assert_population <- function(population) { + # Is population NULL? assertthat::assert_that( !rlang::is_null(population), - msg = "`population` cannot be NULL." + msg = '`population` cannot be NULL.' ) # Is population empty? @@ -97,7 +100,7 @@ assert_population <- function(population) { # Is population a character vector? assertthat::assert_that( rlang::is_character(population), - msg = "`population` must be a character vector." + msg = '`population` must be a character vector.' ) # Does population contain NAs? @@ -107,16 +110,17 @@ assert_population <- function(population) { } assert_d_prime <- function(d_prime) { + # Is d_prime NULL? assertthat::assert_that( !rlang::is_null(d_prime), - msg = "`d_prime` cannot be NULL." + msg = '`d_prime` cannot be NULL.' ) # Is d_prime a double vector? assertthat::assert_that( rlang::is_double(d_prime), - msg = "`d_prime` must be a double vector." + msg = '`d_prime` must be a double vector.' ) # Does d_prime contain NAs? @@ -125,23 +129,24 @@ assert_d_prime <- function(d_prime) { # Is d_prime in [0, 1]? assertthat::assert_that( all(dplyr::between(d_prime, 0, 1)), - msg = "`d_prime` contains values outside the range [0, 1]." + msg = '`d_prime` contains values outside the range [0, 1].' ) return(TRUE) } assert_r_squared <- function(r_squared) { + # Is r_squared NULL? assertthat::assert_that( !rlang::is_null(r_squared), - msg = "`r_squared` cannot be NULL." + msg = '`r_squared` cannot be NULL.' ) # Is r_squared a double vector? assertthat::assert_that( rlang::is_double(r_squared), - msg = "`r_squared` must be a double vector." + msg = '`r_squared` must be a double vector.' ) # Does r_squared contain NAs? @@ -150,17 +155,18 @@ assert_r_squared <- function(r_squared) { # Is r_squared in [0, 1]? assertthat::assert_that( all(dplyr::between(r_squared, 0.05, 1)), - msg = "`r_squared` contains values outside the range [0.05, 1]." + msg = '`r_squared` contains values outside the range [0.05, 1].' ) return(TRUE) } assert_genomic_range <- function(genomic_range) { + # Is genomic_range NULL? assertthat::assert_that( !rlang::is_null(genomic_range), - msg = "`genomic_range` cannot be NULL." + msg = '`genomic_range` cannot be NULL.' ) # Is genomic_range empty? @@ -169,7 +175,7 @@ assert_genomic_range <- function(genomic_range) { # Is genomic_range a character vector? assertthat::assert_that( rlang::is_character(genomic_range), - msg = "`genomic_range` must be a character vector." + msg = '`genomic_range` must be a character vector.' ) # Does genomic_range contain NAs? @@ -179,8 +185,7 @@ assert_genomic_range <- function(genomic_range) { assertthat::assert_that( all(genomic_range_lgl), msg = glue::glue("The following are not well formatted genomic ranges:', - ' {concatenate::cc_and(genomic_range[genomic_range_lgl])}.") - ) + ' {concatenate::cc_and(genomic_range[genomic_range_lgl])}.")) return(TRUE) } diff --git a/R/callback_warning.R b/R/callback_warning.R new file mode 100755 index 0000000..584ebd8 --- /dev/null +++ b/R/callback_warning.R @@ -0,0 +1,8 @@ +callback_warning <- function() { + warning( + "This happen when `callback` is NULL. We are working on it. + if you are curious please checkout for more info: + https://github.com/Ensembl/ensembl-rest/wiki/CORS-And-JSONP#json-p or + https://httr2.r-lib.org/reference/req_perform_stream.html" + ) +} diff --git a/R/consequence_types.R b/R/consequence_types.R index ccd5445..d54c033 100644 --- a/R/consequence_types.R +++ b/R/consequence_types.R @@ -41,24 +41,22 @@ #' @md #' @export get_variant_consequences <- function(verbose = FALSE, warnings = TRUE) { + response <- request( - resource_url = "/info/variation/consequence_types", + resource_url = '/info/variation/consequence_types', verbose = verbose, - warnings = warnings - ) + warnings = warnings) - if (!identical(response$status, "OK")) { - rlang::abort( - "Could not get a successful response\n", - "Response code was {response$response_code}." + if (!identical(response$status, 'OK')) + rlang::abort('Could not get a successful response\n', + 'Response code was {response$response_code}.' ) - } tbl <- tibble::tibble( - SO_accession = purrr::pluck(response, "content", "SO_accession", .default = NA_character_), - SO_term = purrr::pluck(response, "content", "SO_term", .default = NA_character_), - label = purrr::pluck(response, "content", "label", .default = NA_character_), - description = purrr::pluck(response, "content", "description", .default = NA_character_) + SO_accession = purrr::pluck(response, 'content', 'SO_accession', .default = NA_character_), + SO_term = purrr::pluck(response, 'content', 'SO_term', .default = NA_character_), + label = purrr::pluck(response, 'content', 'label', .default = NA_character_), + description = purrr::pluck(response, 'content', 'description', .default = NA_character_) ) return(tbl) diff --git a/R/cross_references.R b/R/cross_references.R index cacd18e..dbcc4e3 100644 --- a/R/cross_references.R +++ b/R/cross_references.R @@ -9,7 +9,9 @@ xrefs_details_tbl <- function(species_name = character(), info_type = character(), info_text = character(), synonyms = list(), - description = character()) { + description = character() + ) { + tbl <- tibble::tibble( species_name = species_name, gene = gene, @@ -28,19 +30,20 @@ xrefs_details_tbl <- function(species_name = character(), } json_list_to_xrefs_details_tbl <- function(species_name, gene, ensembl_db, json_list) { + tbl <- xrefs_details_tbl( species_name = species_name, gene = gene, ensembl_db = ensembl_db, - primary_id = purrr::pluck(json_list, "primary_id", .default = NA_character_), - display_id = purrr::pluck(json_list, "display_id", .default = NA_character_), - external_db_name = purrr::pluck(json_list, "dbname", .default = NA_character_), - external_db_display_name = purrr::pluck(json_list, "db_display_name", .default = NA_character_), - version = purrr::pluck(json_list, "version", .default = NA_character_), - info_type = purrr::pluck(json_list, "info_type", .default = NA_character_), - info_text = purrr::pluck(json_list, "info_text", .default = NA_character_), - synonyms = purrr::pluck(json_list, "synonyms", .default = list()), - description = purrr::pluck(json_list, "description", .default = NA_character_) + primary_id = purrr::pluck(json_list, 'primary_id', .default = NA_character_), + display_id = purrr::pluck(json_list, 'display_id', .default = NA_character_), + external_db_name = purrr::pluck(json_list, 'dbname', .default = NA_character_), + external_db_display_name = purrr::pluck(json_list, 'db_display_name', .default = NA_character_), + version = purrr::pluck(json_list, 'version', .default = NA_character_), + info_type = purrr::pluck(json_list, 'info_type', .default = NA_character_), + info_text = purrr::pluck(json_list, 'info_text', .default = NA_character_), + synonyms = purrr::pluck(json_list, 'synonyms', .default = list()), + description = purrr::pluck(json_list, 'description', .default = NA_character_) ) # Convert empty strings to NA_character_ @@ -104,17 +107,18 @@ json_list_to_xrefs_details_tbl <- function(species_name, gene, ensembl_db, json_ #' #' @examples #' # Get cross references that relate to gene BRCA2 -#' get_xrefs_by_gene(species_name = "human", gene = "BRCA2") +#' get_xrefs_by_gene(species_name = 'human', gene = 'BRCA2') #' #' @md #' @export get_xrefs_by_gene <- function(species_name, gene, - ensembl_db = "core", - external_db = "", + ensembl_db = 'core', + external_db = '', verbose = FALSE, warnings = TRUE, progress_bar = TRUE) { + # Assert species_name argument. assert_species_name(species_name) @@ -131,12 +135,12 @@ get_xrefs_by_gene <- function(species_name, assertthat::assert_that(assertthat::is.flag(progress_bar)) error_msg <- glue::glue( - "All arguments must have consistent lengths, ", - "only values of length one are recycled:\n", - "* Length of `species_name`: {length(species_name)}\n", - "* Length of `gene`: {length(gene)}\n", - "* Length of `ensembl_db`: {length(ensembl_db)}\n", - "* Length of `external_db`: {length(external_db)}\n" + 'All arguments must have consistent lengths, ', + 'only values of length one are recycled:\n', + '* Length of `species_name`: {length(species_name)}\n', + '* Length of `gene`: {length(gene)}\n', + '* Length of `ensembl_db`: {length(ensembl_db)}\n', + '* Length of `external_db`: {length(external_db)}\n' ) if (!are_vec_recyclable(species_name, gene, ensembl_db, external_db)) { @@ -147,35 +151,30 @@ get_xrefs_by_gene <- function(species_name, # The order of names here should be same as passed to # vctrs::vec_recycle_common() - names(recycled_args) <- c("species_name", "gene", "ensembl_db", "external_db") - - resource_urls <- glue::glue( - "/xrefs/name/", - "{recycled_args$species_name}/", - "{recycled_args$gene}?", - "db_type={recycled_args$ensembl_db};", - '{p("external_db", recycled_args$external_db)}' - ) + names(recycled_args) <- c('species_name', 'gene', 'ensembl_db', 'external_db') + + resource_urls <- glue::glue('/xrefs/name/', + '{recycled_args$species_name}/', + '{recycled_args$gene}?', + 'db_type={recycled_args$ensembl_db};', + '{p("external_db", recycled_args$external_db)}') # Usually we'd use purrr::map here but we opted for plyr::llply # for a no frills alternative with progress bar support. - progress <- dplyr::if_else(progress_bar && interactive(), "text", "none") + progress <- dplyr::if_else(progress_bar && interactive(), 'text', 'none') responses <- plyr::llply( .data = resource_urls, .fun = request, verbose = verbose, warnings = warnings, - .progress = progress - ) + .progress = progress) # Only keep those responses that responded successfully, i.e. with status == "OK". - responses_ok <- purrr::keep(responses, ~ identical(.x$status, "OK")) + responses_ok <- purrr::keep(responses, ~ identical(.x$status, 'OK')) # If none of the responses were successful then return an empty linkage # disequilibrium tibble. - if (rlang::is_empty(responses_ok)) { - return(xrefs_details_tbl()) - } + if (rlang::is_empty(responses_ok)) return(xrefs_details_tbl()) return( purrr::imap_dfr( @@ -188,20 +187,23 @@ get_xrefs_by_gene <- function(species_name, ) ) ) + } xrefs_details_tbl2 <- function(species_name = character(), - ensembl_id = character(), - ensembl_db = character(), - primary_id = character(), - display_id = character(), - external_db_name = character(), - external_db_display_name = character(), - version = character(), - info_type = character(), - info_text = character(), - synonyms = list(), - description = character()) { + ensembl_id = character(), + ensembl_db = character(), + primary_id = character(), + display_id = character(), + external_db_name = character(), + external_db_display_name = character(), + version = character(), + info_type = character(), + info_text = character(), + synonyms = list(), + description = character() +) { + tbl <- tibble::tibble( species_name = species_name, ensembl_id = ensembl_id, @@ -220,19 +222,20 @@ xrefs_details_tbl2 <- function(species_name = character(), } json_list_to_xrefs_details_tbl2 <- function(species_name, ensembl_id, ensembl_db, json_list) { + tbl <- xrefs_details_tbl2( species_name = species_name, ensembl_id = ensembl_id, ensembl_db = ensembl_db, - primary_id = purrr::pluck(json_list, "primary_id", .default = NA_character_), - display_id = purrr::pluck(json_list, "display_id", .default = NA_character_), - external_db_name = purrr::pluck(json_list, "dbname", .default = NA_character_), - external_db_display_name = purrr::pluck(json_list, "db_display_name", .default = NA_character_), - version = purrr::pluck(json_list, "version", .default = NA_character_), - info_type = purrr::pluck(json_list, "info_type", .default = NA_character_), - info_text = purrr::pluck(json_list, "info_text", .default = NA_character_), - synonyms = purrr::pluck(json_list, "synonyms", .default = list()), - description = purrr::pluck(json_list, "description", .default = NA_character_) + primary_id = purrr::pluck(json_list, 'primary_id', .default = NA_character_), + display_id = purrr::pluck(json_list, 'display_id', .default = NA_character_), + external_db_name = purrr::pluck(json_list, 'dbname', .default = NA_character_), + external_db_display_name = purrr::pluck(json_list, 'db_display_name', .default = NA_character_), + version = purrr::pluck(json_list, 'version', .default = NA_character_), + info_type = purrr::pluck(json_list, 'info_type', .default = NA_character_), + info_text = purrr::pluck(json_list, 'info_text', .default = NA_character_), + synonyms = purrr::pluck(json_list, 'synonyms', .default = list()), + description = purrr::pluck(json_list, 'description', .default = NA_character_) ) # Convert empty strings to NA_character_ @@ -348,20 +351,21 @@ json_list_to_xrefs_details_tbl2 <- function(species_name, ensembl_id, ensembl_db #' [/xrefs/id/:id](https://rest.ensembl.org/documentation/info/xref_id). #' #' @examples -#' get_xrefs_by_ensembl_id("human", "ENSG00000248378") +#' get_xrefs_by_ensembl_id('human', 'ENSG00000248378') #' -#' get_xrefs_by_ensembl_id("human", "ENSG00000248378", all_levels = TRUE) +#' get_xrefs_by_ensembl_id('human', 'ENSG00000248378', all_levels = TRUE) #' @md #' @export get_xrefs_by_ensembl_id <- function(species_name, ensembl_id, all_levels = FALSE, - ensembl_db = "core", - external_db = "", - feature = "", + ensembl_db = 'core', + external_db = '', + feature = '', verbose = FALSE, warnings = TRUE, progress_bar = TRUE) { + # Assert species_name argument. assert_species_name(species_name) @@ -382,60 +386,54 @@ get_xrefs_by_ensembl_id <- function(species_name, # Assert progress_bar argument. assertthat::assert_that(assertthat::is.flag(progress_bar)) - all_levels <- ifelse(all_levels, "1", "0") + all_levels <- ifelse(all_levels, '1', '0') error_msg <- glue::glue( - "All arguments must have consistent lengths, ", - "only values of length one are recycled:\n", - "* Length of `species_name`: {length(species_name)}\n", - "* Length of `ensembl_id`: {length(ensembl_id)}\n", - "* Length of `all_levels`: {length(all_levels)}\n", - "* Length of `ensembl_db`: {length(ensembl_db)}\n", - "* Length of `external_db`: {length(external_db)}\n", - "* Length of `feature`: {length(feature)}\n" + 'All arguments must have consistent lengths, ', + 'only values of length one are recycled:\n', + '* Length of `species_name`: {length(species_name)}\n', + '* Length of `ensembl_id`: {length(ensembl_id)}\n', + '* Length of `all_levels`: {length(all_levels)}\n', + '* Length of `ensembl_db`: {length(ensembl_db)}\n', + '* Length of `external_db`: {length(external_db)}\n', + '* Length of `feature`: {length(feature)}\n' ) - if (!are_vec_recyclable( - species_name, - ensembl_id, - all_levels, - ensembl_db, - external_db, - feature - )) { + if (!are_vec_recyclable(species_name, + ensembl_id, + all_levels, + ensembl_db, + external_db, + feature)) { rlang::abort(error_msg) } - recycled_args <- vctrs::vec_recycle_common( - species_name, - ensembl_id, - all_levels, - ensembl_db, - external_db, - feature - ) + recycled_args <- vctrs::vec_recycle_common(species_name, + ensembl_id, + all_levels, + ensembl_db, + external_db, + feature) # The order of names here should be same as passed to # vctrs::vec_recycle_common() names(recycled_args) <- c( - "species_name", - "ensembl_id", - "all_levels", - "ensembl_db", - "external_db", - "feature" + 'species_name', + 'ensembl_id', + 'all_levels', + 'ensembl_db', + 'external_db', + 'feature' ) - resource_urls <- glue::glue( - "/xrefs/id/", - "{recycled_args$ensembl_id}?", - ';{p("all_levels", recycled_args$all_levels)}', - ';{p("db_type", recycled_args$ensembl_db)}', - ';{p("external_db", recycled_args$external_db)}', - ';{p("object_type", recycled_args$feature)}', - ';{p("species", recycled_args$species_name)}' - ) + resource_urls <- glue::glue('/xrefs/id/', + '{recycled_args$ensembl_id}?', + ';{p("all_levels", recycled_args$all_levels)}', + ';{p("db_type", recycled_args$ensembl_db)}', + ';{p("external_db", recycled_args$external_db)}', + ';{p("object_type", recycled_args$feature)}', + ';{p("species", recycled_args$species_name)}') # Usually we'd use purrr::map here but we opted for plyr::llply # for a no frills alternative with progress bar support. @@ -455,15 +453,13 @@ get_xrefs_by_ensembl_id <- function(species_name, ) # Only keep those responses that responded successfully, i.e. with status == "OK". - responses_ok <- purrr::keep(responses, ~ identical(.x$status, "OK")) + responses_ok <- purrr::keep(responses, ~ identical(.x$status, 'OK')) # If none of the responses were successful then return an empty linkage # disequilibrium tibble. - if (rlang::is_empty(responses_ok)) { - return(xrefs_details_tbl2()) - } + if (rlang::is_empty(responses_ok)) return(xrefs_details_tbl2()) - # return(tibble::as_tibble(responses_ok[[1]]$content, .name_repair = 'unique')) + #return(tibble::as_tibble(responses_ok[[1]]$content, .name_repair = 'unique')) return(purrr::imap_dfr( .x = responses_ok, @@ -474,6 +470,7 @@ get_xrefs_by_ensembl_id <- function(species_name, json_list = .x$content ) )) + } #' xrefs_symbol_species_symbol_tbl <- diff --git a/R/data.R b/R/data.R index 8efd023..3f09f9a 100644 --- a/R/data.R +++ b/R/data.R @@ -15,4 +15,4 @@ #' downloaded from Ensembl.} #' } #' @source \url{https://rest.ensembl.org/} -"rest_api_endpoints" +'rest_api_endpoints' diff --git a/R/divisions.R b/R/divisions.R index 39026f2..616af00 100644 --- a/R/divisions.R +++ b/R/divisions.R @@ -14,17 +14,15 @@ #' #' @export get_divisions <- function(verbose = FALSE, warnings = TRUE) { + response <- request( - resource_url = "/info/divisions?", + resource_url = '/info/divisions?', verbose = verbose, - warnings = warnings - ) + warnings = warnings) - if (!identical(response$status, "OK")) { - rlang::abort( - "Could not get a successful response\n", - "Response code was {response$response_code}." + if (!identical(response$status, 'OK')) + rlang::abort('Could not get a successful response\n', + 'Response code was {response$response_code}.' ) - } - return(purrr::pluck(response, "content")) + return(purrr::pluck(response, 'content')) } diff --git a/R/ensembl-endpoints.R b/R/ensembl-endpoints.R new file mode 100755 index 0000000..3087f62 --- /dev/null +++ b/R/ensembl-endpoints.R @@ -0,0 +1,1781 @@ +# These functions will perform validation on input parameters that are tailored +# to each specific endpoint, and return already an R object that is sensible +# for the type of information being returned by the endpoint +# -------------------------------------------------------- # +# Endpoints ==== +## Comparative Genomics ===== + +#' Get cafe gene tree by id +#' +#' Retrieves a cafe tree of the gene tree using the gene tree stable identifier +#' +#' @param id A string representing the gene tree stable identifier. +#' @param callback String \emph{(optional)} Name of the callback subroutine +#' to be returned by the requested JSONP response. Required ONLY when using +#' JSONP as the serialisation method. Please +#' see also [the user guide](http://github.com/Ensembl/ensembl-rest/wiki). +#' @param compara String \emph{(optional)} Name of the compara database to use. +#' Multiple comparas exist on a server for separate species divisions. +#' Default is "vertebrates". +#' @param nh_format String \emph{(optional)} The format of a NH (New Hampshire) +#' request. Available only with the default setting to allow us to return +#' the cafe tree with Taxa names appended with number of members +#' and the p_value. Example: "homo_sapiens_3_0.123" where 3 is the number +#' of members and 0.123 is the p value. +#' +#' @return A list of parsed JSON responses containing the cafe tree +#' for the provided gene tree stable identifier. +#' +#' @note +#' See more about the implemented endpoint [get_cafe_genetree_by_id()] +#' on the following [GET cafe/genetree/id/:id](https://rest.ensembl.org/documentation/info/cafe_tree) +#' from the official [Ensembl Rest API](https://rest.ensembl.org/). +#' +#' @export +#' @examples +#' get_cafe_genetree_by_id("ENSGT00390000003602") +#' get_cafe_genetree_by_id("ENSGT00390000003602", +#' callback = "randomlygeneratedname") +#' get_cafe_genetree_by_id("ENSGT00390000003602", +#' compara = "vertebrates") +#' get_cafe_genetree_by_id("ENSGT00390000003602", +#' nh_format = "homo_sapiens_3_0.123") +#' +get_cafe_genetree_by_id <- function(id, callback = "myrandomfunctionname", + compara = "vertebrates", + nh_format = "simple") { + if (missing(id)) { + stop("The 'id' parameter is required.") + } + + cat("\n`compara` seems to not be working for this endpoint. + Hence this specific parameter is ignored for the moment. + Here is more detail on the paramater: + https://rest.ensembl.org/documentation/info/cafe_tree + \n") + + if (!is.null(callback)) { + response <- get(res = "cafe/genetree/id/{id}", id = id, + nh_format = nh_format, + .headers = req_headers(content_type = "application/json")) + } else { + warning("Callback is null. Returning an empty response.") + response <- list() + } + + response +} + +#' Get cafe gene tree by symbol +#' +#' Retrieves the cafe tree of the gene tree that contains the gene identified +#' by a symbol +#' +#' @param species A string representing the species name (e.g., "homo_sapiens"). +#' @param symbol A string representing the gene symbol (e.g., "BRCA2"). +#' +#' @return A list of parsed JSON responses containing the cafe tree +#' for the provided species and gene symbol. +#' +#' @note +#' See more about the implemented endpoint [get_cafe_genetree_by_symbol()] +#' on the following [GET cafe/genetree/member/symbol/:species/:symbol](https://rest.ensembl.org/documentation/info/cafe_tree_member_symbol) +#' from the official [Ensembl Rest API](https://rest.ensembl.org/). +#' +#' @export +#' @examples +#' get_cafe_genetree_by_symbol("homo_sapiens", "BRCA2") +get_cafe_genetree_by_symbol <- function(species, symbol) { + if (missing(species) || missing(symbol)) { + stop("Both 'species' and 'symbol' parameters are required.") + } + response <- get(res = "/cafe/genetree/member/symbol/{species}/{symbol}", + species = species, symbol = symbol, + symbol = symbol, .headers = req_headers(content_type = "application/json") + ) + + response +} + +#' Get cafe gene tree by species id +#' +#' Retrieves the cafe tree of the gene tree that contains the +#' gene/transcript/translation stable identifier in the given species +#' +#' @param species A string representing the species name (e.g., "homo_sapiens"). +#' @param id A string representing the gene, transcript, or translation +#' stable identifier. +#' +#' @return A list of parsed JSON responses containing the cafe tree for +#' the provided species and stable identifier. +#' +#' @note +#' See more about the implemented endpoint [get_cafe_genetree_by_species_id()] +#' on the following [GET cafe/genetree/member/id/:species/:id](https://rest.ensembl.org/documentation/info/cafe_tree_species_member_id) +#' from the official [Ensembl Rest API](https://rest.ensembl.org/). +#' +#' @export +#' @examples +#' get_cafe_genetree_by_species_id("homo_sapiens", "ENST00000380152") +get_cafe_genetree_by_species_id <- function(species, id) { + if (missing(species) || missing(id)) { + stop("Both 'species' and 'id' parameters are required.") + } + response <- get(res = "/cafe/genetree/member/id/{species}/{id}", + species = species, id = id, + .headers = req_headers(content_type = "application/json") + ) + response +} + +#' Get gene tree by id +#' +#' Retrieves a gene tree for a gene tree stable identifier +#' +#' @param id A string representing the gene tree stable identifier. +#' +#' @return A list of parsed JSON responses containing the gene tree for +#' the provided gene tree stable identifier. +#' +#' @note +#' See more about the implemented endpoint [get_genetree_by_id()] +#' on the following [GET genetree/id/:id](https://rest.ensembl.org/documentation/info/genetree) +#' from the official [Ensembl Rest API](https://rest.ensembl.org/). +#' +#' @export +#' @examples +#' get_genetree_by_id("ENSGT00390000003602") +get_genetree_by_id <- function(id) { + if (missing(id)) { + stop("The 'id' parameter is required.") + } + response <- get(res = "/genetree/id/{id}", id = id, + .headers = req_headers(content_type = "application/json") + ) +} + +#' Get gene tree by symbol +#' +#' Retrieves the gene tree that contains the gene identified by a symbol +#' +#' @param species A string representing the species name (e.g., "homo_sapiens"). +#' @param symbol A string representing the gene symbol (e.g., "BRCA2"). +#' +#' @return A list of parsed JSON responses containing the gene tree +#' for the provided species and gene symbol. +#' +#' @note +#' See more about the implemented endpoint [get_genetree_by_symbol()] +#' on the following [GET genetree/member/symbol/:species/:symbol](https://rest.ensembl.org/documentation/info/genetree_member_symbol) +#' from the official [Ensembl Rest API](https://rest.ensembl.org/). +#' +#' @export +#' @examples +#' get_genetree_by_symbol("homo_sapiens", "BRCA2") +get_genetree_by_symbol <- function(species, symbol) { + if (missing(species) || missing(symbol)) { + stop("Both 'species' and 'symbol' parameters are required.") + } + response <- get(res = "/genetree/member/symbol/{species}/{symbol}", + species = species, symbol = symbol, + .headers = req_headers(content_type = "application/json") + ) +} + +#' Get gene tree by species id +#' +#' Retrieves the gene tree that contains the gene/transcript/translation +#' stable identifier in the given species +#' +#' @param species A string representing the species name (e.g., "homo_sapiens"). +#' @param id A string representing the gene, transcript, or translation stable +#' identifier. +#' +#' @return A list of parsed JSON responses containing the gene tree +#' for the provided species and stable identifier. +#' +#' @note +#' See more about the implemented endpoint [get_genetree_by_species_id()] +#' on the following [GET genetree/member/id/:species/:id](https://rest.ensembl.org/documentation/info/genetree_species_member_id) +#' from the official [Ensembl Rest API](https://rest.ensembl.org/). +#' +#' @export +#' @examples +#' get_genetree_by_species_id("homo_sapiens", "ENST00000380152") +get_genetree_by_species_id <- function(species, id) { + if (missing(species) || missing(id)) { + stop("Both 'species' and 'id' parameters are required.") + } + response <- get(res = "/genetree/member/id/{species}/{id}", + species = species, id = id, + .headers = req_headers(content_type = "application/json") + ) +} + +#' Get alignment by region +#' +#' Retrieves genomic alignments as separate blocks based on a region and species +#' +#' @param species A string representing the species name (e.g., "homo_sapiens"). +#' @param region A string representing the genomic region (e.g., "3:1000-2000"). +#' +#' @return A list of parsed JSON responses containing the genomic alignments +#' for the provided species and region. +#' +#' See more about the implemented endpoint [get_alignment_by_region()] +#' on the following [GET alignment/region/:species/:region](https://rest.ensembl.org/documentation/info/genomic_alignment_region) +#' from the official [Ensembl Rest API](https://rest.ensembl.org/). +#' +#' @export +#' @examples +#' get_alignment_by_region("homo_sapiens", "3:1000-2000") +get_alignment_by_region <- function(species, region) { + warning("This function is stil under-develop. If you run it now + it will return you an error.") + if (missing(species) || missing(region)) { + stop("Both 'species' and 'region' parameters are required.") + } + # response <- get(res = "/alignment/region/{species}/{region}", + # species = species, region = region, + # .headers = req_headers(content_type = "application/json") + # ) +} + +#' Get homologous by species id +#' +#' Retrieves homology information (orthologs) by species and Ensembl gene ID +#' +#' @param species A string representing the species name (e.g., "homo_sapiens"). +#' @param id A string representing the Ensembl gene ID. +#' +#' @return A list of parsed JSON responses containing homology information +#' for the provided species and Ensembl gene ID. +#' +#' See more about the implemented endpoint [get_homology_by_species_id()] +#' on the following [GET homology/id/:species/:id](https://rest.ensembl.org/documentation/info/homology_species_gene_id) +#' from the official [Ensembl Rest API](https://rest.ensembl.org/). +#' +#' @export +#' @examples +#' get_homology_by_species_id("homo_sapiens", "ENSG00000157764") +get_homology_by_species_id <- function(species, id) { + if (missing(species) || missing(id)) { + stop("Both 'species' and 'id' parameters are required.") + } + response <- get(res = "/homology/id/{species}/{id}", + species = species, id = id, + .headers = req_headers(content_type = "application/json") + ) +} + +#' Get homologous by symbol +#' +#' Retrieves homology information (orthologs) by species and symbol +#' +#' @param species A string representing the species name (e.g., "homo_sapiens"). +#' @param symbol A string representing the gene symbol (e.g., "BRCA2"). +#' +#' @return A list of parsed JSON responses containing homology information +#' for the provided species and gene symbol. +#' +#' See more about the implemented endpoint [get_homology_by_symbol()] +#' on the following [GET homology/symbol/:species/:symbol](https://rest.ensembl.org/documentation/info/homology_symbol) +#' from the official [Ensembl Rest API](https://rest.ensembl.org/). +#' +#' @export +#' @examples +#' get_homology_by_symbol("homo_sapiens", "BRCA2") +get_homology_by_symbol <- function(species, symbol) { + if (missing(species) || missing(symbol)) { + stop("Both 'species' and 'symbol' parameters are required.") + } + response <- get("/homology/symbol/{species}/{symbol}", + species = species, symbol = symbol, + .headers = req_headers(content_type = "application/json") + ) +} + +# -------------------------------------------------------- # +## Cross References ==== + +#' Get external linked references by symbol +#' +#' Looks up an external symbol and returns all Ensembl objects linked to it +#' +#' This can be a display name for a gene/transcript/translation, a synonym, +#' or an externally linked reference. +#' If a gene's transcript is linked to the supplied symbol, the service will +#' return both gene and transcript (it supports transient links). +#' +#' @param species A string representing the species name (e.g., "homo_sapiens"). +#' @param symbol A string representing the external symbol (e.g., "BRCA2"). +#' +#' @return A list of parsed JSON responses containing Ensembl objects linked +#' to the provided external symbol. +#' +#' See more about the implemented endpoint [get_xrefs_by_symbol()] +#' on the following [GET xrefs/symbol/:species/:symbol](https://rest.ensembl.org/documentation/info/xref_external) +#' from the official [Ensembl Rest API](https://rest.ensembl.org/). +#' +#' @export +#' @examples +#' get_xrefs_by_symbol("homo_sapiens", "BRCA2") +get_xrefs_by_symbol <- function(species, symbol) { + if (missing(species) || missing(symbol)) { + stop("Both 'species' and 'symbol' parameters are required.") + } + response <- get(res = "/xrefs/symbol/{species}/{symbol}", + species = species, symbol = symbol, + .headers = req_headers(content_type = "application/json") + ) +} + +#' Get external linked references by id +#' +#' Performs lookups of Ensembl Identifiers and retrieves their external +#' references in other databases +#' +#' @param id A string representing the Ensembl Identifier (e.g., "ENSG00000157764"). +#' +#' @return A list of parsed JSON responses containing external references +#' for the provided Ensembl identifier. +#' +#' See more about the implemented endpoint [get_xrefs_by_id()] +#' on the following [GET xrefs/id/:id](https://rest.ensembl.org/documentation/info/xref_id) +#' from the official [Ensembl Rest API](https://rest.ensembl.org/). +#' +#' @export +#' @examples +#' get_xrefs_by_id("ENSG00000157764") +get_xrefs_by_id <- function(id) { + if (missing(id)) { + stop("The 'id' parameter is required.") + } + response <- get(res = "/xrefs/id/{id}", + id = id, + .headers = req_headers(content_type = "application/json") + ) +} + +#' Get external linked references by name +#' +#' Performs a lookup based upon the primary accession or display label +#' of an external reference +#' +#' @param species A string representing the species name (e.g., "homo_sapiens"). +#' @param name A string representing the primary accession or display label +#' of the external reference. +#' +#' @return A list of parsed JSON responses containing information about +#' the provided external reference. +#' +#' See more about the implemented endpoint [get_xrefs_by_name()] +#' on the following [GET xrefs/name/:species/:name](https://rest.ensembl.org/documentation/info/xref_name) +#' from the official [Ensembl Rest API](https://rest.ensembl.org/). +#' +#' @export +#' @examples +#' get_xrefs_by_name("homo_sapiens", "P38398") +get_xrefs_by_name <- function(species, name) { + if (missing(species) || missing(name)) { + stop("Both 'species' and 'name' parameters are required.") + } + response <- get(res = "/xrefs/name/{species}/{name}", + species = species, name = name, + .headers = req_headers(content_type = "application/json") + ) +} + +# -------------------------------------------------------- # +## Information ==== + +#' Get the names of analyses involved in generating Ensembl data +#' +#' Retrieves a list of analysis names associated with generating Ensembl +#' data for a given species. +#' +#' @param species A string representing the species name or alias +#' (e.g., "homo_sapiens"). +#' @param callback (Optional) A string representing the name of the callback +#' subroutine for JSONP responses. +#' +#' @return A list of analysis names related to the specified species. +#' +#' See more about the implemented endpoint [get_analysis_info()] +#' on the following [GET info/analysis/:species](https://rest.ensembl.org/documentation/info/analysis) +#' from the official [Ensembl Rest API](https://rest.ensembl.org/). +#' +#' @export +#' @examples +#' get_analysis_info("homo_sapiens") +#' get_analysis_info("homo_sapiens", callback = "randomlygeneratedname") +get_analysis_info <- function(species, callback = "randomlygeneratedname") { + if (missing(species)) { + stop("'species' parameter is required.") + } + if (!is.null(callback)) { + query_params <- list() + if (!is.null(species)) query_params$species <- species + if (!is.null(callback)) query_params$callback <- callback + + headers <- req_headers(content_type = "application/json") + + response <- + do.call(get, + c(list( + res = "/info/analysis/{species}", + .headers = headers), + query_params) + ) + } else { + warning("Callback is null. Returning an empty response.") + response <- list() + } + + response +} + +#' Get the available assemblies for a species +#' +#' Retrieves a list of available assemblies for a given species, +#' including toplevel sequences, chromosomes, and optionally cytogenetic +#' bands and synonyms. +#' +#' @param species A string representing the species name or alias +#' (e.g., "homo_sapiens"). +#' @param bands (Optional) A boolean (0 or 1) indicating whether to +#' include karyotype band information. Default is 0. +#' @param synonyms (Optional) A boolean (0 or 1) indicating whether to +#' include information about known synonyms. Default is 0. +#' @param callback (Optional) A string representing the name of the callback +#' subroutine for JSONP responses. +#' +#' @return A list of parsed JSON responses containing information about +#' the available assemblies for the specified species. +#' +#' See more about the implemented endpoint [get_assembly_info()] +#' on the following [GET info/assembly/:species](https://rest.ensembl.org/documentation/info/assembly_info) +#' from the official [Ensembl Rest API](https://rest.ensembl.org/). +#' +#' @export +#' @examples +#' get_assembly_info("homo_sapiens") +#' get_assembly_info("homo_sapiens", bands = 1) +#' get_assembly_info("homo_sapiens", synonyms = 1, +#' callback = "randomlygeneratedname") +get_assembly_info <- function(species, bands = 0, synonyms = 0, + callback = "randomlygeneratedname") { + if (missing(species)) { + stop("'species' parameter is required.") + } + + if (!is.null(callback)) { + response <- get(res = "/info/assembly/{species}", species = species, + bands = bands, synonyms = synonyms, + .headers = req_headers(content_type = "application/json"), + .params = params) + } else { + warning("Callback is null. Returning an empty response.") + response <- list() + } + + response +} + +#' Get information about a specific toplevel sequence region for a species +#' +#' Retrieves information about the specified toplevel sequence region for +#' a given species, with optional details on karyotype bands and synonyms. +#' +#' @param species A string representing the species name or alias (e.g., "homo_sapiens"). +#' @param region_name A string representing the name of the toplevel sequence region (e.g., "X"). +#' @param bands (Optional) A boolean (0 or 1) indicating whether to include +#' karyotype band information. Default is 0. +#' @param synonyms (Optional) A boolean (0 or 1) indicating whether to include +#' information about known synonyms. Default is 0. +#' @param callback (Optional) A string representing the name of the callback +#' subroutine for JSONP responses. +#' +#' @return A list of parsed JSON responses containing information about +#' the specified sequence region for the given species. +#' +#' See more about the implemented endpoint [get_region_info()] +#' on the following [GET info/assembly/:species/:region_name](https://rest.ensembl.org/documentation/info/assembly_stats) +#' from the official [Ensembl Rest API](https://rest.ensembl.org/). +#' +#' @export +#' @examples +#' get_region_info("homo_sapiens", "X") +#' get_region_info("homo_sapiens", "X", bands = 1) +#' get_region_info("homo_sapiens", "X", synonyms = 1, callback = "randomlygeneratedname") +get_region_info <- function(species, region_name, bands = 0, synonyms = 0, + callback = "randomlygeneratedname") { + if (missing(species) || missing(region_name)) { + stop("'species' and 'region_name' parameters are required.") + } + + if (!is.null(callback)) { + response <- get( + res = "/info/assembly/{species}/{region_name}", + species = species, region_name = region_name, + bands = bands, synonyms = synonyms, + .headers = req_headers(content_type = "application/json")) + } else { + warning("Callback is null. Returning an empty response.") + response <- list() + } + + response +} + +#' Get the functional classifications of gene models for a species +#' +#' Retrieves the list of functional classifications (biotypes) of gene models +#' that Ensembl associates with a particular species. +#' Useful for restricting the type of genes/transcripts retrieved by other endpoints. +#' +#' @param species A string representing the species name or alias (e.g., "homo_sapiens"). +#' @param callback (Optional) A string representing the name of the callback subroutine +#' for JSONP responses. +#' +#' @return A list of parsed JSON responses containing the biotypes for the specified species. +#' +#' See more about the implemented endpoint [get_biotypes()] +#' on the following [GET info/biotypes/:species](https://rest.ensembl.org/documentation/info/biotypes) +#' from the official [Ensembl Rest API](https://rest.ensembl.org/). +#' +#' @export +#' @examples +#' get_biotypes("homo_sapiens") +#' get_biotypes("homo_sapiens", callback = "randomlygeneratedname") +get_biotypes <- function(species, callback = "randomlygeneratedname") { + if (missing(species)) { + stop("'species' parameter is required.") + } + + if (!is.null(callback)) { + response <- get( + res = "/info/biotypes/{species}", species = species, + .headers = req_headers(content_type = "application/json")) + } else { + warning("Callback is null. Returning an empty response.") + response <- list() + } + + response +} + +#' Get properties of biotypes within a group +#' +#' Retrieves a list of available biotype groups or, if a group is specified, +#' the properties of biotypes within that group. +#' Optionally, the object type (gene or transcript) can be used to filter the results. +#' +#' @param group (Optional) A string representing the biotype group (e.g., "coding"). +#' If not provided, the available biotype groups are returned. +#' @param object_type (Optional) A string specifying the object type ("gene" or "transcript"). +#' @param callback (Optional) A string representing the name of the callback subroutine for JSONP responses. +#' +#' @return A list of parsed JSON responses containing the properties of biotypes within +#' the specified group or all biotype groups if no group is provided. +#' +#' See more about the implemented endpoint [get_biotypes_groups()] +#' on the following [GET info/biotypes/groups/:group/:object_type](https://rest.ensembl.org/documentation/info/biotypes_groups) +#' from the official [Ensembl Rest API](https://rest.ensembl.org/). +#' +#' @export +#' @examples +#' get_biotypes_groups() +#' get_biotypes_groups(group = "coding") +#' get_biotypes_groups(group = "coding", object_type = "gene") +#' get_biotypes_groups(group = "coding", object_type = "gene", callback = "randomlygeneratedname") +get_biotypes_groups <- function(group = '', object_type = '', + callback = "randomlygeneratedname") { + # TODO: Create an issue: + # To handle the case in which `group` parameter is null or empty + # while `object_type` is not null nor empty. + + if (!is.null(callback)) { + query_params <- list() + if (!is.null(group)) query_params$group <- group + if (!is.null(object_type)) query_params$object_type <- object_type + if (!is.null(callback)) query_params$callback <- callback + + headers <- req_headers(content_type = "application/json") + + response <- + do.call(get, + c(list( + res = "/info/biotypes/groups/{group}/{object_type}", + .headers = headers), + query_params) + ) + } else { + warning("Callback is null. Returning an empty response.") + response <- list() + } + + response +} + +#' Get properties of biotypes by name +#' +#' Retrieves the properties of biotypes with a given name. Optionally, the object type (gene or transcript) can be provided for filtering. +#' +#' @param name A string representing the biotype name (e.g., "protein_coding"). +#' @param object_type (Optional) A string specifying the object type ("gene" or "transcript"). +#' @param callback (Optional) A string representing the name of the callback subroutine for JSONP responses. +#' +#' @return A list of parsed JSON responses containing the properties of biotypes with the given name. +#' +#' See more about the implemented endpoint [get_biotypes_by_name()] +#' on the following [GET info/biotypes/name/:name/:object_type](https://rest.ensembl.org/documentation/info/biotypes_name) +#' from the official [Ensembl Rest API](https://rest.ensembl.org/). +#' +#' @export +#' @examples +#' get_biotypes_by_name("protein_coding") +#' get_biotypes_by_name("protein_coding", object_type = "gene") +#' get_biotypes_by_name("protein_coding", object_type = "gene", callback = "randomlygeneratedname") +get_biotypes_by_name <- function(name, object_type = "", + callback = "randomlygeneratedname") { + if (missing(name)) { + stop("'name' parameter is required.") + } + + if (!is.null(callback)) { + query_params <- list() + if (!is.null(name)) query_params$name <- name + if (!is.null(object_type)) query_params$object_type <- object_type + if (!is.null(callback)) query_params$callback <- callback + + headers <- req_headers(content_type = "application/json") + + response <- + do.call(get, + c(list( + res = "/info/compara/species_sets/{name}/{object_type}", + .headers = headers), + query_params) + ) + + } else { + warning("Callback is null. Returning an empty response.") + response <- list() + } + + response +} + +#' Get list of compara methods +#' +#' Retrieves a list of all compara analyses available (an analysis defines the type +#' of comparative data). Optional filtering by class or compara database can be applied. +#' +#' @param callback (Optional) A string representing the name of the callback subroutine +#' for JSONP responses. +#' @param class (Optional) A string specifying the class of the method to query for. +#' Regular expression patterns are supported (e.g., "GenomicAlign"). +#' @param compara (Optional) A string representing the name of the compara database +#' to use (e.g., "vertebrates"). +#' +#' @return A list of parsed JSON responses containing all available compara methods. +#' +#' See more about the implemented endpoint [get_compara_methods()] +#' on the following [GET info/compara/methods](https://rest.ensembl.org/documentation/info/compara_methods) +#' from the official [Ensembl Rest API](https://rest.ensembl.org/). +#' +#' @export +#' @examples +#' get_compara_methods() +#' get_compara_methods(class = "GenomicAlign") +#' get_compara_methods(compara = "vertebrates", class = "GenomicAlign") +get_compara_methods <- function(callback = "randomlygeneratedname", + class = NULL, compara = NULL) { + + if (!is.null(callback)) { + url <- "/info/compara/methods" + query_params <- list() + if (!is.null(class)) query_params$class <- class + if (!is.null(compara)) query_params$compara <- compara + if (!is.null(callback)) query_params$callback <- callback + + headers <- req_headers(content_type = "application/json") + + response <- do.call(get, c(list(res = url, .headers = headers), query_params)) + + } else { + warning("Callback is null. Returning an empty response.") + response <- list() + } + + return(response) +} + +#' Get collections of species analysed with a specified compara method +#' +#' Retrieves a list of all collections of species analysed with the specified compara method. +#' The compara method must be one of the methods returned by the `/info/compara/methods` endpoint. +#' +#' @param method A string representing the compara method to filter by (e.g., "EPO"). +#' @param callback (Optional) A string representing the name of the callback subroutine for JSONP responses. +#' @param compara (Optional) A string representing the name of the compara database to use (e.g., "vertebrates"). +#' +#' @return A list of parsed JSON responses containing all collections of species analysed with the specified compara method. +#' +#' See more about the implemented endpoint [get_compara_species_sets()] +#' on the following [GET info/compara/species_sets/:method](https://rest.ensembl.org/documentation/info/compara_species_sets) +#' from the official [Ensembl Rest API](https://rest.ensembl.org/). +#' +#' @export +#' @examples +#' get_compara_species_sets("EPO") +#' get_compara_species_sets("EPO", compara = "vertebrates") +get_compara_species_sets <- function(method, callback = "randomlygeneratedname", + compara = NULL) { + if (missing(method)) { + stop("'method' parameter is required.") + } + + if (!is.null(callback)) { + query_params <- list() + if (!is.null(method)) query_params$method <- method + if (!is.null(compara)) query_params$compara <- compara + if (!is.null(callback)) query_params$callback <- callback + + headers <- req_headers(content_type = "application/json") + + response <- do.call(get, + c(list( + res = "/info/compara/species_sets/{method}", + .headers = headers), + query_params) + ) + + } else { + warning("Callback is null. Returning an empty response.") + response <- list() + } + response +} + +#' Get a list of all available comparative genomics databases and their data release +#' +#' Retrieves a list of all available comparative genomics databases and their data release. +#' This endpoint is deprecated, and users are advised to use the `/info/genomes/division` endpoint instead. +#' +#' @param callback (Optional) A string representing the name of the callback subroutine +#' for JSONP responses. +#' +#' @return A list of parsed JSON responses containing all available comparative genomics +#' databases and their data release. +#' +#' See more about the implemented endpoint [get_comparas()] +#' on the following [GET info/comparas](https://rest.ensembl.org/documentation/info/comparas) +#' from the official [Ensembl Rest API](https://rest.ensembl.org/). +#' +#' @export +#' @examples +#' get_comparas() +get_comparas <- function(callback = "randomlygeneratedname") { + if (!is.null(callback)) { + response <- get( + res = "/info/comparas", + callback = callback, + .headers = req_headers(content_type = "application/json") + ) + } else { + warning("Callback is null. Returning an empty response.") + response <- list() + } + + response +} + +#' Get a list of available data releases on the Ensembl REST server +#' +#' Retrieves a list of the data releases available on the Ensembl REST server. It may return more than one release if the server has an unfrequent, non-standard configuration. +#' +#' @param callback (Optional) A string representing the name of the callback subroutine for JSONP responses. +#' +#' @return A list of parsed JSON responses containing the available data releases on the Ensembl REST server. +#' +#' See more about the implemented endpoint [get_data()] +#' on the following [GET info/data](https://rest.ensembl.org/documentation/info/data) +#' from the official [Ensembl Rest API](https://rest.ensembl.org/). +#' +#' @export +#' @examples +#' get_data() +get_data <- function(callback = "randomlygeneratedname") { + if (!is.null(callback)) { + response <- get( + res = "/info/data", + callback = callback, + .headers = req_headers(content_type = "application/json") + ) + } else { + warning("Callback is null. Returning an empty response.") + response <- list() + } + + response +} + +#' Get the Ensembl Genomes version of the databases backing the service +#' +#' Retrieves the Ensembl Genomes version of the databases supporting the current service. +#' +#' @param callback (Optional) A string representing the name of the callback subroutine for JSONP responses. +#' +#' @return A parsed JSON response containing the Ensembl Genomes version of the databases. +#' +#' See more about the implemented endpoint [get_eg_version()] +#' on the following [GET info/eg_version](https://rest.ensembl.org/documentation/info/eg_version) +#' from the official [Ensembl Rest API](https://rest.ensembl.org/). +#' +#' @export +#' @examples +#' get_eg_version() +get_eg_version <- function(callback = "randomlygeneratedname") { + if (!is.null(callback)) { + response <- get( + res = "/info/eg_version", + callback = callback, + .headers = req_headers(content_type = "application/json") + ) + } else { + warning("Callback is null. Returning an empty response.") + response <- list() + } + + response +} + +#' Get external databases for a species +#' +#' Retrieves a list of all available external sources for a given species. +#' +#' @param species A string representing the species name or alias (e.g., "homo_sapiens"). +#' @param callback (Optional) A string representing the name of the callback subroutine +#' for JSONP responses. +#' @param feature (Optional) A string representing the feature to filter external DB entries +#' (e.g., "dna_align_feature", "protein_align_feature", "unmapped_object", "xref", "seq_region_synonym"). +#' @param filter (Optional) A string to restrict external DB searches to a single source +#' or pattern (e.g., "HGNC", "GO%"). +#' +#' @return A parsed JSON response containing a list of external sources associated +#' with the given species. +#' +#' See more about the implemented endpoint [get_external_dbs()] +#' on the following [GET info/external_dbs/:species](https://rest.ensembl.org/documentation/info/external_dbs) +#' from the official [Ensembl Rest API](https://rest.ensembl.org/). +#' +#' @export +#' @examples +#' get_external_dbs("homo_sapiens") +#' get_external_dbs("homo_sapiens", feature = "xref", filter = "HGNC") +get_external_dbs <- function(species, callback = "randomlygeneratedname", + feature = NULL, filter = NULL) { + if (missing(species)) { + stop("'species' parameter is required.") + } + + if (!is.null(callback)) { + query_params <- list() + if (!is.null(species)) query_params$species <- species + if (!is.null(feature)) query_params$feature <- feature + if (!is.null(filter)) query_params$filter <- filter + if (!is.null(callback)) query_params$callback <- callback + + headers <- req_headers(content_type = "application/json") + + response <- do.call(get, + c(list( + res = "/info/external_dbs/{species}", + .headers = headers), + query_params)) + + } else { + warning("Callback is null. Returning an empty response.") + response <- list() + } + + response +} + +#' Get list of all Ensembl divisions +#' +#' Retrieves a list of all available Ensembl divisions for which information is accessible. +#' +#' @param callback (Optional) A string representing the name of the callback subroutine for JSONP responses. +#' +#' @return A parsed JSON response containing the list of Ensembl divisions. +#' +#' See more about the implemented endpoint [._get_divisions()] +#' on the following [GET info/divisions](https://rest.ensembl.org/documentation/info/info_divisions) +#' from the official [Ensembl Rest API](https://rest.ensembl.org/). +#' +#' @export +#' @examples +#' ._get_divisions() +#' ._get_divisions(callback = "randomlygeneratedname") +._get_divisions <- function(callback = "randomlygeneratedname") { + + if (!is.null(callback)) { + response <- get( + res = "/info/divisions", + callback = callback, + .headers = req_headers(content_type = "application/json") + ) + } else { + warning("Callback is null. Returning an empty response.") + response <- list() + } + + response +} + +#' Get genome information +#' +#' Retrieves detailed information about a given genome based on its production name. +#' +#' @param name (Required) A string representing the production name of the genome +#' (e.g., "arabidopsis_thaliana"). +#' @param callback (Optional) A string representing the name of the callback subroutine +#' for JSONP responses. +#' @param expand (Optional) A boolean value (0 or 1). If set to 1, expands +#' the information to include details of sequences (can be very large). +#' Default is NULL. +#' +#' @return A parsed JSON response containing information about the specified genome. +#' +#' See more about the implemented endpoint [get_genome_info()] +#' on the following [GET info/genomes/:genome_name](https://rest.ensembl.org/documentation/info/info_genome) +#' from the official [Ensembl Rest API](https://rest.ensembl.org/). +#' +#' @export +#' @examples +#' get_genome_info(name = "arabidopsis_thaliana") +#' get_genome_info(name = "arabidopsis_thaliana", expand = 1) +#' get_genome_info(name = "arabidopsis_thaliana", callback = "randomlygeneratedname") +get_genome_info <- function(name, callback = "randomlygeneratedname", + expand = NULL) { + if (missing(name)) { + stop("The 'name' parameter is required.") + } + + if (!is.null(callback)) { + query_params <- list() + if (!is.null(name)) query_params$name <- name + if (!is.null(callback)) query_params$callback <- callback + if (!is.null(expand)) query_params$expand <- expand + + headers <- req_headers(content_type = "application/json") + + response <- do.call(get, + c(list( + res = "/info/genomes/{name}", + .headers = headers), + query_params)) + + } else { + warning("Callback is null. Returning an empty response.") + response <- list() + } + + response +} + +#' Get genome information by INSDC accession +#' +#' Retrieves detailed information about genomes containing a specified INSDC accession. +#' +#' @param accession (Required) A string representing the INSDC sequence accession (optionally versioned), e.g., "U00096". +#' @param callback (Optional) A string representing the name of the callback subroutine for JSONP responses. +#' @param expand (Optional) A boolean value (0 or 1). If set to 1, expands the information to include details of sequences (can be very large). +#' +#' @return A parsed JSON response containing information about genomes with the specified INSDC accession. +#' +#' See more about the implemented endpoint [get_genome_info_by_accession()] +#' on the following [GET info/genomes/accession/:accession](https://rest.ensembl.org/documentation/info/info_genomes_accession) +#' from the official [Ensembl Rest API](https://rest.ensembl.org/). +#' +#' @export +#' @examples +#' get_genome_info_by_accession(accession = "U00096") +#' get_genome_info_by_accession(accession = "U00096", expand = 1) +#' get_genome_info_by_accession(accession = "U00096", callback = "randomlygeneratedname") +get_genome_info_by_accession <- function(accession, callback = "randomlygeneratedname", + expand = NULL) { + if (missing(accession)) { + stop("The 'accession' parameter is required.") + } + + if (!is.null(callback)) { + query_params <- list() + if (!is.null(accession)) query_params$accession <- accession + if (!is.null(callback)) query_params$callback <- callback + if (!is.null(expand)) query_params$expand <- expand + + headers <- req_headers(content_type = "application/json") + + response <- do.call(get, + c(list( + res = "/info/genomes/accession/{accession}", + .headers = headers), + query_params)) + + } else { + warning("Callback is null. Returning an empty response.") + response <- list() + } + + response +} + +#' Get genome information by assembly ID +#' +#' Retrieves information about a genome associated with a specified assembly ID. +#' +#' @param assembly_id (Required) A string representing the INSDC assembly ID (optionally versioned, e.g., "GCA_902167145.1"). +#' @param callback (Optional) A string representing the name of the callback subroutine for JSONP responses. +#' @param expand (Optional) A boolean value (0 or 1). If set to 1, expands the information +#' to include details of sequences (can be very large). +#' +#' @return A parsed JSON response containing information about the genome associated +#' with the specified assembly ID. +#' +#' See more about the implemented endpoint [get_genome_info_by_assembly()] +#' on the following [GET info/genomes/assembly/:assembly_id](https://rest.ensembl.org/documentation/info/info_genomes_assembly) +#' from the official [Ensembl Rest API](https://rest.ensembl.org/). +#' +#' @export +#' @examples +#' get_genome_info_by_assembly(assembly_id = "GCA_902167145.1") +#' get_genome_info_by_assembly(assembly_id = "GCA_902167145.1", expand = 1) +#' get_genome_info_by_assembly(assembly_id = "GCA_902167145.1", callback = "randomlygeneratedname") +get_genome_info_by_assembly <- function(assembly_id, callback = "randomlygeneratedname", + expand = NULL) { + if (missing(assembly_id)) { + stop("The 'assembly_id' parameter is required.") + } + + if (!is.null(callback)) { + query_params <- list() + if (!is.null(assembly_id)) query_params$assembly_id <- assembly_id + if (!is.null(callback)) query_params$callback <- callback + if (!is.null(expand)) query_params$expand <- expand + + headers <- req_headers(content_type = "application/json") + + response <- do.call(get, + c(list( + res = "/info/genomes/assembly/{assembly_id}", + .headers = headers), + query_params)) + + } else { + warning("Callback is null. Returning an empty response.") + response <- list() + } + + response +} + +#' Get genome information for a specific division +#' +#' Retrieves information about all genomes in a given division. +#' Note: The response may be very large for divisions like Ensembl Bacteria. +#' +#' @param division_name (Required) A string representing the name of the division (e.g., "EnsemblPlants"). +#' @param callback (Optional) A string representing the name of the callback subroutine for JSONP responses. +#' @param expand (Optional) A boolean value (0 or 1). If set to 1, expands the information to include details of sequences (can be very large). +#' +#' @return A parsed JSON response containing information about genomes in the specified division. +#' +#' See more about the implemented endpoint [get_genome_info_by_division()] +#' on the following [GET info/genomes/division/:division_name](https://rest.ensembl.org/documentation/info/info_genomes_division) +#' from the official [Ensembl Rest API](https://rest.ensembl.org/). +#' +#' @export +#' @examples +#' get_genome_info_by_division(division_name = "EnsemblPlants") +#' get_genome_info_by_division(division_name = "EnsemblPlants", expand = 1) +#' get_genome_info_by_division(division_name = "EnsemblPlants", callback = "randomlygeneratedname") +get_genome_info_by_division <- function(division_name, callback = "randomlygeneratedname", + expand = NULL) { + if (missing(division_name)) { + stop("The 'division_name' parameter is required.") + } + + if (!is.null(callback)) { + query_params <- list() + if (!is.null(division_name)) query_params$division_name <- division_name + if (!is.null(callback)) query_params$callback <- callback + if (!is.null(expand)) query_params$expand <- expand + + headers <- req_headers(content_type = "application/json") + + response <- do.call(get, + c(list( + res = "/info/genomes/division/{division_name}", + .headers = headers), + query_params)) + + } else { + warning("Callback is null. Returning an empty response.") + response <- list() + } + + response +} + +#' Get genome information by taxonomy node +#' +#' Retrieves information about all genomes beneath a given node of the taxonomy. +#' +#' @param taxon_name (Required) A string representing the taxon name or NCBI taxonomy ID (e.g., "Homo sapiens"). +#' @param callback (Optional) A string representing the name of the callback subroutine for JSONP responses. +#' @param expand (Optional) A boolean value (0 or 1). If set to 1, expands the information +#' to include details of sequences (can be very large). +#' +#' @return A parsed JSON response containing information about genomes beneath the specified taxonomy node. +#' +#' See more about the implemented endpoint [get_genome_info_by_taxonomy()] +#' on the following [GET info/genomes/taxonomy/:taxon_name](https://rest.ensembl.org/documentation/info/info_genomes_taxonomy) +#' from the official [Ensembl Rest API](https://rest.ensembl.org/). +#' +#' @export +#' @examples +#' get_genome_info_by_taxonomy(taxon_name = "Homo sapiens") +#' get_genome_info_by_taxonomy(taxon_name = "Homo sapiens", expand = 1) +#' get_genome_info_by_taxonomy(taxon_name = "Homo sapiens", callback = "randomlygeneratedname") +get_genome_info_by_taxonomy <- function(taxon_name, callback = "randomlygeneratedname", + expand = NULL) { + if (missing(taxon_name)) { + stop("The 'taxon_name' parameter is required.") + } + + if (!is.null(callback)) { + query_params <- list() + if (!is.null(taxon_name)) query_params$taxon_name <- gsub(" ", "%20", taxon_name) + if (!is.null(callback)) query_params$callback <- callback + if (!is.null(expand)) query_params$expand <- expand + + headers <- req_headers(content_type = "application/json") + + response <- do.call(get, + c(list( + res = "/info/genomes/taxonomy/{taxon_name}", + .headers = headers), + query_params)) + + } else { + warning("Callback is null. Returning an empty response.") + response <- list() + } + + response +} + +#' Check Service Status +#' +#' Sends a ping request to the server to check if the service is alive. +#' +#' @param callback (Optional) A string representing the name of the callback +#' subroutine for JSONP responses. +#' +#' @return A parsed JSON response indicating the status of the service. +#' +#' See more about the implemented endpoint [ping_service()] +#' on the following [GET info/ping](https://rest.ensembl.org/documentation/info/ping) +#' from the official [Ensembl Rest API](https://rest.ensembl.org/). +#' +#' @export +#' @examples +#' ping_service() +#' ping_service(callback = "randomlygeneratedname") +ping_service <- function(callback = "randomlygeneratedname") { + if (!is.null(callback)) { + response <- get( + res = "/info/ping", + callback = callback, + .headers = req_headers(content_type = "application/json") + ) + } else { + warning("Callback is null. Returning an empty response.") + response <- list() + } + + response +} + +#' Get REST API Version +#' +#' Retrieves the current version of the Ensembl REST API. +#' +#' @param callback (Optional) A string representing the name of the callback +#' subroutine for JSONP responses. +#' +#' @return A parsed JSON response containing the REST API version information. +#' +#' See more about the implemented endpoint [._get_rest_version()] +#' on the following [GET info/rest](https://rest.ensembl.org/documentation/info/rest) +#' from the official [Ensembl Rest API](https://rest.ensembl.org/). +#' +#' @export +#' @examples +#' ._get_rest_version() +#' ._get_rest_version(callback = "randomlygeneratedname") +._get_rest_version <- function(callback = "randomlygeneratedname") { + + if (!is.null(callback)) { + response <- get( + res = "/info/rest", + callback = callback, + .headers = req_headers(content_type = "application/json") + ) + } else { + warning("Callback is null. Returning an empty response.") + response <- list() + } + + response +} + +#' Get Software Version +#' +#' Retrieves the current version of the Ensembl API used by the REST server. +#' +#' @param callback (Optional) A string representing the name of the callback subroutine for JSONP responses. +#' +#' @return A parsed JSON response containing the Ensembl API version information. +#' +#' See more about the implemented endpoint [._get_software_version()] +#' on the following [GET info/software](https://rest.ensembl.org/documentation/info/software) +#' from the official [Ensembl Rest API](https://rest.ensembl.org/). +#' +#' @export +#' @examples +#' ._get_software_version() +#' ._get_software_version(callback = "randomlygeneratedname") +._get_software_version <- function(callback = "randomlygeneratedname") { + if (!is.null(callback)) { + response <- get( + res = "/info/software", + callback = callback, + .headers = req_headers(content_type = "application/json") + ) + } else { + warning("Callback is null. Returning an empty response.") + response <- list() + } + + response +} + +#' Get Species Information +#' +#' Retrieves a list of all available species, their aliases, available adaptor groups, and data release. +#' +#' @param callback (Optional) A string representing the name of the callback subroutine for JSONP responses. +#' @param division (Optional) A string to filter by Ensembl or Ensembl Genomes division (default is "EnsemblVertebrates"). +#' @param hide_strain_info (Optional) A boolean flag to show/hide strain and +#' strain_collection information (default is 0, which shows strain info). +#' @param strain_collection (Optional) A string to filter by strain collection (e.g., "mouse"). +#' +#' @return A parsed JSON response containing species information. +#' +#' See more about the implemented endpoint [get_species_info()] +#' on the following [GET info/species](https://rest.ensembl.org/documentation/info/species) +#' from the official [Ensembl Rest API](https://rest.ensembl.org/). +#' +#' @export +#' @examples +#' get_species_info() +#' get_species_info(division = "EnsemblPlants") +#' get_species_info(hide_strain_info = 1) +#' get_species_info(strain_collection = "mouse") +get_species_info <- function(callback = "randomlygeneratedname", + division = "EnsemblVertebrates", + hide_strain_info = 0, strain_collection = NULL) { + + if (!is.null(callback)) { + query_params <- list() + if (!is.null(callback)) query_params$callback <- callback + if (!is.null(division)) query_params$division <- division + if (!is.null(hide_strain_info)) query_params$hide_strain_info <- hide_strain_info + if (!is.null(strain_collection)) query_params$strain_collection <- strain_collection + + headers <- req_headers(content_type = "application/json") + + response <- do.call(get, + c(list( + res = "/info/species", + .headers = headers), + query_params)) + + } else { + warning("Callback is null. Returning an empty response.") + response <- list() + } + + response +} + +#' Get Variation Sources for a Species +#' +#' Retrieves the variation sources used in Ensembl for a given species. +#' +#' @param species (Required) A string representing the species name or alias (e.g., "homo_sapiens"). +#' @param callback (Optional) A string representing the name of the callback subroutine for JSONP responses. +#' @param filter (Optional) A string to restrict the variation source searches to a single source +#' (e.g., "dbSNP", "ClinVar", "OMIM", "UniProt", "HGMD"). +#' +#' @return A parsed JSON response containing the variation sources for the specified species. +#' +#' See more about the implemented endpoint [._get_variation_sources()] +#' on the following [GET info/variation/:species](https://rest.ensembl.org/documentation/info/variation) +#' from the official [Ensembl Rest API](https://rest.ensembl.org/). +#' +#' @export +#' @examples +#' ._get_variation_sources("homo_sapiens") +#' ._get_variation_sources("homo_sapiens", filter = "ClinVar") +#' ._get_variation_sources("homo_sapiens", callback = "randomlygeneratedname") +._get_variation_sources <- function(species, callback = "randomlygeneratedname", + filter = NULL) { + if (missing(species)) { + stop("The 'species' parameter is required.") + } + + if (!is.null(callback)) { + query_params <- list() + if (!is.null(callback)) query_params$callback <- callback + if (!is.null(species)) query_params$species <- species + if (!is.null(filter)) query_params$filter <- filter + + headers <- req_headers(content_type = "application/json") + + response <- do.call(get, + c(list( + res = "/info/variation/{species}", + .headers = headers), + query_params)) + + } else { + warning("Callback is null. Returning an empty response.") + response <- list() + } + + response +} + +#' Get Variant Consequence Types +#' +#' Retrieves a list of all variant consequence types available in Ensembl. +#' +#' @param callback (Optional) A string representing the name of the callback subroutine for JSONP responses. +#' @param rank (Optional) A boolean (0 or 1) to include consequence ranking in the response. Default is 0. +#' +#' @return A parsed JSON response containing the list of variant consequence types. +#' +#' See more about the implemented endpoint [get_consequence_types()] +#' on the following [GET info/variation/consequence_types](https://rest.ensembl.org/documentation/info/variation_consequence_types) +#' from the official [Ensembl Rest API](https://rest.ensembl.org/). +#' +#' @export +#' @examples +#' get_consequence_types() +#' get_consequence_types(rank = 1) +#' get_consequence_types(callback = "randomCallback") +get_consequence_types <- function(callback = "randomlygeneratedname", + rank = NULL) { + + if (!is.null(callback)) { + query_params <- list() + if (!is.null(callback)) query_params$callback <- callback + if (!is.null(rank)) query_params$species <- rank + + headers <- req_headers(content_type = "application/json") + + response <- do.call(get, + c(list( + res = "/info/variation/consequence_types", + .headers = headers), + query_params)) + + } else { + warning("Callback is null. Returning an empty response.") + response <- list() + } + + response +} + +#' Get Population Individuals +#' +#' Retrieves a list of all individuals for a specified population from a species in Ensembl. +#' +#' @param population_name (Required) A string representing the name of the population (e.g., "1000GENOMES:phase_3:ASW"). +#' @param species (Required) A string representing the species name or alias (e.g., "human"). +#' @param callback (Optional) A string representing the name of the callback subroutine for JSONP responses. +#' +#' @return A parsed JSON response containing the list of individuals for the specified population. +#' +#' See more about the implemented endpoint [get_population_individuals()] +#' on the following [GET info/variation/populations/:species/:population_name](https://rest.ensembl.org/documentation/info/variation_population_name) +#' from the official [Ensembl Rest API](https://rest.ensembl.org/). +#' +#' @export +#' @examples +#' get_population_individuals(species = "human", population_name = "1000GENOMES:phase_3:ASW") +#' get_population_individuals(species = "homo_sapiens", population_name = "1000GENOMES:phase_3:YRI") +#' get_population_individuals( +#' species = "human", +#' population_name = "1000GENOMES:phase_3:CEU", +#' callback = "randomCallback" +#' ) +get_population_individuals <- function(species, population_name, + callback = "randomlygeneratedname") { + if (missing(species) || missing(population_name)) { + stop("Both 'species' and 'population_name' are required parameters.") + } + + if (!is.null(callback)) { + query_params <- list() + if (!is.null(species)) query_params$species <- species + if (!is.null(population_name)) query_params$population_name <- population_name + if (!is.null(callback)) query_params$callback <- callback + + headers <- req_headers(content_type = "application/json") + + response <- + do.call(get, + c(list( + res = "/info/variation/populations/{species}/{population_name}", + .headers = headers), + query_params) + ) + + } else { + warning("Callback is null. Returning an empty response.") + response <- list() + } + + response +} + +#' Get Populations for a Species +#' +#' Retrieves a list of all populations for a specified species in Ensembl. +#' +#' @param species (Required) A string representing the species name or alias (e.g., "homo_sapiens"). +#' @param callback (Optional) A string representing the name of the callback subroutine for JSONP responses. +#' @param filter (Optional) A string to restrict populations returned +#' (e.g., "LD" to filter populations with linkage disequilibrium data). +#' +#' @return A parsed JSON response containing the list of populations for the specified species. +#' +#' See more about the implemented endpoint [get_species_populations()] +#' on the following [GET info/variation/populations/:species](https://rest.ensembl.org/documentation/info/variation_populations) +#' from the official [Ensembl Rest API](https://rest.ensembl.org/). +#' +#' @export +#' @examples +#' get_species_populations(species = "homo_sapiens") +#' get_species_populations(species = "human", filter = "LD") +#' get_species_populations( +#' species = "homo_sapiens", +#' callback = "randomlygeneratedname", +#' filter = "LD" +#' ) +get_species_populations <- function(species, callback = "randomlygeneratedname", + filter = NULL) { + if (missing(species)) { + stop("'species' is a required parameter.") + } + + if (!is.null(callback)) { + query_params <- list() + query_params$species <- species + if (!is.null(filter)) query_params$filter <- filter + if (!is.null(callback)) query_params$callback <- callback + + headers <- req_headers(content_type = "application/json") + + response <- + do.call(get, + c(list( + res = "/info/variation/populations/{species}", + .headers = headers), + query_params)) + + } else { + warning("Callback is null. Returning an empty response.") + response <- list() + } + + response +} + +# -------------------------------------------------------- # +## Linkage Disequilibrium ==== + +#' Get Linkage Disequilibrium (LD) values +#' +#' Computes and retrieves LD values between a given variant and all other +#' variants within a window of up to 500 kb in the specified population. +#' +#' @param species A string representing the species name +#' or alias. +#' @param id A string representing the variant ID +#' (e.g., rs56116432). +#' @param population_name A string representing the population +#' for which to compute LD. +#' Use GET /info/variation/populations/:species?filter=LD to +#' retrieve a list of all populations with LD data. This endpoint is already +#' implemented. Please, use [get_species_populations()] with filter "LD" +#' to retrieve valid populations. +#' @param attribs \emph{(Optional)} A boolean indicating whether +#' to add variation attributes such as chromosome, start, end, strand, +#' consequence type, and clinical significance. Default is NULL. +#' @param callback \emph{(Optional)} A string representing the name of the +#' callback subroutine for JSONP responses. +#' @param d_prime \emph{(Optional)} A float value (0-1) to filter results +#' by D' (linkage disequilibrium measure). Only returns pairs with D' ≥ +#' the specified value. Default is NULL. +#' @param r2 \emph{(Optional)} A float value (0-1) to filter results by +#' r² (correlation coefficient). Only returns pairs with r² ≥ the specified +#' value.. Default is NULL. +#' @param window_size \emph{(Optional)} An integer specifying the window size +#' in kb (max 500). Defaults to 500 kb. +#' +#' @return A list of parsed JSON responses containing the LD values for +#' the provided variant. +#' +#' @note +#' See more about the implemented endpoint [get_ld_by_variant()] +#' on the following [GET ld/:species/:id/:population_name](https://rest.ensembl.org/documentation/info/ld) +#' from the official [Ensembl Rest API](https://rest.ensembl.org/). +#' +#' @export +#' @examples +#' get_ld_by_variant("homo_sapiens", "rs56116432", "1000GENOMES:phase_3:KHV") +#' get_ld_by_variant("homo_sapiens", "rs56116432", "1000GENOMES:phase_3:KHV", +#' d_prime = 0.8, r2 = 0.85) +#' get_ld_by_variant("homo_sapiens", "rs56116432", "1000GENOMES:phase_3:KHV", +#' window_size = 250) +#' +get_ld_by_variant <- function(species, id, population_name, attribs = NULL, + callback = "randomlygeneratedname", + d_prime = NULL, r2 = NULL, + window_size = 500) { + if (missing(species) || missing(id) || missing(population_name)) { + stop("'species', 'id', and 'population_name' parameters are all required.") + } + + if (!is.null(window_size) && (window_size < 1 || window_size > 500)) { + stop("'window_size' parameter must be between 1 and 500 kb.") + } + + if (!is.null(callback)) { + query_params <- list() + query_params$species <- species + query_params$id <- id + query_params$population_name <- population_name + if (!is.null(attribs)) query_params$attribs <- as.integer(attribs) + if (!is.null(d_prime)) query_params$d_prime <- d_prime + if (!is.null(r2)) query_params$r2 <- r2 + if (!is.null(window_size)) query_params$window_size <- window_size + + headers <- req_headers(content_type = "application/json") + + response <- do.call(get, c(list( + res = "/ld/{species}/{id}/{population_name}", + .headers = headers), + query_params)) + + } else { + warning("Callback is null. Returning an empty response.") + response <- list() + } + + response +} + +#' Get Pairwise Linkage Disequilibrium (LD) Values +#' +#' Computes and retrieves LD values between two given variants. +#' +#' @param species A string representing the species name or +#' alias (e.g., "homo_sapiens"). +#' @param id1 A string representing the first variant ID +#' (e.g., "rs6792369"). +#' @param id2 A string representing the second variant ID +#' (e.g., "rs1042779"). +#' @param callback \emph{(Optional)} A string representing the name of +#' the callback +#' subroutine for JSONP responses. +#' @param d_prime \emph{(Optional)} A float value (0-1) to filter results +#' by D' (linkage disequilibrium measure). +#' Only returns pairs with D' ≥ the specified value. +#' @param r2 \emph{(Optional)} A float value (0-1) to filter results by r² +#' (correlation coefficient). Only returns pairs with r² ≥ the specified value. +#' @param population_name \emph{(Optional)} A string representing the population +#' for which to compute LD. Use [get_species_populations()] with filter "LD" +#' to retrieve valid populations. +#' +#' @return A parsed JSON response containing the LD values for the specified +#' variant pair. +#' +#' See more about the implemented endpoint [get_pairwise_ld_values()] +#' on the following [GET ld/:species/pairwise/:id1/:id2](https://rest.ensembl.org/documentation/info/ld_pairwise) +#' from the official [Ensembl Rest API](https://rest.ensembl.org/). +#' +#' @export +#' @examples +#' get_pairwise_ld_values(species = "homo_sapiens", id1 = "rs6792369", +#' id2 = "rs1042779") +#' get_pairwise_ld_values(species = "homo_sapiens", id1 = "rs6792369", +#' id2 = "rs1042779", r2 = 0.85) +#' get_pairwise_ld_values(species = "homo_sapiens", id1 = "rs6792369", +#' id2 = "rs1042779", d_prime = 1.0, +#' population_name = "1000GENOMES:phase_3:KHV") +get_pairwise_ld_values <- function(species, id1, id2, + callback = "randomlygeneratedname", + d_prime = NULL, r2 = NULL, + population_name = NULL) { + if (missing(species) || missing(id1) || missing(id2)) { + stop("'species', 'id1', and 'id2' parameters are all required.") + } + + if (!is.null(callback)) { + query_params <- list() + query_params$species <- species + query_params$id1 <- id1 + query_params$id2 <- id2 + if (!is.null(d_prime)) query_params$d_prime <- d_prime + if (!is.null(r2)) query_params$r2 <- r2 + if (!is.null(population_name)) query_params$population_name <- population_name + + headers <- req_headers(content_type = "application/json") + + response <- do.call(get, c(list( + res = "/ld/{species}/pairwise/{id1}/{id2}", + .headers = headers), + query_params)) + + } else { + warning("Callback is null. Returning an empty response.") + response <- list() + } + + response +} + +#' Get Linkage Disequilibrium (LD) Values for a Genomic Region +#' +#' Computes and retrieves LD values between all pairs of variants within +#' a defined genomic region. +#' +#' @param species A string representing the species name or alias +#' (e.g., "homo_sapiens"). +#' @param region A string defining the genomic region in the format +#' "chr:start..end". +#' The maximum region size allowed is 500 kb. If the region overlaps +#' the MHC region, the maximum is 10 kb. +#' @param population_name A string representing the population for which +#' LD should be computed. +#' Use [get_species_populations()] with filter "LD" to retrieve valid +#' populations. +#' @param callback \emph{(Optional)} A string representing the name of the +#' callback subroutine for JSONP responses. +#' @param d_prime \emph{(Optional)} A float value (0-1) to filter results +#' by D' (linkage disequilibrium measure). +#' Only returns pairs with D' ≥ the specified value. +#' @param r2 \emph{(Optional)} A float value (0-1) to filter results by r² +#' (correlation coefficient). +#' Only returns pairs with r² ≥ the specified value. +#' +#' @return A parsed JSON response containing the LD values for all variant +#' pairs within the specified region. +#' +#' See more about the implemented endpoint [get_ld_values_by_region()] +#' on the following [GET ld/:species/region/:region/:population_name](https://rest.ensembl.org/documentation/info/ld_region) +#' from the official [Ensembl Rest API](https://rest.ensembl.org/). +#' +#' @export +#' @examples +#' get_ld_values_by_region(species = "homo_sapiens", +#' region = "6:25837556..25843455", +#' population_name = "1000GENOMES:phase_3:KHV") +#' get_ld_values_by_region(species = "homo_sapiens", +#' region = "6:25837556..25843455", +#' population_name = "1000GENOMES:phase_3:KHV", +#' r2 = 0.85) +#' get_ld_values_by_region(species = "homo_sapiens", +#' region = "6:25837556..25843455", +#' population_name = "1000GENOMES:phase_3:KHV", +#' d_prime = 1.0) +get_ld_values_by_region <- function(species, region, population_name, + callback = "randomlygeneratedname", + d_prime = NULL, r2 = NULL) { + if (missing(species) || missing(region) || missing(population_name)) { + stop("'species', 'region', and 'population_name' parameters are all required.") + } + + if (!is.null(callback)) { + query_params <- list() + query_params$species <- species + query_params$region <- region + query_params$population_name <- population_name + if (!is.null(d_prime)) query_params$d_prime <- d_prime + if (!is.null(r2)) query_params$r2 <- r2 + + headers <- req_headers(content_type = "application/json") + + response <- do.call(get, c(list( + res = "/ld/{species}/region/{region}/{population_name}", + .headers = headers), + query_params)) + + } else { + warning("Callback is null. Returning an empty response.") + response <- list() + } + + response +} + + +# -------------------------------------------------------- # +## Lookup ==== + +# -------------------------------------------------------- # +## Mapping ==== + +# -------------------------------------------------------- # +## Ontologies and taxonomy ==== + +# -------------------------------------------------------- # +## Overlap ==== + +# -------------------------------------------------------- # +## Phenotype annotations ==== + +# -------------------------------------------------------- # +## Regulation ==== + +# -------------------------------------------------------- # +## Transcript Haplotypes ==== + +# -------------------------------------------------------- # +## VEP ==== + +# -------------------------------------------------------- # +## Variation ==== + +# -------------------------------------------------------- # +## Variation GA4GH ==== diff --git a/R/eqtl.R b/R/eqtl.R index ed9972f..4b13998 100644 --- a/R/eqtl.R +++ b/R/eqtl.R @@ -1,6 +1,7 @@ eqtl_tissue_tbl <- function( - species_name = character(), - tissue = character()) { + species_name = character(), + tissue = character() +) { tbl <- tibble::tibble( species_name = species_name, tissue = tissue @@ -10,6 +11,7 @@ eqtl_tissue_tbl <- function( } json_list_to_eqtl_tissue_tbl <- function(species_name, json_list) { + tissues <- names(json_list) tbl <- eqtl_tissue_tbl( species_name = species_name, @@ -19,7 +21,7 @@ json_list_to_eqtl_tissue_tbl <- function(species_name, json_list) { # Drop rows if all columns except species_name are NA tbl2 <- tidyr::drop_na(tbl, -species_name) - tissue <- rlang::sym("tissue") + tissue <- rlang::sym('tissue') # Sort alphabetically by tissue tbl3 <- dplyr::arrange(tbl2, species_name, !!tissue) return(tbl3) diff --git a/R/genomic_range.R b/R/genomic_range.R index f484bf4..6f0f95e 100644 --- a/R/genomic_range.R +++ b/R/genomic_range.R @@ -17,96 +17,74 @@ #' #' @export genomic_range <- function(chr, start, end, starting_position_index = 1L) { - if (!(identical(starting_position_index, 0L) || identical(starting_position_index, 1L))) { + + if (!(identical(starting_position_index, 0L) || identical(starting_position_index, 1L))) stop("starting_position_index must be either 0L or 1L.") - } - if (!is.character(chr)) { + if (!is.character(chr)) stop("chr needs to a character vector.") - } - if (identical(length(chr), 0L)) { + if (identical(length(chr), 0L)) stop("chr is empty, must have at least one chromosome name.") - } - if (!is.integer(start)) { + if (!is.integer(start)) stop("start needs to an integer vector.") - } - if (identical(length(start), 0L)) { + if (identical(length(start), 0L)) stop("start is empty, must have at least one start position.") - } - if (!is.integer(end)) { + if (!is.integer(end)) stop("end needs to an integer vector.") - } - if (identical(length(end), 0L)) { + if (identical(length(end), 0L)) stop("end is empty, must have at least one end position.") - } n_chr <- length(chr) n_start <- length(start) n_end <- length(end) - if (!(identical(n_start, n_end) && identical(n_start, n_chr))) { # identical(n_end, n_chr) == TRUE follows. - stop( - "chr, start and end vectors should be of same length: ", - "len(chr) = ", n_chr, ", ", - "len(start) = ", n_start, ", and ", - "len(end) = ", n_end, "." - ) - } + if (!(identical(n_start, n_end) && identical(n_start, n_chr))) # identical(n_end, n_chr) == TRUE follows. + stop("chr, start and end vectors should be of same length: ", + "len(chr) = ", n_chr, ", ", + "len(start) = ", n_start, ", and ", + "len(end) = ", n_end, ".") is_start_below_starting_pos <- start < starting_position_index - if (any(is_start_below_starting_pos)) { - stop( - "All start positions must be greater than ", starting_position_index, ", these are not: ", - concatenate::cc_and(start[is_start_below_starting_pos], oxford = TRUE), "." - ) - } + if (any(is_start_below_starting_pos)) + stop("All start positions must be greater than ", starting_position_index, ", these are not: ", + concatenate::cc_and(start[is_start_below_starting_pos], oxford = TRUE), ".") is_end_below_starting_pos <- end < starting_position_index - if (any(is_end_below_starting_pos)) { - stop( - "All end positions must be greater than ", starting_position_index, ", these are not: ", - concatenate::cc_and(end[is_end_below_starting_pos], oxford = TRUE), "." - ) - } + if (any(is_end_below_starting_pos)) + stop("All end positions must be greater than ", starting_position_index, ", these are not: ", + concatenate::cc_and(end[is_end_below_starting_pos], oxford = TRUE), ".") # Generate genomic ranges strings. gen_ranges <- sprintf("%s:%d..%d", chr, start, end) # When is start greater than end? (should not happen.) start_gr_end <- start > end - if (any(start_gr_end)) { - stop( - "start positions cannot be larger than end positions: ", - concatenate::cc_and(gen_ranges[start_gr_end], oxford = TRUE), "." - ) - } + if (any(start_gr_end)) + stop("start positions cannot be larger than end positions: ", + concatenate::cc_and(gen_ranges[start_gr_end], oxford = TRUE), ".") # Check that all genomic ranges' strings conform to criteria of is_genomic_range. is_gen_ranges <- is_genomic_range(gen_ranges) - if (!all(is_gen_ranges)) { - stop( - "The following are not well-formed genomic ranges: ", - concatenate::cc_and(gen_ranges[!is_gen_ranges], oxford = TRUE), "." - ) - } + if (!all(is_gen_ranges)) + stop("The following are not well-formed genomic ranges: ", + concatenate::cc_and(gen_ranges[!is_gen_ranges], oxford = TRUE), ".") return(gen_ranges) -} -is_genomic_range <- function(genomic_range) { - stringr::str_detect(genomic_range, "\\w+:\\d+\\.\\.\\d+") } +is_genomic_range <- function(genomic_range) + stringr::str_detect(genomic_range, '\\w+:\\d+\\.\\.\\d+') + split_genomic_range <- function(genomic_range) { - split_coordinates <- stringr::str_match( - genomic_range, - "^(\\w+):(\\d+)\\.\\.(\\d+)$" - )[, -1, drop = FALSE] - colnames(split_coordinates) <- c("chromosome", "start", "end") + split_coordinates <- stringr::str_match(genomic_range, + '^(\\w+):(\\d+)\\.\\.(\\d+)$')[, -1, drop = FALSE] + + colnames(split_coordinates) <- c('chromosome', 'start', 'end') return(split_coordinates) } diff --git a/R/get.R b/R/get.R new file mode 100755 index 0000000..5abf18c --- /dev/null +++ b/R/get.R @@ -0,0 +1,50 @@ +#' The function for the GET method +#' +#' The [get()] function is a wrapper around the `reqs` function that performs +#' GET requests to the Ensembl API, handling rate limiting automatically. +#' +#' @param res The resource (path) for the API request, can include variables +#' in curly braces `{}` that will be replaced with the corresponding parameter. +#' @param ... Additional named parameters to be included in the request URL. +#' @param .headers An S3 list with class `ensemblr_req_hdr`. Use the helper +#' [req_headers()] to create such an object. +#' @param rate The maximum number of requests per second to allow. +#' Defaults to 15 per minute (15/60). +#' @param verbose Logical, if TRUE, enables detailed logging of request and response details. +#' +#' @return A list of responses, one for each request made. +#' +#' @keywords internal +get <- function(res, ..., .headers = req_headers(), rate = 15/60, verbose = FALSE) { + requests <- reqs(res, ..., .headers = .headers) + requests <- purrr::map(requests, httr2::req_throttle, rate = rate) + + responses <- vector("list", length(requests)) + for (i in seq_along(requests)) { + if (verbose) message(glue::glue("Performing request {i}/{length(requests)}...")) + repeat { + response <- httr2::req_perform(requests[[i]]) + status_code <- httr2::resp_status(response) + + if (status_code == 429) { + retry_after <- as.numeric(httr2::resp_headers(response)$`Retry-After`) + if (is.na(retry_after)) { + warning("Rate limit exceeded, but `Retry-After` header is missing. Defaulting to 60 seconds.") + retry_after <- 60 + } + message(glue::glue("Rate limit reached for request {i}, waiting {retry_after} seconds...")) + Sys.sleep(retry_after) + } else { + if (status_code != 200) { + warning(glue::glue("Request {i} failed with status code {status_code}.")) + } + responses[[i]] <- response + break + } + } + } + + if (verbose) message("All requests completed.") + return(responses) +} + diff --git a/R/get_assemblies.R b/R/get_assemblies.R index baafcd4..07c5169 100644 --- a/R/get_assemblies.R +++ b/R/get_assemblies.R @@ -72,18 +72,17 @@ parse_assembly_details <- function(species_name, lst) { #' get_assemblies() #' #' # Get details about the Mouse and the Fruit Fly genomes -#' get_assemblies(c("mus_musculus", "drosophila_melanogaster")) +#' get_assemblies(c('mus_musculus', 'drosophila_melanogaster')) #' #' @md #' @export -get_assemblies <- function(species_name = "homo_sapiens", +get_assemblies <- function(species_name = 'homo_sapiens', verbose = FALSE, warnings = TRUE, progress_bar = TRUE) { - resource_urls <- glue::glue( - "/info/assembly/", - "{species_name}?bands=0" - ) + + resource_urls <- glue::glue('/info/assembly/', + '{species_name}?bands=0') responses <- request_parallel( @@ -94,10 +93,8 @@ get_assemblies <- function(species_name = "homo_sapiens", ) # Only keep those responses that responded successfully, i.e. with status == "OK". - responses_ok <- purrr::keep(responses, ~ identical(.x$status, "OK") && !rlang::is_empty(.x$content)) - if (rlang::is_empty(responses_ok)) { - return(assembly_details()) - } + responses_ok <- purrr::keep(responses, ~ identical(.x$status, 'OK') && !rlang::is_empty(.x$content)) + if (rlang::is_empty(responses_ok)) return(assembly_details()) return( purrr::imap_dfr( @@ -108,4 +105,6 @@ get_assemblies <- function(species_name = "homo_sapiens", ) ) ) + } + diff --git a/R/get_cytogenetic_bands.R b/R/get_cytogenetic_bands.R index 6ad26a6..e692b18 100644 --- a/R/get_cytogenetic_bands.R +++ b/R/get_cytogenetic_bands.R @@ -6,42 +6,36 @@ cytogenetic_bands_tbl <- function(species_name = character(), end = integer(), stain = character(), strand = integer()) { - tibble::tibble( - species_name = species_name, - assembly_name = assembly_name, - cytogenetic_band = cytogenetic_band, - chromosome = chromosome, - start = start, - end = end, - stain = stain, - strand = strand - ) + + tibble::tibble(species_name = species_name, + assembly_name = assembly_name, + cytogenetic_band = cytogenetic_band, + chromosome = chromosome, + start = start, + end = end, + stain = stain, + strand = strand) } #' @importFrom rlang .data parse_cytogenetic_bands <- function(species_name, lst) { - if (rlang::is_empty(lst$top_level_region$bands)) { - return(cytogenetic_bands_tbl()) - } + + if(rlang::is_empty(lst$top_level_region$bands)) return(cytogenetic_bands_tbl()) lst$top_level_region$bands %>% dplyr::bind_rows() %>% tibble::as_tibble() %>% tibble::add_column(species_name = species_name, .before = 1L) %>% - dplyr::rename( - cytogenetic_band = .data$id, - chromosome = .data$seq_region_name - ) %>% - dplyr::relocate( - "species_name", - "assembly_name", - "cytogenetic_band", - "chromosome", - "start", - "end", - "stain", - "strand" - ) + dplyr::rename(cytogenetic_band = .data$id, + chromosome = .data$seq_region_name) %>% + dplyr::relocate('species_name', + 'assembly_name', + 'cytogenetic_band', + 'chromosome', + 'start', + 'end', + 'stain', + 'strand') } #' Get cytogenetic bands by species @@ -85,18 +79,17 @@ parse_cytogenetic_bands <- function(species_name, lst) { #' get_cytogenetic_bands() #' #' # Get toplevel sequences for Mus musculus -#' get_cytogenetic_bands("mus_musculus") +#' get_cytogenetic_bands('mus_musculus') #' #' @md #' @export -get_cytogenetic_bands <- function(species_name = "homo_sapiens", - verbose = FALSE, - warnings = TRUE, - progress_bar = TRUE) { - resource_urls <- glue::glue( - "/info/assembly/", - "{species_name}?bands=1" - ) +get_cytogenetic_bands <- function(species_name = 'homo_sapiens', + verbose = FALSE, + warnings = TRUE, + progress_bar = TRUE) { + + resource_urls <- glue::glue('/info/assembly/', + '{species_name}?bands=1') responses <- request_parallel( @@ -108,20 +101,16 @@ get_cytogenetic_bands <- function(species_name = "homo_sapiens", # Only keep those responses that responded successfully, i.e. with status == "OK". responses_ok <- - purrr::keep( - responses, - ~ identical(.x$status, "OK") && - !rlang::is_empty(.x$content) - ) - if (rlang::is_empty(responses_ok)) { + purrr::keep(responses, + ~ identical(.x$status, 'OK') && + !rlang::is_empty(.x$content)) + if (rlang::is_empty(responses_ok)) return(cytogenetic_bands_tbl()) - } return(purrr::imap_dfr( .x = responses_ok, - .f = ~ parse_cytogenetic_bands( - species_name = species_name[.y], - lst = .x$content - ) + .f = ~ parse_cytogenetic_bands(species_name = species_name[.y], + lst = .x$content) )) + } diff --git a/R/get_ensembl_genomes_version.R b/R/get_ensembl_genomes_version.R index b10f0b8..4f128dc 100644 --- a/R/get_ensembl_genomes_version.R +++ b/R/get_ensembl_genomes_version.R @@ -12,11 +12,11 @@ #' #' @export get_ensembl_genomes_version <- function(verbose = FALSE, warnings = TRUE) { - response <- request("/info/eg_version", verbose = verbose, warnings = warnings) - if (identical(response$status, "OK")) { + response <- request('/info/eg_version', verbose = verbose, warnings = warnings) + + if(identical(response$status, 'OK')) return(response$content$version) - } else { + else return(NA_integer_) - } } diff --git a/R/get_eqtl_pval_by_gene.R b/R/get_eqtl_pval_by_gene.R index 0305b77..989c65e 100644 --- a/R/get_eqtl_pval_by_gene.R +++ b/R/get_eqtl_pval_by_gene.R @@ -1,43 +1,45 @@ eqtl_tbl <- function(species_name = character(), - ensembl_id = character(), - variant_id = character(), - tissue = character(), - display_consequence = character(), - seq_region_name = character(), - seq_region_start = integer(), - seq_region_end = integer(), - beta = double(), - pvalue = double()) { - tibble::tibble( - species_name = species_name, - ensembl_id = ensembl_id, - variant_id = variant_id, - tissue = tissue, - display_consequence = display_consequence, - seq_region_name = seq_region_name, - seq_region_start = seq_region_start, - seq_region_end = seq_region_end, - beta = beta, - pvalue = pvalue - ) + ensembl_id = character(), + variant_id = character(), + tissue = character(), + display_consequence = character(), + seq_region_name = character(), + seq_region_start = integer(), + seq_region_end = integer(), + beta = double(), + pvalue = double() + ) { + + tibble::tibble(species_name = species_name, + ensembl_id = ensembl_id, + variant_id = variant_id, + tissue = tissue, + display_consequence = display_consequence, + seq_region_name = seq_region_name, + seq_region_start = seq_region_start, + seq_region_end = seq_region_end, + beta = beta, + pvalue = pvalue) + } #' @importFrom rlang .data to_eqtl_tbl <- function(species_name, ensembl_id, tbl) { + tbl %>% - tibble::as_tibble() %>% - tidyr::pivot_wider( - id_cols = c( - "snp", - "tissue", - "display_consequence", - "seq_region_name", - "seq_region_start", - "seq_region_end" - ), - names_from = "statistic", - values_from = "value" - ) %>% + tibble::as_tibble() %>% + tidyr::pivot_wider( + id_cols = c( + 'snp', + 'tissue', + 'display_consequence', + 'seq_region_name', + 'seq_region_start', + 'seq_region_end' + ), + names_from = 'statistic', + values_from = 'value' + ) %>% dplyr::mutate(seq_region_start = as.integer(.data$seq_region_start), seq_region_end = as.integer(.data$seq_region_end)) %>% tibble::add_column(species_name = species_name, ensembl_id = ensembl_id, .before = 1L) %>% dplyr::rename(variant_id = .data$snp, pvalue = .data$`p-value`) @@ -84,7 +86,7 @@ to_eqtl_tbl <- function(species_name, ensembl_id, tbl) { #' [/eqtl/id/:species/:stable_id](https://rest.ensembl.org/documentation/info/species_id). #' #' @examples -#' get_eqtl_pval_by_gene("ENSG00000248378") +#' get_eqtl_pval_by_gene('ENSG00000248378') #' #' @md #' @export diff --git a/R/get_eqtl_pval_by_variant.R b/R/get_eqtl_pval_by_variant.R index c52d9a3..b8ce6d2 100644 --- a/R/get_eqtl_pval_by_variant.R +++ b/R/get_eqtl_pval_by_variant.R @@ -12,19 +12,21 @@ eqtl_tbl2 <- function(species_name = character(), beta = beta, pvalue = pvalue ) + } #' @importFrom rlang .data to_eqtl_tbl2 <- function(species_name, variant_id, tbl) { + tbl %>% tibble::as_tibble() %>% tidyr::pivot_wider( id_cols = c( - "gene", - "tissue" + 'gene', + 'tissue' ), - names_from = "statistic", - values_from = "value" + names_from = 'statistic', + values_from = 'value' ) %>% tibble::add_column(species_name = species_name, variant_id = variant_id, .before = 1L) %>% dplyr::rename(ensembl_id = .data$gene, pvalue = .data$`p-value`) @@ -65,7 +67,7 @@ to_eqtl_tbl2 <- function(species_name, variant_id, tbl) { #' [/eqtl/variant_name/:species/:variant_name](https://rest.ensembl.org/documentation/info/species_variant). #' #' @examples -#' get_eqtl_pval_by_variant("rs80100814") +#' get_eqtl_pval_by_variant('rs80100814') #' #' @md #' @export diff --git a/R/get_homology_info.R b/R/get_homology_info.R deleted file mode 100644 index 23b62d4..0000000 --- a/R/get_homology_info.R +++ /dev/null @@ -1,50 +0,0 @@ -#' @importFrom tibblify tspec_object -#' @importFrom tibblify tib_df -#' @importFrom tibblify tib_row -#' @importFrom tibblify tib_dbl -#' @importFrom tibblify tib_chr -#' @importFrom tibblify tib_int -spec_homology_info_symbol <- function() { - tspec_object( - tib_df( - "data", - tib_df( - "homologies", - tib_dbl("dn_ds"), - tib_row( - "target", - tib_chr("protein_id"), - tib_chr("species"), - tib_chr("cigar_line"), - tib_chr("align_seq"), - tib_chr("id"), - tib_int("taxon_id"), - tib_dbl("perc_pos"), - tib_dbl("perc_id"), - ), - tib_chr("method_link_type"), - tib_row( - "source", - tib_int("taxon_id"), - tib_dbl("perc_pos"), - tib_chr("id"), - tib_dbl("perc_id"), - tib_chr("cigar_line"), - tib_chr("species"), - tib_chr("protein_id"), - tib_chr("align_seq"), - ), - tib_chr("type"), - tib_chr("taxonomy_level"), - ), - tib_chr("id"), - ), - ) -} - -get_homology_info_by_symbol <- function(species_name, symbol = NULL) { - resource <- glue::glue("/homology/symbol/{species_name}/{symbol}") - lst <- get2(resource, `content-type` = "application/json") - - purrr::map(lst, ~ tibblify::tibblify(.x, spec_homology_info_symbol())) -} diff --git a/R/get_karyotypes.R b/R/get_karyotypes.R index 527c609..2969552 100644 --- a/R/get_karyotypes.R +++ b/R/get_karyotypes.R @@ -3,8 +3,8 @@ parse_karyotypes <- function(species_name, lst) { species_name = species_name, chromosome = lst$karyotype, ) %>% - dplyr::left_join(lst$top_level_region, by = c(chromosome = "name")) %>% - dplyr::relocate("species_name", "coord_system", "chromosome", "length") + dplyr::left_join(lst$top_level_region, by = c(chromosome = 'name')) %>% + dplyr::relocate('species_name', 'coord_system', 'chromosome', 'length') } #' Get the karyotype of a species @@ -33,21 +33,19 @@ parse_karyotypes <- function(species_name, lst) { #' #' @examples #' # Get the karyotype of Caenorhabditis elegans -#' get_karyotypes("caenorhabditis_elegans") +#' get_karyotypes('caenorhabditis_elegans') #' #' # Get the karyotype of the Giant panda -#' get_karyotypes("ailuropoda_melanoleuca") +#' get_karyotypes('ailuropoda_melanoleuca') #' #' @md #' @export -get_karyotypes <- function(species_name = "homo_sapiens", +get_karyotypes <- function(species_name = 'homo_sapiens', verbose = FALSE, warnings = TRUE, progress_bar = TRUE) { - resource_urls <- glue::glue( - "/info/assembly/", - "{species_name}?bands=0" - ) + resource_urls <- glue::glue('/info/assembly/', + '{species_name}?bands=0') responses <- request_parallel( @@ -59,12 +57,10 @@ get_karyotypes <- function(species_name = "homo_sapiens", # Only keep those responses that responded successfully, i.e. with status == "OK". responses_ok <- - purrr::keep( - responses, - ~ identical(.x$status, "OK") && - !rlang::is_empty(.x$content) - ) - if (rlang::is_empty(responses_ok)) { + purrr::keep(responses, + ~ identical(.x$status, 'OK') && + !rlang::is_empty(.x$content)) + if (rlang::is_empty(responses_ok)) return( tibble::tibble( species_name = character(), @@ -73,13 +69,11 @@ get_karyotypes <- function(species_name = "homo_sapiens", length = integer() ) ) - } return(purrr::imap_dfr( .x = responses_ok, - .f = ~ parse_karyotypes( - species_name = species_name[.y], - lst = .x$content - ) + .f = ~ parse_karyotypes(species_name = species_name[.y], + lst = .x$content) )) + } diff --git a/R/get_toplevel_sequence_info.R b/R/get_toplevel_sequence_info.R index 9de9e2b..35ee3b6 100644 --- a/R/get_toplevel_sequence_info.R +++ b/R/get_toplevel_sequence_info.R @@ -6,29 +6,26 @@ toplevel_sequence_info_tbl <- function(species_name = character(), is_circular = logical(), assembly_name = character(), length = double()) { - tibble::tibble( - species_name = species_name, - toplevel_sequence = toplevel_sequence, - is_chromosome = is_chromosome, - coordinate_system = coordinate_system, - assembly_exception_type = assembly_exception_type, - is_circular = is_circular, - assembly_name = assembly_name, - length = length - ) + tibble::tibble(species_name = species_name, + toplevel_sequence = toplevel_sequence, + is_chromosome = is_chromosome, + coordinate_system = coordinate_system, + assembly_exception_type = assembly_exception_type, + is_circular = is_circular, + assembly_name = assembly_name, + length = length) } parse_toplevel_sequence_info <- function(species_name, toplevel_sequence, lst) { - toplevel_sequence_info_tbl( - species_name = species_name, - toplevel_sequence = toplevel_sequence, - is_chromosome = as.logical(lst$is_chromosome), - coordinate_system = lst$coordinate_system, - assembly_exception_type = lst$assembly_exception_type, - is_circular = as.logical(lst$is_circular), - assembly_name = lst$assembly_name, - length = lst$length - ) + + toplevel_sequence_info_tbl(species_name = species_name, + toplevel_sequence = toplevel_sequence, + is_chromosome = as.logical(lst$is_chromosome), + coordinate_system = lst$coordinate_system, + assembly_exception_type = lst$assembly_exception_type, + is_circular = as.logical(lst$is_circular), + assembly_name = lst$assembly_name, + length = lst$length) } #' Get toplevel sequences details @@ -76,47 +73,42 @@ parse_toplevel_sequence_info <- function(species_name, toplevel_sequence, lst) { #' # Get details about a scaffold #' # (To find available toplevel sequences to query use the function #' # `get_toplevel_sequences()`) -#' get_toplevel_sequence_info(species_name = "homo_sapiens", toplevel_sequence = "KI270757.1") +#' get_toplevel_sequence_info(species_name = 'homo_sapiens', toplevel_sequence = 'KI270757.1') #' #' @seealso [get_toplevel_sequences()] #' #' @export -get_toplevel_sequence_info <- function(species_name = "homo_sapiens", - toplevel_sequence = c(1:22, "X", "Y", "MT"), +get_toplevel_sequence_info <- function(species_name = 'homo_sapiens', + toplevel_sequence = c(1:22, 'X', 'Y', 'MT'), verbose = FALSE, warnings = TRUE, progress_bar = TRUE) { + error_msg <- glue::glue( - "All arguments must have consistent lengths, ", - "only values of length one are recycled:\n", - "* Length of `species_name`: {length(species_name)}\n", - "* Length of `toplevel_sequence`: {length(toplevel_sequence)}\n" + 'All arguments must have consistent lengths, ', + 'only values of length one are recycled:\n', + '* Length of `species_name`: {length(species_name)}\n', + '* Length of `toplevel_sequence`: {length(toplevel_sequence)}\n' ) - if (!are_vec_recyclable( - species_name, - toplevel_sequence - )) { + if (!are_vec_recyclable(species_name, + toplevel_sequence)) { rlang::abort(error_msg) } - recycled_args <- vctrs::vec_recycle_common( - species_name, - toplevel_sequence - ) + recycled_args <- vctrs::vec_recycle_common(species_name, + toplevel_sequence) # The order of names here should be same as passed to # vctrs::vec_recycle_common() names(recycled_args) <- c( - "species_name", - "toplevel_sequence" + 'species_name', + 'toplevel_sequence' ) - resource_urls <- glue::glue( - "/info/assembly/", - "{recycled_args$species_name}/{recycled_args$toplevel_sequence}" - ) + resource_urls <- glue::glue('/info/assembly/', + '{recycled_args$species_name}/{recycled_args$toplevel_sequence}') responses <- request_parallel( @@ -128,14 +120,11 @@ get_toplevel_sequence_info <- function(species_name = "homo_sapiens", # Only keep those responses that responded successfully, i.e. with status == "OK". responses_ok <- - purrr::keep( - responses, - ~ identical(.x$status, "OK") && - !rlang::is_empty(.x$content) - ) - if (rlang::is_empty(responses_ok)) { + purrr::keep(responses, + ~ identical(.x$status, 'OK') && + !rlang::is_empty(.x$content)) + if (rlang::is_empty(responses_ok)) return(toplevel_sequence_info_tbl()) - } return(purrr::imap_dfr( .x = responses_ok, @@ -145,4 +134,5 @@ get_toplevel_sequence_info <- function(species_name = "homo_sapiens", lst = .x$content ) )) + } diff --git a/R/get_toplevel_sequences.R b/R/get_toplevel_sequences.R index b875c6c..43897d6 100644 --- a/R/get_toplevel_sequences.R +++ b/R/get_toplevel_sequences.R @@ -1,10 +1,12 @@ #' @importFrom rlang .data parse_toplevel_sequences <- function(species_name, lst) { + lst$top_level_region %>% tibble::as_tibble() %>% tibble::add_column(species_name = species_name) %>% dplyr::rename(toplevel_sequence = .data$name) %>% - dplyr::relocate("species_name", "coord_system", "toplevel_sequence", "length") + dplyr::relocate('species_name', 'coord_system', 'toplevel_sequence', 'length') + } #' Get toplevel sequences by species @@ -39,18 +41,17 @@ parse_toplevel_sequences <- function(species_name, lst) { #' get_toplevel_sequences() #' #' # Get toplevel sequences for Caenorhabditis elegans -#' get_toplevel_sequences("caenorhabditis_elegans") +#' get_toplevel_sequences('caenorhabditis_elegans') #' #' @md #' @export -get_toplevel_sequences <- function(species_name = "homo_sapiens", +get_toplevel_sequences <- function(species_name = 'homo_sapiens', verbose = FALSE, warnings = TRUE, progress_bar = TRUE) { - resource_urls <- glue::glue( - "/info/assembly/", - "{species_name}?bands=0" - ) + + resource_urls <- glue::glue('/info/assembly/', + '{species_name}?bands=0') responses <- request_parallel( @@ -62,12 +63,10 @@ get_toplevel_sequences <- function(species_name = "homo_sapiens", # Only keep those responses that responded successfully, i.e. with status == "OK". responses_ok <- - purrr::keep( - responses, - ~ identical(.x$status, "OK") && - !rlang::is_empty(.x$content) - ) - if (rlang::is_empty(responses_ok)) { + purrr::keep(responses, + ~ identical(.x$status, 'OK') && + !rlang::is_empty(.x$content)) + if (rlang::is_empty(responses_ok)) return( tibble::tibble( species_name = character(), @@ -76,13 +75,11 @@ get_toplevel_sequences <- function(species_name = "homo_sapiens", length = integer() ) ) - } return(purrr::imap_dfr( .x = responses_ok, - .f = ~ parse_toplevel_sequences( - species_name = species_name[.y], - lst = .x$content - ) + .f = ~ parse_toplevel_sequences(species_name = species_name[.y], + lst = .x$content) )) + } diff --git a/R/http-headers.R b/R/http-headers.R new file mode 100644 index 0000000..0624dca --- /dev/null +++ b/R/http-headers.R @@ -0,0 +1,60 @@ +req_header_names <- function() { + c("Accept", "Accept-Encoding", "Content-Type", "Origin") +} + +res_header_names <- function() { + c( + "Access-Control-Allow-Origin", + "Content-Length", + "Content-Type", + "Retry-After", + "X-Runtime", + "X-RateLimit-Limit", + "X-RateLimit-Reset", + "X-RateLimit-Period", + "X-RateLimit-Remaining" + ) +} + +req_headers <- + function(accept = NULL, + accept_encoding = NULL, + content_type = "application/json", # as default + origin = NULL) { + + headers_lst <- + list( + Accept = accept, + `Accept-Encoding` = accept_encoding, + `Content-Type` = content_type, + Origin = origin + ) + + structure(headers_lst, class = "ensemblr_req_hdr") + } + +res_headers <- + function(access_control_allow_origin = "*", + content_length = NULL, + content_type = "application/json", + retry_after = NULL, + x_runtime = NULL, + x_rate_limit_limit = NULL, + x_rate_limit_reset = NULL, + x_rate_limit_period = NULL, + x_rate_limit_remaining = NULL) { + + headers_lst <- list( + `Access-Control-Allow-Origin` = access_control_allow_origin, + `Content-Length` = content_length, + `Content-Type` = content_type, + `Retry-After` = retry_after, + `X-Runtime` = x_runtime, + `X-RateLimit-Limit` = x_rate_limit_limit, + `X-RateLimit-Reset` = x_rate_limit_reset, + `X-RateLimit-Period` = x_rate_limit_period, + `X-RateLimit-Remaining` = x_rate_limit_remaining + ) + + structure(headers_lst, class = "ensemblr_res_hdr") + } diff --git a/R/individuals.R b/R/individuals.R index b9c5936..0bfd35f 100644 --- a/R/individuals.R +++ b/R/individuals.R @@ -1,8 +1,8 @@ individuals_tbl <- function(species_name = character(), - population = character(), - description = character(), - individual = character(), - gender = character()) { + population = character(), + description = character(), + individual = character(), + gender = character()) { tbl <- tibble::tibble( species_name = species_name, population = population, @@ -16,14 +16,14 @@ individuals_tbl <- function(species_name = character(), json_list_to_individuals_tbl <- function(species_name, json_list) { tbl <- individuals_tbl( species_name = species_name, - population = purrr::pluck(json_list, "name", .default = NA_character_), - description = purrr::pluck(json_list, "description", .default = NA_character_), - individual = purrr::pluck(json_list, "individuals", 1, "name", .default = NA_character_), - gender = purrr::pluck(json_list, "individuals", 1, "gender", .default = NA_character_) + population = purrr::pluck(json_list, 'name', .default = NA_character_), + description = purrr::pluck(json_list, 'description', .default = NA_character_), + individual = purrr::pluck(json_list, 'individuals', 1, 'name', .default = NA_character_), + gender = purrr::pluck(json_list, 'individuals', 1, 'gender', .default = NA_character_) ) # Drop rows if all columns except species_name are NA - return(tidyr::drop_na(tbl, -species_name)) + return(tidyr::drop_na(tbl,-species_name)) } #' Get individuals for a population @@ -68,14 +68,15 @@ json_list_to_individuals_tbl <- function(species_name, json_list) { #' get_individuals() #' #' # Get Finnish individuals ("1000GENOMES:phase_3:FIN") -#' get_individuals(population = "1000GENOMES:phase_3:FIN") +#' get_individuals(population = '1000GENOMES:phase_3:FIN') #' #' @export -get_individuals <- function(species_name = "homo_sapiens", - population = "1000GENOMES:phase_3:CEU", +get_individuals <- function(species_name = 'homo_sapiens', + population = '1000GENOMES:phase_3:CEU', verbose = FALSE, warnings = TRUE, progress_bar = TRUE) { + # Assert species_name argument. assert_species_name(species_name) # Assert verbose argument. @@ -86,34 +87,28 @@ get_individuals <- function(species_name = "homo_sapiens", assertthat::assert_that(assertthat::is.flag(progress_bar)) error_msg <- glue::glue( - "All arguments must have consistent lengths, ", - "only values of length one are recycled:\n", - "* Length of `species_name`: {length(species_name)}\n", - "* Length of `population`: {length(population)}\n" + 'All arguments must have consistent lengths, ', + 'only values of length one are recycled:\n', + '* Length of `species_name`: {length(species_name)}\n', + '* Length of `population`: {length(population)}\n' ) - if (!are_vec_recyclable( - species_name, - population - )) { + if (!are_vec_recyclable(species_name, + population)) rlang::abort(error_msg) - } - recycled_args <- vctrs::vec_recycle_common( - species_name, - population - ) + recycled_args <- vctrs::vec_recycle_common(species_name, + population) # The order of names here should be same as passed to # vctrs::vec_recycle_common() names(recycled_args) <- c( - "species_name", - "population" - ) + 'species_name', + 'population') resource_urls <- glue::glue( - "/info/variation/populations/", - "{recycled_args$species_name}/", - "{recycled_args$population}" + '/info/variation/populations/', + '{recycled_args$species_name}/', + '{recycled_args$population}' ) # Usually we'd use purrr::map here but we opted for plyr::llply @@ -134,12 +129,10 @@ get_individuals <- function(species_name = "homo_sapiens", ) # Only keep those responses that responded successfully, i.e. with status == "OK". - responses_ok <- purrr::keep(responses, ~ identical(.x$status, "OK")) + responses_ok <- purrr::keep(responses, ~ identical(.x$status, 'OK')) # If none of the responses were successful then return an empty individuals tibble. - if (rlang::is_empty(responses_ok)) { - return(individuals_tbl()) - } + if (rlang::is_empty(responses_ok)) return(individuals_tbl()) return( purrr::imap_dfr( diff --git a/R/linkage_disequilibrium.R b/R/linkage_disequilibrium.R index cc15019..808c84a 100644 --- a/R/linkage_disequilibrium.R +++ b/R/linkage_disequilibrium.R @@ -1,10 +1,11 @@ ld_tbl <- function( - species_name = character(), - population = character(), - variant_id1 = character(), - variant_id2 = character(), - r_squared = double(), - d_prime = double()) { + species_name = character(), + population = character(), + variant_id1 = character(), + variant_id2 = character(), + r_squared = double(), + d_prime = double() +) { tbl <- tibble::tibble( species_name = species_name, population = population, @@ -18,13 +19,14 @@ ld_tbl <- function( } json_list_to_ld_tbl <- function(species_name, json_list) { + tbl <- ld_tbl( species_name = species_name, - population = purrr::pluck(json_list, "population_name", .default = NA_character_), - variant_id1 = purrr::pluck(json_list, "variation1", .default = NA_character_), - variant_id2 = purrr::pluck(json_list, "variation2", .default = NA_character_), - r_squared = as.double(purrr::pluck(json_list, "r2", .default = NA_real_)), - d_prime = as.double(purrr::pluck(json_list, "d_prime", .default = NA_real_)) + population = purrr::pluck(json_list, 'population_name', .default = NA_character_), + variant_id1 = purrr::pluck(json_list, 'variation1', .default = NA_character_), + variant_id2 = purrr::pluck(json_list, 'variation2', .default = NA_character_), + r_squared = as.double(purrr::pluck(json_list, 'r2', .default = NA_real_)), + d_prime = as.double(purrr::pluck(json_list, 'd_prime', .default = NA_real_)) ) # Drop rows if all columns except species_name are NA @@ -108,31 +110,31 @@ json_list_to_ld_tbl <- function(species_name, json_list) { #' @examples #' # Retrieve variants in LD by a window size of 1kb: #' # 1kb: 500 bp upstream and 500 bp downstream of variant. -#' get_ld_variants_by_window("rs123", genomic_window_size = 1L) +#' get_ld_variants_by_window('rs123', genomic_window_size = 1L) #' #' # Retrieve LD measures for pairs of variants: #' get_ld_variants_by_pair( -#' variant_id1 = c("rs123", "rs35439278"), -#' variant_id2 = c("rs122", "rs35174522") +#' variant_id1 = c('rs123', 'rs35439278'), +#' variant_id2 = c('rs122', 'rs35174522') #' ) #' #' # Retrieve variants in LD within a genomic range -#' get_ld_variants_by_range("7:100000..100500") +#' get_ld_variants_by_range('7:100000..100500') #' #' # Retrieve all pair combinations of variants in LD -#' get_ld_variants_by_pair_combn(c("rs6978506", "rs12718102", "rs13307200")) +#' get_ld_variants_by_pair_combn(c('rs6978506', 'rs12718102', 'rs13307200')) #' #' @export #' @rdname get_ld_variants_by_window get_ld_variants_by_window <- function(variant_id, - genomic_window_size = 500L, - species_name = "homo_sapiens", - population = "1000GENOMES:phase_3:CEU", - d_prime = 0.0, - r_squared = 0.05, - verbose = FALSE, - warnings = TRUE, - progress_bar = TRUE) { + genomic_window_size = 500L, + species_name = 'homo_sapiens', + population = '1000GENOMES:phase_3:CEU', + d_prime = 0.0, + r_squared = 0.05, + verbose = FALSE, + warnings = TRUE, + progress_bar = TRUE) { # Assert variant_id argument. assert_variant_id(variant_id) # Assert genomic_window_size argument. @@ -153,54 +155,48 @@ get_ld_variants_by_window <- function(variant_id, assertthat::assert_that(assertthat::is.flag(progress_bar)) error_msg <- glue::glue( - "All arguments must have consistent lengths, ", - "only values of length one are recycled:\n", - "* Length of `variant_id`: {length(variant_id)}\n", - "* Length of `genomic_window_size`: {length(genomic_window_size)}\n", - "* Length of `species_name`: {length(species_name)}\n", - "* Length of `population`: {length(population)}\n", - "* Length of `d_prime`: {length(d_prime)}\n", - "* Length of `r_squared`: {length(r_squared)}" + 'All arguments must have consistent lengths, ', + 'only values of length one are recycled:\n', + '* Length of `variant_id`: {length(variant_id)}\n', + '* Length of `genomic_window_size`: {length(genomic_window_size)}\n', + '* Length of `species_name`: {length(species_name)}\n', + '* Length of `population`: {length(population)}\n', + '* Length of `d_prime`: {length(d_prime)}\n', + '* Length of `r_squared`: {length(r_squared)}' ) - if (!are_vec_recyclable( - variant_id, - genomic_window_size, - species_name, - population, - d_prime, - r_squared - )) { + if (!are_vec_recyclable(variant_id, + genomic_window_size, + species_name, + population, + d_prime, + r_squared)) rlang::abort(error_msg) - } - - recycled_args <- vctrs::vec_recycle_common( - variant_id, - genomic_window_size, - species_name, - population, - d_prime, - r_squared - ) + + recycled_args <- vctrs::vec_recycle_common(variant_id, + genomic_window_size, + species_name, + population, + d_prime, + r_squared) # The order of names here should be same as passed to # vctrs::vec_recycle_common() names(recycled_args) <- c( - "variant_id", - "genomic_window_size", - "species_name", - "population", - "d_prime", - "r_squared" - ) + 'variant_id', + 'genomic_window_size', + 'species_name', + 'population', + 'd_prime', + 'r_squared') resource_urls <- glue::glue( - "/ld/", - "{recycled_args$species_name}/", - "{recycled_args$variant_id}/", - "{recycled_args$population}?", - "window_size={recycled_args$genomic_window_size};", - "d_prime={recycled_args$d_prime};", - "r2={recycled_args$r_squared}" - ) + '/ld/', + '{recycled_args$species_name}/', + '{recycled_args$variant_id}/', + '{recycled_args$population}?', + 'window_size={recycled_args$genomic_window_size};', + 'd_prime={recycled_args$d_prime};', + 'r2={recycled_args$r_squared}' + ) # Usually we'd use purrr::map here but we opted for plyr::llply # for a no frills alternative with progress bar support. @@ -221,13 +217,11 @@ get_ld_variants_by_window <- function(variant_id, ) # Only keep those responses that responded successfully, i.e. with status == "OK". - responses_ok <- purrr::keep(responses, ~ identical(.x$status, "OK")) + responses_ok <- purrr::keep(responses, ~ identical(.x$status, 'OK')) # If none of the responses were successful then return an empty linkage # disequilibrium tibble. - if (rlang::is_empty(responses_ok)) { - return(ld_tbl()) - } + if (rlang::is_empty(responses_ok)) return(ld_tbl()) return( purrr::imap_dfr( @@ -238,19 +232,21 @@ get_ld_variants_by_window <- function(variant_id, ) ) ) + } #' @export #' @rdname get_ld_variants_by_window get_ld_variants_by_pair <- function(variant_id1, - variant_id2, - species_name = "homo_sapiens", - population = "1000GENOMES:phase_3:CEU", - d_prime = 0.0, - r_squared = 0.05, - verbose = FALSE, - warnings = TRUE, - progress_bar = TRUE) { + variant_id2, + species_name = 'homo_sapiens', + population = '1000GENOMES:phase_3:CEU', + d_prime = 0.0, + r_squared = 0.05, + verbose = FALSE, + warnings = TRUE, + progress_bar = TRUE) { + # Assert variant_id1 argument. assert_variant_id(variant_id1) # Assert variant_id2 argument. @@ -271,55 +267,49 @@ get_ld_variants_by_pair <- function(variant_id1, assertthat::assert_that(assertthat::is.flag(progress_bar)) error_msg <- glue::glue( - "All arguments must have consistent lengths, ", - "only values of length one are recycled:\n", - "* Length of `variant_id1`: {length(variant_id1)}\n", - "* Length of `variant_id2`: {length(variant_id2)}\n", - "* Length of `species_name`: {length(species_name)}\n", - "* Length of `population`: {length(population)}\n", - "* Length of `d_prime`: {length(d_prime)}\n", - "* Length of `r_squared`: {length(r_squared)}" + 'All arguments must have consistent lengths, ', + 'only values of length one are recycled:\n', + '* Length of `variant_id1`: {length(variant_id1)}\n', + '* Length of `variant_id2`: {length(variant_id2)}\n', + '* Length of `species_name`: {length(species_name)}\n', + '* Length of `population`: {length(population)}\n', + '* Length of `d_prime`: {length(d_prime)}\n', + '* Length of `r_squared`: {length(r_squared)}' ) - if (!are_vec_recyclable( - variant_id1, - variant_id2, - species_name, - population, - d_prime, - r_squared - )) { + if (!are_vec_recyclable(variant_id1, + variant_id2, + species_name, + population, + d_prime, + r_squared)) rlang::abort(error_msg) - } - - recycled_args <- vctrs::vec_recycle_common( - variant_id1, - variant_id2, - species_name, - population, - d_prime, - r_squared - ) + + recycled_args <- vctrs::vec_recycle_common(variant_id1, + variant_id2, + species_name, + population, + d_prime, + r_squared) # The order of names here should be same as passed to # vctrs::vec_recycle_common() names(recycled_args) <- c( - "variant_id1", - "variant_id2", - "species_name", - "population", - "d_prime", - "r_squared" - ) + 'variant_id1', + 'variant_id2', + 'species_name', + 'population', + 'd_prime', + 'r_squared') resource_urls <- glue::glue( - "/ld/", - "{recycled_args$species_name}/", - "pairwise/", - "{recycled_args$variant_id1}/", - "{recycled_args$variant_id2}?", - "population_name={recycled_args$population};", - "d_prime={recycled_args$d_prime};", - "r2={recycled_args$r_squared}" + '/ld/', + '{recycled_args$species_name}/', + 'pairwise/', + '{recycled_args$variant_id1}/', + '{recycled_args$variant_id2}?', + 'population_name={recycled_args$population};', + 'd_prime={recycled_args$d_prime};', + 'r2={recycled_args$r_squared}' ) # Usually we'd use purrr::map here but we opted for plyr::llply @@ -340,13 +330,11 @@ get_ld_variants_by_pair <- function(variant_id1, ) # Only keep those responses that responded successfully, i.e. with status == "OK". - responses_ok <- purrr::keep(responses, ~ identical(.x$status, "OK")) + responses_ok <- purrr::keep(responses, ~ identical(.x$status, 'OK')) # If none of the responses were successful then return an empty linkage # disequilibrium tibble. - if (rlang::is_empty(responses_ok)) { - return(ld_tbl()) - } + if (rlang::is_empty(responses_ok)) return(ld_tbl()) return( purrr::imap_dfr( @@ -357,18 +345,19 @@ get_ld_variants_by_pair <- function(variant_id1, ) ) ) + } #' @export #' @rdname get_ld_variants_by_window get_ld_variants_by_range <- function(genomic_range, - species_name = "homo_sapiens", - population = "1000GENOMES:phase_3:CEU", - d_prime = 0.0, - r_squared = 0.05, - verbose = FALSE, - warnings = TRUE, - progress_bar = TRUE) { + species_name = 'homo_sapiens', + population = '1000GENOMES:phase_3:CEU', + d_prime = 0.0, + r_squared = 0.05, + verbose = FALSE, + warnings = TRUE, + progress_bar = TRUE) { # Assert genomic_range argument. assert_genomic_range(genomic_range) # Assert species_name argument. @@ -387,50 +376,44 @@ get_ld_variants_by_range <- function(genomic_range, assertthat::assert_that(assertthat::is.flag(progress_bar)) error_msg <- glue::glue( - "All arguments must have consistent lengths, ", - "only values of length one are recycled:\n", - "* Length of `genomic_range`: {length(genomic_range)}\n", - "* Length of `species_name`: {length(species_name)}\n", - "* Length of `population`: {length(population)}\n", - "* Length of `d_prime`: {length(d_prime)}\n", - "* Length of `r_squared`: {length(r_squared)}" + 'All arguments must have consistent lengths, ', + 'only values of length one are recycled:\n', + '* Length of `genomic_range`: {length(genomic_range)}\n', + '* Length of `species_name`: {length(species_name)}\n', + '* Length of `population`: {length(population)}\n', + '* Length of `d_prime`: {length(d_prime)}\n', + '* Length of `r_squared`: {length(r_squared)}' ) - if (!are_vec_recyclable( - genomic_range, - species_name, - population, - d_prime, - r_squared - )) { + if (!are_vec_recyclable(genomic_range, + species_name, + population, + d_prime, + r_squared)) rlang::abort(error_msg) - } - - recycled_args <- vctrs::vec_recycle_common( - genomic_range, - species_name, - population, - d_prime, - r_squared - ) + + recycled_args <- vctrs::vec_recycle_common(genomic_range, + species_name, + population, + d_prime, + r_squared) # The order of names here should be same as passed to # vctrs::vec_recycle_common() names(recycled_args) <- c( - "genomic_range", - "species_name", - "population", - "d_prime", - "r_squared" - ) + 'genomic_range', + 'species_name', + 'population', + 'd_prime', + 'r_squared') resource_urls <- glue::glue( - "/ld/", - "{recycled_args$species_name}/", - "region/", - "{recycled_args$genomic_range}/", - "{recycled_args$population}?", - "d_prime={recycled_args$d_prime};", - "r2={recycled_args$r_squared}" + '/ld/', + '{recycled_args$species_name}/', + 'region/', + '{recycled_args$genomic_range}/', + '{recycled_args$population}?', + 'd_prime={recycled_args$d_prime};', + 'r2={recycled_args$r_squared}' ) # Usually we'd use purrr::map here but we opted for plyr::llply @@ -451,13 +434,11 @@ get_ld_variants_by_range <- function(genomic_range, ) # Only keep those responses that responded successfully, i.e. with status == "OK". - responses_ok <- purrr::keep(responses, ~ identical(.x$status, "OK")) + responses_ok <- purrr::keep(responses, ~ identical(.x$status, 'OK')) # If none of the responses were successful then return an empty linkage # disequilibrium tibble. - if (rlang::is_empty(responses_ok)) { - return(ld_tbl()) - } + if (rlang::is_empty(responses_ok)) return(ld_tbl()) return( purrr::imap_dfr( @@ -468,18 +449,20 @@ get_ld_variants_by_range <- function(genomic_range, ) ) ) + } #' @export #' @rdname get_ld_variants_by_window get_ld_variants_by_pair_combn <- function(variant_id, - species_name = "homo_sapiens", - population = "1000GENOMES:phase_3:CEU", - d_prime = 0.0, - r_squared = 0.05, - verbose = FALSE, - warnings = TRUE, - progress_bar = TRUE) { + species_name = 'homo_sapiens', + population = '1000GENOMES:phase_3:CEU', + d_prime = 0.0, + r_squared = 0.05, + verbose = FALSE, + warnings = TRUE, + progress_bar = TRUE) { + # Assert species_name is scalar # (defer more specific assertions to get_ld_variants_by_pair()) assertthat::assert_that(assertthat::is.scalar(species_name)) diff --git a/R/lookup_id.R b/R/lookup_id.R deleted file mode 100644 index d3354d9..0000000 --- a/R/lookup_id.R +++ /dev/null @@ -1,80 +0,0 @@ -#' @keywords internal -lookup_id_ <- function(ensembl_id, - species_name = "homo_sapiens", - ensembl_db = "", - expand = FALSE, - format = "condensed", - utr = FALSE, - phenotypes = FALSE, - verbose = FALSE, - warnings = TRUE, - progress_bar = TRUE) { - expand <- dplyr::if_else(expand, "1", "0") - utr <- dplyr::if_else(utr, "1", "0") - phenotypes <- dplyr::if_else(phenotypes, "1", "0") - - error_msg <- glue::glue( - "All arguments must have consistent lengths, ", - "only values of length one are recycled:\n", - "* Length of `ensembl_id`: {length(ensembl_id)}\n", - "* Length of `species_name`: {length(species_name)}\n", - "* Length of `ensembl_db`: {length(ensembl_db)}\n", - "* Length of `expand`: {length(expand)}\n", - "* Length of `format`: {length(format)}\n", - "* Length of `utr`: {length(utr)}\n", - "* Length of `phenotypes`: {length(phenotypes)}" - ) - if (!are_vec_recyclable( - ensembl_id, - species_name, - ensembl_db, - expand, - format, - utr, - phenotypes - )) { - rlang::abort(error_msg) - } - - recycled_args <- vctrs::vec_recycle_common( - ensembl_id, - species_name, - ensembl_db, - expand, - format, - utr, - phenotypes - ) - # The order of names here should be same as passed to - # vctrs::vec_recycle_common() - names(recycled_args) <- c( - "ensembl_id", - "species_name", - "ensembl_db", - "expand", - "format", - "utr", - "phenotypes" - ) - - resource_urls <- glue::glue( - "/lookup/id/", - "{recycled_args$ensembl_id}?", - '{p("species", recycled_args$species_name)};', - '{p("db_type", recycled_args$ensembl_db)};', - '{p("expand", recycled_args$expand)};', - '{p("format", recycled_args$format)};', - '{p("utr", recycled_args$utr)};', - '{p("phenotypes", recycled_args$phenotypes)}' - ) - - responses <- - request_parallel( - resource_urls, - verbose = verbose, - warnings = warnings, - progress_bar = progress_bar - ) - - responses -} diff --git a/R/mapping.R b/R/mapping.R index 4fb7424..8d89ff4 100644 --- a/R/mapping.R +++ b/R/mapping.R @@ -13,24 +13,24 @@ #' @seealso \code{\link{code_to_strand}} #' @keywords internal strand_to_code <- function(strand, .default = NA_integer_, .missing = NA_integer_) { + # Is .default an integer scalar? assertthat::assert_that( rlang::is_scalar_integer(.default), - msg = "`.default` must be an integer scalar." - ) + msg = '`.default` must be an integer scalar.' + ) # Is .missing an integer scalar? assertthat::assert_that( rlang::is_scalar_integer(.missing), - msg = "`.missing` must be an integer scalar." + msg = '`.missing` must be an integer scalar.' ) dplyr::recode(strand, - `forward` = 1L, - `reverse` = -1L, - .default = .default, - .missing = .missing - ) + `forward` = 1L, + `reverse` = -1L, + .default = .default, + .missing = .missing) } #' Convert strand integer codes to strand words @@ -49,52 +49,53 @@ strand_to_code <- function(strand, .default = NA_integer_, .missing = NA_integer #' @seealso \code{\link{strand_to_code}} #' @keywords internal code_to_strand <- function(code, .default = NA_character_, .missing = NA_character_) { + # Is strand an integer scalar? assertthat::assert_that( rlang::is_integer(code), - msg = "`code` must be an integer vector." + msg = '`code` must be an integer vector.' ) # Is .default a character scalar? assertthat::assert_that( rlang::is_scalar_character(.default), - msg = "`.default` must be an character scalar." + msg = '`.default` must be an character scalar.' ) # Is .missing an integer scalar? assertthat::assert_that( rlang::is_scalar_character(.missing), - msg = "`.missing` must be an character scalar." + msg = '`.missing` must be an character scalar.' ) dplyr::recode(code, - `1` = "forward", - `-1` = "reverse", - .default = .default, - .missing = .missing - ) + `1` = 'forward', + `-1` = 'reverse', + .default = .default, + .missing = .missing) } mapping_tbl <- function( - species_name = character(), - assembly_0 = character(), - assembly_1 = character(), - assembly_2 = character(), - coordinate_system_0 = character(), - coordinate_system_1 = character(), - coordinate_system_2 = character(), - strand_0 = character(), - strand_1 = character(), - strand_2 = character(), - sequence_region_name_0 = character(), - sequence_region_name_1 = character(), - sequence_region_name_2 = character(), - start_0 = integer(), - start_1 = integer(), - start_2 = integer(), - end_0 = integer(), - end_1 = integer(), - end_2 = integer()) { + species_name = character(), + assembly_0 = character(), + assembly_1 = character(), + assembly_2 = character(), + coordinate_system_0 = character(), + coordinate_system_1 = character(), + coordinate_system_2 = character(), + strand_0 = character(), + strand_1 = character(), + strand_2 = character(), + sequence_region_name_0 = character(), + sequence_region_name_1 = character(), + sequence_region_name_2 = character(), + start_0 = integer(), + start_1 = integer(), + start_2 = integer(), + end_0 = integer(), + end_1 = integer(), + end_2 = integer() +) { tbl <- tibble::tibble( species_name = species_name, assembly_0 = assembly_0, @@ -123,7 +124,7 @@ mapping_tbl <- function( json_list_to_mapping_tbl <- function(species_name, assembly_0, strand_0, - coordinate_system_0, + coordinate_system_0 , sequence_region_name_0, start_0, end_0, @@ -131,77 +132,78 @@ json_list_to_mapping_tbl <- function(species_name, tbl <- mapping_tbl( species_name = species_name, assembly_0 = assembly_0, - assembly_1 = purrr::pluck(json_list, "original", "assembly", .default = NA_character_), - assembly_2 = purrr::pluck(json_list, "mapped", "assembly", .default = NA_character_), + assembly_1 = purrr::pluck(json_list, 'original', 'assembly', .default = NA_character_), + assembly_2 = purrr::pluck(json_list, 'mapped', 'assembly', .default = NA_character_), coordinate_system_0 = coordinate_system_0, - coordinate_system_1 = purrr::pluck(json_list, "original", "coord_system", .default = NA_character_), - coordinate_system_2 = purrr::pluck(json_list, "mapped", "coord_system", .default = NA_character_), + coordinate_system_1 = purrr::pluck(json_list, 'original', 'coord_system', .default = NA_character_), + coordinate_system_2 = purrr::pluck(json_list, 'mapped', 'coord_system', .default = NA_character_), strand_0 = strand_0, strand_1 = code_to_strand( - as.integer(purrr::pluck(json_list, "original", "strand", .default = NA_integer_)) + as.integer(purrr::pluck(json_list, 'original', 'strand', .default = NA_integer_)) ), strand_2 = code_to_strand( - as.integer(purrr::pluck(json_list, "mapped", "strand", .default = NA_integer_)) + as.integer(purrr::pluck(json_list, 'mapped', 'strand', .default = NA_integer_)) ), sequence_region_name_0 = sequence_region_name_0, - sequence_region_name_1 = purrr::pluck(json_list, "original", "seq_region_name", .default = NA_character_), - sequence_region_name_2 = purrr::pluck(json_list, "mapped", "seq_region_name", .default = NA_character_), + sequence_region_name_1 = purrr::pluck(json_list, 'original', 'seq_region_name', .default = NA_character_), + sequence_region_name_2 = purrr::pluck(json_list, 'mapped', 'seq_region_name', .default = NA_character_), start_0 = start_0, - start_1 = purrr::pluck(json_list, "original", "start", .default = NA_integer_), - start_2 = purrr::pluck(json_list, "mapped", "start", .default = NA_integer_), + start_1 = purrr::pluck(json_list, 'original', 'start', .default = NA_integer_), + start_2 = purrr::pluck(json_list, 'mapped', 'start', .default = NA_integer_), end_0 = end_0, - end_1 = purrr::pluck(json_list, "original", "end", .default = NA_integer_), - end_2 = purrr::pluck(json_list, "mapped", "end", .default = NA_integer_) + end_1 = purrr::pluck(json_list, 'original', 'end', .default = NA_integer_), + end_2 = purrr::pluck(json_list, 'mapped', 'end', .default = NA_integer_) ) return(tbl) } remap_gdna_to_gdna <- function(genomic_range, - from = "GRCh37", - to = "GRCh38", - strand = "forward", - species_name = "homo_sapiens", - coord_system_from = "chromosome", - coord_system_to = "chromosome", - verbose = FALSE, - warnings = TRUE, - progress_bar = TRUE) { + from = 'GRCh37', + to = 'GRCh38', + strand = 'forward', + species_name = 'homo_sapiens', + coord_system_from = 'chromosome', + coord_system_to = 'chromosome', + verbose = FALSE, + warnings = TRUE, + progress_bar = TRUE) { + # Is `from` a character vector of non-NA values? assertthat::assert_that( rlang::is_character(from), !any(rlang::are_na(from)), - msg = "Argument `from` must be a character vector of non-NA values." + msg = 'Argument `from` must be a character vector of non-NA values.' ) # Is `to` a character vector of non-NA values? assertthat::assert_that( rlang::is_character(to), !any(rlang::are_na(to)), - msg = "Argument `to` must be a character vector of non-NA values." + msg = 'Argument `to` must be a character vector of non-NA values.' ) # Is `strand` a character vector of 'forward' or 'reverse' values only? assertthat::assert_that( rlang::is_character(strand), !any(rlang::are_na(strand)), - all(strand %in% c("forward", "reverse")), - msg = "Argument `to` must be a character vector of non-NA values." + all(strand %in% c('forward', 'reverse')), + msg = 'Argument `to` must be a character vector of non-NA values.' ) # Is `coord_system_from` a scalar character vector 'chromosome' values only? assertthat::assert_that( rlang::is_character(coord_system_from), !any(rlang::are_na(coord_system_from)), - all(coord_system_from %in% c("chromosome")), - msg = "For the moment argument `coord_system_from` must be a character vector of 'chromosome' values only." + all(coord_system_from %in% c('chromosome')), + msg = 'For the moment argument `coord_system_from` must be a character vector of \'chromosome\' values only.' ) # Is `coord_system_to` a scalar character vector 'chromosome' values only? assertthat::assert_that( rlang::is_character(coord_system_to), !any(rlang::are_na(coord_system_to)), - all(coord_system_to %in% c("chromosome")), - msg = "For the moment argument `coord_system_to` must be a character vector of 'chromosome' values only." + all(coord_system_to %in% c('chromosome')), + msg = 'For the moment argument `coord_system_to` must be a character vector of \'chromosome\' values only.' ) # Assert genomic_range argument. @@ -217,57 +219,51 @@ remap_gdna_to_gdna <- function(genomic_range, assertthat::assert_that(assertthat::is.flag(progress_bar)) error_msg <- glue::glue( - "All arguments must have consistent lengths, ", - "only values of length one are recycled:\n", - "* Length of `genomic_range`: {length(genomic_range)}\n", - "* Length of `species_name`: {length(species_name)}\n", - "* Length of `from`: {length(from)}\n", - "* Length of `to`: {length(to)}\n", - "* Length of `strand`: {length(strand)}\n", - "* Length of `coord_system_from`: {length(coord_system_from)}\n", - "* Length of `coord_system_to`: {length(coord_system_to)}\n" + 'All arguments must have consistent lengths, ', + 'only values of length one are recycled:\n', + '* Length of `genomic_range`: {length(genomic_range)}\n', + '* Length of `species_name`: {length(species_name)}\n', + '* Length of `from`: {length(from)}\n', + '* Length of `to`: {length(to)}\n', + '* Length of `strand`: {length(strand)}\n', + '* Length of `coord_system_from`: {length(coord_system_from)}\n', + '* Length of `coord_system_to`: {length(coord_system_to)}\n' ) - if (!are_vec_recyclable( - genomic_range, - species_name, - from, - to, - strand, - coord_system_from, - coord_system_to - )) { + if (!are_vec_recyclable(genomic_range, + species_name, + from, + to, + strand, + coord_system_from, + coord_system_to)) rlang::abort(error_msg) - } - recycled_args <- vctrs::vec_recycle_common( - genomic_range, - species_name, - from, - to, - strand, - coord_system_from, - coord_system_to - ) + recycled_args <- vctrs::vec_recycle_common(genomic_range, + species_name, + from, + to, + strand, + coord_system_from, + coord_system_to) # The order of names here should be same as passed to # vctrs::vec_recycle_common() names(recycled_args) <- c( - "genomic_range", - "species_name", - "from", - "to", - "strand", - "coord_system_from", - "coord_system_to" - ) + 'genomic_range', + 'species_name', + 'from', + 'to', + 'strand', + 'coord_system_from', + 'coord_system_to') resource_urls <- glue::glue( - "/map/", - "{recycled_args$species_name}/", - "{recycled_args$from}/", - "{recycled_args$genomic_range}:{strand_to_code(recycled_args$strand)}/", - "{recycled_args$to}/?", - "coord_system={recycled_args$coord_system_from};", - "target_coord_system={recycled_args$coord_system_to}" + '/map/', + '{recycled_args$species_name}/', + '{recycled_args$from}/', + '{recycled_args$genomic_range}:{strand_to_code(recycled_args$strand)}/', + '{recycled_args$to}/?', + 'coord_system={recycled_args$coord_system_from};', + 'target_coord_system={recycled_args$coord_system_to}' ) responses <- @@ -279,13 +275,11 @@ remap_gdna_to_gdna <- function(genomic_range, ) # Only keep those responses that responded successfully, i.e. with status == "OK". - responses_ok <- purrr::keep(responses, ~ identical(.x$status, "OK")) + responses_ok <- purrr::keep(responses, ~ identical(.x$status, 'OK')) # If none of the responses were successful then return an empty linkage # disequilibrium tibble. - if (rlang::is_empty(responses_ok)) { - return(mapping_tbl()) - } + if (rlang::is_empty(responses_ok)) return(mapping_tbl()) return( purrr::imap_dfr( @@ -295,11 +289,12 @@ remap_gdna_to_gdna <- function(genomic_range, assembly_0 = recycled_args$from[.y], strand_0 = recycled_args$strand[.y], coordinate_system_0 = recycled_args$coord_system_from[.y], - sequence_region_name_0 = split_genomic_range(recycled_args$genomic_range[.y])[, "chromosome"], - start_0 = split_genomic_range(recycled_args$genomic_range[.y])[, "start"], - end_0 = split_genomic_range(recycled_args$genomic_range[.y])[, "end"], + sequence_region_name_0 = split_genomic_range(recycled_args$genomic_range[.y])[, 'chromosome'], + start_0 = split_genomic_range(recycled_args$genomic_range[.y])[, 'start'], + end_0 = split_genomic_range(recycled_args$genomic_range[.y])[, 'end'], json_list = .x$content$mappings ) ) ) + } diff --git a/R/populations.R b/R/populations.R index a74bcc7..2ccdf14 100644 --- a/R/populations.R +++ b/R/populations.R @@ -14,13 +14,13 @@ population_tbl <- function(species_name = character(), json_list_to_population_tbl <- function(species_name, json_list) { tbl <- population_tbl( species_name = species_name, - population = purrr::pluck(json_list, "name", .default = NA_character_), - description = purrr::pluck(json_list, "description", .default = NA_character_), - cohort_size = purrr::pluck(json_list, "size", .default = NA_integer_) + population = purrr::pluck(json_list, 'name', .default = NA_character_), + description = purrr::pluck(json_list, 'description', .default = NA_character_), + cohort_size = purrr::pluck(json_list, 'size', .default = NA_integer_) ) # Drop rows if all columns except species_name are NA - return(tidyr::drop_na(tbl, -species_name)) + return(tidyr::drop_na(tbl,-species_name)) } #' Get populations for a species @@ -61,18 +61,19 @@ json_list_to_population_tbl <- function(species_name, json_list) { #' #' @examples #' # Get all human populations with linkage disequilibrium data -#' get_populations(species_name = "homo_sapiens", ld_only = TRUE) +#' get_populations(species_name = 'homo_sapiens', ld_only = TRUE) #' #' # Get all human populations -#' get_populations(species_name = "homo_sapiens", ld_only = FALSE) +#' get_populations(species_name = 'homo_sapiens', ld_only = FALSE) #' #' @md #' @export -get_populations <- function(species_name = "homo_sapiens", +get_populations <- function(species_name = 'homo_sapiens', ld_only = TRUE, verbose = FALSE, warnings = TRUE, progress_bar = TRUE) { + # Assert species_name argument. assert_species_name(species_name) # Assert ld_only argument. @@ -85,35 +86,29 @@ get_populations <- function(species_name = "homo_sapiens", assertthat::assert_that(assertthat::is.flag(progress_bar)) error_msg <- glue::glue( - "All arguments must have consistent lengths, ", - "only values of length one are recycled:\n", - "* Length of `species_name`: {length(species_name)}\n", - "* Length of `ld_only`: {length(ld_only)}\n" - ) - if (!are_vec_recyclable( - species_name, - ld_only - )) { + 'All arguments must have consistent lengths, ', + 'only values of length one are recycled:\n', + '* Length of `species_name`: {length(species_name)}\n', + '* Length of `ld_only`: {length(ld_only)}\n' + ) + if (!are_vec_recyclable(species_name, + ld_only)) rlang::abort(error_msg) - } - recycled_args <- vctrs::vec_recycle_common( - species_name, - ld_only - ) + recycled_args <- vctrs::vec_recycle_common(species_name, + ld_only) # The order of names here should be same as passed to # vctrs::vec_recycle_common() names(recycled_args) <- c( - "species_name", - "ld_only" - ) + 'species_name', + 'ld_only') - filter_by_ld <- dplyr::if_else(recycled_args$ld_only, "?filter=LD", "") + filter_by_ld <- dplyr::if_else(recycled_args$ld_only, '?filter=LD', '') resource_urls <- glue::glue( - "/info/variation/populations/", - "{recycled_args$species_name}", - "{filter_by_ld}" + '/info/variation/populations/', + '{recycled_args$species_name}', + '{filter_by_ld}' ) responses <- @@ -125,12 +120,10 @@ get_populations <- function(species_name = "homo_sapiens", ) # Only keep those responses that responded successfully, i.e. with status == "OK". - responses_ok <- purrr::keep(responses, ~ .x$status == "OK") + responses_ok <- purrr::keep(responses, ~ .x$status == 'OK') # If none of the responses were successful then return an empty population tibble. - if (rlang::is_empty(responses_ok)) { - return(population_tbl()) - } + if (rlang::is_empty(responses_ok)) return(population_tbl()) return( purrr::imap_dfr( diff --git a/R/post.R b/R/post.R new file mode 100755 index 0000000..f2a1102 --- /dev/null +++ b/R/post.R @@ -0,0 +1,72 @@ +#' The function for POST method +#' +#' The [post()] function is a wrapper around the `reqs` function that +#' performs POST requests to the Ensembl API, handling rate limiting +#' automatically. +#' +#' @param res The resource (path) for the API request, can include variables +#' in curly braces `{}` that will be replaced with the corresponding +#' parameter. +#' @param ... Additional named parameters to be included in the request URL. +#' @param .body The body of the POST request, can be a string or raw vector. +#' @param .headers An S3 list with class `ensemblr_req_hdr`. Use the helper +#' [req_headers()] to create such an object. +#' @param rate The maximum number of requests per second to allow. +#' on a request (default: 5). +#' Defaults to 15 per minute (15/60). +#' +#' @return A list of responses, one for each request made. +#' +#' @keywords internal +post <- function(res, ..., .headers = req_headers(), .body, rate = 15/60) { # for post the body could be mandatory? + if (missing(.body)) stop("The '.body' parameter is required for POST requests.") + requests <- reqs(res, ..., .headers = .headers, .body = .body) + requests <- purrr::map(requests, httr2::req_throttle, rate = rate) + responses <- purrr::map(requests, function(req) { + req |> httr2::req_method("POST") |> + httr2::req_perform() + }) + httr2::throttle_status() + + for (i in seq_along(responses)) { + status_code <- httr2::resp_status(responses[[i]]) # I am not sure whether the error will arrive here + if (status_code == 429) { + #the `Retry-After` in the response_headers will only show up once you exceed the rate limit + retry_after <- as.numeric(httr2::resp_headers(responses[[i]])$`Retry-After`) + message(glue::glue("Rate limit reached, waiting {retry_after} seconds + before retrying...")) + Sys.sleep(retry_after) + responses[[i]] <- httr2::req_perform(requests[[i]]) + } else if (status_code != 200) { + warning(glue::glue("Request failed with status code {status_code}.")) + } + } + + responses +} + +#-------------------------------------------------------------------------------------- +# Retrieve the latest version for a set of Ensembl stable IDs, with a low level function +# https://rest.ensembl.org/documentation/info/archive_id_post +post_archive_ids <- function(ids, callback = NULL) { + if (missing(ids) || length(ids) == 0) { + stop("The 'ids' parameter is required and should contain at least one ID.") + } + + body <- jsonlite::toJSON(list(id = ids), auto_unbox = TRUE) + + response <- post( + res = "/archive/id", + callback = callback, # optional query parameter for JSONP + .headers = req_headers(content_type = "application/json"), + .body = body + ) +} + +# #example +# result <- post_archive_ids(ids = c("ENSG00000157764", "ENSG00000248378"), +# callback = "myCallbackFunction") +# print(result) + +# WE CAN THEN GO ON WITH OTHER ENDPOINTS WITH POST METHOD +# {the rest of the endpoints functions are in file `ensembl-endpoins.R`} diff --git a/R/req.R b/R/req.R new file mode 100755 index 0000000..e1ca75f --- /dev/null +++ b/R/req.R @@ -0,0 +1,76 @@ +base_url <- function() "https://rest.ensembl.org" + +user_agent <- function() "ensemblr (https://www.pattern.institute/ensemblr)" + +#' Create a new HTTP request +#' +#' [req()] creates an HTTP request object. +#' +#' @param res A resource (res) URL as a string. This string supports embedding +#' of R variable names in curly braces whose values are looked up in parameter +#' names supplied in `...` and interpolated. +#' +#' @param ... Name value pairs specifying query components or parameters. +#' +#' @param .body A literal string or raw vector to send as body. +#' +#' @param .headers An S3 list with class `ensemblr_req_hdr`. Use the helper +#' [req_headers()] to create such an object. +#' +#' @inherit httr2::request return +#' +#' @keywords internal +req <- + function(res, + ..., + .body = NULL, + .headers = req_headers()) { + + # All parameters. + params <- list(...) + pnames <- names(params) + + # Required parameters. + req_pnames <- vars_in_braces(res) + req_params <- params[req_pnames] + + # Optional parameters. + opt_pnames <- setdiff(pnames, req_pnames) + opt_params <- params[opt_pnames] + + res <- glue::glue(res, .envir = as.environment(req_params)) + + req <- + httr2::request(base_url()) |> + httr2::req_url_path_append(res) |> + httr2::req_url_query(!!!opt_params) |> + httr2::req_headers(!!!.headers) |> + httr2::req_user_agent(user_agent()) + + if (!is.null(.body)) { + req <- httr2::req_body_raw(req, body = .body, type = .headers$content_type) + } + + req + } + +reqs <- function(res, + ..., + .body = NULL, + .headers = req_headers()) { + params <- list(...) + .body <- .body %||% list(.body) + + req_args <- + vctrs::vec_recycle_common( + res = res, + !!!params, + .body = .body + ) + reqs <- purrr::pmap(.l = req_args, function(res, ...) { + req(res, ..., .headers = .headers) # because of errors while pass `.headers` into the recycling logic + + }) + + reqs +} diff --git a/R/request.R b/R/request.R old mode 100644 new mode 100755 index b5634b8..0acbabd --- a/R/request.R +++ b/R/request.R @@ -13,9 +13,8 @@ ensembl_server <- function() "https://rest.ensembl.org" #' #' @return An S3 \code{request} object as defined by the package \code{httr}. #' @keywords internal -user_agent <- function() { +user_agent_id <- function() httr::user_agent("ensemblr (https://www.pattern.institute/ensemblr)") -} #' Warn if response errored #' @@ -29,41 +28,45 @@ user_agent <- function() { #' called mostly for its side effect, i.e., the triggering of a warning. #' @keywords internal warn_when_request_errored <- function(response) { - code <- httr::status_code(response) + + ## code <- httr::status_code(response) #TO BE DEPRECATED/REMOVED + code <- response |> + httr2::resp_status() # If status code is 200 (sucessful) then there is nothing to be done in this # function. - if (identical(code, 200L)) { - return("OK") - } + if (identical(code, 200L)) return('OK') # Until here everything seem to be OK. + #------------------- I HAVE TO CHECK WELL THIS ERROR HANDLING --------------# url <- response$url - type <- httr::http_type(response) - content <- httr::content(response, "text", encoding = "UTF-8") + type <- response |> + httr2::resp_content_type() + content <- httr::content(response, "text", encoding = 'UTF-8') if (identical(type, "application/json")) { response_msg <- (jsonlite::fromJSON(content, flatten = TRUE))$error - } else { - content2 <- httr::content(response, as = "parsed", encoding = "UTF-8") + } + else { + content2 <- httr::content(response, as = 'parsed', encoding = 'UTF-8') if (identical(code, 503L)) { - msg1 <- rvest::html_text(rvest::html_nodes(content2, "body"), trim = TRUE) - msg2 <- stringr::str_replace(msg1, "\\n", "\\. ") - response_msg <- glue::glue("{msg2}") + msg1 <- rvest::html_text(rvest::html_nodes(content2, 'body'), trim = TRUE) + msg2 <- stringr::str_replace(msg1, '\\n', '\\. ') + response_msg <- glue::glue('{msg2}') } else { - # NB rvest::html_nodes uses only one of the two arguments: css or xpath. - # That is why xpath being missing from the call below is not an issue. - msg1 <- rvest::html_text(rvest::html_nodes(content2, "h1"), trim = TRUE) - msg2 <- rvest::html_text(rvest::html_nodes(content2, "h2"), trim = TRUE) - response_msg <- glue::glue("{msg1}. {msg2}.") + # NB rvest::html_nodes uses only one of the two arguments: css or xpath. + # That is why xpath being missing from the call below is not an issue. + msg1 <- rvest::html_text(rvest::html_nodes(content2, 'h1'), trim = TRUE) + msg2 <- rvest::html_text(rvest::html_nodes(content2, 'h2'), trim = TRUE) + response_msg <- glue::glue('{msg1}. {msg2}.') } # TODO: handle 400 error, e.g. resource_url = '/info/variation/populations/homo_sapiens/little humans' } + #------------------- I HAVE TO CHECK WELL THIS ERROR HANDLING --------------# wrn_msg <- glue::glue( - "\n\n", - "* Status code: {code}\n", - "* Server message: {response_msg}\n", - "* Endpoint: {url}" - ) + '\n\n', + '* Status code: {code}\n', + '* Server message: {response_msg}\n', + '* Endpoint: {url}') warning(wrn_msg, immediate. = TRUE, call. = FALSE) return(wrn_msg) @@ -93,16 +96,28 @@ warn_when_request_errored <- function(response) { #' #' @keywords internal request <- function(resource_url, base_url = ensembl_server(), - verbose = FALSE, warnings = TRUE) { + verbose = FALSE, warnings = TRUE) { + if (verbose) message(glue::glue("Base URL: {base_url}.")) url <- stringr::str_c(base_url, resource_url) if (verbose) message(glue::glue("Requesting resource: {url}.")) if (verbose) message(glue::glue("Using the user agent: {user_agent_id()$options$useragent}.")) - response <- httr::GET(url, user_agent()) - - response_code <- httr::status_code(response) + # Start switching to httr2 + # Creating a 'request', I opted directly for `json` content from the response instead of text! + req <- httr2::request(base_url = url) |> + httr2::req_headers( + Accept = "application/json" # To avoid + ) + ## response <- httr::GET(url, user_agent_id()) #TO BE DEPRECATED/REMOVED + response <- req |> + httr2::req_user_agent("ensemblr: R Client for the Ensembl REST API") |> + httr2::req_perform() + + ## response_code <- httr::status_code(response) #TO BE DEPRECATED/REMOVED + response_code <- response |> + httr2::resp_status() # We can also obtain the response code by simply `response$status_code` if (verbose) message(glue::glue("Response code: {response_code}.")) # Response object (a list of four elements): @@ -112,12 +127,10 @@ request <- function(resource_url, base_url = ensembl_server(), # or OK if successful. # - response content: the content of the parsed JSON response, or NULL if # not successful. - obj <- list( - url = url, - response_code = response_code, - status = NA_character_, - content = NULL - ) + obj <- list(url = url, + response_code = response_code, + status = NA_character_, + content = NULL) # If response is not 200, i.e. if request did not complete successfully then # return an empty response object (NULL) and warn about the response code. @@ -125,10 +138,12 @@ request <- function(resource_url, base_url = ensembl_server(), wrg_msg <- warn_when_request_errored(response) obj$status <- wrg_msg return(obj) - } else { # Else response code is 200 and we move on to JSON parsing. + } else {# Else response code is 200 and we move on to JSON parsing. # Check if the content type of the response is JSON. - content_type <- httr::http_type(response) + ## content_type <- httr::http_type(response) #TO BE DEPRECATED/REMOVED + content_type <- response |> + httr2::resp_content_type() if (verbose) message(glue::glue("Response content type: {content_type}.")) if (!identical(content_type, "application/json")) { @@ -141,9 +156,12 @@ request <- function(resource_url, base_url = ensembl_server(), } # Parse JSON content - content <- jsonlite::fromJSON(httr::content(response, "text", encoding = "UTF-8"), flatten = FALSE) + ## content <- jsonlite::fromJSON(httr::content(response, "text", encoding = 'UTF-8'), flatten = FALSE) #TO BE DEPRECATED/REMOVED + content <- response |> + httr2::resp_body_json() |> + data.frame() # should we keep `data.frame` as it was before??? - obj$status <- "OK" + obj$status <- "OK" # Shouldn't we take the actual value provided by the response??? obj$content <- content return(obj) } @@ -177,16 +195,17 @@ request_parallel <- function(resource_urls, verbose = FALSE, warnings = TRUE, progress_bar = TRUE) { + + # Usually we'd use purrr::map here but we opted for plyr::llply # for a no frills alternative with progress bar support. - progress <- dplyr::if_else(progress_bar && interactive(), "text", "none") + progress <- dplyr::if_else(progress_bar && interactive(), 'text', 'none') responses <- plyr::llply( .data = resource_urls, .fun = request, verbose = verbose, warnings = warnings, - .progress = progress - ) + .progress = progress) return(responses) } diff --git a/R/species.R b/R/species.R index 21b0841..ce57b82 100644 --- a/R/species.R +++ b/R/species.R @@ -1,16 +1,17 @@ species_tbl <- function( - division = character(), - taxon_id = integer(), - species_name = character(), - species_display_name = character(), - species_common_name = character(), - release = integer(), - genome_assembly_name = character(), - genbank_assembly_accession = character(), - strain = character(), - strain_collection = character(), - species_aliases = list(), - groups = list()) { + division = character(), + taxon_id = integer(), + species_name = character(), + species_display_name = character(), + species_common_name = character(), + release = integer(), + genome_assembly_name = character(), + genbank_assembly_accession = character(), + strain = character(), + strain_collection = character(), + species_aliases = list(), + groups = list() +) { tbl <- tibble::tibble( division = division, taxon_id = taxon_id, @@ -29,19 +30,20 @@ species_tbl <- function( } json_list_to_species_tbl <- function(json_list) { + tbl <- species_tbl( - division = purrr::pluck(json_list, "division", .default = NA_character_), - taxon_id = as.integer(purrr::pluck(json_list, "taxon_id", .default = NA_integer_)), - species_name = purrr::pluck(json_list, "name", .default = NA_character_), - species_display_name = purrr::pluck(json_list, "display_name", .default = NA_character_), - species_common_name = purrr::pluck(json_list, "common_name", .default = NA_character_), - release = as.integer(purrr::pluck(json_list, "release", .default = NA_integer_)), - genome_assembly_name = purrr::pluck(json_list, "assembly", .default = NA_character_), - genbank_assembly_accession = purrr::pluck(json_list, "accession", .default = NA_character_), - strain = purrr::pluck(json_list, "strain", .default = NA_character_), - strain_collection = purrr::pluck(json_list, "strain_collection", .default = NA_character_), - species_aliases = purrr::pluck(json_list, "aliases", .default = list(character())), - groups = purrr::pluck(json_list, "groups", .default = list(character())) + division = purrr::pluck(json_list, 'division', .default = NA_character_), + taxon_id = as.integer(purrr::pluck(json_list, 'taxon_id', .default = NA_integer_)), + species_name = purrr::pluck(json_list, 'name', .default = NA_character_), + species_display_name = purrr::pluck(json_list, 'display_name', .default = NA_character_), + species_common_name = purrr::pluck(json_list, 'common_name', .default = NA_character_), + release = as.integer(purrr::pluck(json_list, 'release', .default = NA_integer_)), + genome_assembly_name = purrr::pluck(json_list, 'assembly', .default = NA_character_), + genbank_assembly_accession = purrr::pluck(json_list, 'accession', .default = NA_character_), + strain = purrr::pluck(json_list, 'strain', .default = NA_character_), + strain_collection = purrr::pluck(json_list, 'strain_collection', .default = NA_character_), + species_aliases = purrr::pluck(json_list, 'aliases', .default = list(character())), + groups = purrr::pluck(json_list, 'groups', .default = list(character())) ) # Sort species by division and species_name @@ -96,6 +98,7 @@ get_species <- function(division = get_divisions(), verbose = FALSE, warnings = TRUE, progress_bar = TRUE) { + # Assert division argument. assert_division(division) # Assert verbose argument. @@ -106,9 +109,9 @@ get_species <- function(division = get_divisions(), # e() is a short alias for function urltools::url_encode() e <- urltools::url_encode resource_urls <- glue::glue( - "/info/species?", - "hide_strain_info=0;", # We do not hide strain information - "division={e(division)}" + '/info/species?', + 'hide_strain_info=0;', # We do not hide strain information + 'division={e(division)}' ) responses <- @@ -120,13 +123,11 @@ get_species <- function(division = get_divisions(), ) # Only keep those responses that responded successfully, i.e. with status == "OK". - responses_ok <- purrr::keep(responses, ~ identical(.x$status, "OK")) + responses_ok <- purrr::keep(responses, ~ identical(.x$status, 'OK')) # If none of the responses were successful then return an empty linkage # disequilibrium tibble. - if (rlang::is_empty(responses_ok)) { - return(species_tbl()) - } + if (rlang::is_empty(responses_ok)) return(species_tbl()) return( purrr::map_dfr( @@ -135,3 +136,4 @@ get_species <- function(division = get_divisions(), ) ) } + diff --git a/R/utils.R b/R/utils.R index 32e57f8..aa4b0cd 100644 --- a/R/utils.R +++ b/R/utils.R @@ -6,9 +6,8 @@ #' #' @return A scalar logical: \code{TRUE} or \code{FALSE}. #' @keywords internal -are_vec_recyclable <- function(...) { +are_vec_recyclable <- function(...) !assertthat::is.error(try(vctrs::vec_recycle_common(...), silent = TRUE)) -} #' Pairwise combinations #' @@ -21,10 +20,11 @@ are_vec_recyclable <- function(...) { #' pairwise combination. #' @keywords internal pairwise_combn <- function(x) { + # Is x NULL? assertthat::assert_that( !rlang::is_null(x), - msg = "`x` cannot be NULL." + msg = '`x` cannot be NULL.' ) # Is x empty? @@ -33,7 +33,7 @@ pairwise_combn <- function(x) { # Is x a character vector? assertthat::assert_that( rlang::is_character(x), - msg = "`x` must be a character vector." + msg = '`x` must be a character vector.' ) # Does x contain NAs? @@ -45,13 +45,14 @@ pairwise_combn <- function(x) { return(tbl) } -p <- function(param_name, value, missing = "") { - params <- glue::glue("{param_name}={value}") - params[value == missing] <- "" +p <- function(param_name, value, missing = '') { + + params <- glue::glue('{param_name}={value}') + params[value == missing] <- '' return(params) } empty_strings_to_NA <- function(df) { - dplyr::mutate_if(df, is.character, list(~ dplyr::na_if(., ""))) + dplyr::mutate_if(df, is.character, list( ~ dplyr::na_if(., ""))) } diff --git a/R/variation_sources.R b/R/variation_sources.R index f46fb4e..6fd44d1 100644 --- a/R/variation_sources.R +++ b/R/variation_sources.R @@ -5,7 +5,9 @@ variation_source_tbl <- function(species_name = character(), somatic_status = character(), description = character(), url = character(), - data_types = list()) { + data_types = list() +) { + tbl <- tibble::tibble( species_name = species_name, db_name = db_name, @@ -20,15 +22,16 @@ variation_source_tbl <- function(species_name = character(), } json_list_variation_source_tbl <- function(species_name, json_list) { + tbl <- variation_source_tbl( species_name = species_name, - db_name = purrr::pluck(json_list, "name", .default = NA_character_), - type = purrr::pluck(json_list, "type", .default = NA_character_), - version = purrr::pluck(json_list, "version", .default = NA_character_), - somatic_status = purrr::pluck(json_list, "somatic_status", .default = NA_character_), - description = purrr::pluck(json_list, "description", .default = NA_character_), - url = purrr::pluck(json_list, "url", .default = NA_character_), - data_types = purrr::pluck(json_list, "data_types", .default = list(NA_character_)) + db_name = purrr::pluck(json_list, 'name', .default = NA_character_), + type = purrr::pluck(json_list, 'type', .default = NA_character_), + version = purrr::pluck(json_list, 'version', .default = NA_character_), + somatic_status = purrr::pluck(json_list, 'somatic_status', .default = NA_character_), + description = purrr::pluck(json_list, 'description', .default = NA_character_), + url = purrr::pluck(json_list, 'url', .default = NA_character_), + data_types = purrr::pluck(json_list, 'data_types', .default = list(NA_character_)) ) # Convert empty strings to NA_character_ @@ -78,13 +81,14 @@ json_list_variation_source_tbl <- function(species_name, json_list) { #' get_variation_sources() #' #' # Retrieve variant sources for mouse -#' get_variation_sources(species_name = "mus_musculus") +#' get_variation_sources(species_name = 'mus_musculus') #' #' @export -get_variation_sources <- function(species_name = "human", +get_variation_sources <- function(species_name = 'human', verbose = FALSE, warnings = TRUE, progress_bar = TRUE) { + # Assert species_name argument. assert_species_name(species_name) @@ -95,10 +99,8 @@ get_variation_sources <- function(species_name = "human", # Assert progress_bar argument. assertthat::assert_that(assertthat::is.flag(progress_bar)) - resource_urls <- glue::glue( - "/info/variation/", - "{species_name}/" - ) + resource_urls <- glue::glue('/info/variation/', + '{species_name}/') responses <- request_parallel( @@ -109,15 +111,13 @@ get_variation_sources <- function(species_name = "human", ) # Only keep those responses that responded successfully, i.e. with status == "OK". - responses_ok <- purrr::keep(responses, ~ identical(.x$status, "OK")) + responses_ok <- purrr::keep(responses, ~ identical(.x$status, 'OK')) - responses_ok_lgl <- purrr::map_lgl(responses, ~ identical(.x$status, "OK")) + responses_ok_lgl <- purrr::map_lgl(responses, ~ identical(.x$status, 'OK')) species_name2 <- species_name[responses_ok_lgl] # If none of the responses were successful then return an empty tibble. - if (rlang::is_empty(responses_ok)) { - return(variation_source_tbl()) - } + if (rlang::is_empty(responses_ok)) return(variation_source_tbl()) return( purrr::imap_dfr( diff --git a/R/vars-in-braces.R b/R/vars-in-braces.R new file mode 100644 index 0000000..c9e0be3 --- /dev/null +++ b/R/vars-in-braces.R @@ -0,0 +1,6 @@ +vars_in_braces <- function(x) { + matches <- stringr::str_extract_all(x, "\\{([^}]+)\\}") + vars <- gsub("[{}]", "", unlist(matches)) + + vars +} diff --git a/R/versioning.R b/R/versioning.R old mode 100644 new mode 100755 index 58120ff..4584e0b --- a/R/versioning.R +++ b/R/versioning.R @@ -1,107 +1,107 @@ -#' Retrieve the Perl API version -#' -#' @param verbose Whether to be chatty. -#' @param warnings Whether to print warnings. -#' @return A scalar integer vector with the Perl API version. -#' -#' @export -get_software_version <- function(verbose = FALSE, warnings = TRUE) { - response <- request( - resource_url = "/info/software?", - verbose = verbose, - warnings = warnings - ) - - return(purrr::pluck(response, - "content", - "release", - .default = NA_integer_ - )) -} - -#' Retrieve the current version of the Ensembl REST API -#' -#' @param verbose Whether to be chatty. -#' @param warnings Whether to print warnings. -#' @return A scalar character vector with Ensembl REST API version. -#' -#' @export -get_rest_version <- function(verbose = FALSE, warnings = TRUE) { - response <- request( - resource_url = "/info/rest?", - verbose = verbose, - warnings = warnings - ) - - return(purrr::pluck(response, - "content", - "release", - .default = NA_character_ - )) -} - -#' Retrieve the data release version(s) available on the Ensembl REST server. -#' -#' @param verbose Whether to be chatty. -#' @param warnings Whether to print warnings. -#' @return An integer vector of release version(s). -#' -#' @export -get_data_versions <- function(verbose = FALSE, warnings = TRUE) { - response <- request( - resource_url = "/info/data?", - verbose = verbose, - warnings = warnings - ) - - return(purrr::pluck(response, - "content", - "releases", - .default = NA_integer_ - )) -} - -#' Retrieve Ensembl REST versions -#' -#' This function gets the versions of the different entities involved in the -#' REST API requests. When accessing the Ensembl REST API, you are actually -#' accessing three interconnected entities: -#' \itemize{ -#' \item Ensembl databases (\code{data}). -#' \item Perl API (\code{software}). -#' \item REST API (\code{rest}). -#' } -#' \figure{ensembl_api_versioning_wo_fonts.svg} -#' -#' @param verbose Whether to be chatty. -#' @param warnings Whether to print warnings. -#' @return A named list of three elements: \code{data}, \code{software} and -#' \code{rest}. -#' -#' @examples -#' # Get the versions of the different entities involved in the REST API -#' # requests. -#' get_versioning() -#' -#' @export -get_versioning <- function(verbose = FALSE, warnings = TRUE) { - # Ensembl data release version(s) - data_version <- - get_data_versions(verbose = verbose, warnings = warnings) - - # Ensembl internal Perl API version - software_version <- - get_software_version(verbose = verbose, warnings = warnings) - - # Ensembl REST API version - rest_version <- - get_rest_version(verbose = verbose, warnings = warnings) - - api_versions <- list( - data = data_version, - software = software_version, - rest = rest_version - ) - - return(api_versions) -} +#' Retrieve the Perl API version +#' +#' @param verbose Whether to be chatty. +#' @param warnings Whether to print warnings. +#' @return A scalar integer vector with the Perl API version. +#' +#' @export +get_software_version <- function(verbose = FALSE, warnings = TRUE) { + + response <- request( + resource_url = '/info/software?', + verbose = verbose, + warnings = warnings) + + return(purrr::pluck(response, + 'content', + 'release', + .default = NA_integer_) + ) +} + +#' Retrieve the current version of the Ensembl REST API +#' +#' @param verbose Whether to be chatty. +#' @param warnings Whether to print warnings. +#' @return A scalar character vector with Ensembl REST API version. +#' +#' @export +get_rest_version <- function(verbose = FALSE, warnings = TRUE) { + + response <- request( + resource_url = '/info/rest?', + verbose = verbose, + warnings = warnings) + + return(purrr::pluck(response, + 'content', + 'release', + .default = NA_character_) + ) +} + +#' Retrieve the data release version(s) available on the Ensembl REST server. +#' +#' @param verbose Whether to be chatty. +#' @param warnings Whether to print warnings. +#' @return An integer vector of release version(s). +#' +#' @export +get_data_versions <- function(verbose = FALSE, warnings = TRUE) { + + response <- request( + resource_url = '/info/data?', + verbose = verbose, + warnings = warnings) + + return(purrr::pluck(response, + 'content', + 'releases', + .default = NA_integer_) + ) +} + +#' Retrieve Ensembl REST versions +#' +#' This function gets the versions of the different entities involved in the +#' REST API requests. When accessing the Ensembl REST API, you are actually +#' accessing three interconnected entities: +#' \itemize{ +#' \item Ensembl databases (\code{data}). +#' \item Perl API (\code{software}). +#' \item REST API (\code{rest}). +#' } +#' \figure{ensembl_api_versioning_wo_fonts.svg} +#' +#' @param verbose Whether to be chatty. +#' @param warnings Whether to print warnings. +#' @return A named list of three elements: \code{data}, \code{software} and +#' \code{rest}. +#' +#' @examples +#' # Get the versions of the different entities involved in the REST API +#' # requests. +#' get_versioning() +#' +#' @export +get_versioning <- function(verbose = FALSE, warnings = TRUE) { + + # Ensembl data release version(s) + data_version <- + get_data_versions(verbose = verbose, warnings = warnings) + + # Ensembl internal Perl API version + software_version <- + get_software_version(verbose = verbose, warnings = warnings) + + # Ensembl REST API version + rest_version <- + get_rest_version(verbose = verbose, warnings = warnings) + + api_versions <- list( + data = data_version, + software = software_version, + rest = rest_version) + + return(api_versions) +} diff --git a/data-raw/rest_api_endpoints.R b/data-raw/rest_api_endpoints.R index f381a33..822c3d1 100644 --- a/data-raw/rest_api_endpoints.R +++ b/data-raw/rest_api_endpoints.R @@ -25,7 +25,7 @@ library(usethis) library(readr) # URL of the page of interest. -url <- "https://rest.ensembl.org/" +url <- 'https://rest.ensembl.org/' # Get the html code for Ensembl REST API landing page # and create an XML document object. @@ -35,7 +35,7 @@ html <- xml2::read_html(url) # corresponding to the table. The path string # '/html/body/div/table' was found by manually # inspection with a browser. -nodeset <- rvest::html_nodes(x = html, xpath = "/html/body/div/table") +nodeset <- rvest::html_nodes(x = html, xpath = '/html/body/div/table') # Parse the html table and convert it to a data frame. my_table <- rvest::html_table(nodeset)[[1]] @@ -50,14 +50,14 @@ my_table <- rvest::html_table(nodeset)[[1]] # https://stackoverflow.com/questions/38511743/adding-missing-grouping-variables-message-in-dplyr-in-r my_table %>% dplyr::rename(endpoint = X1, description = X2) %>% - dplyr::mutate(section_id = dplyr::lead(cumsum(endpoint == "Resource"))) %>% + dplyr::mutate(section_id = dplyr::lead(cumsum(endpoint == 'Resource'))) %>% tidyr::fill(section_id) %>% dplyr::group_by(section_id) %>% dplyr::mutate(section = dplyr::first(endpoint)) %>% dplyr::group_by(section_id) %>% dplyr::slice(-(1:2)) %>% dplyr::ungroup() %>% - dplyr::select("section", "endpoint", "description") %>% + dplyr::select('section', 'endpoint', 'description') %>% dplyr::mutate(last_update_date = lubridate::date()) %>% dplyr::arrange(section) -> rest_api_endpoints diff --git a/data-raw/species.R b/data-raw/species.R index aa5e033..84093ee 100644 --- a/data-raw/species.R +++ b/data-raw/species.R @@ -38,3 +38,5 @@ usethis::use_data(species, internal = TRUE, compress = "xz", overwrite = TRUE, v # More about this here: # - https://stackoverflow.com/questions/48105239/using-datasets-in-an-r-package # - https://support.bioconductor.org/p/24756/ + + diff --git a/man/dot-_get_divisions.Rd b/man/dot-_get_divisions.Rd new file mode 100755 index 0000000..d98d4e1 --- /dev/null +++ b/man/dot-_get_divisions.Rd @@ -0,0 +1,25 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ensembl-endpoints.R +\name{._get_divisions} +\alias{._get_divisions} +\title{Get list of all Ensembl divisions} +\usage{ +._get_divisions(callback = "randomlygeneratedname") +} +\arguments{ +\item{callback}{(Optional) A string representing the name of the callback subroutine for JSONP responses.} +} +\value{ +A parsed JSON response containing the list of Ensembl divisions. + +See more about the implemented endpoint \code{\link[=._get_divisions]{._get_divisions()}} +on the following \href{https://rest.ensembl.org/documentation/info/info_divisions}{GET info/divisions} +from the official \href{https://rest.ensembl.org/}{Ensembl Rest API}. +} +\description{ +Retrieves a list of all available Ensembl divisions for which information is accessible. +} +\examples{ +._get_divisions() +._get_divisions(callback = "randomlygeneratedname") +} diff --git a/man/dot-_get_rest_version.Rd b/man/dot-_get_rest_version.Rd new file mode 100755 index 0000000..7045efc --- /dev/null +++ b/man/dot-_get_rest_version.Rd @@ -0,0 +1,26 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ensembl-endpoints.R +\name{._get_rest_version} +\alias{._get_rest_version} +\title{Get REST API Version} +\usage{ +._get_rest_version(callback = "randomlygeneratedname") +} +\arguments{ +\item{callback}{(Optional) A string representing the name of the callback +subroutine for JSONP responses.} +} +\value{ +A parsed JSON response containing the REST API version information. + +See more about the implemented endpoint \code{\link[=._get_rest_version]{._get_rest_version()}} +on the following \href{https://rest.ensembl.org/documentation/info/rest}{GET info/rest} +from the official \href{https://rest.ensembl.org/}{Ensembl Rest API}. +} +\description{ +Retrieves the current version of the Ensembl REST API. +} +\examples{ +._get_rest_version() +._get_rest_version(callback = "randomlygeneratedname") +} diff --git a/man/dot-_get_software_version.Rd b/man/dot-_get_software_version.Rd new file mode 100755 index 0000000..6209c78 --- /dev/null +++ b/man/dot-_get_software_version.Rd @@ -0,0 +1,25 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ensembl-endpoints.R +\name{._get_software_version} +\alias{._get_software_version} +\title{Get Software Version} +\usage{ +._get_software_version(callback = "randomlygeneratedname") +} +\arguments{ +\item{callback}{(Optional) A string representing the name of the callback subroutine for JSONP responses.} +} +\value{ +A parsed JSON response containing the Ensembl API version information. + +See more about the implemented endpoint \code{\link[=._get_software_version]{._get_software_version()}} +on the following \href{https://rest.ensembl.org/documentation/info/software}{GET info/software} +from the official \href{https://rest.ensembl.org/}{Ensembl Rest API}. +} +\description{ +Retrieves the current version of the Ensembl API used by the REST server. +} +\examples{ +._get_software_version() +._get_software_version(callback = "randomlygeneratedname") +} diff --git a/man/dot-_get_variation_sources.Rd b/man/dot-_get_variation_sources.Rd new file mode 100755 index 0000000..4b079a6 --- /dev/null +++ b/man/dot-_get_variation_sources.Rd @@ -0,0 +1,35 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ensembl-endpoints.R +\name{._get_variation_sources} +\alias{._get_variation_sources} +\title{Get Variation Sources for a Species} +\usage{ +._get_variation_sources( + species, + callback = "randomlygeneratedname", + filter = NULL +) +} +\arguments{ +\item{species}{(Required) A string representing the species name or alias (e.g., "homo_sapiens").} + +\item{callback}{(Optional) A string representing the name of the callback subroutine for JSONP responses.} + +\item{filter}{(Optional) A string to restrict the variation source searches to a single source +(e.g., "dbSNP", "ClinVar", "OMIM", "UniProt", "HGMD").} +} +\value{ +A parsed JSON response containing the variation sources for the specified species. + +See more about the implemented endpoint \code{\link[=._get_variation_sources]{._get_variation_sources()}} +on the following \href{https://rest.ensembl.org/documentation/info/variation}{GET info/variation/:species} +from the official \href{https://rest.ensembl.org/}{Ensembl Rest API}. +} +\description{ +Retrieves the variation sources used in Ensembl for a given species. +} +\examples{ +._get_variation_sources("homo_sapiens") +._get_variation_sources("homo_sapiens", filter = "ClinVar") +._get_variation_sources("homo_sapiens", callback = "randomlygeneratedname") +} diff --git a/man/ensemblr-package.Rd b/man/ensemblr-package.Rd index 1f28765..1da7fa0 100644 --- a/man/ensemblr-package.Rd +++ b/man/ensemblr-package.Rd @@ -6,16 +6,14 @@ \alias{ensemblr-package} \title{ensemblr: R Client for the Ensembl REST API} \description{ -\if{html}{\figure{logo.png}{options: style='float: right' alt='logo' width='120'}} - R Client for the Ensembl REST API. } \seealso{ Useful links: \itemize{ - \item \url{https://github.com/ramiromagno/ensemblr} - \item \url{http://rmagno.eu/ensemblr/} - \item Report bugs at \url{https://github.com/ramiromagno/ensemblr/issues} + \item \url{https://github.com/patterninstitute/ensemblr} + \item \url{https://www.pattern.institute/ensemblr/} + \item Report bugs at \url{https://github.com/patterninstitute/ensemblr/issues} } } @@ -24,13 +22,14 @@ Useful links: Authors: \itemize{ + \item Dany Mukesha \email{dmukesha@pattern.institute} (\href{https://orcid.org/0009-0001-9514-751X}{ORCID}) \item Isabel Duarte \email{iduarte.scientist@gmail.com} (\href{https://orcid.org/0000-0003-0060-2936}{ORCID}) \item Ana-Teresa Maia \email{maia.anateresa@gmail.com} (\href{https://orcid.org/0000-0002-0454-9207}{ORCID}) } Other contributors: \itemize{ - \item CINTESIS [copyright holder, funder] + \item CINTESIS [funder] \item Pattern Institute [copyright holder, funder] } diff --git a/man/get.Rd b/man/get.Rd new file mode 100755 index 0000000..e48c593 --- /dev/null +++ b/man/get.Rd @@ -0,0 +1,30 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/get.R +\name{get} +\alias{get} +\title{The function for the GET method} +\usage{ +get(res, ..., .headers = req_headers(), rate = 15/60, verbose = FALSE) +} +\arguments{ +\item{res}{The resource (path) for the API request, can include variables +in curly braces \code{{}} that will be replaced with the corresponding parameter.} + +\item{...}{Additional named parameters to be included in the request URL.} + +\item{.headers}{An S3 list with class \code{ensemblr_req_hdr}. Use the helper +\code{\link[=req_headers]{req_headers()}} to create such an object.} + +\item{rate}{The maximum number of requests per second to allow. +Defaults to 15 per minute (15/60).} + +\item{verbose}{Logical, if TRUE, enables detailed logging of request and response details.} +} +\value{ +A list of responses, one for each request made. +} +\description{ +The \code{\link[=get]{get()}} function is a wrapper around the \code{reqs} function that performs +GET requests to the Ensembl API, handling rate limiting automatically. +} +\keyword{internal} diff --git a/man/get_alignment_by_region.Rd b/man/get_alignment_by_region.Rd new file mode 100755 index 0000000..dbbd35f --- /dev/null +++ b/man/get_alignment_by_region.Rd @@ -0,0 +1,27 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ensembl-endpoints.R +\name{get_alignment_by_region} +\alias{get_alignment_by_region} +\title{Get alignment by region} +\usage{ +get_alignment_by_region(species, region) +} +\arguments{ +\item{species}{A string representing the species name (e.g., "homo_sapiens").} + +\item{region}{A string representing the genomic region (e.g., "3:1000-2000").} +} +\value{ +A list of parsed JSON responses containing the genomic alignments +for the provided species and region. + +See more about the implemented endpoint \code{\link[=get_alignment_by_region]{get_alignment_by_region()}} +on the following \href{https://rest.ensembl.org/documentation/info/genomic_alignment_region}{GET alignment/region/:species/:region} +from the official \href{https://rest.ensembl.org/}{Ensembl Rest API}. +} +\description{ +Retrieves genomic alignments as separate blocks based on a region and species +} +\examples{ +get_alignment_by_region("homo_sapiens", "3:1000-2000") +} diff --git a/man/get_analyses.Rd b/man/get_analyses.Rd index 27682b9..2b2f962 100644 --- a/man/get_analyses.Rd +++ b/man/get_analyses.Rd @@ -26,14 +26,14 @@ responses' status.} \value{ A \code{\link[tibble]{tibble}} of 3 variables: \describe{ - \item{\code{species_name}}{Ensembl species name: this is the name used - internally by Ensembl to uniquely identify a species by name. It is the - scientific name but formatted without capitalisation and spacing converted - with an underscore, e.g., \code{'homo_sapiens'}.} - \item{\code{database}}{Ensembl database. Typically one of \code{'core'}, - \code{'rnaseq'}, \code{'cdna'}, \code{'funcgen'} and - \code{'otherfeatures'}.} - \item{\code{analysis}}{Analysis.} +\item{\code{species_name}}{Ensembl species name: this is the name used +internally by Ensembl to uniquely identify a species by name. It is the +scientific name but formatted without capitalisation and spacing converted +with an underscore, e.g., \code{'homo_sapiens'}.} +\item{\code{database}}{Ensembl database. Typically one of \code{'core'}, +\code{'rnaseq'}, \code{'cdna'}, \code{'funcgen'} and +\code{'otherfeatures'}.} +\item{\code{analysis}}{Analysis.} } } \description{ diff --git a/man/get_analysis_info.Rd b/man/get_analysis_info.Rd new file mode 100755 index 0000000..a462546 --- /dev/null +++ b/man/get_analysis_info.Rd @@ -0,0 +1,30 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ensembl-endpoints.R +\name{get_analysis_info} +\alias{get_analysis_info} +\title{Get the names of analyses involved in generating Ensembl data} +\usage{ +get_analysis_info(species, callback = "randomlygeneratedname") +} +\arguments{ +\item{species}{A string representing the species name or alias +(e.g., "homo_sapiens").} + +\item{callback}{(Optional) A string representing the name of the callback +subroutine for JSONP responses.} +} +\value{ +A list of analysis names related to the specified species. + +See more about the implemented endpoint \code{\link[=get_analysis_info]{get_analysis_info()}} +on the following \href{https://rest.ensembl.org/documentation/info/analysis}{GET info/analysis/:species} +from the official \href{https://rest.ensembl.org/}{Ensembl Rest API}. +} +\description{ +Retrieves a list of analysis names associated with generating Ensembl +data for a given species. +} +\examples{ +get_analysis_info("homo_sapiens") +get_analysis_info("homo_sapiens", callback = "randomlygeneratedname") +} diff --git a/man/get_assembly_info.Rd b/man/get_assembly_info.Rd new file mode 100755 index 0000000..9286aee --- /dev/null +++ b/man/get_assembly_info.Rd @@ -0,0 +1,45 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ensembl-endpoints.R +\name{get_assembly_info} +\alias{get_assembly_info} +\title{Get the available assemblies for a species} +\usage{ +get_assembly_info( + species, + bands = 0, + synonyms = 0, + callback = "randomlygeneratedname" +) +} +\arguments{ +\item{species}{A string representing the species name or alias +(e.g., "homo_sapiens").} + +\item{bands}{(Optional) A boolean (0 or 1) indicating whether to +include karyotype band information. Default is 0.} + +\item{synonyms}{(Optional) A boolean (0 or 1) indicating whether to +include information about known synonyms. Default is 0.} + +\item{callback}{(Optional) A string representing the name of the callback +subroutine for JSONP responses.} +} +\value{ +A list of parsed JSON responses containing information about +the available assemblies for the specified species. + +See more about the implemented endpoint \code{\link[=get_assembly_info]{get_assembly_info()}} +on the following \href{https://rest.ensembl.org/documentation/info/assembly_info}{GET info/assembly/:species} +from the official \href{https://rest.ensembl.org/}{Ensembl Rest API}. +} +\description{ +Retrieves a list of available assemblies for a given species, +including toplevel sequences, chromosomes, and optionally cytogenetic +bands and synonyms. +} +\examples{ +get_assembly_info("homo_sapiens") +get_assembly_info("homo_sapiens", bands = 1) +get_assembly_info("homo_sapiens", synonyms = 1, + callback = "randomlygeneratedname") +} diff --git a/man/get_biotypes.Rd b/man/get_biotypes.Rd new file mode 100755 index 0000000..d178875 --- /dev/null +++ b/man/get_biotypes.Rd @@ -0,0 +1,30 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ensembl-endpoints.R +\name{get_biotypes} +\alias{get_biotypes} +\title{Get the functional classifications of gene models for a species} +\usage{ +get_biotypes(species, callback = "randomlygeneratedname") +} +\arguments{ +\item{species}{A string representing the species name or alias (e.g., "homo_sapiens").} + +\item{callback}{(Optional) A string representing the name of the callback subroutine +for JSONP responses.} +} +\value{ +A list of parsed JSON responses containing the biotypes for the specified species. + +See more about the implemented endpoint \code{\link[=get_biotypes]{get_biotypes()}} +on the following \href{https://rest.ensembl.org/documentation/info/biotypes}{GET info/biotypes/:species} +from the official \href{https://rest.ensembl.org/}{Ensembl Rest API}. +} +\description{ +Retrieves the list of functional classifications (biotypes) of gene models +that Ensembl associates with a particular species. +Useful for restricting the type of genes/transcripts retrieved by other endpoints. +} +\examples{ +get_biotypes("homo_sapiens") +get_biotypes("homo_sapiens", callback = "randomlygeneratedname") +} diff --git a/man/get_biotypes_by_name.Rd b/man/get_biotypes_by_name.Rd new file mode 100755 index 0000000..9b11e88 --- /dev/null +++ b/man/get_biotypes_by_name.Rd @@ -0,0 +1,34 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ensembl-endpoints.R +\name{get_biotypes_by_name} +\alias{get_biotypes_by_name} +\title{Get properties of biotypes by name} +\usage{ +get_biotypes_by_name( + name, + object_type = "", + callback = "randomlygeneratedname" +) +} +\arguments{ +\item{name}{A string representing the biotype name (e.g., "protein_coding").} + +\item{object_type}{(Optional) A string specifying the object type ("gene" or "transcript").} + +\item{callback}{(Optional) A string representing the name of the callback subroutine for JSONP responses.} +} +\value{ +A list of parsed JSON responses containing the properties of biotypes with the given name. + +See more about the implemented endpoint \code{\link[=get_biotypes_by_name]{get_biotypes_by_name()}} +on the following \href{https://rest.ensembl.org/documentation/info/biotypes_name}{GET info/biotypes/name/:name/:object_type} +from the official \href{https://rest.ensembl.org/}{Ensembl Rest API}. +} +\description{ +Retrieves the properties of biotypes with a given name. Optionally, the object type (gene or transcript) can be provided for filtering. +} +\examples{ +get_biotypes_by_name("protein_coding") +get_biotypes_by_name("protein_coding", object_type = "gene") +get_biotypes_by_name("protein_coding", object_type = "gene", callback = "randomlygeneratedname") +} diff --git a/man/get_biotypes_groups.Rd b/man/get_biotypes_groups.Rd new file mode 100755 index 0000000..b3253bc --- /dev/null +++ b/man/get_biotypes_groups.Rd @@ -0,0 +1,39 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ensembl-endpoints.R +\name{get_biotypes_groups} +\alias{get_biotypes_groups} +\title{Get properties of biotypes within a group} +\usage{ +get_biotypes_groups( + group = "", + object_type = "", + callback = "randomlygeneratedname" +) +} +\arguments{ +\item{group}{(Optional) A string representing the biotype group (e.g., "coding"). +If not provided, the available biotype groups are returned.} + +\item{object_type}{(Optional) A string specifying the object type ("gene" or "transcript").} + +\item{callback}{(Optional) A string representing the name of the callback subroutine for JSONP responses.} +} +\value{ +A list of parsed JSON responses containing the properties of biotypes within +the specified group or all biotype groups if no group is provided. + +See more about the implemented endpoint \code{\link[=get_biotypes_groups]{get_biotypes_groups()}} +on the following \href{https://rest.ensembl.org/documentation/info/biotypes_groups}{GET info/biotypes/groups/:group/:object_type} +from the official \href{https://rest.ensembl.org/}{Ensembl Rest API}. +} +\description{ +Retrieves a list of available biotype groups or, if a group is specified, +the properties of biotypes within that group. +Optionally, the object type (gene or transcript) can be used to filter the results. +} +\examples{ +get_biotypes_groups() +get_biotypes_groups(group = "coding") +get_biotypes_groups(group = "coding", object_type = "gene") +get_biotypes_groups(group = "coding", object_type = "gene", callback = "randomlygeneratedname") +} diff --git a/man/get_cafe_genetree_by_id.Rd b/man/get_cafe_genetree_by_id.Rd new file mode 100755 index 0000000..8e67031 --- /dev/null +++ b/man/get_cafe_genetree_by_id.Rd @@ -0,0 +1,53 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ensembl-endpoints.R +\name{get_cafe_genetree_by_id} +\alias{get_cafe_genetree_by_id} +\title{Get cafe gene tree by id} +\usage{ +get_cafe_genetree_by_id( + id, + callback = "myrandomfunctionname", + compara = "vertebrates", + nh_format = "simple" +) +} +\arguments{ +\item{id}{A string representing the gene tree stable identifier.} + +\item{callback}{String \emph{(optional)} Name of the callback subroutine +to be returned by the requested JSONP response. Required ONLY when using +JSONP as the serialisation method. Please +see also \href{http://github.com/Ensembl/ensembl-rest/wiki}{the user guide}.} + +\item{compara}{String \emph{(optional)} Name of the compara database to use. +Multiple comparas exist on a server for separate species divisions. +Default is "vertebrates".} + +\item{nh_format}{String \emph{(optional)} The format of a NH (New Hampshire) +request. Available only with the default setting to allow us to return +the cafe tree with Taxa names appended with number of members +and the p_value. Example: "homo_sapiens_3_0.123" where 3 is the number +of members and 0.123 is the p value.} +} +\value{ +A list of parsed JSON responses containing the cafe tree +for the provided gene tree stable identifier. +} +\description{ +Retrieves a cafe tree of the gene tree using the gene tree stable identifier +} +\note{ +See more about the implemented endpoint \code{\link[=get_cafe_genetree_by_id]{get_cafe_genetree_by_id()}} +on the following \href{https://rest.ensembl.org/documentation/info/cafe_tree}{GET cafe/genetree/id/:id} +from the official \href{https://rest.ensembl.org/}{Ensembl Rest API}. +} +\examples{ +get_cafe_genetree_by_id("ENSGT00390000003602") +get_cafe_genetree_by_id("ENSGT00390000003602", + callback = "randomlygeneratedname") +get_cafe_genetree_by_id("ENSGT00390000003602", + compara = "vertebrates") +get_cafe_genetree_by_id("ENSGT00390000003602", + nh_format = "homo_sapiens_3_0.123") + +} diff --git a/man/get_cafe_genetree_by_species_id.Rd b/man/get_cafe_genetree_by_species_id.Rd new file mode 100755 index 0000000..9a81de6 --- /dev/null +++ b/man/get_cafe_genetree_by_species_id.Rd @@ -0,0 +1,30 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ensembl-endpoints.R +\name{get_cafe_genetree_by_species_id} +\alias{get_cafe_genetree_by_species_id} +\title{Get cafe gene tree by species id} +\usage{ +get_cafe_genetree_by_species_id(species, id) +} +\arguments{ +\item{species}{A string representing the species name (e.g., "homo_sapiens").} + +\item{id}{A string representing the gene, transcript, or translation +stable identifier.} +} +\value{ +A list of parsed JSON responses containing the cafe tree for +the provided species and stable identifier. +} +\description{ +Retrieves the cafe tree of the gene tree that contains the +gene/transcript/translation stable identifier in the given species +} +\note{ +See more about the implemented endpoint \code{\link[=get_cafe_genetree_by_species_id]{get_cafe_genetree_by_species_id()}} +on the following \href{https://rest.ensembl.org/documentation/info/cafe_tree_species_member_id}{GET cafe/genetree/member/id/:species/:id} +from the official \href{https://rest.ensembl.org/}{Ensembl Rest API}. +} +\examples{ +get_cafe_genetree_by_species_id("homo_sapiens", "ENST00000380152") +} diff --git a/man/get_cafe_genetree_by_symbol.Rd b/man/get_cafe_genetree_by_symbol.Rd new file mode 100755 index 0000000..7d7a7ad --- /dev/null +++ b/man/get_cafe_genetree_by_symbol.Rd @@ -0,0 +1,29 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ensembl-endpoints.R +\name{get_cafe_genetree_by_symbol} +\alias{get_cafe_genetree_by_symbol} +\title{Get cafe gene tree by symbol} +\usage{ +get_cafe_genetree_by_symbol(species, symbol) +} +\arguments{ +\item{species}{A string representing the species name (e.g., "homo_sapiens").} + +\item{symbol}{A string representing the gene symbol (e.g., "BRCA2").} +} +\value{ +A list of parsed JSON responses containing the cafe tree +for the provided species and gene symbol. +} +\description{ +Retrieves the cafe tree of the gene tree that contains the gene identified +by a symbol +} +\note{ +See more about the implemented endpoint \code{\link[=get_cafe_genetree_by_symbol]{get_cafe_genetree_by_symbol()}} +on the following \href{https://rest.ensembl.org/documentation/info/cafe_tree_member_symbol}{GET cafe/genetree/member/symbol/:species/:symbol} +from the official \href{https://rest.ensembl.org/}{Ensembl Rest API}. +} +\examples{ +get_cafe_genetree_by_symbol("homo_sapiens", "BRCA2") +} diff --git a/man/get_compara_methods.Rd b/man/get_compara_methods.Rd new file mode 100755 index 0000000..3e22b65 --- /dev/null +++ b/man/get_compara_methods.Rd @@ -0,0 +1,38 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ensembl-endpoints.R +\name{get_compara_methods} +\alias{get_compara_methods} +\title{Get list of compara methods} +\usage{ +get_compara_methods( + callback = "randomlygeneratedname", + class = NULL, + compara = NULL +) +} +\arguments{ +\item{callback}{(Optional) A string representing the name of the callback subroutine +for JSONP responses.} + +\item{class}{(Optional) A string specifying the class of the method to query for. +Regular expression patterns are supported (e.g., "GenomicAlign").} + +\item{compara}{(Optional) A string representing the name of the compara database +to use (e.g., "vertebrates").} +} +\value{ +A list of parsed JSON responses containing all available compara methods. + +See more about the implemented endpoint \code{\link[=get_compara_methods]{get_compara_methods()}} +on the following \href{https://rest.ensembl.org/documentation/info/compara_methods}{GET info/compara/methods} +from the official \href{https://rest.ensembl.org/}{Ensembl Rest API}. +} +\description{ +Retrieves a list of all compara analyses available (an analysis defines the type +of comparative data). Optional filtering by class or compara database can be applied. +} +\examples{ +get_compara_methods() +get_compara_methods(class = "GenomicAlign") +get_compara_methods(compara = "vertebrates", class = "GenomicAlign") +} diff --git a/man/get_compara_species_sets.Rd b/man/get_compara_species_sets.Rd new file mode 100755 index 0000000..778d75c --- /dev/null +++ b/man/get_compara_species_sets.Rd @@ -0,0 +1,34 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ensembl-endpoints.R +\name{get_compara_species_sets} +\alias{get_compara_species_sets} +\title{Get collections of species analysed with a specified compara method} +\usage{ +get_compara_species_sets( + method, + callback = "randomlygeneratedname", + compara = NULL +) +} +\arguments{ +\item{method}{A string representing the compara method to filter by (e.g., "EPO").} + +\item{callback}{(Optional) A string representing the name of the callback subroutine for JSONP responses.} + +\item{compara}{(Optional) A string representing the name of the compara database to use (e.g., "vertebrates").} +} +\value{ +A list of parsed JSON responses containing all collections of species analysed with the specified compara method. + +See more about the implemented endpoint \code{\link[=get_compara_species_sets]{get_compara_species_sets()}} +on the following \href{https://rest.ensembl.org/documentation/info/compara_species_sets}{GET info/compara/species_sets/:method} +from the official \href{https://rest.ensembl.org/}{Ensembl Rest API}. +} +\description{ +Retrieves a list of all collections of species analysed with the specified compara method. +The compara method must be one of the methods returned by the \verb{/info/compara/methods} endpoint. +} +\examples{ +get_compara_species_sets("EPO") +get_compara_species_sets("EPO", compara = "vertebrates") +} diff --git a/man/get_comparas.Rd b/man/get_comparas.Rd new file mode 100755 index 0000000..13c0175 --- /dev/null +++ b/man/get_comparas.Rd @@ -0,0 +1,27 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ensembl-endpoints.R +\name{get_comparas} +\alias{get_comparas} +\title{Get a list of all available comparative genomics databases and their data release} +\usage{ +get_comparas(callback = "randomlygeneratedname") +} +\arguments{ +\item{callback}{(Optional) A string representing the name of the callback subroutine +for JSONP responses.} +} +\value{ +A list of parsed JSON responses containing all available comparative genomics +databases and their data release. + +See more about the implemented endpoint \code{\link[=get_comparas]{get_comparas()}} +on the following \href{https://rest.ensembl.org/documentation/info/comparas}{GET info/comparas} +from the official \href{https://rest.ensembl.org/}{Ensembl Rest API}. +} +\description{ +Retrieves a list of all available comparative genomics databases and their data release. +This endpoint is deprecated, and users are advised to use the \verb{/info/genomes/division} endpoint instead. +} +\examples{ +get_comparas() +} diff --git a/man/get_consequence_types.Rd b/man/get_consequence_types.Rd new file mode 100755 index 0000000..49ff41b --- /dev/null +++ b/man/get_consequence_types.Rd @@ -0,0 +1,28 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ensembl-endpoints.R +\name{get_consequence_types} +\alias{get_consequence_types} +\title{Get Variant Consequence Types} +\usage{ +get_consequence_types(callback = "randomlygeneratedname", rank = NULL) +} +\arguments{ +\item{callback}{(Optional) A string representing the name of the callback subroutine for JSONP responses.} + +\item{rank}{(Optional) A boolean (0 or 1) to include consequence ranking in the response. Default is 0.} +} +\value{ +A parsed JSON response containing the list of variant consequence types. + +See more about the implemented endpoint \code{\link[=get_consequence_types]{get_consequence_types()}} +on the following \href{https://rest.ensembl.org/documentation/info/variation_consequence_types}{GET info/variation/consequence_types} +from the official \href{https://rest.ensembl.org/}{Ensembl Rest API}. +} +\description{ +Retrieves a list of all variant consequence types available in Ensembl. +} +\examples{ +get_consequence_types() +get_consequence_types(rank = 1) +get_consequence_types(callback = "randomCallback") +} diff --git a/man/get_data.Rd b/man/get_data.Rd new file mode 100755 index 0000000..28de98c --- /dev/null +++ b/man/get_data.Rd @@ -0,0 +1,24 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ensembl-endpoints.R +\name{get_data} +\alias{get_data} +\title{Get a list of available data releases on the Ensembl REST server} +\usage{ +get_data(callback = "randomlygeneratedname") +} +\arguments{ +\item{callback}{(Optional) A string representing the name of the callback subroutine for JSONP responses.} +} +\value{ +A list of parsed JSON responses containing the available data releases on the Ensembl REST server. + +See more about the implemented endpoint \code{\link[=get_data]{get_data()}} +on the following \href{https://rest.ensembl.org/documentation/info/data}{GET info/data} +from the official \href{https://rest.ensembl.org/}{Ensembl Rest API}. +} +\description{ +Retrieves a list of the data releases available on the Ensembl REST server. It may return more than one release if the server has an unfrequent, non-standard configuration. +} +\examples{ +get_data() +} diff --git a/man/get_data_version2.Rd b/man/get_data_version2.Rd new file mode 100755 index 0000000..3024a9e --- /dev/null +++ b/man/get_data_version2.Rd @@ -0,0 +1,23 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/api-versioning.R +\name{get_data_version2} +\alias{get_data_version2} +\title{Get data version} +\source{ +\url{https://github.com/Ensembl/ensembl-rest/wiki/API-Versioning} +} +\usage{ +get_data_version2() +} +\value{ +A numeric value representing of data release version. +} +\description{ +Retreive the version(s) of the Ensembl data that the REST API is accessing. +} +\examples{ +\dontrun{ +data_version <- get_data_version() +print(data_version) +} +} diff --git a/man/get_divisions.Rd b/man/get_divisions.Rd old mode 100644 new mode 100755 diff --git a/man/get_eg_version.Rd b/man/get_eg_version.Rd new file mode 100755 index 0000000..66f671b --- /dev/null +++ b/man/get_eg_version.Rd @@ -0,0 +1,24 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ensembl-endpoints.R +\name{get_eg_version} +\alias{get_eg_version} +\title{Get the Ensembl Genomes version of the databases backing the service} +\usage{ +get_eg_version(callback = "randomlygeneratedname") +} +\arguments{ +\item{callback}{(Optional) A string representing the name of the callback subroutine for JSONP responses.} +} +\value{ +A parsed JSON response containing the Ensembl Genomes version of the databases. + +See more about the implemented endpoint \code{\link[=get_eg_version]{get_eg_version()}} +on the following \href{https://rest.ensembl.org/documentation/info/eg_version}{GET info/eg_version} +from the official \href{https://rest.ensembl.org/}{Ensembl Rest API}. +} +\description{ +Retrieves the Ensembl Genomes version of the databases supporting the current service. +} +\examples{ +get_eg_version() +} diff --git a/man/get_external_dbs.Rd b/man/get_external_dbs.Rd new file mode 100755 index 0000000..49c2bcb --- /dev/null +++ b/man/get_external_dbs.Rd @@ -0,0 +1,40 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ensembl-endpoints.R +\name{get_external_dbs} +\alias{get_external_dbs} +\title{Get external databases for a species} +\usage{ +get_external_dbs( + species, + callback = "randomlygeneratedname", + feature = NULL, + filter = NULL +) +} +\arguments{ +\item{species}{A string representing the species name or alias (e.g., "homo_sapiens").} + +\item{callback}{(Optional) A string representing the name of the callback subroutine +for JSONP responses.} + +\item{feature}{(Optional) A string representing the feature to filter external DB entries +(e.g., "dna_align_feature", "protein_align_feature", "unmapped_object", "xref", "seq_region_synonym").} + +\item{filter}{(Optional) A string to restrict external DB searches to a single source +or pattern (e.g., "HGNC", "GO\%").} +} +\value{ +A parsed JSON response containing a list of external sources associated +with the given species. + +See more about the implemented endpoint \code{\link[=get_external_dbs]{get_external_dbs()}} +on the following \href{https://rest.ensembl.org/documentation/info/external_dbs}{GET info/external_dbs/:species} +from the official \href{https://rest.ensembl.org/}{Ensembl Rest API}. +} +\description{ +Retrieves a list of all available external sources for a given species. +} +\examples{ +get_external_dbs("homo_sapiens") +get_external_dbs("homo_sapiens", feature = "xref", filter = "HGNC") +} diff --git a/man/get_genetree_by_id.Rd b/man/get_genetree_by_id.Rd new file mode 100755 index 0000000..56da720 --- /dev/null +++ b/man/get_genetree_by_id.Rd @@ -0,0 +1,26 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ensembl-endpoints.R +\name{get_genetree_by_id} +\alias{get_genetree_by_id} +\title{Get gene tree by id} +\usage{ +get_genetree_by_id(id) +} +\arguments{ +\item{id}{A string representing the gene tree stable identifier.} +} +\value{ +A list of parsed JSON responses containing the gene tree for +the provided gene tree stable identifier. +} +\description{ +Retrieves a gene tree for a gene tree stable identifier +} +\note{ +See more about the implemented endpoint \code{\link[=get_genetree_by_id]{get_genetree_by_id()}} +on the following \href{https://rest.ensembl.org/documentation/info/genetree}{GET genetree/id/:id} +from the official \href{https://rest.ensembl.org/}{Ensembl Rest API}. +} +\examples{ +get_genetree_by_id("ENSGT00390000003602") +} diff --git a/man/get_genetree_by_species_id.Rd b/man/get_genetree_by_species_id.Rd new file mode 100755 index 0000000..5fc14ae --- /dev/null +++ b/man/get_genetree_by_species_id.Rd @@ -0,0 +1,30 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ensembl-endpoints.R +\name{get_genetree_by_species_id} +\alias{get_genetree_by_species_id} +\title{Get gene tree by species id} +\usage{ +get_genetree_by_species_id(species, id) +} +\arguments{ +\item{species}{A string representing the species name (e.g., "homo_sapiens").} + +\item{id}{A string representing the gene, transcript, or translation stable +identifier.} +} +\value{ +A list of parsed JSON responses containing the gene tree +for the provided species and stable identifier. +} +\description{ +Retrieves the gene tree that contains the gene/transcript/translation +stable identifier in the given species +} +\note{ +See more about the implemented endpoint \code{\link[=get_genetree_by_species_id]{get_genetree_by_species_id()}} +on the following \href{https://rest.ensembl.org/documentation/info/genetree_species_member_id}{GET genetree/member/id/:species/:id} +from the official \href{https://rest.ensembl.org/}{Ensembl Rest API}. +} +\examples{ +get_genetree_by_species_id("homo_sapiens", "ENST00000380152") +} diff --git a/man/get_genetree_by_symbol.Rd b/man/get_genetree_by_symbol.Rd new file mode 100755 index 0000000..69f515c --- /dev/null +++ b/man/get_genetree_by_symbol.Rd @@ -0,0 +1,28 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ensembl-endpoints.R +\name{get_genetree_by_symbol} +\alias{get_genetree_by_symbol} +\title{Get gene tree by symbol} +\usage{ +get_genetree_by_symbol(species, symbol) +} +\arguments{ +\item{species}{A string representing the species name (e.g., "homo_sapiens").} + +\item{symbol}{A string representing the gene symbol (e.g., "BRCA2").} +} +\value{ +A list of parsed JSON responses containing the gene tree +for the provided species and gene symbol. +} +\description{ +Retrieves the gene tree that contains the gene identified by a symbol +} +\note{ +See more about the implemented endpoint \code{\link[=get_genetree_by_symbol]{get_genetree_by_symbol()}} +on the following \href{https://rest.ensembl.org/documentation/info/genetree_member_symbol}{GET genetree/member/symbol/:species/:symbol} +from the official \href{https://rest.ensembl.org/}{Ensembl Rest API}. +} +\examples{ +get_genetree_by_symbol("homo_sapiens", "BRCA2") +} diff --git a/man/get_genome_info.Rd b/man/get_genome_info.Rd new file mode 100755 index 0000000..be4eed9 --- /dev/null +++ b/man/get_genome_info.Rd @@ -0,0 +1,34 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ensembl-endpoints.R +\name{get_genome_info} +\alias{get_genome_info} +\title{Get genome information} +\usage{ +get_genome_info(name, callback = "randomlygeneratedname", expand = NULL) +} +\arguments{ +\item{name}{(Required) A string representing the production name of the genome +(e.g., "arabidopsis_thaliana").} + +\item{callback}{(Optional) A string representing the name of the callback subroutine +for JSONP responses.} + +\item{expand}{(Optional) A boolean value (0 or 1). If set to 1, expands +the information to include details of sequences (can be very large). +Default is NULL.} +} +\value{ +A parsed JSON response containing information about the specified genome. + +See more about the implemented endpoint \code{\link[=get_genome_info]{get_genome_info()}} +on the following \href{https://rest.ensembl.org/documentation/info/info_genome}{GET info/genomes/:genome_name} +from the official \href{https://rest.ensembl.org/}{Ensembl Rest API}. +} +\description{ +Retrieves detailed information about a given genome based on its production name. +} +\examples{ +get_genome_info(name = "arabidopsis_thaliana") +get_genome_info(name = "arabidopsis_thaliana", expand = 1) +get_genome_info(name = "arabidopsis_thaliana", callback = "randomlygeneratedname") +} diff --git a/man/get_genome_info_by_accession.Rd b/man/get_genome_info_by_accession.Rd new file mode 100755 index 0000000..2a0e293 --- /dev/null +++ b/man/get_genome_info_by_accession.Rd @@ -0,0 +1,34 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ensembl-endpoints.R +\name{get_genome_info_by_accession} +\alias{get_genome_info_by_accession} +\title{Get genome information by INSDC accession} +\usage{ +get_genome_info_by_accession( + accession, + callback = "randomlygeneratedname", + expand = NULL +) +} +\arguments{ +\item{accession}{(Required) A string representing the INSDC sequence accession (optionally versioned), e.g., "U00096".} + +\item{callback}{(Optional) A string representing the name of the callback subroutine for JSONP responses.} + +\item{expand}{(Optional) A boolean value (0 or 1). If set to 1, expands the information to include details of sequences (can be very large).} +} +\value{ +A parsed JSON response containing information about genomes with the specified INSDC accession. + +See more about the implemented endpoint \code{\link[=get_genome_info_by_accession]{get_genome_info_by_accession()}} +on the following \href{https://rest.ensembl.org/documentation/info/info_genomes_accession}{GET info/genomes/accession/:accession} +from the official \href{https://rest.ensembl.org/}{Ensembl Rest API}. +} +\description{ +Retrieves detailed information about genomes containing a specified INSDC accession. +} +\examples{ +get_genome_info_by_accession(accession = "U00096") +get_genome_info_by_accession(accession = "U00096", expand = 1) +get_genome_info_by_accession(accession = "U00096", callback = "randomlygeneratedname") +} diff --git a/man/get_genome_info_by_assembly.Rd b/man/get_genome_info_by_assembly.Rd new file mode 100755 index 0000000..8ee8992 --- /dev/null +++ b/man/get_genome_info_by_assembly.Rd @@ -0,0 +1,36 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ensembl-endpoints.R +\name{get_genome_info_by_assembly} +\alias{get_genome_info_by_assembly} +\title{Get genome information by assembly ID} +\usage{ +get_genome_info_by_assembly( + assembly_id, + callback = "randomlygeneratedname", + expand = NULL +) +} +\arguments{ +\item{assembly_id}{(Required) A string representing the INSDC assembly ID (optionally versioned, e.g., "GCA_902167145.1").} + +\item{callback}{(Optional) A string representing the name of the callback subroutine for JSONP responses.} + +\item{expand}{(Optional) A boolean value (0 or 1). If set to 1, expands the information +to include details of sequences (can be very large).} +} +\value{ +A parsed JSON response containing information about the genome associated +with the specified assembly ID. + +See more about the implemented endpoint \code{\link[=get_genome_info_by_assembly]{get_genome_info_by_assembly()}} +on the following \href{https://rest.ensembl.org/documentation/info/info_genomes_assembly}{GET info/genomes/assembly/:assembly_id} +from the official \href{https://rest.ensembl.org/}{Ensembl Rest API}. +} +\description{ +Retrieves information about a genome associated with a specified assembly ID. +} +\examples{ +get_genome_info_by_assembly(assembly_id = "GCA_902167145.1") +get_genome_info_by_assembly(assembly_id = "GCA_902167145.1", expand = 1) +get_genome_info_by_assembly(assembly_id = "GCA_902167145.1", callback = "randomlygeneratedname") +} diff --git a/man/get_genome_info_by_division.Rd b/man/get_genome_info_by_division.Rd new file mode 100755 index 0000000..a9a4cf6 --- /dev/null +++ b/man/get_genome_info_by_division.Rd @@ -0,0 +1,35 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ensembl-endpoints.R +\name{get_genome_info_by_division} +\alias{get_genome_info_by_division} +\title{Get genome information for a specific division} +\usage{ +get_genome_info_by_division( + division_name, + callback = "randomlygeneratedname", + expand = NULL +) +} +\arguments{ +\item{division_name}{(Required) A string representing the name of the division (e.g., "EnsemblPlants").} + +\item{callback}{(Optional) A string representing the name of the callback subroutine for JSONP responses.} + +\item{expand}{(Optional) A boolean value (0 or 1). If set to 1, expands the information to include details of sequences (can be very large).} +} +\value{ +A parsed JSON response containing information about genomes in the specified division. + +See more about the implemented endpoint \code{\link[=get_genome_info_by_division]{get_genome_info_by_division()}} +on the following \href{https://rest.ensembl.org/documentation/info/info_genomes_division}{GET info/genomes/division/:division_name} +from the official \href{https://rest.ensembl.org/}{Ensembl Rest API}. +} +\description{ +Retrieves information about all genomes in a given division. +Note: The response may be very large for divisions like Ensembl Bacteria. +} +\examples{ +get_genome_info_by_division(division_name = "EnsemblPlants") +get_genome_info_by_division(division_name = "EnsemblPlants", expand = 1) +get_genome_info_by_division(division_name = "EnsemblPlants", callback = "randomlygeneratedname") +} diff --git a/man/get_genome_info_by_taxonomy.Rd b/man/get_genome_info_by_taxonomy.Rd new file mode 100755 index 0000000..cdff624 --- /dev/null +++ b/man/get_genome_info_by_taxonomy.Rd @@ -0,0 +1,35 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ensembl-endpoints.R +\name{get_genome_info_by_taxonomy} +\alias{get_genome_info_by_taxonomy} +\title{Get genome information by taxonomy node} +\usage{ +get_genome_info_by_taxonomy( + taxon_name, + callback = "randomlygeneratedname", + expand = NULL +) +} +\arguments{ +\item{taxon_name}{(Required) A string representing the taxon name or NCBI taxonomy ID (e.g., "Homo sapiens").} + +\item{callback}{(Optional) A string representing the name of the callback subroutine for JSONP responses.} + +\item{expand}{(Optional) A boolean value (0 or 1). If set to 1, expands the information +to include details of sequences (can be very large).} +} +\value{ +A parsed JSON response containing information about genomes beneath the specified taxonomy node. + +See more about the implemented endpoint \code{\link[=get_genome_info_by_taxonomy]{get_genome_info_by_taxonomy()}} +on the following \href{https://rest.ensembl.org/documentation/info/info_genomes_taxonomy}{GET info/genomes/taxonomy/:taxon_name} +from the official \href{https://rest.ensembl.org/}{Ensembl Rest API}. +} +\description{ +Retrieves information about all genomes beneath a given node of the taxonomy. +} +\examples{ +get_genome_info_by_taxonomy(taxon_name = "Homo sapiens") +get_genome_info_by_taxonomy(taxon_name = "Homo sapiens", expand = 1) +get_genome_info_by_taxonomy(taxon_name = "Homo sapiens", callback = "randomlygeneratedname") +} diff --git a/man/get_homology_by_species_id.Rd b/man/get_homology_by_species_id.Rd new file mode 100755 index 0000000..903a36e --- /dev/null +++ b/man/get_homology_by_species_id.Rd @@ -0,0 +1,27 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ensembl-endpoints.R +\name{get_homology_by_species_id} +\alias{get_homology_by_species_id} +\title{Get homologous by species id} +\usage{ +get_homology_by_species_id(species, id) +} +\arguments{ +\item{species}{A string representing the species name (e.g., "homo_sapiens").} + +\item{id}{A string representing the Ensembl gene ID.} +} +\value{ +A list of parsed JSON responses containing homology information +for the provided species and Ensembl gene ID. + +See more about the implemented endpoint \code{\link[=get_homology_by_species_id]{get_homology_by_species_id()}} +on the following \href{https://rest.ensembl.org/documentation/info/homology_species_gene_id}{GET homology/id/:species/:id} +from the official \href{https://rest.ensembl.org/}{Ensembl Rest API}. +} +\description{ +Retrieves homology information (orthologs) by species and Ensembl gene ID +} +\examples{ +get_homology_by_species_id("homo_sapiens", "ENSG00000157764") +} diff --git a/man/get_homology_by_symbol.Rd b/man/get_homology_by_symbol.Rd new file mode 100755 index 0000000..b13f9b2 --- /dev/null +++ b/man/get_homology_by_symbol.Rd @@ -0,0 +1,27 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ensembl-endpoints.R +\name{get_homology_by_symbol} +\alias{get_homology_by_symbol} +\title{Get homologous by symbol} +\usage{ +get_homology_by_symbol(species, symbol) +} +\arguments{ +\item{species}{A string representing the species name (e.g., "homo_sapiens").} + +\item{symbol}{A string representing the gene symbol (e.g., "BRCA2").} +} +\value{ +A list of parsed JSON responses containing homology information +for the provided species and gene symbol. + +See more about the implemented endpoint \code{\link[=get_homology_by_symbol]{get_homology_by_symbol()}} +on the following \href{https://rest.ensembl.org/documentation/info/homology_symbol}{GET homology/symbol/:species/:symbol} +from the official \href{https://rest.ensembl.org/}{Ensembl Rest API}. +} +\description{ +Retrieves homology information (orthologs) by species and symbol +} +\examples{ +get_homology_by_symbol("homo_sapiens", "BRCA2") +} diff --git a/man/get_id.Rd b/man/get_id.Rd index 4829b89..9b256d8 100644 --- a/man/get_id.Rd +++ b/man/get_id.Rd @@ -23,17 +23,17 @@ responses' status.} \value{ A \code{\link[tibble]{tibble}} of 9 variables: \describe{ - \item{\code{id}}{Ensembl identifier.} - \item{\code{id_latest}}{Ensembl identifier including the version suffix.} - \item{\code{type}}{Entity type: gene (\code{'Gene'}), exon (\code{'Exon'}), - transcript (\code{'Transcript'}), and protein (\code{'Translation'}).} - \item{\code{id_version}}{Ensembl identifier version, indicates how many - times that entity has changed during its time in Ensembl.} - \item{\code{release}}{Ensembl release version.} - \item{\code{is_current}}{Is this the latest identifier for the represented entity.} - \item{\code{genome_assembly_name}}{Code name of the genome assembly.} - \item{\code{peptide}}{TODO} - \item{\code{possible_replacement}}{TODO} +\item{\code{id}}{Ensembl identifier.} +\item{\code{id_latest}}{Ensembl identifier including the version suffix.} +\item{\code{type}}{Entity type: gene (\code{'Gene'}), exon (\code{'Exon'}), +transcript (\code{'Transcript'}), and protein (\code{'Translation'}).} +\item{\code{id_version}}{Ensembl identifier version, indicates how many +times that entity has changed during its time in Ensembl.} +\item{\code{release}}{Ensembl release version.} +\item{\code{is_current}}{Is this the latest identifier for the represented entity.} +\item{\code{genome_assembly_name}}{Code name of the genome assembly.} +\item{\code{peptide}}{TODO} +\item{\code{possible_replacement}}{TODO} } } \description{ diff --git a/man/get_ld_by_variant.Rd b/man/get_ld_by_variant.Rd new file mode 100755 index 0000000..47fbfb7 --- /dev/null +++ b/man/get_ld_by_variant.Rd @@ -0,0 +1,70 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ensembl-endpoints.R +\name{get_ld_by_variant} +\alias{get_ld_by_variant} +\title{Get Linkage Disequilibrium (LD) values} +\usage{ +get_ld_by_variant( + species, + id, + population_name, + attribs = NULL, + callback = "randomlygeneratedname", + d_prime = NULL, + r2 = NULL, + window_size = 500 +) +} +\arguments{ +\item{species}{A string representing the species name +or alias.} + +\item{id}{A string representing the variant ID +(e.g., rs56116432).} + +\item{population_name}{A string representing the population +for which to compute LD. +Use GET /info/variation/populations/:species?filter=LD to +retrieve a list of all populations with LD data. This endpoint is already +implemented. Please, use \code{\link[=get_species_populations]{get_species_populations()}} with filter "LD" +to retrieve valid populations.} + +\item{attribs}{\emph{(Optional)} A boolean indicating whether +to add variation attributes such as chromosome, start, end, strand, +consequence type, and clinical significance. Default is NULL.} + +\item{callback}{\emph{(Optional)} A string representing the name of the +callback subroutine for JSONP responses.} + +\item{d_prime}{\emph{(Optional)} A float value (0-1) to filter results +by D' (linkage disequilibrium measure). Only returns pairs with D' ≥ +the specified value. Default is NULL.} + +\item{r2}{\emph{(Optional)} A float value (0-1) to filter results by +r² (correlation coefficient). Only returns pairs with r² ≥ the specified +value.. Default is NULL.} + +\item{window_size}{\emph{(Optional)} An integer specifying the window size +in kb (max 500). Defaults to 500 kb.} +} +\value{ +A list of parsed JSON responses containing the LD values for +the provided variant. +} +\description{ +Computes and retrieves LD values between a given variant and all other +variants within a window of up to 500 kb in the specified population. +} +\note{ +See more about the implemented endpoint \code{\link[=get_ld_by_variant]{get_ld_by_variant()}} +on the following \href{https://rest.ensembl.org/documentation/info/ld}{GET ld/:species/:id/:population_name} +from the official \href{https://rest.ensembl.org/}{Ensembl Rest API}. +} +\examples{ +get_ld_by_variant("homo_sapiens", "rs56116432", "1000GENOMES:phase_3:KHV") +get_ld_by_variant("homo_sapiens", "rs56116432", "1000GENOMES:phase_3:KHV", + d_prime = 0.8, r2 = 0.85) +get_ld_by_variant("homo_sapiens", "rs56116432", "1000GENOMES:phase_3:KHV", + window_size = 250) + +} diff --git a/man/get_ld_values_by_region.Rd b/man/get_ld_values_by_region.Rd new file mode 100755 index 0000000..8b56604 --- /dev/null +++ b/man/get_ld_values_by_region.Rd @@ -0,0 +1,65 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ensembl-endpoints.R +\name{get_ld_values_by_region} +\alias{get_ld_values_by_region} +\title{Get Linkage Disequilibrium (LD) Values for a Genomic Region} +\usage{ +get_ld_values_by_region( + species, + region, + population_name, + callback = "randomlygeneratedname", + d_prime = NULL, + r2 = NULL +) +} +\arguments{ +\item{species}{A string representing the species name or alias +(e.g., "homo_sapiens").} + +\item{region}{A string defining the genomic region in the format +"chr:start..end". +The maximum region size allowed is 500 kb. If the region overlaps +the MHC region, the maximum is 10 kb.} + +\item{population_name}{A string representing the population for which +LD should be computed. +Use \code{\link[=get_species_populations]{get_species_populations()}} with filter "LD" to retrieve valid +populations.} + +\item{callback}{\emph{(Optional)} A string representing the name of the +callback subroutine for JSONP responses.} + +\item{d_prime}{\emph{(Optional)} A float value (0-1) to filter results +by D' (linkage disequilibrium measure). +Only returns pairs with D' ≥ the specified value.} + +\item{r2}{\emph{(Optional)} A float value (0-1) to filter results by r² +(correlation coefficient). +Only returns pairs with r² ≥ the specified value.} +} +\value{ +A parsed JSON response containing the LD values for all variant +pairs within the specified region. + +See more about the implemented endpoint \code{\link[=get_ld_values_by_region]{get_ld_values_by_region()}} +on the following \href{https://rest.ensembl.org/documentation/info/ld_region}{GET ld/:species/region/:region/:population_name} +from the official \href{https://rest.ensembl.org/}{Ensembl Rest API}. +} +\description{ +Computes and retrieves LD values between all pairs of variants within +a defined genomic region. +} +\examples{ +get_ld_values_by_region(species = "homo_sapiens", + region = "6:25837556..25843455", + population_name = "1000GENOMES:phase_3:KHV") +get_ld_values_by_region(species = "homo_sapiens", + region = "6:25837556..25843455", + population_name = "1000GENOMES:phase_3:KHV", + r2 = 0.85) +get_ld_values_by_region(species = "homo_sapiens", + region = "6:25837556..25843455", + population_name = "1000GENOMES:phase_3:KHV", + d_prime = 1.0) +} diff --git a/man/get_ld_variants_by_window.Rd b/man/get_ld_variants_by_window.Rd index 2d53f63..4b3c7c6 100644 --- a/man/get_ld_variants_by_window.Rd +++ b/man/get_ld_variants_by_window.Rd @@ -87,7 +87,7 @@ populations for a species.} \item{r_squared}{\eqn{r^2} is a measure of linkage disequilibrium. \code{r_squared} defines a cut-off threshold: only variants whose \eqn{r^2 -\ge }\code{r_squared} are returned. The lower bound for \code{r_squared} is + \ge }\code{r_squared} are returned. The lower bound for \code{r_squared} is \code{0.05}, not \code{0}; the upper bound is \code{1}.} \item{verbose}{Whether to be verbose about the http requests and respective @@ -116,15 +116,15 @@ function \code{get_ld_variants_by_range()}.} \value{ A \code{\link[tibble]{tibble}} of 6 variables: \describe{ - \item{\code{species_name}}{Ensembl species name: this is the name used internally - by Ensembl to uniquely identify a species by name. It is the scientific - name but formatted without capitalisation and spacing converted with an - underscore, e.g., \code{'homo_sapiens'}.} - \item{\code{population}}{Population for which to compute linkage disequilibrium.} - \item{\code{variant_id1}}{First variant identifier.} - \item{\code{variant_id2}}{Second variant identifier.} - \item{\code{d_prime}}{\eqn{D'} between the two variants.} - \item{\code{r_squared}}{\eqn{r^2} between the two variants.} +\item{\code{species_name}}{Ensembl species name: this is the name used internally +by Ensembl to uniquely identify a species by name. It is the scientific +name but formatted without capitalisation and spacing converted with an +underscore, e.g., \code{'homo_sapiens'}.} +\item{\code{population}}{Population for which to compute linkage disequilibrium.} +\item{\code{variant_id1}}{First variant identifier.} +\item{\code{variant_id2}}{Second variant identifier.} +\item{\code{d_prime}}{\eqn{D'} between the two variants.} +\item{\code{r_squared}}{\eqn{r^2} between the two variants.} } } \description{ diff --git a/man/get_pairwise_ld_values.Rd b/man/get_pairwise_ld_values.Rd new file mode 100755 index 0000000..cdbb3d7 --- /dev/null +++ b/man/get_pairwise_ld_values.Rd @@ -0,0 +1,61 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ensembl-endpoints.R +\name{get_pairwise_ld_values} +\alias{get_pairwise_ld_values} +\title{Get Pairwise Linkage Disequilibrium (LD) Values} +\usage{ +get_pairwise_ld_values( + species, + id1, + id2, + callback = "randomlygeneratedname", + d_prime = NULL, + r2 = NULL, + population_name = NULL +) +} +\arguments{ +\item{species}{A string representing the species name or +alias (e.g., "homo_sapiens").} + +\item{id1}{A string representing the first variant ID +(e.g., "rs6792369").} + +\item{id2}{A string representing the second variant ID +(e.g., "rs1042779").} + +\item{callback}{\emph{(Optional)} A string representing the name of +the callback +subroutine for JSONP responses.} + +\item{d_prime}{\emph{(Optional)} A float value (0-1) to filter results +by D' (linkage disequilibrium measure). +Only returns pairs with D' ≥ the specified value.} + +\item{r2}{\emph{(Optional)} A float value (0-1) to filter results by r² +(correlation coefficient). Only returns pairs with r² ≥ the specified value.} + +\item{population_name}{\emph{(Optional)} A string representing the population +for which to compute LD. Use \code{\link[=get_species_populations]{get_species_populations()}} with filter "LD" +to retrieve valid populations.} +} +\value{ +A parsed JSON response containing the LD values for the specified +variant pair. + +See more about the implemented endpoint \code{\link[=get_pairwise_ld_values]{get_pairwise_ld_values()}} +on the following \href{https://rest.ensembl.org/documentation/info/ld_pairwise}{GET ld/:species/pairwise/:id1/:id2} +from the official \href{https://rest.ensembl.org/}{Ensembl Rest API}. +} +\description{ +Computes and retrieves LD values between two given variants. +} +\examples{ +get_pairwise_ld_values(species = "homo_sapiens", id1 = "rs6792369", + id2 = "rs1042779") +get_pairwise_ld_values(species = "homo_sapiens", id1 = "rs6792369", + id2 = "rs1042779", r2 = 0.85) +get_pairwise_ld_values(species = "homo_sapiens", id1 = "rs6792369", + id2 = "rs1042779", d_prime = 1.0, + population_name = "1000GENOMES:phase_3:KHV") +} diff --git a/man/get_population_individuals.Rd b/man/get_population_individuals.Rd new file mode 100755 index 0000000..b75fc44 --- /dev/null +++ b/man/get_population_individuals.Rd @@ -0,0 +1,38 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ensembl-endpoints.R +\name{get_population_individuals} +\alias{get_population_individuals} +\title{Get Population Individuals} +\usage{ +get_population_individuals( + species, + population_name, + callback = "randomlygeneratedname" +) +} +\arguments{ +\item{species}{(Required) A string representing the species name or alias (e.g., "human").} + +\item{population_name}{(Required) A string representing the name of the population (e.g., "1000GENOMES:phase_3:ASW").} + +\item{callback}{(Optional) A string representing the name of the callback subroutine for JSONP responses.} +} +\value{ +A parsed JSON response containing the list of individuals for the specified population. + +See more about the implemented endpoint \code{\link[=get_population_individuals]{get_population_individuals()}} +on the following \href{https://rest.ensembl.org/documentation/info/variation_population_name}{GET info/variation/populations/:species/:population_name} +from the official \href{https://rest.ensembl.org/}{Ensembl Rest API}. +} +\description{ +Retrieves a list of all individuals for a specified population from a species in Ensembl. +} +\examples{ +get_population_individuals(species = "human", population_name = "1000GENOMES:phase_3:ASW") +get_population_individuals(species = "homo_sapiens", population_name = "1000GENOMES:phase_3:YRI") +get_population_individuals( + species = "human", + population_name = "1000GENOMES:phase_3:CEU", + callback = "randomCallback" +) +} diff --git a/man/get_region_info.Rd b/man/get_region_info.Rd new file mode 100755 index 0000000..ca8a0d3 --- /dev/null +++ b/man/get_region_info.Rd @@ -0,0 +1,45 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ensembl-endpoints.R +\name{get_region_info} +\alias{get_region_info} +\title{Get information about a specific toplevel sequence region for a species} +\usage{ +get_region_info( + species, + region_name, + bands = 0, + synonyms = 0, + callback = "randomlygeneratedname" +) +} +\arguments{ +\item{species}{A string representing the species name or alias (e.g., "homo_sapiens").} + +\item{region_name}{A string representing the name of the toplevel sequence region (e.g., "X").} + +\item{bands}{(Optional) A boolean (0 or 1) indicating whether to include +karyotype band information. Default is 0.} + +\item{synonyms}{(Optional) A boolean (0 or 1) indicating whether to include +information about known synonyms. Default is 0.} + +\item{callback}{(Optional) A string representing the name of the callback +subroutine for JSONP responses.} +} +\value{ +A list of parsed JSON responses containing information about +the specified sequence region for the given species. + +See more about the implemented endpoint \code{\link[=get_region_info]{get_region_info()}} +on the following \href{https://rest.ensembl.org/documentation/info/assembly_stats}{GET info/assembly/:species/:region_name} +from the official \href{https://rest.ensembl.org/}{Ensembl Rest API}. +} +\description{ +Retrieves information about the specified toplevel sequence region for +a given species, with optional details on karyotype bands and synonyms. +} +\examples{ +get_region_info("homo_sapiens", "X") +get_region_info("homo_sapiens", "X", bands = 1) +get_region_info("homo_sapiens", "X", synonyms = 1, callback = "randomlygeneratedname") +} diff --git a/man/get_rest_version.Rd b/man/get_rest_version.Rd old mode 100644 new mode 100755 diff --git a/man/get_rest_version2.Rd b/man/get_rest_version2.Rd new file mode 100755 index 0000000..d8e0224 --- /dev/null +++ b/man/get_rest_version2.Rd @@ -0,0 +1,25 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/api-versioning.R +\name{get_rest_version2} +\alias{get_rest_version2} +\title{Get REST API version} +\source{ +\url{https://github.com/Ensembl/ensembl-rest/wiki/API-Versioning} +} +\usage{ +get_rest_version2() +} +\value{ +A numeric value representing the REST API version components: +\code{major}, \code{minor}, and \code{point}. +} +\description{ +Retrieve the version of the Ensembl REST API currently in use. +The version format is \code{major.minor.point}. +} +\examples{ +\dontrun{ +rest_version <- get_rest_version() +print(rest_version) +} +} diff --git a/man/get_software_version.Rd b/man/get_software_version.Rd old mode 100644 new mode 100755 diff --git a/man/get_software_version2.Rd b/man/get_software_version2.Rd new file mode 100755 index 0000000..ef746cc --- /dev/null +++ b/man/get_software_version2.Rd @@ -0,0 +1,23 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/api-versioning.R +\name{get_software_version2} +\alias{get_software_version2} +\title{Get API software version} +\source{ +\url{https://github.com/Ensembl/ensembl-rest/wiki/API-Versioning} +} +\usage{ +get_software_version2() +} +\value{ +A numeric value representing the software version. +} +\description{ +Retreive the version of the Ensembl software the REST API is using. +} +\examples{ +\dontrun{ +software_version <- get_software_version() +print(software_version) +} +} diff --git a/man/get_species.Rd b/man/get_species.Rd index ca33859..c0fde1a 100644 --- a/man/get_species.Rd +++ b/man/get_species.Rd @@ -27,26 +27,26 @@ responses' status.} \value{ A \code{\link[tibble]{tibble}} of 12 variables: \describe{ - \item{division}{Ensembl division: \code{"EnsemblVertebrates"}, - \code{"EnsemblMetazoa"}, \code{"EnsemblPlants"}, \code{"EnsemblProtists"}, - \code{"EnsemblFungi"} or \code{"EnsemblBacteria"}.} - \item{taxon_id}{NCBI taxon identifier.} - \item{species_name}{Ensembl species name: this is the name used internally - by Ensembl to uniquely identify a species by name. It is the scientific - name but formatted without capitalisation and spacing converted with an - underscore, e.g., \code{'homo_sapiens'}.} - \item{species_display_name}{Species display name: the name used for display - on Ensembl website.} - \item{species_common_name}{Species common name.} - \item{release}{Ensembl release version.} - \item{genome_assembly_name}{Code name of the genome assembly.} - \item{genbank_assembly_accession}{Genbank assembly accession identifier.} - \item{strain}{Species strain.} - \item{strain_collection}{Species strain collection.} - \item{species_aliases}{Other names or acronyms used to refer to the - species. Note that this column is of the list type.} - \item{groups}{Ensembl databases for which data exists for this species. - Note that this column is of the list type.} +\item{division}{Ensembl division: \code{"EnsemblVertebrates"}, +\code{"EnsemblMetazoa"}, \code{"EnsemblPlants"}, \code{"EnsemblProtists"}, +\code{"EnsemblFungi"} or \code{"EnsemblBacteria"}.} +\item{taxon_id}{NCBI taxon identifier.} +\item{species_name}{Ensembl species name: this is the name used internally +by Ensembl to uniquely identify a species by name. It is the scientific +name but formatted without capitalisation and spacing converted with an +underscore, e.g., \code{'homo_sapiens'}.} +\item{species_display_name}{Species display name: the name used for display +on Ensembl website.} +\item{species_common_name}{Species common name.} +\item{release}{Ensembl release version.} +\item{genome_assembly_name}{Code name of the genome assembly.} +\item{genbank_assembly_accession}{Genbank assembly accession identifier.} +\item{strain}{Species strain.} +\item{strain_collection}{Species strain collection.} +\item{species_aliases}{Other names or acronyms used to refer to the +species. Note that this column is of the list type.} +\item{groups}{Ensembl databases for which data exists for this species. +Note that this column is of the list type.} } } \description{ diff --git a/man/get_species_info.Rd b/man/get_species_info.Rd new file mode 100755 index 0000000..4e8cf6a --- /dev/null +++ b/man/get_species_info.Rd @@ -0,0 +1,39 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ensembl-endpoints.R +\name{get_species_info} +\alias{get_species_info} +\title{Get Species Information} +\usage{ +get_species_info( + callback = "randomlygeneratedname", + division = "EnsemblVertebrates", + hide_strain_info = 0, + strain_collection = NULL +) +} +\arguments{ +\item{callback}{(Optional) A string representing the name of the callback subroutine for JSONP responses.} + +\item{division}{(Optional) A string to filter by Ensembl or Ensembl Genomes division (default is "EnsemblVertebrates").} + +\item{hide_strain_info}{(Optional) A boolean flag to show/hide strain and +strain_collection information (default is 0, which shows strain info).} + +\item{strain_collection}{(Optional) A string to filter by strain collection (e.g., "mouse").} +} +\value{ +A parsed JSON response containing species information. + +See more about the implemented endpoint \code{\link[=get_species_info]{get_species_info()}} +on the following \href{https://rest.ensembl.org/documentation/info/species}{GET info/species} +from the official \href{https://rest.ensembl.org/}{Ensembl Rest API}. +} +\description{ +Retrieves a list of all available species, their aliases, available adaptor groups, and data release. +} +\examples{ +get_species_info() +get_species_info(division = "EnsemblPlants") +get_species_info(hide_strain_info = 1) +get_species_info(strain_collection = "mouse") +} diff --git a/man/get_species_populations.Rd b/man/get_species_populations.Rd new file mode 100755 index 0000000..c565c2d --- /dev/null +++ b/man/get_species_populations.Rd @@ -0,0 +1,39 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ensembl-endpoints.R +\name{get_species_populations} +\alias{get_species_populations} +\title{Get Populations for a Species} +\usage{ +get_species_populations( + species, + callback = "randomlygeneratedname", + filter = NULL +) +} +\arguments{ +\item{species}{(Required) A string representing the species name or alias (e.g., "homo_sapiens").} + +\item{callback}{(Optional) A string representing the name of the callback subroutine for JSONP responses.} + +\item{filter}{(Optional) A string to restrict populations returned +(e.g., "LD" to filter populations with linkage disequilibrium data).} +} +\value{ +A parsed JSON response containing the list of populations for the specified species. + +See more about the implemented endpoint \code{\link[=get_species_populations]{get_species_populations()}} +on the following \href{https://rest.ensembl.org/documentation/info/variation_populations}{GET info/variation/populations/:species} +from the official \href{https://rest.ensembl.org/}{Ensembl Rest API}. +} +\description{ +Retrieves a list of all populations for a specified species in Ensembl. +} +\examples{ +get_species_populations(species = "homo_sapiens") +get_species_populations(species = "human", filter = "LD") +get_species_populations( + species = "homo_sapiens", + callback = "randomlygeneratedname", + filter = "LD" +) +} diff --git a/man/get_variation_sources.Rd b/man/get_variation_sources.Rd old mode 100644 new mode 100755 diff --git a/man/get_versioning.Rd b/man/get_versioning.Rd index 5907d21..e9ef3ff 100644 --- a/man/get_versioning.Rd +++ b/man/get_versioning.Rd @@ -13,7 +13,7 @@ get_versioning(verbose = FALSE, warnings = TRUE) } \value{ A named list of three elements: \code{data}, \code{software} and - \code{rest}. +\code{rest}. } \description{ This function gets the versions of the different entities involved in the diff --git a/man/get_versioning2.Rd b/man/get_versioning2.Rd new file mode 100755 index 0000000..5da0871 --- /dev/null +++ b/man/get_versioning2.Rd @@ -0,0 +1,29 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/api-versioning.R +\name{get_versioning2} +\alias{get_versioning2} +\title{Get Ensembl REST versions} +\usage{ +get_versioning2() +} +\value{ +A named list of three elements: \code{data}, \code{software} and +\code{rest}. +} +\description{ +Retreive the versions of the different entities involved in the +REST API requests. When accessing the Ensembl REST API, you are actually +accessing three interconnected entities: +\itemize{ +\item Ensembl databases (\code{data}). +\item Perl API (\code{software}). +\item REST API (\code{rest}). +} +\figure{ensembl_api_versioning_wo_fonts.svg} +} +\examples{ +# Get the versions of the different entities involved in the REST API +# requests. +get_versioning() + +} diff --git a/man/get_xrefs_by_id.Rd b/man/get_xrefs_by_id.Rd new file mode 100755 index 0000000..cfb1285 --- /dev/null +++ b/man/get_xrefs_by_id.Rd @@ -0,0 +1,26 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ensembl-endpoints.R +\name{get_xrefs_by_id} +\alias{get_xrefs_by_id} +\title{Get external linked references by id} +\usage{ +get_xrefs_by_id(id) +} +\arguments{ +\item{id}{A string representing the Ensembl Identifier (e.g., "ENSG00000157764").} +} +\value{ +A list of parsed JSON responses containing external references +for the provided Ensembl identifier. + +See more about the implemented endpoint \code{\link[=get_xrefs_by_id]{get_xrefs_by_id()}} +on the following \href{https://rest.ensembl.org/documentation/info/xref_id}{GET xrefs/id/:id} +from the official \href{https://rest.ensembl.org/}{Ensembl Rest API}. +} +\description{ +Performs lookups of Ensembl Identifiers and retrieves their external +references in other databases +} +\examples{ +get_xrefs_by_id("ENSG00000157764") +} diff --git a/man/get_xrefs_by_name.Rd b/man/get_xrefs_by_name.Rd new file mode 100755 index 0000000..360e3e5 --- /dev/null +++ b/man/get_xrefs_by_name.Rd @@ -0,0 +1,29 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ensembl-endpoints.R +\name{get_xrefs_by_name} +\alias{get_xrefs_by_name} +\title{Get external linked references by name} +\usage{ +get_xrefs_by_name(species, name) +} +\arguments{ +\item{species}{A string representing the species name (e.g., "homo_sapiens").} + +\item{name}{A string representing the primary accession or display label +of the external reference.} +} +\value{ +A list of parsed JSON responses containing information about +the provided external reference. + +See more about the implemented endpoint \code{\link[=get_xrefs_by_name]{get_xrefs_by_name()}} +on the following \href{https://rest.ensembl.org/documentation/info/xref_name}{GET xrefs/name/:species/:name} +from the official \href{https://rest.ensembl.org/}{Ensembl Rest API}. +} +\description{ +Performs a lookup based upon the primary accession or display label +of an external reference +} +\examples{ +get_xrefs_by_name("homo_sapiens", "P38398") +} diff --git a/man/get_xrefs_by_symbol.Rd b/man/get_xrefs_by_symbol.Rd new file mode 100755 index 0000000..82ef8ec --- /dev/null +++ b/man/get_xrefs_by_symbol.Rd @@ -0,0 +1,33 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ensembl-endpoints.R +\name{get_xrefs_by_symbol} +\alias{get_xrefs_by_symbol} +\title{Get external linked references by symbol} +\usage{ +get_xrefs_by_symbol(species, symbol) +} +\arguments{ +\item{species}{A string representing the species name (e.g., "homo_sapiens").} + +\item{symbol}{A string representing the external symbol (e.g., "BRCA2").} +} +\value{ +A list of parsed JSON responses containing Ensembl objects linked +to the provided external symbol. + +See more about the implemented endpoint \code{\link[=get_xrefs_by_symbol]{get_xrefs_by_symbol()}} +on the following \href{https://rest.ensembl.org/documentation/info/xref_external}{GET xrefs/symbol/:species/:symbol} +from the official \href{https://rest.ensembl.org/}{Ensembl Rest API}. +} +\description{ +Looks up an external symbol and returns all Ensembl objects linked to it +} +\details{ +This can be a display name for a gene/transcript/translation, a synonym, +or an externally linked reference. +If a gene's transcript is linked to the supplied symbol, the service will +return both gene and transcript (it supports transient links). +} +\examples{ +get_xrefs_by_symbol("homo_sapiens", "BRCA2") +} diff --git a/man/is_ensembl_reachable.Rd b/man/is_ensembl_reachable.Rd index b919d9c..3b072a7 100644 --- a/man/is_ensembl_reachable.Rd +++ b/man/is_ensembl_reachable.Rd @@ -17,7 +17,7 @@ change this parameter.} } \value{ A logical value: \code{TRUE} if EBI server is reachable, \code{FALSE} - otherwise. +otherwise. } \description{ Check if the Ensembl server where REST API service is running is reachable. diff --git a/man/pairwise_combn.Rd b/man/pairwise_combn.Rd index 003bf3e..b2d15a1 100644 --- a/man/pairwise_combn.Rd +++ b/man/pairwise_combn.Rd @@ -11,7 +11,7 @@ pairwise_combn(x) } \value{ A \code{\link[tibble]{tibble}} of two columns where each row is a - pairwise combination. +pairwise combination. } \description{ Generates pairwise combinations from the supplied vector. Never returns the diff --git a/man/ping_service.Rd b/man/ping_service.Rd new file mode 100755 index 0000000..6136b87 --- /dev/null +++ b/man/ping_service.Rd @@ -0,0 +1,26 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ensembl-endpoints.R +\name{ping_service} +\alias{ping_service} +\title{Check Service Status} +\usage{ +ping_service(callback = "randomlygeneratedname") +} +\arguments{ +\item{callback}{(Optional) A string representing the name of the callback +subroutine for JSONP responses.} +} +\value{ +A parsed JSON response indicating the status of the service. + +See more about the implemented endpoint \code{\link[=ping_service]{ping_service()}} +on the following \href{https://rest.ensembl.org/documentation/info/ping}{GET info/ping} +from the official \href{https://rest.ensembl.org/}{Ensembl Rest API}. +} +\description{ +Sends a ping request to the server to check if the service is alive. +} +\examples{ +ping_service() +ping_service(callback = "randomlygeneratedname") +} diff --git a/man/post.Rd b/man/post.Rd new file mode 100755 index 0000000..a8f7f91 --- /dev/null +++ b/man/post.Rd @@ -0,0 +1,33 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/post.R +\name{post} +\alias{post} +\title{The function for POST method} +\usage{ +post(res, ..., .headers = req_headers(), .body, rate = 15/60) +} +\arguments{ +\item{res}{The resource (path) for the API request, can include variables +in curly braces \code{{}} that will be replaced with the corresponding +parameter.} + +\item{...}{Additional named parameters to be included in the request URL.} + +\item{.headers}{An S3 list with class \code{ensemblr_req_hdr}. Use the helper +\code{\link[=req_headers]{req_headers()}} to create such an object.} + +\item{.body}{The body of the POST request, can be a string or raw vector.} + +\item{rate}{The maximum number of requests per second to allow. +on a request (default: 5). +Defaults to 15 per minute (15/60).} +} +\value{ +A list of responses, one for each request made. +} +\description{ +The \code{\link[=post]{post()}} function is a wrapper around the \code{reqs} function that +performs POST requests to the Ensembl API, handling rate limiting +automatically. +} +\keyword{internal} diff --git a/man/req.Rd b/man/req.Rd new file mode 100644 index 0000000..e322420 --- /dev/null +++ b/man/req.Rd @@ -0,0 +1,27 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/req.R +\name{req} +\alias{req} +\title{Create a new HTTP request} +\usage{ +req(res, ..., .body = NULL, .headers = req_headers()) +} +\arguments{ +\item{res}{A resource (res) URL as a string. This string supports embedding +of R variable names in curly braces whose values are looked up in parameter +names supplied in \code{...} and interpolated.} + +\item{...}{Name value pairs specifying query components or parameters.} + +\item{.body}{A literal string or raw vector to send as body.} + +\item{.headers}{An S3 list with class \code{ensemblr_req_hdr}. Use the helper +\code{\link[=req_headers]{req_headers()}} to create such an object.} +} +\value{ +An HTTP request: an S3 list with class \code{httr2_request}. +} +\description{ +\code{\link[=req]{req()}} creates an HTTP request object. +} +\keyword{internal} diff --git a/man/warn_when_request_errored.Rd b/man/warn_when_request_errored.Rd index 8980d65..a3af60e 100644 --- a/man/warn_when_request_errored.Rd +++ b/man/warn_when_request_errored.Rd @@ -11,8 +11,8 @@ warn_when_request_errored(response) } \value{ A scalar character vector with a warning message, or the string - \code{'OK'} if the response was successful, although this function is - called mostly for its side effect, i.e., the triggering of a warning. +\code{'OK'} if the response was successful, although this function is +called mostly for its side effect, i.e., the triggering of a warning. } \description{ Warn if an httr \code{\link[httr]{response}} errored. It also returns a tidy diff --git a/tests/testthat.R b/tests/testthat.R new file mode 100644 index 0000000..cb002db --- /dev/null +++ b/tests/testthat.R @@ -0,0 +1,12 @@ +# This file is part of the standard setup for testthat. +# It is recommended that you do not modify it. +# +# Where should you do additional test configuration? +# Learn more about the roles of various files in: +# * https://r-pkgs.org/testing-design.html#sec-tests-files-overview +# * https://testthat.r-lib.org/articles/special-files.html + +library(testthat) +library(ensemblr) + +test_check("ensemblr") diff --git a/tests/testthat/test-base-url.R b/tests/testthat/test-base-url.R new file mode 100755 index 0000000..942ae02 --- /dev/null +++ b/tests/testthat/test-base-url.R @@ -0,0 +1,4 @@ +test_that("base_url() returns correct URL", { + ensembl_url <- "https://rest.ensembl.org" + expect_equal(base_url(), ensembl_url) +}) diff --git a/tests/testthat/test-endpoints.R b/tests/testthat/test-endpoints.R new file mode 100755 index 0000000..6dabbe0 --- /dev/null +++ b/tests/testthat/test-endpoints.R @@ -0,0 +1,157 @@ +skip_on_cran() +skip_if_offline() + +test_that("Testing if the `Ensembl API` functions work correctly", { + + # -------------------------------------------------------- # + ## Comparative Genomics ==== + + test_that("`get_cafe_genetree_by_id` works", { + result <- get_cafe_genetree_by_id("ENSGT00390000003602") + expect_type(result, "list") + expect_equal(result[[1]]$status_code, 200) + expect_error(get_cafe_genetree_by_id(), "The 'id' parameter is required") + }) + + test_that("`get_cafe_genetree_by_symbol` works", { + result <- get_cafe_genetree_by_symbol("homo_sapiens", "BRCA2") + expect_type(result, "list") + expect_equal(result[[1]]$status_code, 200) + expect_error(get_cafe_genetree_by_symbol("homo_sapiens"), "Both 'species' and 'symbol' parameters are required") + }) + + test_that("`get_cafe_genetree_by_species_id` works", { + result <- get_cafe_genetree_by_species_id("homo_sapiens", "ENST00000380152") + expect_type(result, "list") + expect_equal(result[[1]]$status_code, 200) + expect_error(get_cafe_genetree_by_species_id("homo_sapiens"), "Both 'species' and 'id' parameters are required") + }) + + test_that("`get_genetree_by_id` works", { + result <- get_genetree_by_id("ENSGT00390000003602") + expect_type(result, "list") + expect_equal(result[[1]]$status_code, 200) + expect_error(get_genetree_by_id(), "The 'id' parameter is required") + }) + + test_that("`get_genetree_by_symbol` works", { + result <- get_genetree_by_symbol("homo_sapiens", "BRCA2") + expect_type(result, "list") + expect_equal(result[[1]]$status_code, 200) + expect_error(get_genetree_by_symbol("homo_sapiens"), "Both 'species' and 'symbol' parameters are required") + }) + + test_that("`get_genetree_by_species_id` works", { + result <- get_genetree_by_species_id("homo_sapiens", "ENST00000380152") + expect_type(result, "list") + expect_equal(result[[1]]$status_code, 200) + expect_error(get_genetree_by_species_id("homo_sapiens"), "Both 'species' and 'id' parameters are required") + }) + + test_that("`get_alignment_by_region` works", { + expect_warning(get_alignment_by_region("homo_sapiens", "3:1000-2000"), "This function is stil under-develop") + # expect_error(get_alignment_by_region("homo_sapiens"), "Both 'species' and 'region' parameters are required") + }) + + test_that("`get_homology_by_species_id` works", { + result <- get_homology_by_species_id("homo_sapiens", "ENSG00000157764") + expect_type(result, "list") + expect_equal(result[[1]]$status_code, 200) + expect_error(get_homology_by_species_id("homo_sapiens"), "Both 'species' and 'id' parameters are required") + }) + + test_that("`get_homology_by_symbol` works", { + result <- get_homology_by_symbol("homo_sapiens", "BRCA2") + expect_type(result, "list") + expect_equal(result[[1]]$status_code, 200) + expect_error(get_homology_by_symbol("homo_sapiens"), "Both 'species' and 'symbol' parameters are required") + }) + + # -------------------------------------------------------- # + ## Cross References ==== + + test_that("`get_xrefs_by_symbol` works", { + result <- get_xrefs_by_symbol("homo_sapiens", "BRCA2") + expect_type(result, "list") + expect_equal(result[[1]]$status_code, 200) + expect_error(get_xrefs_by_symbol("homo_sapiens"), "Both 'species' and 'symbol' parameters are required") + }) + + test_that("`get_xrefs_by_id` works", { + result <- get_xrefs_by_id("ENSG00000157764") + expect_type(result, "list") + expect_equal(result[[1]]$status_code, 200) + expect_error(get_xrefs_by_id(), "The 'id' parameter is required") + }) + + test_that("`get_xrefs_by_name` works", { + result <- get_xrefs_by_name("homo_sapiens", "P38398") + expect_type(result, "list") + expect_equal(result[[1]]$status_code, 200) + expect_error(get_xrefs_by_name("homo_sapiens"), "Both 'species' and 'name' parameters are required") + }) + + # -------------------------------------------------------- # + ## Information ==== + ## TO DO + + # -------------------------------------------------------- # + ## Linkage Disequilibrium ==== + + test_that("get_ld_by_variant works", { + result <- get_ld_by_variant(species = "homo_sapiens", id = "rs56116432", + population_name = "1000GENOMES:phase_3:KHV") + + expect_type(result, "list") + expect_true(!is.null(result)) + expect_true(length(result) > 0) + }) + + test_that("get_ld_by_variant handles missing parameters", { + expect_error(get_ld_by_variant(id = "rs56116432", population_name = "1000GENOMES:phase_3:KHV"), + "'species', 'id', and 'population_name' parameters are all required.") + expect_error(get_ld_by_variant(species = "homo_sapiens",population_name = "1000GENOMES:phase_3:KHV"), + "'species', 'id', and 'population_name' parameters are all required.") + }) + + test_that("get_pairwise_ld_values works", { + result <- get_pairwise_ld_values(species = "homo_sapiens", + id1 = "rs6792369",id2 = "rs1042779") + + expect_type(result, "list") + expect_true(!is.null(result)) + expect_true(length(result) > 0) + }) + + test_that("get_pairwise_ld_values handles missing parameters", { + expect_error(get_pairwise_ld_values(id1 = "rs6792369", id2 = "rs1042779"), + "'species', 'id1', and 'id2' parameters are all required.") + expect_error(get_pairwise_ld_values(species = "homo_sapiens", id1 = "rs6792369"), + "'species', 'id1', and 'id2' parameters are all required.") + }) + + test_that("get_ld_values_by_region works", { + result <- get_ld_values_by_region(species = "homo_sapiens", + region = "6:25837556..25843455", + population_name = "1000GENOMES:phase_3:KHV") + + expect_type(result, "list") + expect_true(!is.null(result)) + expect_true(length(result) > 0) + }) + + test_that("get_ld_values_by_region handles missing parameters", { + expect_error(get_ld_values_by_region(region = "6:25837556..25843455", population_name = "1000GENOMES:phase_3:KHV"), + "'species', 'region', and 'population_name' parameters are all required.") + expect_error(get_ld_values_by_region(species = "homo_sapiens", population_name = "1000GENOMES:phase_3:KHV"), + "'species', 'region', and 'population_name' parameters are all required.") + }) + + ## test `get` + id <- "ENSGT00390000003602" + response <- get(res = "/cafe/genetree/id/{id}", id = id, + .headers = req_headers(content_type = "application/json") + ) + expect_equal(response[[1]]$status_code, 200) + +}) diff --git a/tests/testthat/test-ensembl-server.R b/tests/testthat/test-ensembl-server.R new file mode 100644 index 0000000..9e4f0f2 --- /dev/null +++ b/tests/testthat/test-ensembl-server.R @@ -0,0 +1,3 @@ +test_that("ensembl_server() works", { + expect_equal(ensembl_server(), "https://rest.ensembl.org") +}) diff --git a/tests/testthat/test-http-headers.R b/tests/testthat/test-http-headers.R new file mode 100755 index 0000000..363e836 --- /dev/null +++ b/tests/testthat/test-http-headers.R @@ -0,0 +1,65 @@ +test_that("req_headers creates correct structure", { + headers <- req_headers(accept = "application/json", content_type = "text/plain") + + expect_s3_class(headers, "ensemblr_req_hdr") + expect_setequal(names(headers), req_header_names()) + expect_named(headers[!sapply(headers, is.null)], c("Accept", "Content-Type")) + expect_equal(headers$Accept, "application/json") + expect_equal(headers$`Content-Type`, "text/plain") +}) + +test_that("req_header_names returns the correct header names", { + expected_headers <- c("Accept", "Accept-Encoding", "Content-Type", "Origin") + expect_equal(req_header_names(), expected_headers) +}) + +test_that("res_header_names returns the correct response header names", { + expected_headers <- c( + "Access-Control-Allow-Origin", + "Content-Length", + "Content-Type", + "Retry-After", + "X-Runtime", + "X-RateLimit-Limit", + "X-RateLimit-Reset", + "X-RateLimit-Period", + "X-RateLimit-Remaining" + ) + expect_equal(res_header_names(), expected_headers) +}) + +test_that("req_headers creates a list with correct default values", { + headers <- req_headers() + expect_equal(headers$`Content-Type`, "application/json") + expect_null(headers$Accept) + expect_null(headers$`Accept-Encoding`) + expect_null(headers$Origin) + expect_s3_class(headers, "ensemblr_req_hdr") +}) + +test_that("req_headers assigns custom values correctly", { + headers <- req_headers(accept = "application/xml", origin = "https://example.com") + expect_equal(headers$Accept, "application/xml") + expect_equal(headers$Origin, "https://example.com") + expect_equal(headers$`Content-Type`, "application/json") +}) + +test_that("res_headers creates a list with correct default values", { + headers <- res_headers() + expect_equal(headers$`Access-Control-Allow-Origin`, "*") + expect_equal(headers$`Content-Type`, "application/json") + expect_null(headers$`Content-Length`) + expect_s3_class(headers, "ensemblr_res_hdr") +}) + +test_that("res_headers assigns custom values correctly", { + headers <- res_headers( + content_length = "123", + x_runtime = "0.123", + x_rate_limit_limit = "100" + ) + expect_equal(headers$`Content-Length`, "123") + expect_equal(headers$`X-Runtime`, "0.123") + expect_equal(headers$`X-RateLimit-Limit`, "100") +}) + diff --git a/tests/testthat/test-req.R b/tests/testthat/test-req.R new file mode 100755 index 0000000..12143c5 --- /dev/null +++ b/tests/testthat/test-req.R @@ -0,0 +1,123 @@ +# Tests that require a connection to Ensembl's REST API. +skip_on_cran() +skip_if_offline() + +## tests for `req()` function + +test_that("`req()` builds correct request", { + res <- "/cafe/genetree/member/symbol/{species}/{symbol}" + test_req <- req( + res, + species = "homo_sapiens", + symbol = "BRCA2", + version = NULL, + .headers = req_headers(accept = "application/json") + ) + + expected_endpoint <- "/cafe/genetree/member/symbol/homo_sapiens/BRCA2" + expect_identical(test_req$url, paste0(base_url(), expected_endpoint)) + expect_identical(test_req$headers$Accept, "application/json") + expect_identical(test_req$headers$`Accept-Encoding`, NULL) + expect_identical(test_req$headers$`Content-Type`, "application/json") + expect_identical(test_req$headers$Origin, NULL) + expect_identical(test_req$options$useragent, user_agent()) +}) + +test_that("`req()` function raises error for missing required parameters", { + + res <- "/cafe/genetree/member/symbol/{species}/{symbol}" + expect_error(req(res, species = "homo_sapiens"), "object 'symbol' not found") + expect_error(req(res, symbol = "BRCA2"), "object 'species' not found") + expect_no_error(req(res, species = "homo_sapiens", symbol = "BRCA2")) + +}) + +test_that("`req()` sets request body, if provided", { + res <- "/cafe/genetree/member/symbol/{species}/{symbol}" + test_req <- req( + res, + species = "homo_sapiens", + symbol = "BRCA2", + .body = "test body content", + .headers = req_headers(content_type = "text/plain") + ) + + expect_identical(test_req$body$data, "test body content") + expect_identical(test_req$headers$`Content-Type`, "text/plain") +}) + +# tests for optional parameters only +test_that("`req()` function handles optional parameters correctly", { + test_req <- req("/path/to/resource", + optional_param1 = "value1", + optional_param2 = "value2") + expect_identical( + test_req$url, + paste0( + base_url(), + "/path/to/resource", + "?optional_param1=", + "value1", + "&optional_param2=", + "value2" + ) + ) +}) + +test_that("`req()` function does not set body when `.body` is NULL", { + test_req <- req( + "/cafe/genetree/member/symbol/{species}/{symbol}", + species = "homo_sapiens", + symbol = "BRCA2", + .headers = req_headers(content_type = "application/json") + ) + expect_null(test_req$body) +}) + +test_that("req function handles invalid or null headers", { + test_req <- req("/path/to/resource", .headers = req_headers()) + expect_false(any(is.null(test_req$headers))) +}) + +test_that("req function correctly handles special characters in parameters", { + test_req <- req("/path", query_param = "special?chars&") + expected_url <- "https://rest.ensembl.org/path?query_param=special%3Fchars%26" + expect_identical(test_req$url, expected_url) +}) + +# in case of missing http headers argument (using defaults); +# content type defaults to application/json when not provided +test_that("req function handles default headers", { + test_req <- req("/path/to/resource") + expect_identical(test_req$headers$`Content-Type`, "application/json") + expect_null(test_req$headers$Accept) + expect_null(test_req$headers$Origin) +}) + +# ------------- GET Request Tests ------------- + +test_that("GET req sends a valid GET request with valid id", { + res <- req("/archive/id/{id}", id = "ENSG00000157764") |> + httr2::req_perform() + + expect_s3_class(res, "httr2_response") + expect_identical(res$url, "https://rest.ensembl.org/archive/id/ENSG00000157764") + expect_identical(res$method, "GET") + expect_true("Content-Type" %in% names(res$headers)) + expect_identical(res$headers$`Content-Type`, "application/json") +}) + +test_that("GET req sends correct callback parameter in URL", { + res <- req("/archive/id/{id}", id = "ENSG00000157764", callback = "randomlygeneratedname") + expected_url <- "https://rest.ensembl.org/archive/id/ENSG00000157764?callback=randomlygeneratedname" + expect_identical(res$url, expected_url) +}) + +test_that("GET req sets correct headers", { + res <- req("/archive/id/{id}", id = "ENSG00000157764") + expect_identical(res$headers$`Content-Type`, "application/json") +}) + +# ------------- POST Request Tests ------------- + +## to be done diff --git a/tests/testthat/test-reqs.R b/tests/testthat/test-reqs.R new file mode 100755 index 0000000..3fa0b28 --- /dev/null +++ b/tests/testthat/test-reqs.R @@ -0,0 +1,102 @@ +# Tests that require a connection to Ensembl's REST API. +skip_on_cran() +skip_if_offline() + +##test for only reqs() function +test_that("reqs function creates correct number of requests", { + res <- c("/endpoint1", "/endpoint2") + param1 <- c("value1", "value2") # for optional parameters + param2 <- c("a", "b") + + result <- reqs(res, param1 = param1, param2 = param2) + + expect_length(result, 2) + expect_s3_class(result[[1]], "httr2_request") #check the object type + expect_s3_class(result[[2]], "httr2_request") +}) + + +#test_that("reqs function correctly handles parameter recycling", { +# res <- c("/endpoint1", "/endpoint2", "/endpoint3") +# param1 <- c("value1", "value2") # for optional parameters +# +# result <- reqs(res, param1 = param1) +# +# expect_length(result, 3) +# expect_equal(httr2::req_url_query(result[[1]])$param1, "value1") +# expect_equal(httr2::req_url_query(result[[2]])$param1, "value2") +# expect_equal(httr2::req_url_query(result[[3]])$param1, "value1") +#}) + +test_that("reqs() correctly applies custom headers", { + res <- "/endpoint" + custom_headers <- req_headers(accept = "text/x-fasta") + + result <- reqs(res, .headers = custom_headers) + + expect_length(result, 1) + expect_equal(httr2::req_headers(result[[1]])$headers$Accept, "text/x-fasta") +}) + +#test_that("reqs() correctly interpolates variables in resource path", { +# res <- "/endpoint/{var1}/{var2}" +# var1 <- c("a", "b") +# var2 <- c("x", "y") +# +# result <- reqs(res, var1 = var1, var2 = var2) +# +# expect_length(result, 2) +# expect_match(httr2::req_url_path(result[[1]]), "/endpoint/a/x") +# expect_match(httr2::req_url_path(result[[2]]), "/endpoint/b/y") +#}) + +## more tests for `reqs()` function + +test_that("reqs() correctly vectorizes parameters", { + res_path <- "/example/resource/{param1}/{param2}" + req_list <- reqs( + res = res_path, + param1 = "value1", + param2 = c("valA", "valB"), + .headers = req_headers(content_type = "application/json") + ) + + expect_equal(length(req_list), 2) + + expect_equal(req_list[[1]]$url, + "https://rest.ensembl.org/example/resource/value1/valA") + expect_equal(req_list[[2]]$url, + "https://rest.ensembl.org/example/resource/value1/valB") + + req_list2 <- reqs( + res = res_path, + param1 = c("val1", "val2"), + param2 = c("A", "B"), + .headers = req_headers(content_type = "application/json") + ) + + expect_equal(length(req_list2), 2) + + expect_equal(req_list2[[1]]$url, + "https://rest.ensembl.org/example/resource/val1/A") + expect_equal(req_list2[[2]]$url, + "https://rest.ensembl.org/example/resource/val2/B") + + # vectorized parameters with different lengths (should throw an error) + expect_error(reqs( + res = res_path, + param1 = c("val1", "val2", "val3"), + param2 = c("A", "B"), + .headers = req_headers(content_type = "application/json") + ), "Can't recycle `res` \\(size 3\\) to match `param2` \\(size 2\\).") + + # no parameters passed (edge case) + req_list3 <- reqs( + res = "/example/resource", + .headers = req_headers(content_type = "application/json") + ) + + expect_equal(length(req_list3), 1) + expect_equal(req_list3[[1]]$url, "https://rest.ensembl.org/example/resource") +} +) diff --git a/tests/testthat/test-user-agent.R b/tests/testthat/test-user-agent.R new file mode 100755 index 0000000..48a3410 --- /dev/null +++ b/tests/testthat/test-user-agent.R @@ -0,0 +1,4 @@ +test_that("user_agent() works", { + user_agent_desc <- "ensemblr (https://www.pattern.institute/ensemblr)" + expect_equal(user_agent(), user_agent_desc) +}) diff --git a/tests/testthat/test-vars-in-braces.R b/tests/testthat/test-vars-in-braces.R new file mode 100644 index 0000000..3ab9808 --- /dev/null +++ b/tests/testthat/test-vars-in-braces.R @@ -0,0 +1,7 @@ +# start with basic function +test_that("vars_in_braces extracts variables correctly", { + expect_equal(vars_in_braces("Hello {world}"), "world") + expect_equal(vars_in_braces("/{species}/{symbol}"), c("species", "symbol")) + expect_equal(vars_in_braces("No braces here"), character(0)) + expect_equal(vars_in_braces("{a} {b} {c}"), c("a", "b", "c")) +})