diff --git a/.Rbuildignore b/.Rbuildignore index ccdb2262..c8ed7b7e 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -15,6 +15,7 @@ ^\.dockerignore$ ^\.editorconfig$ ^\.gitattributes$ +^\.git$ ^\.github$ ^\.gitignore$ ^\.httr-oauth$ @@ -39,3 +40,4 @@ ^revdep$ CLAUDE.md AGENTS.md +^pr-233-code-review\.md$ diff --git a/DESCRIPTION b/DESCRIPTION index 48fc38ce..5bce76fd 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: tf Title: S3 Classes and Methods for Tidy Functional Data -Version: 0.4.1 +Version: 0.4.2 Authors@R: c( person("Fabian", "Scheipl", , "fabian.scheipl@googlemail.com", role = c("aut", "cre", "cph"), comment = c(ORCID = "0000-0001-8172-3603")), @@ -10,13 +10,7 @@ Authors@R: c( person("Julia", "Wrobel", role = "ctb", comment = c(ORCID = "0000-0001-6783-1421")), person("Sebastian", "Fischer", role = "ctb", - comment = c(ORCID = "0000-0002-9609-3197")), - person("Trevor", "Hastie", role = "ctb", - comment = "softImpute author"), - person("Rahul", "Mazumder", role = "ctb", - comment = "softImpute author"), - person("Chen", "Meng", role = "ctb", - comment = "mogsa author") + comment = c(ORCID = "0000-0002-9609-3197")) ) Description: Provides S3 vector types for functional data represented on grids, in spline bases, or via functional principal components. @@ -37,8 +31,6 @@ Imports: cli, methods, mgcv, - mvtnorm, - pracma, purrr (>= 1.0.0), rlang, stats, @@ -48,29 +40,40 @@ Suggests: covr, dplyr, fda, - fdasrvf, + fdasrvf (>= 2.4.0), pillar, refund, testthat (>= 3.0.0), + tibble, + tidyr, withr Config/testthat/edition: 3 Encoding: UTF-8 LazyData: true Roxygen: list(markdown = TRUE) -RoxygenNote: 7.3.3 Collate: + 'tfb-class.R' + 'tfd-class.R' + 'tfd-mv.R' + 'tfb-mv.R' + 'vctrs-mv.R' + 'accessors-mv.R' 'approx.R' 'assertions.R' 'bibentries.R' + 'brackets-mv.R' 'brackets.R' + 'calculus-mv.R' 'calculus.R' 'convert-construct-utils.R' + 'convert-mv.R' 'convert.R' 'data.R' 'depth.R' 'evaluate.R' 'fda-connectors.R' 'fwise.R' + 'geometry-mv.R' 'globals.R' 'graphics.R' 'interpolate.R' @@ -78,9 +81,14 @@ Collate: 'ops.R' 'math.R' 'methods.R' + 'mv-stubs.R' + 'ops-mv.R' + 'plot-mv.R' + 'print-format-mv.R' 'print-format.R' 'rebase.R' 'register-cc.R' + 'register-mv.R' 'register-utils.R' 'register.R' 'registration-class.R' @@ -92,10 +100,9 @@ Collate: 'tf-package.R' 'tfb-fpc.R' 'tfb-spline.R' - 'tfb-class.R' - 'tfd-class.R' 'tf-s4.R' 'tfb-fpc-utils.R' + 'tfb-mfpc.R' 'tfb-spline-utils.R' 'utils.R' 'vctrs-cast.R' @@ -103,3 +110,4 @@ Collate: 'where.R' 'zoom.R' 'zzz.R' +Config/roxygen2/version: 8.0.0 diff --git a/NAMESPACE b/NAMESPACE index a909eddd..2b214139 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,26 +1,41 @@ # Generated by roxygen2: do not edit by hand +S3method("!=",tf_mv) S3method("!=",tfb) S3method("!=",tfd) +S3method("$",tf_mv) +S3method("$<-",tf_mv) +S3method("==",tf_mv) S3method("==",tfb) S3method("==",tfd) S3method("[",tf) +S3method("[",tf_mv) S3method("[",tf_registration) +S3method("[",tf_shape_registration) S3method("[<-",tf) +S3method("[<-",tf_mv) +S3method("names<-",tf_mv) S3method("tf_arg<-",tfb) S3method("tf_arg<-",tfd_irreg) S3method("tf_arg<-",tfd_reg) +S3method(Math,tf_mv) S3method(Math,tfb) S3method(Math,tfd) S3method(Predict.matrix,fourier.smooth) S3method(Summary,tf) +S3method(Summary,tf_mv) S3method(as.data.frame,tf) +S3method(as.data.frame,tf_mv) S3method(as.function,tf) S3method(as.matrix,tf) +S3method(as.matrix,tf_mv) +S3method(as.tfb_mv,default) S3method(as.tfd,default) S3method(as.tfd_irreg,tfb) S3method(as.tfd_irreg,tfd_irreg) S3method(as.tfd_irreg,tfd_reg) +S3method(as.tfd_mv,default) +S3method(as.tfd_mv,tf_mv) S3method(coef,tfb) S3method(cummax,tfb) S3method(cummax,tfd) @@ -32,71 +47,125 @@ S3method(cumsum,tfb) S3method(cumsum,tfd) S3method(fivenum,default) S3method(fivenum,tf) +S3method(fivenum,tf_mv) S3method(format,tf) +S3method(format,tf_mv) S3method(fpc_wsvd,data.frame) S3method(fpc_wsvd,matrix) S3method(is.na,tf) +S3method(is.na,tf_mv) S3method(is.na,tfd_irreg) S3method(length,tf_registration) S3method(lines,tf) +S3method(lines,tf_mv) S3method(max,tf) S3method(mean,tf) +S3method(mean,tf_mv) S3method(median,tf) +S3method(median,tf_mv) S3method(min,tf) S3method(plot,tf) +S3method(plot,tf_mv) S3method(plot,tf_registration) S3method(points,tf) +S3method(points,tf_mv) S3method(print,summary.tf_registration) +S3method(print,summary.tf_shape_registration) S3method(print,tf) +S3method(print,tf_mv) S3method(print,tf_registration) +S3method(print,tf_shape_registration) S3method(print,tfb) S3method(print,tfd_irreg) S3method(print,tfd_reg) S3method(quantile,tf) +S3method(quantile,tf_mv) S3method(range,tf) S3method(rank,default) S3method(rank,tf) +S3method(rank,tf_mv) S3method(rev,tf) +S3method(rev,tf_mv) S3method(sd,default) S3method(sd,tf) +S3method(sd,tf_mv) S3method(smooth.construct,fourier.smooth.spec) S3method(sort,tf) +S3method(sort,tf_mv) S3method(summary,tf) +S3method(summary,tf_mv) S3method(summary,tf_registration) +S3method(summary,tf_shape_registration) +S3method(tf_align,tf_mv) S3method(tf_align,tfb) S3method(tf_align,tfd) +S3method(tf_anywhere,default) +S3method(tf_anywhere,tf_mv) +S3method(tf_arclength,default) +S3method(tf_arclength,tf_mv) S3method(tf_arg,default) +S3method(tf_arg,tf_mv) S3method(tf_arg,tfb) S3method(tf_arg,tfd_irreg) S3method(tf_arg,tfd_reg) S3method(tf_count,default) +S3method(tf_count,tf_mv) S3method(tf_count,tfd_irreg) S3method(tf_count,tfd_reg) +S3method(tf_crosscor,default) +S3method(tf_crosscor,tf_mv) +S3method(tf_crosscov,default) +S3method(tf_crosscov,tf_mv) S3method(tf_depth,matrix) S3method(tf_depth,tf) +S3method(tf_depth,tf_mv) S3method(tf_derive,default) S3method(tf_derive,matrix) +S3method(tf_derive,tf_mv) S3method(tf_derive,tfb_fpc) S3method(tf_derive,tfb_spline) S3method(tf_derive,tfd) S3method(tf_derive,tfd_irreg) +S3method(tf_estimate_warps,tf_mv) S3method(tf_estimate_warps,tfb) S3method(tf_estimate_warps,tfd_irreg) S3method(tf_estimate_warps,tfd_reg) S3method(tf_evaluate,default) +S3method(tf_evaluate,tf_mv) S3method(tf_evaluate,tfb) S3method(tf_evaluate,tfd) S3method(tf_evaluations,default) +S3method(tf_evaluations,tf_mv) S3method(tf_evaluations,tfb) S3method(tf_evaluations,tfd_irreg) S3method(tf_evaluations,tfd_reg) +S3method(tf_fmean,default) +S3method(tf_fmean,tf_mv) +S3method(tf_fsd,default) +S3method(tf_fsd,tf_mv) +S3method(tf_fvar,default) +S3method(tf_fvar,tf_mv) +S3method(tf_fwise,default) +S3method(tf_fwise,tf_mv) +S3method(tf_inner,default) +S3method(tf_inner,tf) +S3method(tf_inner,tf_mv) S3method(tf_integrate,default) +S3method(tf_integrate,tf_mv) S3method(tf_integrate,tfb) S3method(tf_integrate,tfd) +S3method(tf_interpolate,tf_mv) S3method(tf_interpolate,tfb) S3method(tf_interpolate,tfd) +S3method(tf_invert,tf_mv) S3method(tf_invert,tfb) S3method(tf_invert,tfd) +S3method(tf_jiggle,default) +S3method(tf_jiggle,tf_mv) +S3method(tf_norm,default) +S3method(tf_norm,tf) +S3method(tf_norm,tf_mv) +S3method(tf_rebase,tf_mv) S3method(tf_rebase,tfb) S3method(tf_rebase,tfb.tfb) S3method(tf_rebase,tfb.tfd) @@ -105,10 +174,20 @@ S3method(tf_rebase,tfd.tfb_fpc) S3method(tf_rebase,tfd.tfb_spline) S3method(tf_rebase,tfd.tfd) S3method(tf_smooth,default) +S3method(tf_smooth,tf_mv) S3method(tf_smooth,tfb) S3method(tf_smooth,tfd) +S3method(tf_sparsify,default) +S3method(tf_sparsify,tf_mv) +S3method(tf_tangent,default) +S3method(tf_tangent,tf) +S3method(tf_tangent,tf_mv) +S3method(tf_warp,tf_mv) S3method(tf_warp,tfb) S3method(tf_warp,tfd) +S3method(tf_where,default) +S3method(tf_where,tf_mv) +S3method(tf_zoom,tf_mv) S3method(tf_zoom,tfb) S3method(tf_zoom,tfb_fpc) S3method(tf_zoom,tfd) @@ -117,6 +196,12 @@ S3method(tfb_fpc,default) S3method(tfb_fpc,matrix) S3method(tfb_fpc,numeric) S3method(tfb_fpc,tf) +S3method(tfb_mfpc,default) +S3method(tfb_mfpc,list) +S3method(tfb_mfpc,tf_mv) +S3method(tfb_mv,default) +S3method(tfb_mv,list) +S3method(tfb_mv,tf_mv) S3method(tfb_spline,data.frame) S3method(tfb_spline,default) S3method(tfb_spline,fd) @@ -132,12 +217,24 @@ S3method(tfd,list) S3method(tfd,matrix) S3method(tfd,numeric) S3method(tfd,tf) +S3method(tfd_mv,array) +S3method(tfd_mv,data.frame) +S3method(tfd_mv,default) +S3method(tfd_mv,list) +S3method(tfd_mv,tf_mv) S3method(var,default) S3method(var,tf) +S3method(var,tf_mv) +S3method(vec_arith,tf_mv) S3method(vec_arith,tfb) S3method(vec_arith,tfd) +S3method(vec_arith.numeric,tf_mv) S3method(vec_arith.numeric,tfb) S3method(vec_arith.numeric,tfd) +S3method(vec_arith.tf_mv,MISSING) +S3method(vec_arith.tf_mv,default) +S3method(vec_arith.tf_mv,numeric) +S3method(vec_arith.tf_mv,tf_mv) S3method(vec_arith.tfb,MISSING) S3method(vec_arith.tfb,default) S3method(vec_arith.tfb,numeric) @@ -150,6 +247,8 @@ S3method(vec_cast,tfb_fpc.tfb_fpc) S3method(vec_cast,tfb_fpc.tfb_spline) S3method(vec_cast,tfb_fpc.tfd_irreg) S3method(vec_cast,tfb_fpc.tfd_reg) +S3method(vec_cast,tfb_mv.tfb_mv) +S3method(vec_cast,tfb_mv.tfd_mv) S3method(vec_cast,tfb_spline.tfb_fpc) S3method(vec_cast,tfb_spline.tfb_spline) S3method(vec_cast,tfb_spline.tfd_irreg) @@ -158,14 +257,19 @@ S3method(vec_cast,tfd_irreg.tfb_fpc) S3method(vec_cast,tfd_irreg.tfb_spline) S3method(vec_cast,tfd_irreg.tfd_irreg) S3method(vec_cast,tfd_irreg.tfd_reg) +S3method(vec_cast,tfd_mv.tfb_mv) +S3method(vec_cast,tfd_mv.tfd_mv) S3method(vec_cast,tfd_reg.tfb_fpc) S3method(vec_cast,tfd_reg.tfb_spline) S3method(vec_cast,tfd_reg.tfd_irreg) S3method(vec_cast,tfd_reg.tfd_reg) +S3method(vec_proxy,tf_mv) S3method(vec_ptype2,tfb_fpc.tfb_fpc) S3method(vec_ptype2,tfb_fpc.tfb_spline) S3method(vec_ptype2,tfb_fpc.tfd_irreg) S3method(vec_ptype2,tfb_fpc.tfd_reg) +S3method(vec_ptype2,tfb_mv.tfb_mv) +S3method(vec_ptype2,tfb_mv.tfd_mv) S3method(vec_ptype2,tfb_spline.tfb_fpc) S3method(vec_ptype2,tfb_spline.tfb_spline) S3method(vec_ptype2,tfb_spline.tfd_irreg) @@ -174,36 +278,54 @@ S3method(vec_ptype2,tfd_irreg.tfb_fpc) S3method(vec_ptype2,tfd_irreg.tfb_spline) S3method(vec_ptype2,tfd_irreg.tfd_irreg) S3method(vec_ptype2,tfd_irreg.tfd_reg) +S3method(vec_ptype2,tfd_mv.tfb_mv) +S3method(vec_ptype2,tfd_mv.tfd_mv) S3method(vec_ptype2,tfd_reg.tfb_fpc) S3method(vec_ptype2,tfd_reg.tfb_spline) S3method(vec_ptype2,tfd_reg.tfd_irreg) S3method(vec_ptype2,tfd_reg.tfd_reg) S3method(vec_ptype_abbr,tfb_fpc) +S3method(vec_ptype_abbr,tfb_mv) S3method(vec_ptype_abbr,tfb_spline) S3method(vec_ptype_abbr,tfd_irreg) +S3method(vec_ptype_abbr,tfd_mv) S3method(vec_ptype_abbr,tfd_reg) +S3method(vec_ptype_full,tfb_mv) +S3method(vec_ptype_full,tfd_mv) +S3method(vec_restore,tf_mv) S3method(xtfrm,tf) +S3method(xtfrm,tf_mv) export("%inr%") export("tf_arg<-") +export("tf_component<-") export("tf_domain<-") export("tf_evaluator<-") export(as.tfb) +export(as.tfb_mv) export(as.tfd) export(as.tfd_irreg) +export(as.tfd_mv) export(ensure_list) export(fivenum) +export(fpc_wsvd) export(in_range) export(is_irreg) export(is_reg) export(is_tf) +export(is_tf_1d) +export(is_tf_mv) export(is_tfb) export(is_tfb_fpc) +export(is_tfb_mfpc) +export(is_tfb_mv) export(is_tfb_spline) export(is_tfd) export(is_tfd_irreg) +export(is_tfd_mv) export(is_tfd_reg) export(prep_plotting_arg) export(rank) +export(savgol) export(sd) export(tf_align) export(tf_aligned) @@ -214,14 +336,18 @@ export(tf_approx_locf) export(tf_approx_nocb) export(tf_approx_none) export(tf_approx_spline) +export(tf_arclength) export(tf_arg) export(tf_basis) export(tf_combine) +export(tf_component) +export(tf_components) export(tf_count) export(tf_crosscor) export(tf_crosscov) export(tf_depth) export(tf_derive) +export(tf_distance) export(tf_domain) export(tf_estimate_warps) export(tf_evaluate) @@ -235,26 +361,40 @@ export(tf_frange) export(tf_fsd) export(tf_fvar) export(tf_fwise) +export(tf_inner) export(tf_integrate) export(tf_interpolate) export(tf_inv_warps) export(tf_invert) export(tf_jiggle) export(tf_landmarks_extrema) +export(tf_mfpc_efunctions) +export(tf_mfpc_scores) +export(tf_ncomp) +export(tf_norm) export(tf_rebase) export(tf_register) +export(tf_register_shape) +export(tf_reparam_arclength) export(tf_rgp) +export(tf_rotations) +export(tf_scales) export(tf_smooth) export(tf_sparsify) +export(tf_speed) export(tf_split) +export(tf_tangent) export(tf_template) export(tf_warp) export(tf_where) export(tf_zoom) export(tfb) export(tfb_fpc) +export(tfb_mfpc) +export(tfb_mv) export(tfb_spline) export(tfd) +export(tfd_mv) export(unique_id) export(var) import(purrr, except = c(flatten, flatten_lgl, flatten_int, flatten_dbl, flatten_chr, flatten_raw, splice, invoke, `%@%`)) @@ -268,6 +408,7 @@ importFrom(checkmate,assert_data_frame) importFrom(checkmate,assert_flag) importFrom(checkmate,assert_function) importFrom(checkmate,assert_int) +importFrom(checkmate,assert_integerish) importFrom(checkmate,assert_list) importFrom(checkmate,assert_logical) importFrom(checkmate,assert_matrix) @@ -281,11 +422,14 @@ importFrom(checkmate,check_numeric) importFrom(checkmate,makeAssertionFunction) importFrom(checkmate,vname) importFrom(grDevices,heat.colors) +importFrom(grDevices,n2mfrow) importFrom(grDevices,rgb) importFrom(graphics,axis) importFrom(graphics,image) +importFrom(graphics,lines) importFrom(graphics,matlines) importFrom(graphics,matplot) +importFrom(graphics,par) importFrom(methods,formalArgs) importFrom(methods,setClass) importFrom(methods,setOldClass) @@ -298,8 +442,6 @@ importFrom(mgcv,s) importFrom(mgcv,scat) importFrom(mgcv,smooth.construct) importFrom(mgcv,uniquecombs) -importFrom(mvtnorm,rmvnorm) -importFrom(pracma,savgol) importFrom(stats,approx) importFrom(stats,coef) importFrom(stats,complete.cases) @@ -309,6 +451,7 @@ importFrom(stats,median) importFrom(stats,na.omit) importFrom(stats,optim) importFrom(stats,quantile) +importFrom(stats,rnorm) importFrom(stats,runif) importFrom(stats,sd) importFrom(stats,setNames) diff --git a/NEWS.md b/NEWS.md index 959ecb60..ef01116f 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,71 @@ +# tf 0.4.2 + +## Internal + +* `tf_rgp()` no longer depends on `mvtnorm` or `pracma`. GP draws are now + generated by a direct eigen-based sampler (the symmetric square root + `V D^{1/2} V^T` of the covariance) that reproduces the previous + `mvtnorm::rmvnorm(method = "eigen")` draws bit-for-bit under `set.seed()` and + handles rank-deficient kernels (e.g. squared-exp with zero nugget) correctly. + `tf_smooth.tfd(method = "savgol")` now inlines a small Savitzky–Golay filter + in place of the `pracma::savgol()` dependency. + +## Bug fixes + +* `tf_evaluate()` no longer returns values at the wrong positions when the + requested arg contains duplicates (#236). +* `tf_integrate(f, definite = FALSE)` for irregular `tfd` with n > 1 no longer + crashes; the antiderivative's per-curve grids are kept as a list (#237). +* `Math.tfd()` / `Math.tfb()` now forward `...` to the underlying op so + `round(x, digits)`, `log(x, base)`, `signif(x, digits)`, etc. honor their + extra arguments instead of silently dropping them (#246). +* `tf_integrate()` on irregular `tfd` no longer silently returns `NA` under + default limits; for irregular input the defaults are now each curve's own + observed arg range (i.e., the range of its `tf_arg()` values). Pass explicit + `lower` / `upper` (or an extrapolating evaluator) to override (#253). + +## Vector-valued functional data + +This release introduces first-class support for vector-valued (multivariate) +functional data -- functions whose codomain is `R^d` -- alongside the existing +univariate `tfd`/`tfb` classes. + +* `tfd_mv()` and `tfb_mv()`: new `vctrs`-based S3 classes for vector-valued + functional data, holding several component functions per observation on a + shared domain. Constructors accept named lists of `tfd`/`tfb` vectors or + list-columns of matrices. +* `tfb_mfpc()` implements multivariate functional principal component analysis + (Happ & Greven, 2018) for `tf_mv` data: a single set of scalar scores per + curve shared across all components, with vector-valued eigenfunctions. + Component weighting is configurable (`"inverse_variance"` default, `"snr"`, + `"equal"`, or user-supplied). New data can be projected onto a fitted basis + via `tf_rebase()` / `vec_cast()`. Accessors `tf_mfpc_scores()`, + `tf_mfpc_efunctions()` and the predicate `is_tfb_mfpc()`. +* Multivariate registration: `tf_register()` gains `method = "srvf_mv"` for + jointly aligning the components of `tf_mv` curves via the multivariate SRVF + framework, and `tf_register_shape()` provides elastic shape registration + (rotation/translation/scale-invariant) via `fdasrvf`. +* New geometry verbs for `tf_mv` (and where meaningful for univariate `tf`): + `tf_norm()`, `tf_inner()`, `tf_tangent()`, `tf_arclength()`. +* Component accessors (`tf_ncomp()`, `tf_components()`, `tf_component()`), + `tf_split()` / `tf_combine()` extensions and `[`/`[[` methods for extracting, + replacing and recombining components. +* Function-wise scalar summaries `tf_fmean()`, `tf_fvar()` and `tf_fsd()` + return component-wise matrices for `tf_mv` objects. +* `tf_interpolate()`, `tf_fwise()`, `tf_sparsify()` and `tf_jiggle()` now have + component-wise `tf_mv` methods; `tf_sparsify()` and `tf_jiggle()` keep shared + component grids by default via `same_arg = TRUE`. +* `tf_where()` / `tf_anywhere()` work on `tf_mv` objects with *joint* + conditions across components, referenced by name (e.g. + `tf_where(f, x > 0 & y < 1)`); there is no `value` column for `tf_mv` input. + Components must share a common grid, or `arg` must be supplied explicitly. + +### Contract change + +* `is_tf()` now returns `TRUE` for `tf_mv` as well as univariate `tfd`/`tfb`. + Code that branched on `is_tf()` to mean "univariate `tf`" should switch to + the new predicate `is_tf_1d()`. + # tf 0.4.1 ## Bug fixes diff --git a/R/accessors-mv.R b/R/accessors-mv.R new file mode 100644 index 00000000..fa65cf8b --- /dev/null +++ b/R/accessors-mv.R @@ -0,0 +1,321 @@ +#' @include tfd-mv.R tfb-mv.R vctrs-mv.R +NULL + +# Accessors -------------------------------------------------------------------- + +#' Accessors and methods for vector-valued functional data +#' +#' Utilities for `tf_mv` objects (see [tfd_mv()] / [tfb_mv()]). `tf_ncomp()` +#' returns the number of output dimensions \eqn{d}, `tf_components()` the list +#' of the `d` underlying univariate `tf` vectors, and `tf_component()` extracts +#' or replaces a single one (also available via the `$` operator, e.g. `f$x`). +#' +#' @details +#' Most univariate `tf` verbs also work on `tf_mv` objects by acting on each +#' component: [tf_rebase()] (and hence `tfd_mv`/`tfb_mv` conversion), +#' [tf_derive()], [tf_integrate()] (definite integrals return an `n x d` +#' matrix), [tf_smooth()] and [tf_zoom()]. Registration +#' ([tf_register()] / [tf_estimate_warps()] / [tf_warp()] / [tf_align()]) +#' estimates a *single, shared* time-warp per curve and applies it jointly to +#' every component. The registration signal is, by default, the first +#' component; use `ref_component` to pick another component (by name/index), +#' `"norm"` for the pointwise Euclidean norm, or a function mapping the +#' `tf_mv` to a univariate `tf` vector. +#' +#' `is.na()` flags a curve as missing if **any** of its components is missing +#' (the union, not the intersection), which also drives the `na.rm` behaviour +#' of [mean()] / [median()] etc. +#' +#' @param f a `tf_mv` object. +#' @param which a component name or index. +#' @param value a univariate `tf` vector (replacement) of matching length and +#' domain. +#' @returns `tf_ncomp()`: an integer. `tf_components()`: a named list of `tf` +#' vectors. `tf_component()`: a single univariate `tf` vector. +#' @examples +#' f <- tfd_mv(list(x = tf_rgp(3), y = tf_rgp(3))) +#' tf_ncomp(f) +#' tf_components(f) +#' tf_component(f, "y") +#' f$y +#' @family tf_mv-class +#' @name tf_mv_methods +#' @rdname tf_mv_methods +#' @export +tf_ncomp <- function(f) length(attr(f, "components")) + +#' @rdname tf_mv_methods +#' @export +tf_components <- function(f) attr(f, "components") + +map_components <- function(x, .f, ...) { + comps <- map(tf_components(x), .f, ...) + names(comps) <- attr(x, "comp_names") + new_tf_mv(comps) +} + +map2_components <- function(x, y, .f, ...) { + check_compatible_mv(x, y) + comps <- map2(tf_components(x), tf_components(y), .f, ...) + names(comps) <- attr(x, "comp_names") + new_tf_mv(comps) +} + +#' @rdname tf_mv_methods +#' @export +tf_component <- function(f, which) { + assert_tf_mv(f) + comps <- tf_components(f) + which <- check_component_index(which, comps, arg = "which") + comps[[which]] +} + +# resolve a component selector (single name or single integer index) to a +# valid positional index into `comps`, with informative errors. +check_component_index <- function(which, comps, arg = "which") { + if (!length(comps)) { + cli::cli_abort( + "Cannot select component {.arg {arg}}: the object has no components." + ) + } + if (is.character(which)) { + assert_string(which, .var.name = arg) + loc <- match(which, names(comps)) + if (is.na(loc)) { + cli::cli_abort(c( + "Unknown component {.val {which}}.", + "i" = "Available component{?s}: {.val {names(comps)}}." + )) + } + return(loc) + } + if ( + !is.numeric(which) || + length(which) != 1L || + is.na(which) || + which != round(which) || + which < 1 || + which > length(comps) + ) { + cli::cli_abort(c( + "{.arg {arg}} must be a single component name or an integer index between 1 and {length(comps)}.", + "x" = "You supplied {.val {which}}." + )) + } + as.integer(which) +} + +#' @rdname tf_mv_methods +#' @export +`tf_component<-` <- function(f, which, value) { + assert_tf_mv(f) + assert_tf(value) + if (vec_size(value) != vec_size(f)) { + cli::cli_abort( + "Replacement component has length {vec_size(value)}, expected {vec_size(f)}." + ) + } + # Replacing a component invalidates the joint MFPC eigenbasis (the shared + # scores no longer correspond to the new component), so warn and demote + # before the value is swapped in. `tfb_mfpc_demote()` rebuilds the retained + # components as standalone full-rank `tfb_fpc` -- merely dropping the `mfpc` + # attribute would leave them with the abort-stub `scoring_function`, breaking + # subsequent arithmetic / `tf_rebase()`. The same applies to a `value` taken + # from this very fit (e.g. `mf2$x <- mf$x`), which is rebuilt as well. + if (is_tfb_mfpc(f)) { + warn_mfpc_demotion( + "Replacing a component invalidates the joint MFPC eigenbasis." + ) + value <- mfpc_demote_component_value( + value, + tf_components(f), + attr(f, "mfpc")$uni + ) + f <- tfb_mfpc_demote(f) + } + comps <- tf_components(f) + if (is.character(which)) { + # validate the scalar name *before* the vectorized `%in%` below, which + # would otherwise error ("length > 1 in coercion to logical") on a + # multi-element selector. + assert_string(which, min.chars = 1L, .var.name = "which") + if (which %in% names(comps)) { + comps[[match(which, names(comps))]] <- value + } else { + # allow adding a new component by name + comps[[which]] <- value + } + } else { + which <- check_component_index(which, comps, arg = "which") + comps[[which]] <- value + } + # new_tf_mv() validates that `value` is the same kind (tfd/tfb) as the other + # components and that its domain is compatible. The joint MFPC spec was + # dropped by tfb_mfpc_demote() above and is intentionally not forwarded. + new_tf_mv(comps, domain = tf_domain(f)) +} + +#' @export +`$.tf_mv` <- function(x, name) tf_component(x, name) + +#' @export +`$<-.tf_mv` <- function(x, name, value) { + `tf_component<-`(x, name, value) +} + +#------------------------------------------------------------------------------- + +# do all components share (numerically) identical arg grids? +mv_args_shared <- function(f) { + args <- map(tf_components(f), tf_arg) + length(args) <= 1L || + all(map_lgl(args[-1], \(a) isTRUE(all.equal(a, args[[1]])))) +} + +#' @export +tf_arg.tf_mv <- function(f) { + comps <- tf_components(f) + if (!length(comps)) return(numeric(0)) + args <- map(comps, tf_arg) + all_agree <- mv_args_shared(f) + if (any(map_lgl(comps, is_irreg))) { + # all-irregular + per-curve args shared across components (the typical + # movement-data case): collapse to a single per-curve list. + if (all(map_lgl(comps, is_irreg)) && all_agree) return(args[[1]]) + # otherwise return per-component (genuinely different args per dim) + return(args) + } + # all components are regular: collapse if they share the grid + if (all_agree) return(args[[1]]) + args +} + +# assemble per-component evaluation lists into a uniform list of per-curve +# data.frames with columns (arg, comp1, ..., compd). `grids` is either a single +# numeric vector (shared across curves) or a length-n list of per-curve grids. +# `comp_evals[[k]][[i]]` holds the k-th component's numeric evaluations at +# `grids[[i]]` (or `grids`); NA-fill where a component has no value at that arg. +assemble_mv_evals <- function(comp_evals, grids, comp_names, n) { + if (!n) return(list()) + d <- length(comp_evals) + shared_grid <- !is.list(grids) + map(seq_len(n), function(i) { + g <- if (shared_grid) grids else grids[[i]] + df <- data_frame0(arg = if (length(g)) g else numeric(0)) + if (!d) return(df) + for (k in seq_len(d)) { + v <- comp_evals[[k]][[i]] + df[[comp_names[k]]] <- if (is.null(v) || length(v) != length(g)) { + rep(NA_real_, length(g)) + } else { + v + } + } + df + }) +} + +# private companion: drop the leading `arg` column and return the component +# evaluations as a plain numeric matrix [n_arg, d]. Used by callers (e.g. +# arclength_polyline) that need the matrix shape internally. +evals_to_matrix <- function(df) { + if (!ncol(df) || ncol(df) == 1L) { + return(matrix(numeric(0), nrow = nrow(df), ncol = 0)) + } + mat <- as.matrix(df[, -1L, drop = FALSE]) + rownames(mat) <- NULL + mat +} + +tf_mv_curve_grids <- function(x) { + n <- vec_size(x) + arg_vals <- tf_arg(x) + if (is.numeric(arg_vals)) { + rep(list(arg_vals), n) + } else if ( + all(map_lgl(arg_vals, is.numeric)) && + !identical(names(arg_vals), attr(x, "comp_names")) + ) { + arg_vals + } else { + comps <- tf_components(x) + lapply(seq_len(n), function(i) { + sort(unique(unlist( + lapply(comps, function(comp) { + comp_arg <- tf_arg(comp) + if (is.list(comp_arg)) comp_arg[[i]] else comp_arg + }), + use.names = FALSE + ))) + }) + } +} + +#' @export +tf_evaluations.tf_mv <- function(f) { + if (!vec_size(f)) return(list()) + comps <- tf_components(f) + comp_names <- attr(f, "comp_names") + n <- vec_size(f) + # Evaluate every component on each curve's union grid so the per-curve + # data.frame has a single shared `arg` column with NA-fill where a component + # has no native observation. (For aligned-grid mv this is a no-op.) + grids <- tf_mv_curve_grids(f) + comp_evals <- map(comps, \(comp) { + map(comp[, grids, matrix = FALSE], `[[`, "value") + }) + assemble_mv_evals(comp_evals, grids, comp_names, n) +} + +#' @export +tf_count.tf_mv <- function(f) { + comps <- tf_components(f) + n <- vec_size(f) + if (!length(comps) || n == 0L) { + return(matrix( + integer(0), + nrow = n, + ncol = length(comps), + dimnames = list(names(f), attr(f, "comp_names")) + )) + } + if (length(comps) && all(map_lgl(comps, is_tfb))) { + cli::cli_abort( + "{.fn tf_count} is not defined for basis-represented ({.cls tfb_mv}) data." + ) + } + counts <- map(comps, tf_count) + mat <- do.call(cbind, counts) + if (!is.null(mat)) colnames(mat) <- attr(f, "comp_names") + mat +} + +#' @export +is.na.tf_mv <- function(x) { + comp_na <- map(tf_components(x), is.na) + if (!length(comp_na)) return(logical(0)) + Reduce(`|`, comp_na) +} + +mv_complete <- function(x, na.rm = FALSE, missing = is.na(x)) { + if (!length(missing) || !any(missing)) { + return(x) + } + if (na.rm) { + return(x[!missing]) + } + comps <- map(tf_components(x), function(comp) { + suppressWarnings(comp[missing] <- NA) + comp + }) + names(comps) <- attr(x, "comp_names") + new_tf_mv(comps, domain = tf_domain(x)) +} + +mv_missing <- function(...) { + mv_args <- list(...) + if (!length(mv_args)) { + return(logical(0)) + } + Reduce(`|`, map(mv_args, is.na)) +} diff --git a/R/approx.R b/R/approx.R index c9253742..4b32a17c 100644 --- a/R/approx.R +++ b/R/approx.R @@ -1,5 +1,4 @@ zoo_wrapper <- function(f, ...) { - #nocov start dots <- list(...) function(x, arg, evaluations) { x_arg <- sort_unique(c(x, arg)) @@ -9,7 +8,6 @@ zoo_wrapper <- function(f, ...) { ret <- do.call(f, dots) coredata(ret)[requested] } - #nocov end } #------------------------------------------------------------------------------- diff --git a/R/assertions.R b/R/assertions.R index d7455640..daff69e1 100644 --- a/R/assertions.R +++ b/R/assertions.R @@ -86,6 +86,10 @@ assert_tfb <- function(x, .var.name = vname(x)) { assert_class(x, "tfb", .var.name = .var.name) } +assert_tf_mv <- function(x, .var.name = vname(x)) { + assert_class(x, "tf_mv", .var.name = .var.name) +} + # "strict" does not allow stretching/compressing or truncation of domain # (i.e. strict allows only bijective time transformations) assert_warp <- function(warp, x, strict = FALSE) { @@ -120,6 +124,400 @@ assert_monotonic <- function(x, .var.name = vname(x)) { invisible(x) } +# Internal validator for tf and tf_mv objects. +# +# Cheap, deep, recursive integrity check intended for use in tests, not in +# production code. Returns `TRUE` invisibly on success; aborts on the first +# failed invariant with a `cli::cli_abort` message naming the broken contract. +# +# @param x A `tf` or `tf_mv` object. +# @return `TRUE` invisibly on success. +# @keywords internal +validate_tf <- function(x) { + if (inherits(x, "tf_mv")) { + return(validate_tf_mv(x)) + } + if (!inherits(x, "tf")) { + cli::cli_abort( + "{.arg x} is not a {.cls tf} object (class: {.val {class(x)}})." + ) + } + # ---- domain (all tf subclasses) ----------------------------------------- + domain <- attr(x, "domain") + # length-0 prototypes legitimately carry a degenerate domain — numeric(2) (= c(0, 0)) + # in the tfb constructors, or c(NA_real_, NA_real_) in the tfd constructors (matching + # the S4 prototype); only require the structural shape (length-2 numeric) for prototypes. + is_proto <- length(unclass(x)) == 0L + bad_domain <- !is.numeric(domain) || length(domain) != 2L || + (!is_proto && (anyNA(domain) || any(!is.finite(domain)) || + domain[1] > domain[2] || length(unique(domain)) != 2L)) + if (bad_domain) { + cli::cli_abort(paste0( + "Invalid {.field domain}: must be a finite, sorted length-2 numeric ", + "with distinct endpoints; got {.val {domain}}." + )) + } + # ---- element names ------------------------------------------------------ + nms <- names(x) + if (!is.null(nms)) { + if (length(nms) != length(unclass(x))) { + cli::cli_abort(paste0( + "{.field names(x)} length {.val {length(nms)}} does not match ", + "data length {.val {length(unclass(x))}}." + )) + } + if (anyNA(nms)) { + cli::cli_abort("{.field names(x)} contains {.code NA}.") + } + } + # ---- dispatch on subclass ---------------------------------------------- + if (inherits(x, "tfd_reg")) { + validate_tfd_reg(x) + } else if (inherits(x, "tfd_irreg")) { + validate_tfd_irreg(x) + } else if (inherits(x, "tfb_spline")) { + validate_tfb_spline(x) + } else if (inherits(x, "tfb_fpc")) { + validate_tfb_fpc(x) + } else { + cli::cli_abort( + "Unknown {.cls tf} subclass: {.val {class(x)}}." + ) + } + invisible(TRUE) +} + +# helper: assert arg vector is sorted, finite, in domain +validate_arg_vector <- function(arg, domain, where, check_unique = TRUE) { + if (!is.numeric(arg)) { + cli::cli_abort("{where}: {.field arg} must be numeric.") + } + if (length(arg) == 0L) return(invisible(TRUE)) + if (anyNA(arg) || any(!is.finite(arg))) { + cli::cli_abort("{where}: {.field arg} contains non-finite values.") + } + if (is.unsorted(arg)) { + cli::cli_abort("{where}: {.field arg} is not sorted.") + } + if (check_unique && anyDuplicated(arg)) { + cli::cli_abort("{where}: {.field arg} contains duplicates.") + } + if (min(arg) < domain[1] || max(arg) > domain[2]) { + cli::cli_abort(paste0( + "{where}: {.field arg} values outside {.field domain} ", + "[{domain[1]}, {domain[2]}]." + )) + } + invisible(TRUE) +} + +validate_tfd_reg <- function(x) { + # required attrs + required <- c("evaluator", "evaluator_name", "arg", "domain") + missing_attrs <- setdiff(required, names(attributes(x))) + if (length(missing_attrs)) { + cli::cli_abort( + "{.cls tfd_reg}: missing required attribute(s) {.val {missing_attrs}}." + ) + } + if (!is.function(attr(x, "evaluator"))) { + cli::cli_abort("{.cls tfd_reg}: {.field evaluator} must be a function.") + } + if (!is.character(attr(x, "evaluator_name"))) { + cli::cli_abort( + "{.cls tfd_reg}: {.field evaluator_name} must be character." + ) + } + arg_attr <- attr(x, "arg") + if (!is.list(arg_attr) || length(arg_attr) != 1L) { + cli::cli_abort( + "{.cls tfd_reg}: {.field arg} attribute must be a length-1 list." + ) + } + domain <- attr(x, "domain") + validate_arg_vector(arg_attr[[1]], domain, where = "tfd_reg") + # element lengths match shared arg length (NULL entries = NA functions OK) + data <- unclass(x) + expected_len <- length(arg_attr[[1]]) + for (i in seq_along(data)) { + el <- data[[i]] + if (is.null(el)) next + if (!is.numeric(el)) { + cli::cli_abort( + "{.cls tfd_reg}: element {.val {i}} is not numeric." + ) + } + if (length(el) != expected_len) { + cli::cli_abort(paste0( + "{.cls tfd_reg}: element {.val {i}} has length {.val {length(el)}}, ", + "expected {.val {expected_len}} (= length of shared arg)." + )) + } + } + invisible(TRUE) +} + +validate_tfd_irreg <- function(x) { + required <- c("evaluator", "evaluator_name", "domain") + missing_attrs <- setdiff(required, names(attributes(x))) + if (length(missing_attrs)) { + cli::cli_abort( + "{.cls tfd_irreg}: missing required attribute(s) {.val {missing_attrs}}." + ) + } + if (!is.function(attr(x, "evaluator"))) { + cli::cli_abort("{.cls tfd_irreg}: {.field evaluator} must be a function.") + } + if (!is.character(attr(x, "evaluator_name"))) { + cli::cli_abort( + "{.cls tfd_irreg}: {.field evaluator_name} must be character." + ) + } + domain <- attr(x, "domain") + data <- unclass(x) + for (i in seq_along(data)) { + el <- data[[i]] + if (is.null(el)) next + if (!is.list(el)) { + cli::cli_abort(paste0( + "{.cls tfd_irreg}: element {.val {i}} must be a list (got ", + "{.cls {class(el)}})." + )) + } + expected_fields <- c("arg", "value") + if (!all(expected_fields %in% names(el))) { + cli::cli_abort(paste0( + "{.cls tfd_irreg}: element {.val {i}} must be a list with fields ", + "{.val arg} and {.val value}; got {.val {names(el)}}." + )) + } + if (length(el$arg) != length(el$value)) { + cli::cli_abort(paste0( + "{.cls tfd_irreg}: element {.val {i}} has length(arg) = ", + "{.val {length(el$arg)}} != length(value) = {.val {length(el$value)}}." + )) + } + validate_arg_vector( + el$arg, domain, + where = paste0("tfd_irreg element ", i), + check_unique = TRUE + ) + } + invisible(TRUE) +} + +validate_tfb_spline <- function(x) { + # length-0 prototypes from new_tfb_spline(numeric(0)) legitimately carry + # only domain/arg/family, so basis attributes can't be required for them + # (the universal domain/names checks in validate_tf() still apply). + if (length(unclass(x)) == 0L) { + return(invisible(TRUE)) + } + required <- c( + "basis", "basis_matrix", "basis_label", "basis_args", + "arg", "family", "family_label", "domain" + ) + missing_attrs <- setdiff(required, names(attributes(x))) + if (length(missing_attrs)) { + cli::cli_abort(paste0( + "{.cls tfb_spline}: missing required attribute(s) ", + "{.val {missing_attrs}}." + )) + } + if (!is.function(attr(x, "basis"))) { + cli::cli_abort("{.cls tfb_spline}: {.field basis} must be a function.") + } + bmat <- attr(x, "basis_matrix") + arg <- attr(x, "arg") + domain <- attr(x, "domain") + # arg is a flat numeric vector for tfb_spline + if (!is.numeric(arg)) { + cli::cli_abort("{.cls tfb_spline}: {.field arg} must be numeric.") + } + validate_arg_vector(arg, domain, where = "tfb_spline") + if (!is.matrix(bmat) || !is.numeric(bmat)) { + cli::cli_abort( + "{.cls tfb_spline}: {.field basis_matrix} must be a numeric matrix." + ) + } + if (nrow(bmat) != length(arg)) { + cli::cli_abort(paste0( + "{.cls tfb_spline}: nrow(basis_matrix) = {.val {nrow(bmat)}} != ", + "length(arg) = {.val {length(arg)}}." + )) + } + data <- unclass(x) + expected_len <- ncol(bmat) + for (i in seq_along(data)) { + el <- data[[i]] + if (is.null(el)) next + if (!is.numeric(el)) { + cli::cli_abort( + "{.cls tfb_spline}: coefficient {.val {i}} is not numeric." + ) + } + if (length(el) != expected_len) { + cli::cli_abort(paste0( + "{.cls tfb_spline}: coefficient {.val {i}} has length ", + "{.val {length(el)}}, expected ncol(basis_matrix) = ", + "{.val {expected_len}}." + )) + } + } + invisible(TRUE) +} + +validate_tfb_fpc <- function(x) { + # length-0 prototypes from new_tfb_fpc(numeric(0)) legitimately carry only + # domain/arg/score_variance, so basis attributes can't be required for them + # (the universal domain/names checks in validate_tf() still apply). + if (length(unclass(x)) == 0L) { + return(invisible(TRUE)) + } + required <- c( + "basis", "basis_matrix", "basis_label", + "arg", "score_variance", "scoring_function", "domain" + ) + missing_attrs <- setdiff(required, names(attributes(x))) + if (length(missing_attrs)) { + cli::cli_abort( + "{.cls tfb_fpc}: missing required attribute(s) {.val {missing_attrs}}." + ) + } + if (!is.function(attr(x, "basis"))) { + cli::cli_abort("{.cls tfb_fpc}: {.field basis} must be a function.") + } + if (!is.function(attr(x, "scoring_function"))) { + cli::cli_abort( + "{.cls tfb_fpc}: {.field scoring_function} must be a function." + ) + } + bmat <- attr(x, "basis_matrix") + arg <- attr(x, "arg") + domain <- attr(x, "domain") + if (!is.numeric(arg)) { + cli::cli_abort("{.cls tfb_fpc}: {.field arg} must be numeric.") + } + validate_arg_vector(arg, domain, where = "tfb_fpc") + if (!is.matrix(bmat) || !is.numeric(bmat)) { + cli::cli_abort( + "{.cls tfb_fpc}: {.field basis_matrix} must be a numeric matrix." + ) + } + if (nrow(bmat) != length(arg)) { + cli::cli_abort(paste0( + "{.cls tfb_fpc}: nrow(basis_matrix) = {.val {nrow(bmat)}} != ", + "length(arg) = {.val {length(arg)}}." + )) + } + data <- unclass(x) + expected_len <- ncol(bmat) + for (i in seq_along(data)) { + el <- data[[i]] + if (is.null(el)) next + if (!is.numeric(el)) { + cli::cli_abort( + "{.cls tfb_fpc}: coefficient {.val {i}} is not numeric." + ) + } + if (length(el) != expected_len) { + cli::cli_abort(paste0( + "{.cls tfb_fpc}: coefficient {.val {i}} has length ", + "{.val {length(el)}}, expected ncol(basis_matrix) = ", + "{.val {expected_len}}." + )) + } + } + # score_variance should be ncol(basis_matrix) - 1 (one column is the mean) + sv <- attr(x, "score_variance") + if (!is.numeric(sv)) { + cli::cli_abort("{.cls tfb_fpc}: {.field score_variance} must be numeric.") + } + expected_sv_len <- ncol(bmat) - 1L + if (length(sv) != expected_sv_len) { + cli::cli_abort(paste0( + "{.cls tfb_fpc}: {.field score_variance} has length ", + "{.val {length(sv)}}, expected ncol(basis_matrix) - 1 = ", + "{.val {expected_sv_len}}." + )) + } + invisible(TRUE) +} + +validate_tf_mv <- function(x) { + if (!inherits(x, "tf_mv")) { + cli::cli_abort("{.arg x} is not a {.cls tf_mv} object.") + } + comps <- attr(x, "components") + if (!is.list(comps)) { + cli::cli_abort("{.cls tf_mv}: {.field components} must be a list.") + } + n <- length(unclass(x)) + # dummy payload length must match component count of curves (each component + # has length n; payload is also length n). Spec phrased this as + # "length(unclass(x)) == nrow(attr(x, 'components'))"; structurally the + # invariant is `length(unclass(x)) == vec_size(components[[i]])` for each i, + # which the constructor enforces. + domain <- attr(x, "domain") + is_proto <- length(unclass(x)) == 0L + bad_domain <- !is.numeric(domain) || length(domain) != 2L || + any(!is.finite(domain)) || domain[1] > domain[2] || + (!is_proto && length(unique(domain)) != 2L) + if (bad_domain) { + cli::cli_abort(paste0( + "{.cls tf_mv}: invalid {.field domain}: must be a finite sorted ", + "length-2 numeric with distinct endpoints; got {.val {domain}}." + )) + } + comp_names <- attr(x, "comp_names") + if (length(comps)) { + lens <- vapply(comps, length, integer(1)) + if (any(lens != n)) { + cli::cli_abort(paste0( + "{.cls tf_mv}: payload length {.val {n}} does not match component ", + "lengths {.val {lens}}." + )) + } + # all components valid tf, recursively + for (i in seq_along(comps)) { + if (!inherits(comps[[i]], "tf")) { + cli::cli_abort( + "{.cls tf_mv}: component {.val {i}} is not a {.cls tf} object." + ) + } + validate_tf(comps[[i]]) + } + # all components share the same domain (the constructor enforces this + # by widening, so verify the invariant holds) + comp_domains <- lapply(comps, tf_domain) + first_d <- comp_domains[[1]] + for (i in seq_along(comp_domains)[-1]) { + if (!isTRUE(all.equal(comp_domains[[i]], first_d))) { + cli::cli_abort(paste0( + "{.cls tf_mv}: component {.val {i}} domain {.val ", + "{comp_domains[[i]]}} differs from component 1 domain ", + "{.val {first_d}}." + )) + } + } + # also: tf_mv domain must agree with components' shared domain + if (!isTRUE(all.equal(first_d, domain))) { + cli::cli_abort(paste0( + "{.cls tf_mv}: {.field domain} {.val {domain}} differs from ", + "components' shared domain {.val {first_d}}." + )) + } + # comp_names matches names(components) + if (!identical(comp_names, names(comps))) { + cli::cli_abort(paste0( + "{.cls tf_mv}: {.field comp_names} {.val {comp_names}} does not ", + "match names(components) {.val {names(comps)}}." + )) + } + } + invisible(TRUE) +} + check_limit <- function(x, f) { domain <- tf_domain(f) res <- check_numeric( diff --git a/R/bibentries.R b/R/bibentries.R index 9b772fc1..9186db9f 100644 --- a/R/bibentries.R +++ b/R/bibentries.R @@ -1,4 +1,15 @@ bibentries <- c( + happ2018 = bibentry( + "article", + title = "Multivariate functional principal component analysis for data observed on different (dimensional) domains", + author = "Happ, Clara and Greven, Sonja", + journal = "Journal of the American Statistical Association", + volume = "113", + number = "522", + pages = "649--659", + year = "2018", + publisher = "Taylor & Francis" + ), sun2012exact = bibentry( "article", title = "Exact fast computation of band depth for large functional datasets: How quickly can one million curves be ranked?", @@ -41,23 +52,6 @@ bibentries <- c( pages = "2287--2322", year = "2010" ), - softimpute = bibentry( - "manual", - title = "softImpute: Matrix Completion via Iterative Soft-Thresholded SVD", - author = "Trevor Hastie and Rahul Mazumder", - year = "2021", - note = "R package version 1.4-1", - url = "https://CRAN.R-project.org/package=softImpute", - doi = "10.32614/CRAN.package.softImpute" - ), - meng2023mogsa = bibentry( - "manual", - title = "mogsa: Multiple omics data integrative clustering and gene set analysis", - author = "Chen Meng", - year = "2023", - url = "https://bioconductor.org/packages/mogsa", - doi = "10.18129/B9.bioc.mogsa" - ), swihart2010lasagna = bibentry( "article", title = "Lasagna plots: a saucy alternative to spaghetti plots", diff --git a/R/brackets-mv.R b/R/brackets-mv.R new file mode 100644 index 00000000..7eddc303 --- /dev/null +++ b/R/brackets-mv.R @@ -0,0 +1,264 @@ +# Evaluation and bracket-indexing ---------------------------------------------- + +#' @export +tf_evaluate.tf_mv <- function(object, arg, ...) { + if (!vec_size(object)) return(list()) + comps <- tf_components(object) + comp_names <- attr(object, "comp_names") + n <- vec_size(object) + has_arg <- !missing(arg) && !is.null(arg) + # Build the per-curve evaluation grid: caller's `arg` (a numeric vector or + # per-curve list) if supplied, otherwise the per-curve union of the + # components' native grids. Then evaluate each component on that grid so + # every per-curve data.frame has a single `arg` column with NA-fill where + # a component has no value at that arg. + if (has_arg) { + grids <- if (is.list(arg)) arg else rep(list(arg), n) + } else { + grids <- tf_mv_curve_grids(object) + } + comp_evals <- map(comps, function(comp) { + tf_evaluate(comp, arg = grids, ...) + }) + assemble_mv_evals(comp_evals, grids, comp_names, n) +} + +#' @rdname tfbrackets +#' @param component for `tf_mv` objects only: optionally restrict evaluation / +#' extraction to a subset of the output dimensions, given by name or integer +#' index. A single name/index drops to the univariate component (a `tfd` or +#' `tfb`); a vector of length > 1 returns a sub-`tf_mv` containing just those +#' components. `NULL` (default) keeps all `d` components. +#' @export +`[.tf_mv` <- function( + x, + i, + j, + component = NULL, + interpolate = TRUE, + matrix = TRUE +) { + # multi-component subset: rewrap as a smaller tf_mv with the selected + # components, then fall through to the rest of the bracket logic with + # `component = NULL` (which now operates on the smaller tf_mv). + if (!is.null(component) && length(component) > 1L) { + x <- tf_mv_subset_components(x, component) + component <- NULL + } + if (!is.null(component)) { + comp <- tf_component(x, component) + if (missing(i)) i <- seq_along(comp) + if (missing(j)) { + if (missing(matrix)) { + return(comp[i, interpolate = interpolate]) + } + return(comp[i, interpolate = interpolate, matrix = matrix]) + } + if (missing(matrix)) { + return(comp[i, j, interpolate = interpolate]) + } + return(comp[i, j, interpolate = interpolate, matrix = matrix]) + } + comps <- tf_components(x) + comp_names <- attr(x, "comp_names") + + # `interpolate = FALSE` is only meaningful for tfd components; tfb components + # are always evaluated from their basis representation. Build a per-component + # `interpolate` vector so we honour the user's choice for tfd components in + # any (potentially mixed) tf_mv, and emit the "interpolate ignored" inform + # at most once even when several components are basis-represented (#252). + comp_is_tfb <- map_lgl(comps, is_tfb) + if (!interpolate && any(comp_is_tfb)) { + cli::cli_inform( + "{.arg interpolate} ignored for data in basis representation." + ) + } + interpolate_comp <- ifelse(comp_is_tfb, TRUE, interpolate) + + # matrix-index i: (function, arg) pairs -> (nrow(i) x d) matrix + if (!missing(i) && is.matrix(i)) { + cols <- map2( + comps, + interpolate_comp, + \(comp, intp) comp[i, interpolate = intp] + ) + ret <- do.call(cbind, cols) + colnames(ret) <- comp_names + return(ret) + } + + # Validate `i` the same way univariate `[.tf` does (no NA, no missing names, + # no out-of-bounds). TODO(#263): replace with a shared helper that also + # handles `j`-normalisation, to remove the duplication with `[.tf`. + if (missing(i)) { + i <- seq_along(x) + } else { + i <- vec_as_location( + i, + n = vec_size(x), + names = names(x), + missing = "error" + ) + } + xi <- vec_slice(x, i) + + if (missing(j) && missing(matrix)) { + return(xi) + } + if (missing(j) && !missing(matrix) && isFALSE(matrix)) { + j <- tf_mv_curve_grids(xi) + } + + comps_i <- tf_components(xi) + n_i <- vec_size(xi) + if (matrix) { + if (missing(j)) j <- sort_unique(tf_mv_curve_grids(xi), simplify = TRUE) + if (!length(comps_i) || n_i == 0L) { + return(array( + numeric(0), + dim = c(n_i, length(j), length(comps_i)), + dimnames = list(names(xi), as.character(j), comp_names) + )) + } + mats <- map2( + comps_i, + interpolate_comp, + \(comp, intp) comp[, j, interpolate = intp, matrix = TRUE] + ) + arr <- array( + unlist(mats, use.names = FALSE), + dim = c(nrow(mats[[1]]), ncol(mats[[1]]), length(comps_i)), + dimnames = list(rownames(mats[[1]]), colnames(mats[[1]]), comp_names) + ) + return(arr) + } + # matrix = FALSE: list of per-curve data.frames with arg + one col per comp + if (!length(comps_i) || n_i == 0L) { + return(setNames(vector("list", n_i), names(xi))) + } + dfs <- map2( + comps_i, + interpolate_comp, + \(comp, intp) comp[, j, interpolate = intp, matrix = FALSE] + ) + map(seq_len(n_i), function(k) { + base <- dfs[[1]][[k]] + out <- data_frame0(arg = base$arg) + for (cn in seq_along(comp_names)) { + out[[comp_names[cn]]] <- dfs[[cn]][[k]]$value + } + out + }) |> + setNames(names(xi)) +} + +#' @rdname tfbrackets +#' @export +`[<-.tf_mv` <- function(x, i, value) { + # Replace curves component-wise via the univariate `[<-.tf` (which handles + # NA assignment, length recycling and lossy casts per component), then + # rebuild. This is more robust than letting the default `[<-.tf` thread a + # `vec_slice<-` through the data-frame-of-components proxy. + if (missing(i)) i <- seq_along(x) + comps <- tf_components(x) + value_comps <- if (is_tf_mv(value)) { + check_compatible_mv(x, value) + tf_components(value) + } else { + # Only logical NA is broadcast across every component. Typed NAs + # (NA_real_, NA_integer_) and any non-NA value -- including a univariate + # tf -- are rejected: they would silently make every component identical + # or trigger a confusing downstream "Can't combine" error. + is_logical_na <- is.logical(value) && length(value) && all(is.na(value)) + if (!is_logical_na) { + cli::cli_abort( + "expected logical {.code NA} or another {.cls tf_mv} on the right-hand side." + ) + } + # Validate length upfront so we emit a clean message rather than the + # downstream vec_slice<- "Can't recycle" complaint. Use vctrs subassignment + # semantics so logical / negative / named indices give the correct number + # of replacement locations rather than `length(i)`. + n_loc <- length(vec_as_location( + i, + n = vec_size(x), + names = names(x), + missing = "error" + )) + if (length(value) > 1L && length(value) != n_loc) { + cli::cli_abort( + "length of {.arg value} ({length(value)}) must be 1 or match \\ + the number of locations in {.arg i} ({n_loc})." + ) + } + rep(list(value), length(comps)) + } + new_comps <- map2(comps, value_comps, function(comp, v) { + comp[i] <- v + comp + }) + names(new_comps) <- attr(x, "comp_names") + new_tf_mv(new_comps, domain = tf_domain(x)) +} + +#' @export +`names<-.tf_mv` <- function(x, value) { + # curve names live on the underlying components (that is what `vec_restore()` + # rebuilds from), so push them down to every component rather than only onto + # the outer vctr -- otherwise they are lost on the next subset / concatenation. + comps <- map(tf_components(x), function(comp) { + names(comp) <- value + comp + }) + names(comps) <- attr(x, "comp_names") + # Forward the joint MFPC spec, if any: setting curve names is a non-mutating + # rename and must not silently demote a `tfb_mfpc`. (`vec_c()` runs `names<-` + # post-restore, which would otherwise strip the spec set by `tf_mv_ptype2`.) + new_tf_mv(comps, domain = tf_domain(x), mfpc = attr(x, "mfpc")) +} + +tf_mv_subset_components <- function(x, component) { + comp_names <- attr(x, "comp_names") + loc <- tf_mv_component_locations(component, comp_names) + components <- tf_components(x)[loc] + + if (!is_tfb_mfpc(x)) { + return(new_tf_mv(components, domain = tf_domain(x))) + } + + selected_names <- comp_names[loc] + preserves_full_spec <- length(loc) == length(comp_names) && + !anyDuplicated(selected_names) && + setequal(selected_names, comp_names) + + if (preserves_full_spec) { + return(new_tf_mv( + components, + domain = tf_domain(x), + mfpc = mfpc_reorder_spec(attr(x, "mfpc"), selected_names) + )) + } + + warn_mfpc_demotion( + "Selecting a strict subset or repeated set of components invalidates the joint MFPC eigenbasis." + ) + x <- tfb_mfpc_demote(x) + new_tf_mv(tf_components(x)[loc], domain = tf_domain(x)) +} + +tf_mv_component_locations <- function(component, comp_names) { + if (is.character(component)) { + bad <- setdiff(component, comp_names) + if (length(bad)) { + cli::cli_abort("Unknown component{?s}: {.val {bad}}.") + } + return(match(component, comp_names)) + } + assert_integerish( + component, + any.missing = FALSE, + lower = 1L, + upper = length(comp_names) + ) + as.integer(component) +} diff --git a/R/calculus-mv.R b/R/calculus-mv.R new file mode 100644 index 00000000..b58f45b2 --- /dev/null +++ b/R/calculus-mv.R @@ -0,0 +1,93 @@ +# Re-representation, calculus, smoothing (component-wise) ---------------------- + +#' @export +tf_rebase.tf_mv <- function(object, basis_from, arg = NULL, ...) { + cn <- attr(object, "comp_names") + comps <- tf_components(object) + # re-express onto a multivariate FPCA basis -> joint re-scoring, not the + # independent component-wise rebase below. + if (is_tf_mv(basis_from) && is_tfb_mfpc(basis_from)) { + return(mfpc_rescore(object, basis_from, arg = arg)) + } + if (is_tf_mv(basis_from)) { + check_compatible_mv(object, basis_from) + bases <- tf_components(basis_from) + new_comps <- map2(comps, bases, function(o, b) { + if (is.null(arg)) tf_rebase(o, b, ...) else + tf_rebase(o, b, arg = arg, ...) + }) + } else { + new_comps <- map(comps, function(o) { + if (is.null(arg)) { + tf_rebase(o, basis_from, ...) + } else { + tf_rebase(o, basis_from, arg = arg, ...) + } + }) + } + names(new_comps) <- cn + new_tf_mv(new_comps) +} + +#' @export +tf_derive.tf_mv <- function(f, arg, order = 1, ...) { + has_arg <- !missing(arg) + map_components(f, function(comp) { + if (has_arg) { + tf_derive(comp, arg = arg, order = order, ...) + } else { + tf_derive(comp, order = order, ...) + } + }) +} + +#' @export +tf_integrate.tf_mv <- function(f, arg, lower, upper, definite = TRUE, ...) { + cn <- attr(f, "comp_names") %||% character(0) + has_arg <- !missing(arg) + has_lower <- !missing(lower) + has_upper <- !missing(upper) + # zero-component object: return a shape-appropriate empty result instead of + # crashing on results[[1]] below. + if (!length(cn)) { + n <- vec_size(f) + if (definite) { + return(matrix( + numeric(0), + nrow = n, + ncol = 0, + dimnames = list(names(f), NULL) + )) + } + return(new_tf_mv(list(), domain = tf_domain(f), class = "tfd_mv")) + } + results <- map(tf_components(f), function(comp) { + call_args <- list(comp, definite = definite, ...) + if (has_arg) call_args$arg <- arg + if (has_lower) call_args$lower <- lower + if (has_upper) call_args$upper <- upper + do.call(tf_integrate, call_args) + }) + if (is.numeric(results[[1]])) { + mat <- do.call(cbind, results) + colnames(mat) <- cn + return(mat) + } + names(results) <- cn + new_tf_mv(results) +} + +#' @export +tf_smooth.tf_mv <- function(x, ...) { + map_components(x, \(comp) tf_smooth(comp, ...)) +} + +#' @export +tf_zoom.tf_mv <- function( + f, + begin = tf_domain(f)[1], + end = tf_domain(f)[2], + ... +) { + map_components(f, \(comp) tf_zoom(comp, begin = begin, end = end, ...)) +} diff --git a/R/calculus.R b/R/calculus.R index 4ee77ac3..40d35f9f 100644 --- a/R/calculus.R +++ b/R/calculus.R @@ -95,8 +95,34 @@ tf_invert.tfb <- function(x, ...) { } -# reinsert NULL entries for previously missing functions while preserving tf attrs -restore_na_entries <- function(tf_non_na, na_entries, names_out) { +# Trapezoidal quadrature weights for a possibly-irregular grid. +# +# Contract: given a sorted numeric grid `arg` of length n, returns a length-n +# weight vector `w` such that for a function sampled on `arg` with values `v`, +# `sum(w * v)` is the trapezoidal-rule approximation of the integral over +# `[arg[1], arg[n]]`. Interior weights are the average of the two adjacent +# `diff(arg)` spacings; boundary weights are half of the single adjacent +# spacing. The result is invariant to a constant shift of `arg`. +trapezoid_weights <- function(arg) { + delta <- c(0, diff(arg)) + 0.5 * c(delta[-1] + head(delta, -1), tail(delta, 1)) +} + +# Reinsert NULL entries for previously missing functions while preserving tf attrs. +# +# Contract: `tf_non_na` is a (possibly zero-length) `tf` carrying the desired +# output attributes (e.g. a freshly rebased `tfb`); `na_entries` is the logical +# mask of the full-length result; `names_out` are the final names. When some +# entries are non-NA, those slots are filled from `unclass(tf_non_na)` in order. +# When every entry is NA, pass `ref_tfb` to supply the attributes (since +# `tf_non_na` would itself be empty and carry no useful attributes); by default +# `ref_tfb` falls back to `tf_non_na`. +restore_na_entries <- function( + tf_non_na, + na_entries, + names_out, + ref_tfb = tf_non_na +) { if (!any(na_entries)) { return(setNames(tf_non_na, names_out)) } @@ -105,7 +131,7 @@ restore_na_entries <- function(tf_non_na, na_entries, names_out) { ret[!na_entries] <- unclass(tf_non_na) } ret[na_entries] <- list(NULL) - attributes(ret) <- attributes(tf_non_na) + attributes(ret) <- attributes(ref_tfb) names(ret) <- names_out ret } @@ -307,6 +333,16 @@ tf_derive.tfb_fpc <- function(f, arg = tf_arg(f), order = 1, ...) { #' @returns For `definite = TRUE`, the definite integrals of the functions in #' `f`. For `definite = FALSE` and `tf`-inputs, a `tf` object containing their #' anti-derivatives +#' @details +#' When `f` is irregular **and** `lower` / `upper` are not supplied explicitly, +#' they default to each curve's own observed arg range (i.e., the range of its +#' `tf_arg()` values) rather than the (shared) domain endpoints; for regular `tfd` +#' the defaults remain the domain endpoints. +#' Without this per-curve default, curves that do not span the full domain +#' would silently NA-poison the trapezoidal sum, because the default linear +#' evaluator does not extrapolate. Pass explicit `lower` / `upper` to integrate +#' over a fixed sub-interval, or switch to an extrapolating evaluator +#' (e.g. [tf_approx_fill_extend()]) to integrate over the full domain. #' @examples #' arg <- seq(0, 1, length.out = 11) #' x <- tfd(rbind(arg, arg^2), arg = arg) @@ -335,12 +371,26 @@ tf_integrate.tfd <- function( ) { assert_arg(arg, f) arg <- ensure_list(arg) - # TODO: integrate is NA whenever arg does not cover entire domain! + default_lower <- missing(lower) + default_upper <- missing(upper) assert_limit(lower, f) assert_limit(upper, f) + if (is_irreg(f) && (default_lower || default_upper)) { + # For irregular data, the default linear evaluator does not extrapolate. + # Using global domain endpoints when curves don't reach them yields NA + # for the boundary evaluations and NA-poisoned integrals. Fall back to + # each curve's own arg range for any defaulted limit. + per_curve_range <- map(ensure_list(tf_arg(f)), range) + if (default_lower) { + lower <- map_dbl(per_curve_range, 1) + } + if (default_upper) { + upper <- map_dbl(per_curve_range, 2) + } + } limits <- cbind(lower, upper) if (nrow(limits) > 1) { - if (!definite) .NotYetImplemented() # needs vd-data + if (!definite && !is_irreg(f)) .NotYetImplemented() # needs vd-data limits <- limits |> split(seq_len(nrow(limits))) } arg <- map2( @@ -398,10 +448,21 @@ tf_integrate.tfd <- function( } else { data_list <- map(quads, cumsum) names(data_list) <- names(f) + # for irregular `f`, `arg` holds per-curve grids and must stay a list; + # flattening would yield an unsorted vector and fail `tfd.list`'s checks. + arg_out <- if (length(arg) == 1) arg[[1]] else arg + # with per-curve limits (irregular f, default args) `limits` is a list of + # 2-vectors; collapse to a single [min(lower), max(upper)] domain. + domain_out <- if (is.list(limits)) { + lims <- do.call(rbind, limits) + c(min(lims[, 1]), max(lims[, 2])) + } else { + as.numeric(limits) + } tfd( data = data_list, - arg = unlist(arg, use.names = FALSE), - domain = as.numeric(limits), + arg = arg_out, + domain = domain_out, evaluator = !!attr(f, "evaluator_name") ) } diff --git a/R/convert-construct-utils.R b/R/convert-construct-utils.R index 6fe512ca..2b8bc43b 100644 --- a/R/convert-construct-utils.R +++ b/R/convert-construct-utils.R @@ -4,6 +4,14 @@ # turn a tf object into a data.frame evaluated on arg with cols id-arg-value tf_2_df <- function(tf, arg, interpolate = TRUE, ...) { assert_tf(tf) + # tf_2_df is the univariate (id, arg, value) builder. For a tf_mv, hand off + # to the multivariate-aware as.data.frame.tf_mv() instead of running the + # hard-coded scalar-column path on a multi-component object. + if (!is_tf_1d(tf)) { + return(as.data.frame(tf, unnest = TRUE, + arg = if (missing(arg)) NULL else arg, + interpolate = interpolate, ...)) + } if (missing(arg)) { arg <- tf_arg(tf) } @@ -80,7 +88,10 @@ mat_2_df <- function(x, arg) { assert_true(length(arg) == ncol(x)) id <- unique_id(rownames(x)) %||% seq_len(nrow(x)) - id <- ordered(id, levels = unique(id)) + # use plain factor() rather than ordered() so id has the same class as the + # other coercion paths (tf_2_df also uses factor()); avoids silent + # ordered-factor surprises downstream. + id <- factor(id, levels = unique(id)) t_x <- t(x) df_2_df(data_frame0( # use t(x) here so that order of vector remains unchanged... diff --git a/R/convert-mv.R b/R/convert-mv.R new file mode 100644 index 00000000..39b49bc7 --- /dev/null +++ b/R/convert-mv.R @@ -0,0 +1,139 @@ +# Conversion / interop --------------------------------------------------------- + +#' Coerce a `tf_mv` to a matrix or data.frame +#' +#' `as.matrix.tf_mv` returns a **3-d** array `[curve, arg, component]` -- the +#' natural shape for a vector-valued evaluation. This is deliberately +#' different from [`as.matrix.tf`] (2-d, `[curve, arg]`); see `@seealso`. +#' +#' `as.data.frame.tf_mv` returns either a single-column wrapping data.frame +#' (`unnest = FALSE`, for storing a `tf_mv` in a tibble column) or an +#' evaluated long/wide data.frame (`unnest = TRUE`). +#' +#' @param x a `tf_mv` object. +#' @param arg optional evaluation grid (numeric vector or per-curve list). +#' When `NULL` (default for `as.data.frame.tf_mv`; equivalent to "missing" +#' for `as.matrix.tf_mv`), the per-curve union of all components' native +#' argument grids is used. +#' @param interpolate forwarded to the underlying `tf` evaluation. `tfb` +#' components are always interpolated. +#' @param unnest if `TRUE`, return an evaluated data.frame (see `long`); if +#' `FALSE` (default), a one-column data.frame wrapping `x`. +#' @param long when `unnest = TRUE`, controls the schema. `long = TRUE` +#' (default) returns a 4-column data.frame +#' `(id, arg, component, value)` -- the multivariate analogue of the +#' univariate `(id, arg, value)` contract, with `component` a `factor` over +#' `attr(x, "comp_names")`. `long = FALSE` returns the wide +#' `(id, arg, comp1, ..., compd)` schema. +#' @param row.names,optional standard `as.data.frame` plumbing. +#' @param ... passed through. +#' @returns a 3-d array (`as.matrix.tf_mv`) or a data.frame (`as.data.frame.tf_mv`). +#' @seealso [as.matrix.tf()] (2-d sibling), [as.data.frame.tf()] (univariate +#' contract), [tf_evaluate()]. +#' @family tidyfun converters +#' @name converters-mv +#' @examples +#' arg <- seq(0, 1, length.out = 11) +#' xf <- tfd(t(sapply(1:3, function(i) sin(2 * pi * arg + i))), arg = arg) +#' yf <- tfd(t(sapply(1:3, function(i) cos(2 * pi * arg + i))), arg = arg) +#' mv <- tfd_mv(list(x = xf, y = yf)) +#' dim(as.matrix(mv)) +#' head(as.data.frame(mv, unnest = TRUE)) +#' @export +as.matrix.tf_mv <- function(x, arg, interpolate = FALSE, ...) { + if (missing(arg) || is.null(arg)) { + x[,, interpolate = interpolate, matrix = TRUE] + } else { + x[, arg, interpolate = interpolate, matrix = TRUE] + } +} + +#' @rdname converters-mv +#' @export +as.data.frame.tf_mv <- function( + x, + row.names = NULL, + optional = FALSE, + unnest = FALSE, + long = TRUE, + arg = NULL, + interpolate = TRUE, + ... +) { + if (!unnest) { + out <- vctrs::new_data_frame(list(x), n = vec_size(x)) + names(out) <- "data" + return(out) + } + + comp_names <- attr(x, "comp_names") %||% character(0) + n <- vec_size(x) + ids <- unique_id(names(x)) %||% seq_len(n) + id_levels <- as.character(ids) + + empty_long <- function() { + data_frame0( + id = factor(character(0), levels = id_levels), + arg = numeric(0), + component = factor(character(0), levels = comp_names), + value = numeric(0) + ) + } + empty_wide <- function() { + do.call( + data_frame0, + c( + list(id = factor(character(0), levels = id_levels), arg = numeric(0)), + setNames(rep(list(numeric(0)), length(comp_names)), comp_names) + ) + ) + } + + if (!length(comp_names) || !n) { + return(if (long) empty_long() else empty_wide()) + } + + # `tf_evaluate.tf_mv` returns a length-n list of per-curve data.frames with + # columns (arg, comp1, ..., compd), NA-filled where a component has no value + # at that arg. This is exactly the union-grid + NA-fill coercion downstream + # consumers (e.g. tidyfun's ggplot layer) need; long/wide is just a pivot. + per_curve <- tf_evaluate(x, arg = arg, interpolate = interpolate, ...) + + id_factor <- factor(as.character(ids), levels = id_levels) + + if (long) { + parts <- map(seq_len(n), function(i) { + cdf <- per_curve[[i]] + if (!nrow(cdf)) return(empty_long()) + nr <- nrow(cdf) + do.call( + rbind, + lapply(comp_names, function(nm) { + data_frame0( + id = rep(id_factor[i], nr), + arg = cdf$arg, + component = factor(rep(nm, nr), levels = comp_names), + value = cdf[[nm]] + ) + }) + ) + }) + out <- do.call(rbind, parts) + if (!nrow(out)) return(empty_long()) + out <- out[order(out$id, out$arg, out$component), , drop = FALSE] + } else { + parts <- map(seq_len(n), function(i) { + cdf <- per_curve[[i]] + if (!nrow(cdf)) return(empty_wide()) + cbind( + data_frame0(id = rep(id_factor[i], nrow(cdf))), + cdf + ) + }) + out <- do.call(rbind, parts) + if (!nrow(out)) return(empty_wide()) + out <- out[order(out$id, out$arg), , drop = FALSE] + } + rownames(out) <- NULL + out +} diff --git a/R/data.R b/R/data.R index e6b83c7f..a29eba7b 100644 --- a/R/data.R +++ b/R/data.R @@ -4,11 +4,13 @@ #' recorded every 2 milliseconds over 300 milliseconds. The data captures the #' dynamics of finger pinch force applied during controlled motor tasks. #' -#' Data is also include in the \CRANpkg{fda} package in another format. +#' Data is also included in the \CRANpkg{fda} package in another format. #' #' @references #' `r format_bib("ramsay2009functional", "ramsay2005functional", "ramsay2002applied")` #' +#' @format A `tfd` vector of length 20 containing the pinch-force curves. +#' #' @examples #' pinch "pinch" @@ -19,15 +21,15 @@ #' of the Berkeley Growth Study. The data tracks physical development over #' time with measurements at 31 different ages that are not equally spaced. #' -#' Data is also include in the \CRANpkg{fda} package in another format. +#' Data is also included in the \CRANpkg{fda} package in another format. #' #' @references #' `r format_bib("ramsay2009functional", "ramsay2005functional", "ramsay2002applied", "tuddenham1954physical")` #' #' @format A data frame with 93 rows and 2 variables: #' \describe{ -#' \item{gender}{sex of the subject (boy/girl)} -#' \item{height}{height in centimeters} +#' \item{gender}{sex of the subject, a factor with levels `female`/`male`} +#' \item{height}{a `tfd` column of height-over-age curves (cm)} #' } #' @examples #' head(growth) @@ -36,20 +38,20 @@ #' Hip and knee angle while walking data #' #' Hip and knee angle measurements in degrees through a 20-point movement cycle -#' for 39 boys. The data represents the angular positions of hip and knee +#' for 39 children. The data represents the angular positions of hip and knee #' joints during normal walking gait, captured at evenly spaced time points #' throughout the gait cycle. #' +#' Data is also included in the \CRANpkg{fda} package in another format. +#' #' @references #' `r format_bib("olshen1989gait")` #' -#' Data is also include in the `datasets` package in another format. -#' #' @format A data frame with 39 rows and 3 variables: #' \describe{ #' \item{subject_id}{subject identifier} -#' \item{knee_angle}{knee joint angles in degrees} -#' \item{hip_angle}{hip joint angle in degrees} +#' \item{knee_angle}{a `tfd` column of knee-joint angles (degrees)} +#' \item{hip_angle}{a `tfd` column of hip-joint angles (degrees)} #' } #' @examples #' head(gait) diff --git a/R/depth.R b/R/depth.R index a13898d5..125f3600 100644 --- a/R/depth.R +++ b/R/depth.R @@ -309,7 +309,7 @@ validate_depth <- function(depth) { depth_data <- function(x, depth, na.rm = FALSE, ...) { validate_depth(depth) if (!na.rm && anyNA(x)) - return(list(x = 1 * NA * x[which(is.na(x))[1]], d = NULL)) + return(list(x = tf_na_like(x, which(is.na(x))[1]), d = NULL)) x <- x[!is.na(x)] if (length(x) == 0) return(list(x = x, d = NULL)) @@ -417,6 +417,17 @@ rank.tf <- function( #' @rdname tf_order #' @export xtfrm.tf <- function(x) { + cli::cli_warn( + c( + "Ordering {.cls tf} vectors via {.fn sort}/{.fn order}/{.fn rank} \\ + uses a depth-based total order ({.val MHI} by default), not a \\ + pointwise comparison.", + i = "See {.fn tf_order} for the underlying semantics and how to \\ + pick a different depth." + ), + .frequency = "once", + .frequency_id = "tf_xtfrm" + ) compute_depth(x, "MHI", na.rm = FALSE) } diff --git a/R/evaluate.R b/R/evaluate.R index 1f1146ea..a771cde7 100644 --- a/R/evaluate.R +++ b/R/evaluate.R @@ -73,8 +73,15 @@ evaluate_tfd_once <- function( seen <- !is.na(seen) ret <- rep(NA_real_, length(new_arg)) ret[seen] <- evaluations[seen_index] - ret[!seen] <- - evaluator(new_arg[!seen], arg = arg, evaluations = evaluations) + if (any(!seen)) { + # evaluator may sort/uniquify its input (see zoo_wrapper); evaluate on + # the unique sorted unseen args, then map back so duplicated `new_arg` + # values land at the correct positions. + unseen <- new_arg[!seen] + u <- sort_unique(unseen) + vals <- evaluator(u, arg = arg, evaluations = evaluations) + ret[!seen] <- vals[match(unseen, u)] + } ret } @@ -94,7 +101,7 @@ tf_evaluate.tfb <- function(object, arg, ...) { evals <- evaluate_tfb_once( x = arg, arg = tf_arg(object), - coefs = do.call(cbind, coef(object[!na_entries])), + coefs = do.call(cbind, unname(coef(object[!na_entries]))), basis = attr(object, "basis"), X = attr(object, "basis_matrix") ) @@ -111,7 +118,7 @@ tf_evaluate.tfb <- function(object, arg, ...) { } } else { ret <- pmap( - list(arg, ensure_list(tf_arg(object)), coef(object)), + list(arg, ensure_list(tf_arg(object)), unname(coef(object))), function(x, y, z) { if (!length(z) || anyNA(z)) { return(rep(NA_real_, length(x))) diff --git a/R/fwise.R b/R/fwise.R index f4541538..5971b5ea 100644 --- a/R/fwise.R +++ b/R/fwise.R @@ -44,6 +44,11 @@ NULL #' tf_crosscor(x, -x) #' tf_crosscov(x, x) == tf_fvar(x) tf_fwise <- function(x, .f, arg = tf_arg(x), ...) { + UseMethod("tf_fwise") +} + +#' @export +tf_fwise.default <- function(x, .f, arg = tf_arg(x), ...) { assert_tf(x) assert_arg(arg = arg, x = x) x_ <- x[, arg, matrix = FALSE] @@ -52,35 +57,46 @@ tf_fwise <- function(x, .f, arg = tf_arg(x), ...) { setNames(ret, names(x)) } +#' @export +tf_fwise.tf_mv <- function(x, .f, arg = tf_arg(x), ...) { + comp_names <- attr(x, "comp_names") + comp_results <- imap(tf_components(x), function(comp, nm) { + tf_fwise(comp, .f, arg = tf_mv_component_arg(arg, nm, comp_names), ...) + }) + ret <- map(seq_along(x), function(i) { + vals <- map(comp_results, \(res) res[[i]]) + names(vals) <- comp_names + vals + }) + setNames(ret, names(x)) +} + +# Factory for the function-wise scalar reductions tf_fmax / tf_fmin / +# tf_fmedian: reduce each function's values with `reduce_op`, unlist the +# per-function scalars and reattach names. +make_tf_freduce <- function(reduce_op) { + function(x, arg = tf_arg(x), na.rm = FALSE) { + x |> + tf_fwise(\(.x) reduce_op(.x$value, na.rm = na.rm), arg = arg) |> + unlist(use.names = FALSE) |> + setNames(names(x)) + } +} + #' @export #' @describeIn functionwise maximal value of each function #' @inheritParams base::min -tf_fmax <- function(x, arg = tf_arg(x), na.rm = FALSE) { - x |> - tf_fwise(\(.x) max(.x$value, na.rm = na.rm), arg = arg) |> - unlist(use.names = FALSE) |> - setNames(names(x)) -} +tf_fmax <- make_tf_freduce(max) #' @export #' @describeIn functionwise minimal value of each function #' @inheritParams base::min -tf_fmin <- function(x, arg = tf_arg(x), na.rm = FALSE) { - x |> - tf_fwise(\(.x) min(.x$value, na.rm = na.rm), arg = arg) |> - unlist(use.names = FALSE) |> - setNames(names(x)) -} +tf_fmin <- make_tf_freduce(min) #' @export #' @describeIn functionwise median value of each function #' @inheritParams base::min -tf_fmedian <- function(x, arg = tf_arg(x), na.rm = FALSE) { - x |> - tf_fwise(\(.x) median(.x$value, na.rm = na.rm), arg = arg) |> - unlist(use.names = FALSE) |> - setNames(names(x)) -} +tf_fmedian <- make_tf_freduce(stats::median) #' @export #' @describeIn functionwise range of values of each function @@ -93,6 +109,11 @@ tf_frange <- function(x, arg = tf_arg(x), na.rm = FALSE, finite = FALSE) { #' @describeIn functionwise mean of each function: #' \eqn{\tfrac{1}{|T|}\int_T x_i(t) dt} tf_fmean <- function(x, arg = tf_arg(x)) { + UseMethod("tf_fmean") +} + +#' @export +tf_fmean.default <- function(x, arg = tf_arg(x)) { assert_tf(x) assert_arg(arg = arg, x = x) x_ <- tf_interpolate(x, arg = arg) @@ -101,10 +122,21 @@ tf_fmean <- function(x, arg = tf_arg(x)) { tf_integrate(x_) / len } +#' @export +#' @describeIn functionwise component-wise means of each vector-valued function +tf_fmean.tf_mv <- function(x, arg = tf_arg(x)) { + tf_mv_fsummary_matrix(x, tf_fmean, arg = arg) +} + #' @export #' @describeIn functionwise variance of each function: #' \eqn{\tfrac{1}{|T|}\int_T (x_i(t) - \bar x(t))^2 dt} tf_fvar <- function(x, arg = tf_arg(x)) { + UseMethod("tf_fvar") +} + +#' @export +tf_fvar.default <- function(x, arg = tf_arg(x)) { assert_tf(x) assert_arg(arg = arg, x = x) arg <- ensure_list(arg) @@ -115,17 +147,72 @@ tf_fvar <- function(x, arg = tf_arg(x)) { tf_integrate(x_c^2) / len } +#' @export +#' @describeIn functionwise component-wise variances of each vector-valued function +tf_fvar.tf_mv <- function(x, arg = tf_arg(x)) { + tf_mv_fsummary_matrix(x, tf_fvar, arg = arg) +} + #' @export #' @describeIn functionwise standard deviation of each function: #' \eqn{\sqrt{\tfrac{1}{|T|}\int_T (x_i(t) - \bar x(t))^2 dt}} tf_fsd <- function(x, arg = tf_arg(x)) { + UseMethod("tf_fsd") +} + +#' @export +tf_fsd.default <- function(x, arg = tf_arg(x)) { tf_fvar(x, arg) |> sqrt() } +#' @export +#' @describeIn functionwise component-wise standard deviations of each vector-valued function +tf_fsd.tf_mv <- function(x, arg = tf_arg(x)) { + tf_mv_fsummary_matrix(x, tf_fsd, arg = arg) +} + +tf_mv_fsummary_matrix <- function(x, .f, arg = tf_arg(x)) { + comps <- tf_components(x) + comp_names <- attr(x, "comp_names") + if (!length(comps)) { + return(matrix( + numeric(0), + nrow = vec_size(x), + ncol = 0L, + dimnames = list(names(x), comp_names) + )) + } + vals <- imap(comps, function(comp, nm) { + .f(comp, arg = tf_mv_component_arg(arg, nm, comp_names)) + }) + ret <- do.call(cbind, vals) + dimnames(ret) <- list(names(x), comp_names) + ret +} + +tf_mv_component_arg <- function(arg, nm, comp_names) { + if ( + is.list(arg) && + length(arg) == length(comp_names) && + !is.null(names(arg)) && + !anyDuplicated(names(arg)) && + setequal(names(arg), comp_names) + ) { + arg[[nm]] + } else { + arg + } +} + #' @export #' @describeIn functionwise cross-covariances between two functional vectors: #' \eqn{\tfrac{1}{|T|}\int_T (x_i(t) - \bar x(t)) (y_i(t)-\bar y(t)) dt} tf_crosscov <- function(x, y, arg = tf_arg(x)) { + UseMethod("tf_crosscov") +} + +#' @export +tf_crosscov.default <- function(x, y, arg = tf_arg(x)) { # check same domain, arg assert_tf(x) assert_tf(y) @@ -153,5 +240,10 @@ tf_crosscov <- function(x, y, arg = tf_arg(x)) { #' @describeIn functionwise cross-correlation between two functional vectors: #' `tf_crosscov(x, y) / (tf_fsd(x) * tf_fsd(y))` tf_crosscor <- function(x, y, arg = tf_arg(x)) { + UseMethod("tf_crosscor") +} + +#' @export +tf_crosscor.default <- function(x, y, arg = tf_arg(x)) { tf_crosscov(x, y, arg) / sqrt(tf_fvar(x, arg) * tf_fvar(y, arg)) } diff --git a/R/geometry-mv.R b/R/geometry-mv.R new file mode 100644 index 00000000..f30f2c43 --- /dev/null +++ b/R/geometry-mv.R @@ -0,0 +1,445 @@ +# Geometric primitives for vector-valued curves -------------------------------- + +#' Pointwise norm and inner product for functional data +#' +#' Small geometric helpers defined by component-wise composition of the existing +#' univariate `Ops` / `Math` machinery: +#' - `tf_norm(f)` -- pointwise Euclidean norm \eqn{\lVert f(t) \rVert}; +#' - `tf_speed(f)` -- pointwise speed \eqn{\lVert f'(t) \rVert}; +#' - `tf_inner(f, g)` -- pointwise inner product \eqn{\langle f(t), g(t) \rangle}; +#' - `tf_distance(f, g)` -- pointwise Euclidean distance \eqn{\lVert f(t) - g(t) \rVert}; +#' - `tf_tangent(f)` -- unit tangent \eqn{f'(t) / \lVert f'(t) \rVert} +#' (undefined where the speed is zero -- callers get `NaN`s there); +#' - `tf_reparam_arclength(f)` -- re-parametrize the curve at constant speed +#' (i.e. by its normalized cumulative arc length). +#' +#' These also apply to *univariate* `tfd`/`tfb` (treated as scalar-valued curves +#' \eqn{f: T \to \mathbb{R}}), where they reduce to their one-dimensional +#' specializations: \eqn{\lVert f(t) \rVert = |f(t)|}, +#' \eqn{\langle f(t), g(t) \rangle = f(t)\,g(t)}, and the unit tangent +#' \eqn{f'(t) / |f'(t)| = \mathrm{sign}(f'(t))}. +#' +#' @param f,g `tf_mv` objects, or univariate `tf` (`tfd`/`tfb`) objects (with +#' identical `d` and component names where two `tf_mv` arguments are required). +#' @returns a univariate `tfd` for `tf_norm`/`tf_speed`/`tf_inner`/`tf_distance`; +#' `tf_tangent` returns a `tf_mv` (or a univariate `tf` for univariate input) +#' and `tf_reparam_arclength` a `tf_mv`. +#' @family tf_mv-class +#' @examples +#' set.seed(1) +#' f <- tfd_mv(list(x = tf_rgp(2), y = tf_rgp(2))) +#' tf_norm(f) +#' tf_speed(f) +#' tf_distance(f, tfd_mv(list(x = tf_rgp(2), y = tf_rgp(2)))) +#' # univariate: tf_norm reduces to the pointwise absolute value +#' u <- tf_rgp(2) +#' tf_norm(u) +#' tf_inner(u, tf_rgp(2)) +#' @name tf_geom +#' @rdname tf_geom +#' @export +tf_norm <- function(f) UseMethod("tf_norm") + +#' @rdname tf_geom +#' @export +tf_norm.default <- function(f) { + cli::cli_abort(c( + "{.fn tf_norm} is not defined for {.cls {class(f)}} objects.", + "i" = "Supply a univariate {.cls tf} or a vector-valued {.cls tf_mv} object." + )) +} + +#' @rdname tf_geom +#' @export +tf_norm.tf <- function(f) abs(f) + +# Reduce a tf_mv (or a pair of aligned tf_mvs) to a univariate `tfd` by applying +# a pointwise `reduce` to each curve's component-value matrix. `evals` is the +# per-curve list of `(arg, comp...)` data.frames from `tf_evaluate()`; the arg +# grids are read back from those. For the two-operand form, `evals2` holds the +# matching data.frames for the second object and `reduce(m1, m2)` is called. +# This is the shared tail of the tfd-valued geometry reductions (norm, inner). +mv_reduce_to_tfd <- function(evals, reduce, domain, nms, evals2 = NULL) { + vals <- if (is.null(evals2)) { + map(evals, \(cdf) if (nrow(cdf)) reduce(evals_to_matrix(cdf)) else numeric(0)) + } else { + map2(evals, evals2, \(a, b) { + if (nrow(a)) reduce(evals_to_matrix(a), evals_to_matrix(b)) else numeric(0) + }) + } + names(vals) <- nms + tfd(vals, arg = map(evals, `[[`, "arg"), domain = domain) +} + +#' @rdname tf_geom +#' @export +tf_norm.tf_mv <- function(f) { + if (!tf_ncomp(f) || !vec_size(f)) return(tfd(numeric(0), domain = tf_domain(f))) + # Components may live on different argument grids (the constructor allows + # this), so evaluate every component on each curve's union grid -- a plain + # `Reduce(`+`, comp^2)` would error or misalign, and on tfb would rebase + # `comp^2` lossily. `tf_evaluate()` does the union-grid + NA-fill. + mv_reduce_to_tfd(tf_evaluate(f), \(m) sqrt(rowSums(m^2)), tf_domain(f), names(f)) +} + +#' @rdname tf_geom +#' @export +tf_speed <- function(f) tf_norm(tf_derive(f)) + +#' @rdname tf_geom +#' @export +tf_inner <- function(f, g) UseMethod("tf_inner") + +#' @rdname tf_geom +#' @export +tf_inner.default <- function(f, g) { + cli::cli_abort(c( + "{.fn tf_inner} is not defined for {.cls {class(f)}} objects.", + "i" = "Supply univariate {.cls tf} or vector-valued {.cls tf_mv} objects." + )) +} + +#' @rdname tf_geom +#' @export +tf_inner.tf <- function(f, g) { + if (!inherits(g, "tf") || is_tf_mv(g)) { + cli::cli_abort(c( + "{.arg g} must be a univariate {.cls tf} object to match {.arg f}.", + "x" = "You supplied a {.cls {class(g)}} object." + )) + } + f * g +} + +#' @rdname tf_geom +#' @export +tf_inner.tf_mv <- function(f, g) { + if (!is_tf_mv(g)) { + cli::cli_abort(c( + "{.arg g} must be a vector-valued {.cls tf_mv} object to match {.arg f}.", + "x" = "You supplied a {.cls {class(g)}} object." + )) + } + check_compatible_mv(f, g) + assert_compatible_size("tf_inner", f, g) + # vctrs recycling: a size-0 operand makes the whole result size 0. + n <- if (min(vec_size(f), vec_size(g)) == 0L) { + 0L + } else { + max(vec_size(f), vec_size(g)) + } + if (!tf_ncomp(f) || !n) return(tfd(numeric(0), domain = tf_domain(f))) + # the inner product is only defined where both curves are -- the intersection + # of their domains. Evaluating on the union would step outside one operand's + # support (mirrors univariate `tfd * tfd`, which errors on non-overlapping + # domains and restricts to the common arguments otherwise). + dom <- c( + max(tf_domain(f)[1], tf_domain(g)[1]), + min(tf_domain(f)[2], tf_domain(g)[2]) + ) + if (dom[1] > dom[2]) { + cli::cli_abort( + "{.fn tf_inner} is not permitted for non-overlapping domains + ({.val {tf_domain(f)}} and {.val {tf_domain(g)}})." + ) + } + grids <- tf_mv_pair_grids(f, g, domain = dom) + nms <- if (vec_size(f) >= vec_size(g)) names(f) else names(g) + mv_reduce_to_tfd( + tf_mv_evaluate_on_grids(f, grids), + \(mf, mg) rowSums(mf * mg), + dom, nms, + evals2 = tf_mv_evaluate_on_grids(g, grids) + ) +} + +#' @rdname tf_geom +#' @export +tf_distance <- function(f, g) tf_norm(f - g) + +#' @rdname tf_geom +#' @export +tf_tangent <- function(f) UseMethod("tf_tangent") + +#' @rdname tf_geom +#' @export +tf_tangent.default <- function(f) { + cli::cli_abort(c( + "{.fn tf_tangent} is not defined for {.cls {class(f)}} objects.", + "i" = "Supply a univariate {.cls tf} or a vector-valued {.cls tf_mv} object." + )) +} + +#' @rdname tf_geom +#' @export +tf_tangent.tf <- function(f) { + df <- tf_derive(f) + df / tf_norm(df) +} + +#' @rdname tf_geom +#' @export +tf_tangent.tf_mv <- function(f) { + df <- tf_derive(f) + inv_speed <- 1 / tf_norm(df) + grids <- tf_mv_curve_grids(df) + df_evals <- tf_evaluate(df, arg = grids) + speed_evals <- tf_evaluate(inv_speed, arg = grids) + comp_names <- attr(df, "comp_names") + comps <- map(comp_names, function(nm) { + vals <- map2(df_evals, speed_evals, \(cdf, speed) cdf[[nm]] * speed) + names(vals) <- names(df) + tfd(vals, arg = grids, domain = tf_domain(df)) + }) + names(comps) <- comp_names + new_tf_mv(comps, domain = tf_domain(df)) +} + +tf_mv_pair_grids <- function(x, y, domain = NULL) { + sizes <- c(vec_size(x), vec_size(y)) + # vctrs recycling: a size-0 operand collapses the common size to 0. + n <- if (min(sizes) == 0L) 0L else max(sizes) + if (!n) return(list()) + x_grids <- tf_mv_curve_grids(x) + y_grids <- tf_mv_curve_grids(y) + grid_at <- function(grids, i) { + if (!length(grids)) return(numeric(0)) + grids[[if (length(grids) == 1L) 1L else i]] + } + map(seq_len(n), function(i) { + g <- sort(unique(c(grid_at(x_grids, i), grid_at(y_grids, i)))) + if (!is.null(domain)) g <- g[g >= domain[1] & g <= domain[2]] + g + }) +} + +tf_mv_evaluate_on_grids <- function(x, grids) { + if (vec_size(x) == length(grids)) { + return(tf_evaluate(x, arg = grids)) + } + tf_evaluate(x[rep(1L, length(grids))], arg = grids) +} + +#' @rdname tf_geom +#' @export +tf_reparam_arclength <- function(f) { + if (!vec_size(f)) return(f) + s <- tf_arclength(f, definite = FALSE) # cumulative s(t), one per curve + L <- tf_arclength(f) # total length per curve + dom <- tf_domain(f) + # curves that are constant in every component have zero (or undefined) arc + # length, so `s / L` would be 0/0 = NaN and produce an invalid (non-monotone) + # warp. Reparametrize only the well-defined curves; leave the rest unchanged. + degenerate <- !is.finite(L) | L == 0 + good <- which(!degenerate) + if (!length(good)) { + if (any(degenerate)) { + cli::cli_warn(c( + "!" = "{sum(degenerate)} curve{?s} with zero/undefined arc length left unchanged.", + "i" = "Arc-length reparametrization is undefined for curves that are constant in all components." + )) + } + return(f) + } + # u(t) maps the domain monotonically onto itself. `tf_warp(f, w)` computes + # `f o w^{-1}`, so passing `u` (not its inverse) gives the arc-length- + # parameterised curve `f o u^{-1}`. + u <- dom[1] + diff(dom) * (s[good] / L[good]) + warped <- tf_warp(f[good], u) + if (!any(degenerate)) { + return(warped) + } + # `warped` may have a more general per-component type than `f` (e.g. + # tfd_irreg from a tfd_reg input), so in-place `out[good] <- warped` + # can fail with a vctrs ptype mismatch. Build the output by ptype-aware + # concatenation of warped + untouched, then reorder back to input index. + cli::cli_warn(c( + "!" = "{sum(degenerate)} curve{?s} with zero/undefined arc length left unchanged.", + "i" = "Arc-length reparametrization is undefined for curves that are constant in all components." + )) + bad <- which(degenerate) + common <- vctrs::vec_ptype_common(warped, f[bad]) + out <- vctrs::vec_c( + vctrs::vec_cast(warped, common), + vctrs::vec_cast(f[bad], common) + ) + out <- out[order(c(good, bad))] + names(out) <- names(f) + out +} + +# Arc length ------------------------------------------------------------------- + +#' Arc length of vector-valued functional data +#' +#' For a vector-valued curve `f: [a, b] -> R^d`, the arc length is +#' \eqn{\int_a^b \lVert f'(t) \rVert\, dt} -- the length traced out by `f` in +#' `R^d`. +#' +#' Two methods are supported: +#' +#' * **`"polyline"`** (default): sum of the Euclidean lengths of the line +#' segments between consecutive sample points (in `R^d`). Each curve is +#' evaluated on the union of its components' argument grids (or a supplied +#' `arg`) and the segment-sum is computed in closed form. For raw `tfd_mv` +#' data this is more accurate than `"derive"` because it avoids the +#' compounding error of numerical differentiation followed by quadrature. +#' * **`"derive"`**: composes the existing verbs -- per-component +#' differentiation ([tf_derive()]), pointwise speed [tf_speed()], then +#' [tf_integrate()]. Best for `tfb_mv` (analytical derivatives) or when a +#' custom `tf_integrate(...)` argument is needed. +#' +#' @param f a `tf_mv` object. +#' @param arg,lower,upper optional evaluation/integration grid and limits. +#' @param definite `TRUE` (default) returns a numeric vector of total arc +#' lengths per curve; `FALSE` returns the cumulative arc length +#' \eqn{s(t) = \int_a^t \lVert f'(u) \rVert\, du} as a univariate `tfd`. +#' @param method `"polyline"` (default) or `"derive"`. +#' @param ... forwarded to [tf_integrate()] when `method = "derive"`. +#' @returns a numeric vector (definite) or a univariate `tfd` (indefinite). +#' @family tf_mv-class +#' @examples +#' # unit circle parameterised on [0, 1] -- arc length is 2*pi +#' t <- seq(0, 1, length.out = 401) +#' circ <- tfd_mv(list( +#' x = tfd(matrix(cos(2 * pi * t), nrow = 1), arg = t), +#' y = tfd(matrix(sin(2 * pi * t), nrow = 1), arg = t) +#' )) +#' tf_arclength(circ) +#' tf_arclength(circ, lower = 0, upper = 0.25) # quarter -> pi/2 +#' tf_arclength(circ, definite = FALSE) # cumulative s(t) +#' @export +tf_arclength <- function(f, ...) UseMethod("tf_arclength") + +#' @rdname tf_arclength +#' @export +tf_arclength.default <- function(f, ...) .NotYetImplemented() + +#' @rdname tf_arclength +#' @export +tf_arclength.tf_mv <- function( + f, + arg = NULL, + lower = tf_domain(f)[1], + upper = tf_domain(f)[2], + definite = TRUE, + method = c("polyline", "derive"), + ... +) { + method <- match.arg(method) + assert_number(lower, finite = TRUE) + assert_number(upper, finite = TRUE) + if (lower > upper) { + cli::cli_abort(c( + "{.arg lower} must not exceed {.arg upper}.", + "x" = "You supplied {.arg lower} = {.val {lower}} and {.arg upper} = {.val {upper}}." + )) + } + dom <- tf_domain(f) + if (lower < dom[1] || upper > dom[2]) { + cli::cli_abort(c( + "{.arg lower}/{.arg upper} must lie within the domain {.val {dom}}.", + "x" = "You supplied {.arg lower} = {.val {lower}} and {.arg upper} = {.val {upper}}." + )) + } + if (!definite && lower == upper) { + cli::cli_abort(c( + "Cumulative arc length ({.code definite = FALSE}) is undefined for a \\ + zero-width interval.", + "x" = "You supplied {.arg lower} == {.arg upper} == {.val {lower}}." + )) + } + if (method == "derive") { + speed <- tf_speed(f) + call_args <- list( + speed, + lower = lower, + upper = upper, + definite = definite, + ... + ) + if (!is.null(arg)) call_args$arg <- arg + return(do.call(tf_integrate, call_args)) + } + arclength_polyline(f, arg, lower, upper, definite) +} + +# Polyline arc length: evaluate the multivariate curve on each curve's +# argument grid (or a supplied common `arg`), then sum Euclidean distances +# between consecutive d-dimensional sample points. +arclength_polyline <- function(f, arg, lower, upper, definite) { + n <- vec_size(f) + if (!n) { + return(if (definite) numeric(0) else tfd(numeric(0))) + } + # per-curve evaluation grids + grids <- if (!is.null(arg)) { + rep(list(sort(unique(arg))), n) + } else { + tf_mv_curve_grids(f) + } + # clamp each curve's grid to the intersection of [lower, upper] with its own + # observed argument range. Without this, irregular curves that don't span the + # full global domain would be evaluated outside their support and yield NA. + grids <- lapply(grids, function(g) { + if (lower == upper) return(lower) + if (!length(g)) return(numeric(0)) + lo_i <- max(lower, min(g)) + up_i <- min(upper, max(g)) + if (lo_i > up_i) return(numeric(0)) + g <- g[g >= lo_i & g <= up_i] + sort(unique(c(lo_i, g, up_i))) + }) + empty <- vapply(grids, function(g) length(g) == 0L, logical(1)) + needs_eval <- vapply(grids, function(g) length(g) >= 2L, logical(1)) + paired_dfs <- vector("list", n) + if (any(needs_eval)) { + paired_dfs[needs_eval] <- tf_evaluate( + f[needs_eval], + arg = grids[needs_eval] + ) + } + # `tf_evaluate.tf_mv` now returns a uniform list of per-curve data.frames + # `(arg, comp1, ..., compd)`. Drop the arg column to get the value matrix + # required by the polyline segment-length computation. + incomplete <- map_lgl(paired_dfs, function(df) { + is.data.frame(df) && nrow(df) > 0L && anyNA(df[, -1L, drop = FALSE]) + }) + if (any(incomplete)) { + idx <- which(incomplete) + cli::cli_abort(c( + "Cannot compute polyline arc length with missing paired component evaluations.", + "i" = "Affected curve {cli::qty(length(idx))}index{?/es}: {.val {idx}}.", + "i" = "Set {.arg lower}/{.arg upper} to a common observed interval or use an evaluator that supplies all requested component values." + )) + } + per_curve_segs <- map(seq_len(n), function(i) { + if (empty[i]) return(NA_real_) + if (length(grids[[i]]) < 2L) return(numeric(0)) + df <- paired_dfs[[i]] + if (is.null(df) || !nrow(df)) return(NA_real_) + mat <- evals_to_matrix(df) + if (nrow(mat) < 2L) return(numeric(0)) + sqrt(rowSums(diff(mat)^2)) + }) + if (definite) { + setNames( + map_dbl(per_curve_segs, \(s) if (anyNA(s)) NA_real_ else sum(s)), + names(f) + ) + } else { + if (any(map_lgl(per_curve_segs, anyNA))) { + cli::cli_abort( + "Cannot compute cumulative arc length for missing vector-valued curves." + ) + } + cum_evals <- map(per_curve_segs, function(s) c(0, cumsum(s))) + same_grid <- length(unique(lengths(grids))) == 1L && + all(map_lgl(grids[-1], \(g) isTRUE(all.equal(g, grids[[1]])))) + if (same_grid) { + tfd(do.call(rbind, cum_evals), arg = grids[[1]]) + } else { + tfd(cum_evals, arg = grids) + } + } +} diff --git a/R/globals.R b/R/globals.R index 9eb5ba4a..a4210c70 100644 --- a/R/globals.R +++ b/R/globals.R @@ -1,20 +1,5 @@ # clean up globals for R CMD check utils::globalVariables(c( - ".", - "..fill", - "..label", "..order", - "..order_by_value", - "..row", - "..x", - "..y", - "V1", - "V2", - "id_num", - "tick_hi", - "tick_lo", - "tick_pos", - "ticks", - "value", - "xmax" + "value" )) diff --git a/R/graphics.R b/R/graphics.R index 779e250f..4b1ca146 100644 --- a/R/graphics.R +++ b/R/graphics.R @@ -34,9 +34,10 @@ prep_plotting_arg <- function(f, n_grid) { #' data for `tfd`-objects without inter-/extrapolation, use `n_grid < 1` or #' `n_grid = NA`. #' -#' @param x an `tf` object. -#' @param y (optional) numeric vector to be used as `arg` -#' (i.e., for the **x**-axis...!). +#' @param x a `tf` object. +#' @param y (optional) numeric vector of evaluation points to use as `arg` +#' for plotting (passed as the function's input grid, not the y-axis +#' values). #' @param n_grid minimal size of equidistant grid used for plotting, #' defaults to `50`. See details. #' @param points should the original evaluation points be marked by points? @@ -103,7 +104,7 @@ plot.tf <- function( points = TRUE, interpolate = FALSE, pch = 19, - ol = rgb(0, 0, 0, alpha) + col = rgb(0, 0, 0, alpha) ), list(...) ) diff --git a/R/interpolate.R b/R/interpolate.R index 5a89288f..cd22035a 100644 --- a/R/interpolate.R +++ b/R/interpolate.R @@ -67,6 +67,16 @@ tf_interpolate.tfd <- function(object, arg, ...) { tfd(object, arg = arg, ...) } +#' @export +#' @rdname tf_interpolate +tf_interpolate.tf_mv <- function(object, arg, ...) { + comp_names <- attr(object, "comp_names") + comps <- imap(tf_components(object), function(comp, nm) { + tf_interpolate(comp, arg = tf_mv_component_arg(arg, nm, comp_names), ...) + }) + new_tf_mv(comps, domain = tf_domain(object), mfpc = attr(object, "mfpc")) +} + # #' @export # #' @rdname tf_interpolate # tf_interpolate.tfd_irreg <- function(object, arg, force = FALSE, ...) { diff --git a/R/landmarks.R b/R/landmarks.R index a985ef2a..72bab6eb 100644 --- a/R/landmarks.R +++ b/R/landmarks.R @@ -24,7 +24,6 @@ #' each column). Contains `NA` where a curve is missing a landmark. #' @seealso [tf_register()] with `method = "landmark"` #' @export -#' @rdname landmarks #' @family registration functions #' @examples #' t <- seq(0, 1, length.out = 101) @@ -112,14 +111,14 @@ tf_landmarks_extrema <- function( # --- Landmark Detection Helpers ------------------------------------------------ -#' @details -#' - `detect_landmarks` detects local extrema and zero crossings per curve. +#' Detect local extrema and zero crossings per curve +#' #' @param x tf object (already smoothed if needed) #' @param arg_list list of numeric vectors: per-curve evaluation grids #' @param which character vector: subset of c("max", "min", "zero") -#' @returns `detect_landmarks`: list of n data.frames with columns (position, type) +#' @returns list of n data.frames with columns (position, type) #' @keywords internal -#' @rdname landmarks +#' @noRd detect_landmarks <- function(x, arg_list, which) { x_evals <- tf_evaluations(x) n <- length(x) @@ -183,17 +182,19 @@ detect_landmarks <- function(x, arg_list, which) { } -#' @details -#' - `cluster_landmarks` clusters within each feature type separately -#' (max with max, min with min, etc.) to avoid merging adjacent features of -#' different types. Then combines and sorts by position. +#' Cluster detected features across curves +#' +#' Clusters within each feature type separately (max with max, min with min, +#' etc.) to avoid merging adjacent features of different types. Then combines +#' and sorts by position. +#' #' @param features list of per-curve data.frames from detect_landmarks() #' @param n number of curves #' @param bandwidth merge distance for clustering #' @param threshold minimum proportion of curves for a cluster to be retained -#' @returns `cluster_landmarks`: data.frame with columns: center, type, count, proportion +#' @returns data.frame with columns: center, type, count, proportion #' @keywords internal -#' @rdname landmarks +#' @noRd cluster_landmarks <- function(features, n, bandwidth, threshold) { # Pool all features with curve ID all_f <- do.call( @@ -264,16 +265,17 @@ cluster_landmarks <- function(features, n, bandwidth, threshold) { result[order(result$center), , drop = FALSE] } -#' @details -#' - `build_landmark_matrix` creates a landmark matrix by matching per-curve -#' features to clusters. +#' Build the landmark matrix from per-curve features and clusters +#' +#' Creates a landmark matrix by matching per-curve features to clusters. +#' #' @param features list of per-curve data.frames #' @param clusters data.frame from cluster_landmarks() #' @param n number of curves #' @param bandwidth matching distance -#' @returns `build_landmark_matrix`: n x k matrix with feature_types attribute +#' @returns n x k matrix with feature_types attribute #' @keywords internal -#' @rdname landmarks +#' @noRd build_landmark_matrix <- function(features, clusters, n, bandwidth) { k <- nrow(clusters) lm_mat <- matrix(NA_real_, nrow = n, ncol = k) diff --git a/R/math.R b/R/math.R index ad7190f4..5ee07841 100644 --- a/R/math.R +++ b/R/math.R @@ -1,10 +1,11 @@ # utility function for linear operations that can be done on coefs or # evaluations directly. -fun_math <- function(x, op) { +fun_math <- function(x, op, ...) { attr_ret <- attributes(x) + dots <- list(...) ret <- map(tf_evaluations(x), \(x) { if (is.null(x)) return(NULL) - result <- do.call(op, list(x = x)) + result <- do.call(op, c(list(x = x), dots)) if (allMissing(result)) NULL else result }) if (is_irreg(x)) { @@ -25,7 +26,7 @@ NULL #' @export #' @family tidyfun compute functions Math.tfd <- function(x, ...) { - fun_math(x, .Generic) + fun_math(x, .Generic, ...) } #' @rdname tfgroupgenerics @@ -36,35 +37,21 @@ Math.tfb <- function(x, ...) { "Potentially lossy cast to {.cls tfd} and back in {.fn {genericname}}({.cls {vec_ptype_full(x)}})." ) basis_args <- attr(x, "basis_args") - eval <- fun_math(tfd(x), .Generic) + eval <- fun_math(tfd(x), .Generic, ...) na_entries <- is.na(eval) if (all(na_entries)) { - # all entries became NA -- return tfb with NULL entries - result <- vector("list", length(eval)) - result[] <- list(NULL) - result_names <- names(eval) - attributes(result) <- attributes(x) - names(result) <- result_names - return(result) + return(restore_na_entries( + eval[!na_entries], + na_entries, + names(eval), + ref_tfb = x + )) } - if (any(na_entries)) { - # refit only non-NA entries, then insert NULLs at NA positions - non_na <- do.call( - "tfb", - c(list(eval[!na_entries]), basis_args, penalized = FALSE, verbose = FALSE) - ) - result <- vector("list", length(eval)) - result[!na_entries] <- unclass(non_na) - result[na_entries] <- list(NULL) - result_names <- names(eval) - attributes(result) <- attributes(non_na) - names(result) <- result_names - return(result) - } - do.call( + non_na <- do.call( "tfb", - c(list(eval), basis_args, penalized = FALSE, verbose = FALSE) + c(list(eval[!na_entries]), basis_args, penalized = FALSE, verbose = FALSE) ) + restore_na_entries(non_na, na_entries, names(eval)) } #------------------------------------------------------------------------------- diff --git a/R/methods.R b/R/methods.R index 670e25b8..912aadcf 100644 --- a/R/methods.R +++ b/R/methods.R @@ -189,7 +189,12 @@ tf_basis <- function(f, as_tfd = FALSE) { #' @export `tf_arg<-.tfd_irreg` <- function(x, value) { assert_arg(value, x, check_unique = FALSE) - ret <- map2(tf_evaluations(x), value, \(x, y) list(arg = y, data = x)) + value <- ensure_list(value) + if (length(value) == 1) value <- rep(value, length(x)) + ret <- map2(tf_evaluations(x), value, \(v, y) { + if (is.null(v)) return(NULL) + list(arg = y, value = v) + }) attributes(ret) <- attributes(x) ret } @@ -222,12 +227,14 @@ tf_basis <- function(f, as_tfd = FALSE) { #------------------------------------------------------------------------------- #' @rdname tfmethods -#' @param object as usual -#' @param ... dots +#' @param object a `tfb` object. +#' @param ... not used. #' @export #' @importFrom stats coef coef.tfb <- function(object, ...) { + nms <- names(object) attributes(object) <- NULL + names(object) <- nms object } @@ -237,6 +244,12 @@ rev.tf <- function(x) { x[rev(seq_along(x))] } +#' @export +#' @rdname tfmethods +rev.tf_mv <- function(x) { + x[rev(seq_along(x))] +} + #------------------------------------------------------------------------------- #' @rdname tfmethods @@ -257,6 +270,14 @@ is.na.tfd_irreg <- function(x) { #' @export is_tf <- function(x) inherits(x, "tf") +#' @rdname tfmethods +#' @description `is_tf_1d()` distinguishes *univariate* `tf` vectors (length-`n` +#' samples of `f: R -> R`) from any `tf` vector. Returns `TRUE` for `tfd` / +#' `tfb` and `FALSE` for `tfd_mv` / `tfb_mv`. Useful as a dispatch / guard +#' predicate inside helpers that assume scalar per-arg evaluations. +#' @export +is_tf_1d <- function(x) inherits(x, "tf") && !inherits(x, "tf_mv") + #' @rdname tfmethods #' @export is_tfd <- function(x) inherits(x, "tfd") @@ -288,3 +309,15 @@ is_tfb_spline <- function(x) inherits(x, "tfb_spline") #' @rdname tfmethods #' @export is_tfb_fpc <- function(x) inherits(x, "tfb_fpc") + +#' @rdname tfmethods +#' @export +is_tf_mv <- function(x) inherits(x, "tf_mv") + +#' @rdname tfmethods +#' @export +is_tfd_mv <- function(x) inherits(x, "tfd_mv") + +#' @rdname tfmethods +#' @export +is_tfb_mv <- function(x) inherits(x, "tfb_mv") diff --git a/R/mv-stubs.R b/R/mv-stubs.R new file mode 100644 index 00000000..8594f0cb --- /dev/null +++ b/R/mv-stubs.R @@ -0,0 +1,88 @@ +#' @include tfd-mv.R tfb-mv.R accessors-mv.R +NULL + +# Fail-fast .tf_mv stubs ------------------------------------------------------ +# +# Many univariate-tf generics do not (yet) have a well-defined extension to +# vector-valued `tf_mv` objects. Without an explicit method, dispatch falls +# through to the .tf / .tfd / .tfb method (since tf_mv classes inherit from +# "tf"), which silently produces NA garbage, internal errors deep in the call +# stack, or component-wise behaviour with unclear semantics. The stubs below +# abort *fast* with a classed condition (`tf_mv_method_unimplemented`) so that +# callers can catch them uniformly while we design real semantics +# component-by-component -- see tidyfun/tf#255. + +mv_unimplemented <- function(fn) { + cli::cli_abort( + c( + "{.fn {fn}} is not (yet) defined for vector-valued {.cls tf_mv}.", + i = "See {.url https://github.com/tidyfun/tf/issues/255} for design discussion." + ), + class = "tf_mv_method_unimplemented", + call = NULL + ) +} + +# ---- summarize.R: summary / fivenum / quantile ------------------------------- + +#' @export +summary.tf_mv <- function(object, ...) mv_unimplemented("summary") + +#' @export +fivenum.tf_mv <- function(x, na.rm = FALSE, ...) mv_unimplemented("fivenum") + +#' @export +quantile.tf_mv <- function(x, ...) mv_unimplemented("quantile") + +# ---- depth.R: tf_depth, rank, xtfrm, sort ----------------------------------- +# (`min`/`max`/`range` are handled by the existing `Summary.tf_mv` group method; +# `cummax`/`cummin`/`cumprod`/`cumsum` by `Math.tf_mv`. They are not stubbed.) + +#' @export +tf_depth.tf_mv <- function(x, arg, ...) mv_unimplemented("tf_depth") + +#' @export +rank.tf_mv <- function(x, ...) mv_unimplemented("rank") + +#' @export +xtfrm.tf_mv <- function(x) mv_unimplemented("xtfrm") + +#' @export +sort.tf_mv <- function(x, decreasing = FALSE, ...) mv_unimplemented("sort") + +# ---- graphics.R: points ------------------------------------------------------ + +#' @export +points.tf_mv <- function(x, ...) mv_unimplemented("points") + +# ---- approx.R: tf_invert ----------------------------------------------------- + +#' @export +tf_invert.tf_mv <- function(x, ...) mv_unimplemented("tf_invert") + +# ---- fwise.R: tf_crosscov / tf_crosscor -------------------------------------- +# Converted to generics in fwise.R (default method retains the univariate body). + +#' @export +tf_crosscov.tf_mv <- function(x, y, ...) mv_unimplemented("tf_crosscov") + +#' @export +tf_crosscor.tf_mv <- function(x, y, ...) mv_unimplemented("tf_crosscor") + +#' Methods registered on vector-valued (`tf_mv`) classes +#' +#' `tf_mv` classes (`tfd_mv` / `tfb_mv`) inherit from `"tf"` so that +#' `tf_domain()`, the type predicates (`is_tf()`, `is_tf_mv()`, ...) and S4 +#' generic reuse continue to work. **Behaviour** on `tf_mv` objects, however, +#' is supplied *only* by explicitly registered `.tf_mv` methods: any generic +#' without one aborts with a classed `tf_mv_method_unimplemented` condition +#' (e.g. `tf_depth()`, `summary()`). This avoids silent +#' fall-through to the univariate method, which would otherwise produce +#' wrong-shape results or deep internal errors. +#' +#' Real component-wise semantics (joint vs. per-component, norm-based, ...) are +#' being designed verb-by-verb in . +#' +#' @name tf_mv_unimplemented +#' @keywords internal +NULL diff --git a/R/ops-mv.R b/R/ops-mv.R new file mode 100644 index 00000000..69323a4f --- /dev/null +++ b/R/ops-mv.R @@ -0,0 +1,143 @@ +# Arithmetic, math, summaries (all component-wise) ----------------------------- + +# Demote a `tfb_mfpc` operand before Math/Ops touch its `tfb_fpc` components. +# Scoring a single MFPC component is ill-defined (see `mfpc_component_scoring`), +# so any arithmetic / Math.Generic must drop the joint spec first. We warn once +# per operation and continue along the standard component-wise path. +mfpc_demote_for_op <- function(x, op, warn = TRUE) { + if (is_tfb_mfpc(x)) { + if (warn) { + warn_mfpc_demotion(paste0( + "Operation {.code ", op, "} on a {.cls tfb_mfpc} forces per-component arithmetic." + )) + } + return(tfb_mfpc_demote(x)) + } + x +} + +#' @export +#' @method vec_arith tf_mv +vec_arith.tf_mv <- function(op, x, y, ...) { + UseMethod("vec_arith.tf_mv", y) +} + +#' @export +#' @method vec_arith.tf_mv default +vec_arith.tf_mv.default <- function(op, x, y, ...) { + stop_incompatible_op(op, x, y) +} + +#' @export +#' @method vec_arith.tf_mv tf_mv +vec_arith.tf_mv.tf_mv <- function(op, x, y, ...) { + # if both operands are tfb_mfpc, warn only once for the single user-facing + # operation: the warning for `y` is suppressed when `x` already triggered it. + x_warns <- is_tfb_mfpc(x) + x <- mfpc_demote_for_op(x, op) + y <- mfpc_demote_for_op(y, op, warn = !x_warns) + map2_components(x, y, \(a, b) vec_arith(op, a, b)) +} + +#' @export +#' @method vec_arith.tf_mv numeric +vec_arith.tf_mv.numeric <- function(op, x, y, ...) { + x <- mfpc_demote_for_op(x, op) + map_components(x, \(a) vec_arith(op, a, y)) +} + +#' @export +#' @method vec_arith.numeric tf_mv +vec_arith.numeric.tf_mv <- function(op, x, y, ...) { + y <- mfpc_demote_for_op(y, op) + map_components(y, \(b) vec_arith(op, x, b)) +} + +#' @export +#' @method vec_arith.tf_mv MISSING +vec_arith.tf_mv.MISSING <- function(op, x, y, ...) { + x <- mfpc_demote_for_op(x, op) + map_components(x, \(a) vec_arith(op, a, MISSING())) +} + +#' @export +Math.tf_mv <- function(x, ...) { + generic <- .Generic + x <- mfpc_demote_for_op(x, generic) + map_components(x, \(a) do.call(generic, list(a, ...))) +} + +#' @export +Summary.tf_mv <- function(..., na.rm = FALSE) { + generic <- .Generic + dots <- list(...) + mv_args <- map_lgl(dots, is_tf_mv) + x <- dots[[which(mv_args)[1]]] + walk(dots[mv_args], \(arg) check_compatible_mv(x, arg)) + missing <- do.call(mv_missing, dots[mv_args]) + dots[mv_args] <- map( + dots[mv_args], + mv_complete, + missing = missing, + na.rm = na.rm + ) + x <- dots[[which(mv_args)[1]]] + comps <- imap(tf_components(x), function(comp, nm) { + comp_args <- map(dots, function(arg) { + if (is_tf_mv(arg)) tf_component(arg, nm) else arg + }) + do.call(generic, c(comp_args, list(na.rm = na.rm))) + }) + names(comps) <- attr(x, "comp_names") + new_tf_mv(comps) +} + +#' @export +`==.tf_mv` <- function(e1, e2) { + check_compatible_mv(e1, e2) + # a zero-component object has no values to compare: trivially equal (and + # `Reduce()` on an empty list would return `NULL` rather than `logical(0)`). + if (!tf_ncomp(e1)) return(rep(TRUE, vec_size(e1))) + eqs <- map2(tf_components(e1), tf_components(e2), \(a, b) a == b) + Reduce(`&`, eqs) +} + +#' @export +`!=.tf_mv` <- function(e1, e2) !(e1 == e2) + +#' @export +mean.tf_mv <- function(x, ..., na.rm = FALSE) { + x <- mv_complete(x, na.rm = na.rm) + map_components(x, \(a) mean(a, ..., na.rm = na.rm)) +} + +#' @export +median.tf_mv <- function(x, na.rm = FALSE, ...) { + x <- mv_complete(x, na.rm = na.rm) + map_components(x, \(a) median(a, na.rm = na.rm, ...)) +} + +#' @export +sd.tf_mv <- function(x, na.rm = FALSE) { + x <- mv_complete(x, na.rm = na.rm) + map_components(x, \(a) sd(a, na.rm = na.rm)) +} + +#' @export +var.tf_mv <- function(x, y = NULL, na.rm = FALSE, use) { + if (!is.null(y)) { + cli::cli_abort(c( + "{.fn var} on {.cls tf_mv} does not support a second argument {.arg y}.", + "i" = "Use {.fn tf_crosscov} or {.fn tf_crosscor} for cross-(co)variance." + )) + } + has_use <- !missing(use) + x <- mv_complete(x, na.rm = na.rm) + map_components(x, function(a) { + if (has_use) { + var(a, na.rm = na.rm, use = use) + } else { + var(a, na.rm = na.rm) + } + }) +} diff --git a/R/ops.R b/R/ops.R index 8407e439..160c56a7 100644 --- a/R/ops.R +++ b/R/ops.R @@ -63,14 +63,14 @@ NULL #' @export `!=.tfd` <- function(e1, e2) !(e1 == e2) -# need to copy instead of defining tf-method s.t. dispatch in Ops works +# Plain copy so S3 dispatch in Ops picks up a tfb-named method. #' @rdname tfgroupgenerics #' @export -`==.tfb` <- eval(`==.tfd`) +`==.tfb` <- `==.tfd` #' @rdname tfgroupgenerics #' @export -`!=.tfb` <- eval(`!=.tfd`) +`!=.tfb` <- `!=.tfd` #' @rdname tfgroupgenerics #' @export @@ -307,55 +307,43 @@ tfd_op_tfd <- function(op, x, y) { ) } -tfd_op_numeric <- function(op, x, y, ...) { +# Worker for {tfd, numeric} arithmetic that preserves operand order so +# non-commutative ops (e.g. `/`, `-`, `^`) are correct on either side. +# `tf_left` indicates which side carries the tfd. +tfd_numeric_op <- function(op, x, y, tf_left) { assert_compatible_size(op, x, y) - ret <- map2(tf_evaluations(x), y, \(x, y) { - if (is.null(x)) return(NULL) - result <- do.call(op, list(x, y)) - if (allMissing(result)) NULL else result - }) - if (is_irreg(x)) { - ret <- map2(tf_arg(x), ret, \(.arg, .ret) { + tf_side <- if (tf_left) x else y + num_side <- if (tf_left) y else x + ret <- map2( + if (tf_left) tf_evaluations(tf_side) else num_side, + if (tf_left) num_side else tf_evaluations(tf_side), + \(a, b) { + if (is.null(a) || is.null(b)) return(NULL) + result <- do.call(op, list(a, b)) + if (allMissing(result)) NULL else result + } + ) + if (is_irreg(tf_side)) { + ret <- map2(tf_arg(tf_side), ret, \(.arg, .ret) { if (is.null(.ret)) return(NULL) list(arg = .arg, value = .ret) }) } - attributes(ret) <- attributes(x) - if (vec_size(y) > 1) { + attributes(ret) <- attributes(tf_side) + if (vec_size(num_side) > 1) { names(ret) <- NULL } ret } -# some code-duplication here, this makes non-commutative ops work for tfd and numeric -numeric_op_tfd <- function(op, x, y) { - assert_compatible_size(op, x, y) - ret <- map2(x, tf_evaluations(y), \(x, y) { - if (is.null(y)) return(NULL) - result <- do.call(op, list(x, y)) - if (allMissing(result)) NULL else result - }) - if (is_irreg(y)) { - ret <- map2(tf_arg(y), ret, \(.arg, .ret) { - if (is.null(.ret)) return(NULL) - list(arg = .arg, value = .ret) - }) - } - attributes(ret) <- attributes(y) - if (vec_size(x) > 1) { - names(ret) <- NULL - } - ret +tfd_op_numeric <- function(op, x, y, ...) { + tfd_numeric_op(op, x, y, tf_left = TRUE) } -# construct a tfb shell with NULL entries, using ref_tfb for attributes -tfb_na_result <- function(eval, ref_tfb) { - result <- vector("list", length(eval)) - result[] <- list(NULL) - names(result) <- names(eval) - attributes(result) <- attributes(ref_tfb) - names(result) <- names(eval) - result +# Mirror of tfd_op_numeric so non-commutative ops work with the numeric on the +# left. +numeric_op_tfd <- function(op, x, y) { + tfd_numeric_op(op, x, y, tf_left = FALSE) } #------------------------------------------------------------------------------- @@ -376,69 +364,58 @@ tfb_multdiv_numeric <- function(op, x, y) { ret } +# Shared body for tfb {+,-,*,/,^,%%, %/%} {tfb, numeric}: cast to tfd, run the +# pre-computed numeric `eval`, then rebase to `rebase_target`. `ref_tfb` +# supplies attributes for the all-NA branch when `rebase_target` is a length-0 +# ptype. +#TODO: restore sp afterwards so all properties are preserved? +tfb_lossy_rebase <- function(eval, rebase_target, ref_tfb = rebase_target) { + na_entries <- is.na(eval) + if (all(na_entries)) { + return(restore_na_entries( + eval[!na_entries], + na_entries, + names(eval), + ref_tfb = ref_tfb + )) + } + rebase_subset <- if (vec_size(rebase_target) > 1) { + rebase_target[!na_entries] + } else { + rebase_target + } + rebased <- tf_rebase( + eval[!na_entries], + rebase_subset, + penalized = FALSE, + verbose = FALSE + ) + restore_na_entries(rebased, na_entries, names(eval)) +} + tfb_op_numeric <- function(op, x, y) { cli::cli_warn( "Potentially lossy cast to {.cls tfd} and back in {.cls {vec_ptype_full(x)}} {op} {.cls {vec_ptype_full(y)}}." ) - eval <- tfd_op_numeric(op, tfd(x), y) - na_entries <- is.na(eval) - if (all(na_entries)) return(tfb_na_result(eval, x)) - if (any(na_entries)) { - rebased <- tf_rebase( - eval[!na_entries], - x[!na_entries], - penalized = FALSE, - verbose = FALSE - ) - result <- tfb_na_result(eval, rebased) - result[!na_entries] <- unclass(rebased) - return(result) - } - tf_rebase(eval, x, penalized = FALSE, verbose = FALSE) - #TODO: restore sp afterwards so all properties are preserved? + tfb_lossy_rebase(tfd_op_numeric(op, tfd(x), y), rebase_target = x) } numeric_op_tfb <- function(op, x, y) { cli::cli_warn( "Potentially lossy cast to {.cls tfd} and back in {.cls {vec_ptype_full(x)}} {op} {.cls {vec_ptype_full(y)}}." ) - eval <- numeric_op_tfd(op, x, tfd(y)) - na_entries <- is.na(eval) - if (all(na_entries)) return(tfb_na_result(eval, y)) - if (any(na_entries)) { - rebased <- tf_rebase( - eval[!na_entries], - y[!na_entries], - penalized = FALSE, - verbose = FALSE - ) - result <- tfb_na_result(eval, rebased) - result[!na_entries] <- unclass(rebased) - return(result) - } - tf_rebase(eval, y, penalized = FALSE, verbose = FALSE) #TODO: see tfb_op_numeric + tfb_lossy_rebase(numeric_op_tfd(op, x, tfd(y)), rebase_target = y) } tfb_op_tfb <- function(op, x, y) { cli::cli_warn( "Potentially lossy casts to {.cls tfd} and back for {.cls {vec_ptype_full(x)}} {op} {.cls {vec_ptype_full(y)}}." ) - eval <- tfd_op_tfd(op, tfd(x), tfd(y)) ret_ptype <- if (vec_size(x) >= vec_size(y)) vec_ptype(x) else vec_ptype(y) - na_entries <- is.na(eval) - if (all(na_entries)) return(tfb_na_result(eval, ret_ptype)) - if (any(na_entries)) { - rebased <- tf_rebase( - eval[!na_entries], - ret_ptype, - penalized = FALSE, - verbose = FALSE - ) - result <- tfb_na_result(eval, rebased) - result[!na_entries] <- unclass(rebased) - return(result) - } - tf_rebase(eval, ret_ptype, penalized = FALSE, verbose = FALSE) #TODO: see tfb_op_numeric + tfb_lossy_rebase( + tfd_op_tfd(op, tfd(x), tfd(y)), + rebase_target = ret_ptype + ) } tfb_plusminus_tfb <- function(op, x, y) { diff --git a/R/plot-mv.R b/R/plot-mv.R new file mode 100644 index 00000000..d759b269 --- /dev/null +++ b/R/plot-mv.R @@ -0,0 +1,132 @@ +# Plotting (rudimentary) ------------------------------------------------------- + +# graphical parameters that should be recycled *per curve* in trajectory plots +traj_curve_par <- c("col", "lty", "lwd", "pch", "cex", "lend", "ljoin") + +# Extract paired (x(t), y(t)) matrices from a 2-component tf_mv for matlines. +# Delegates to the *exported* `as.matrix.tf_mv`, which already does the +# union-grid + NA-fill coercion. Kept as a 2-line internal so the plot +# methods read clearly; downstream consumers wanting the same semantics +# should use `as.matrix.tf_mv()` or `as.data.frame.tf_mv(long = TRUE)` +# directly rather than reach into this private helper. +mv_paired_xy <- function(x) { + arr <- as.matrix(x, interpolate = TRUE) # [n_curve, n_arg, 2] + list(x = matrix(arr[, , 1L], nrow = dim(arr)[1L], ncol = dim(arr)[2L], + dimnames = dimnames(arr)[1:2]), + y = matrix(arr[, , 2L], nrow = dim(arr)[1L], ncol = dim(arr)[2L], + dimnames = dimnames(arr)[1:2])) +} + +# Draw each curve (row of mx/my) as a column of a matrix so that matlines() +# recycles col/lty/lwd/... across curves -- matching univariate plot.tf(). +# A single lines() call per curve would only honour the first element of e.g. +# `col`, so passing `col = 1:n` would draw every curve in the same colour. +# `alpha` (if given) tints the line colour(s) -- consistent with plot.tf(). +draw_trajectory <- function(mx, my, dots) { + line_args <- modifyList( + list(col = 1, lty = 1), + dots[intersect(names(dots), traj_curve_par)] + ) + if (!is.null(dots$alpha)) { + line_args$col <- grDevices::adjustcolor(line_args$col, alpha.f = dots$alpha) + } + do.call(graphics::matlines, c(list(t(mx), t(my)), line_args)) +} + +# default display: "trajectory" for 2-d curves (the movement-data view), +# "facet" otherwise. +mv_plot_type <- function(type, comps) { + type <- type %||% if (length(comps) == 2L) "trajectory" else "facet" + match.arg(type, c("facet", "trajectory")) +} + +#' Plot vector-valued functional data +#' +#' Two simple display modes for `tf_mv` objects: `"facet"` draws one panel per +#' output dimension (delegating to the univariate [plot.tf()]); +#' `"trajectory"` (only for `d == 2`) draws the curves in the plane, i.e. +#' \eqn{y(t)} against \eqn{x(t)} -- the natural view for movement data. +#' +#' @details +#' In `"trajectory"` mode the two components must be paired at common argument +#' values to form \eqn{(x(t), y(t))} points. When the components are sampled on +#' different (or per-curve irregular) grids they are therefore evaluated on the +#' union of their argument grids with `interpolate = TRUE` (values outside a +#' component's observed range become `NA` and are skipped). For components that +#' already share a grid this is a no-op. +#' +#' @param x a `tf_mv` object. +#' @param y ignored. +#' @param type `"trajectory"` or `"facet"`. Defaults to `"trajectory"` for +#' two-component (`d == 2`) objects and to `"facet"` otherwise. +#' @param ... passed to the underlying plotting calls. Per-curve graphical +#' parameters (`col`, `lty`, `lwd`, ...) are recycled across curves. +#' @returns `x`, invisibly. +#' @family tf_mv-class +#' @examples +#' arg <- seq(0, 1, length.out = 31) +#' xf <- tfd(t(sapply(1:5, function(i) sin(2 * pi * arg + i / 5))), arg = arg) +#' yf <- tfd(t(sapply(1:5, function(i) cos(2 * pi * arg + i / 5))), arg = arg) +#' mv <- tfd_mv(list(x = xf, y = yf)) +#' plot(mv, type = "trajectory") +#' plot(mv, type = "facet") +#' @export +plot.tf_mv <- function(x, y, ..., type = NULL) { + comps <- tf_components(x) + type <- mv_plot_type(type, comps) + comp_names <- attr(x, "comp_names") + if (type == "trajectory") { + if (length(comps) != 2) { + cli::cli_abort( + "{.code type = \"trajectory\"} requires exactly 2 components." + ) + } + xy <- mv_paired_xy(x) + mx <- xy$x + my <- xy$y + dots <- list(...) + # set up the plotting region without per-curve params, then draw the curves + setup_dots <- dots[setdiff(names(dots), c(traj_curve_par, "alpha"))] + plot_args <- modifyList( + list(xlab = comp_names[1], ylab = comp_names[2]), + setup_dots + ) + do.call( + plot, + c( + list( + range(mx, na.rm = TRUE), + range(my, na.rm = TRUE), + type = "n" + ), + plot_args + ) + ) + draw_trajectory(mx, my, dots) + return(invisible(x)) + } + # Prefer a single row for up to 3 components (wider figures are typical); + # fall back to n2mfrow's roughly-square layout for larger d. + mfrow_layout <- if (length(comps) <= 3L) c(1L, length(comps)) else + grDevices::n2mfrow(length(comps)) + op <- graphics::par(mfrow = mfrow_layout) + on.exit(graphics::par(op)) + iwalk(comps, \(comp, nm) plot(comp, main = nm, ...)) + invisible(x) +} + +#' @rdname plot.tf_mv +#' @importFrom graphics par lines matlines +#' @importFrom grDevices n2mfrow +#' @export +lines.tf_mv <- function(x, ..., type = NULL) { + comps <- tf_components(x) + type <- mv_plot_type(type, comps) + if (type == "trajectory" && length(comps) == 2) { + xy <- mv_paired_xy(x) + draw_trajectory(xy$x, xy$y, list(...)) + return(invisible(x)) + } + walk(comps, \(comp) graphics::lines(comp, ...)) + invisible(x) +} diff --git a/R/print-format-mv.R b/R/print-format-mv.R new file mode 100644 index 00000000..59e04d49 --- /dev/null +++ b/R/print-format-mv.R @@ -0,0 +1,100 @@ +# Printing / formatting -------------------------------------------------------- + +#' @export +format.tf_mv <- function(x, ...) { + comps <- tf_components(x) + if (!length(comps)) return(character(0)) + per_comp <- map(comps, \(comp) format(comp, ...)) + n <- vec_size(x) + map_chr(seq_len(n), function(i) { + paste(map_chr(per_comp, \(p) p[i]), collapse = " | ") + }) +} + +# one-line description of a single component's representation, mirroring the +# "evaluations / interpolation / basis" lines of print.tfd / print.tfb. +mv_component_info <- function(comp) { + if (is_tfb(comp)) { + return(paste0( + "in basis representation: ", + trimws(paste(attr(comp, "basis_label"), attr(comp, "family_label"))) + )) + } + evaluator <- paste0("interpolation by ", attr(comp, "evaluator_name")) + if (is_irreg(comp)) { + n_evals <- tf_count(comp[!is.na(comp)]) + grid <- if (length(n_evals)) { + paste0( + "based on ", + min(n_evals), + " to ", + max(n_evals), + " evaluations each" + ) + } else { + "irregular" + } + return(paste0(grid, ", ", evaluator)) + } + paste0("based on ", length(tf_arg(comp)), " evaluations each, ", evaluator) +} + +#' @export +print.tf_mv <- function(x, n = 6, ...) { + comp_names <- attr(x, "comp_names") + d <- tf_ncomp(x) + domain <- tf_domain(x) |> map_chr(format) + if (d == 0L) { + range_str <- "R^0" + } else { + range_str <- map_chr(tf_components(x), function(comp) { + r <- suppressWarnings(safe_range_evals(comp)) |> map_chr(format) + paste0("[", r[1], ", ", r[2], "]") + }) |> + paste(collapse = " x ") + } + cat(paste0( + class(x)[1], + "[", + length(x), + "] (", + paste(comp_names, collapse = ", "), + "): [", + domain[1], + ", ", + domain[2], + "] -> ", + range_str, + "\n" + )) + if (d > 0L) { + info <- map_chr(tf_components(x), mv_component_info) + if (length(unique(info)) == 1L) { + # all components share the same grid / interpolator / basis + cat(paste0("components ", info[1], "\n")) + } else { + for (k in seq_along(info)) { + cat(paste0(" ", comp_names[k], ": ", info[k], "\n")) + } + } + } + len <- length(x) + if (len > 0) { + n_show <- min(n, len) + formatted <- format(x[seq_len(n_show)], ...) + paste0("[", seq_len(n_show), "]: ", formatted) |> + cat(sep = "\n") + cat("\n") + if (n < len) { + cat(paste0(" [....] (", len - n, " not shown)\n")) + } + } + invisible(x) +} + +# dynamically exported in zzz.R (pillar glimpse), mirrors format_glimpse.tf +format_glimpse.tf_mv <- function(x, ...) { + format.tf_mv(x, ...) +} diff --git a/R/rebase.R b/R/rebase.R index a8e17ea2..2fb60ba0 100644 --- a/R/rebase.R +++ b/R/rebase.R @@ -85,27 +85,51 @@ tf_rebase.tfd.tfb_spline <- function( ... ) { assert_same_domains(object, basis_from) - # extract evals from object - data <- as.data.frame(object, unnest = TRUE) + assert_arg(arg, basis_from) + arg <- if (is.list(arg)) { + if (length(arg) != 1L) { + cli::cli_abort( + "{.arg arg} must be a single evaluation grid for {.cls tfb_spline}." + ) + } + arg[[1L]] + } else { + arg + } dots <- list(...) - dots$penalized <- dots$penalized %||% - !(is.na(attr(basis_from, "basis_args")$sp)) basis_args <- attr(basis_from, "basis_args") - basis_args <- basis_args[names(basis_args) != "sp"] - do.call( + dots$penalized <- dots$penalized %||% !is.na(basis_args$sp) + + # Fit on object's NATIVE arg values, reusing basis_from's mgcv spec — no + # pre-interpolation (that would compound interpolation error on top of basis + # approximation error). spec_override skips mgcv's unique-args-vs-k check + # so the under-determined case (n_obs <= k) succeeds via the penalty. + fit <- do.call( new_tfb_spline, c( list( - data = data, + data = as.data.frame(object, unnest = TRUE), domain = tf_domain(basis_from), - arg = arg, - sp = attr(basis_from, "basis_args")$sp, - family = attr(basis_from, "family") + sp = basis_args$sp, + family = attr(basis_from, "family"), + spec_override = environment(attr(basis_from, "basis"))$spec ), - basis_args, dots ) ) + + # Re-home onto the requested arg plus basis_from's closure / labels: the + # coefficients ARE the spline function in basis-coordinate space; the stored + # basis_matrix is just cached evaluation at the stored arg. With default arg, + # this still makes `same_basis(result, basis_from)` TRUE so downstream + # arithmetic stays warning-free; custom arg gets a correctly matched cache. + attr(fit, "arg") <- arg + attr(fit, "basis") <- attr(basis_from, "basis") + attr(fit, "basis_matrix") <- attr(basis_from, "basis")(arg) + attr(fit, "basis_args") <- attr(basis_from, "basis_args") + attr(fit, "basis_label") <- attr(basis_from, "basis_label") + attr(fit, "family_label") <- attr(basis_from, "family_label") + fit } #' @export @@ -148,7 +172,7 @@ tf_rebase.tfb.tfd <- function( evaluator = attr(basis_from, "evaluator_name") ) tfd_args <- modifyList(tfd_args, list(...)) - do.call(tfd, append(tfd_args, list(data = object, arg = arg, ...))) + do.call(tfd, append(tfd_args, list(data = object, arg = arg))) } #' @export diff --git a/R/register-mv.R b/R/register-mv.R new file mode 100644 index 00000000..a334e70f --- /dev/null +++ b/R/register-mv.R @@ -0,0 +1,538 @@ +# Registration: one shared time-warp per curve, applied to all components ------ + +# univariate signal used to estimate the (joint) warp for a multivariate curve +mv_registration_signal <- function(x, ref_component = 1L) { + assert_tf_mv(x) + if (is.function(ref_component)) { + return(ref_component(x)) + } + if (identical(ref_component, "norm")) { + return(tf_norm(x)) + } + # otherwise a component name or index -- tf_component() validates it. + tf_component(x, ref_component) +} + +#' @export +tf_warp.tf_mv <- function(x, warp, ...) { + map_components(x, \(comp) tf_warp(comp, warp, ...)) +} + +#' @export +tf_align.tf_mv <- function(x, warp, ...) { + map_components(x, \(comp) tf_align(comp, warp, ...)) +} + +#' @export +tf_estimate_warps.tf_mv <- function( + x, + ..., + template = NULL, + method = c("srvf", "srvf_mv", "cc", "affine", "landmark"), + max_iter = 3L, + tol = 1e-2, + ref_component = 1L +) { + method <- match.arg(method) + assert_count(max_iter, positive = TRUE) + assert_number(tol, lower = 0, finite = TRUE) + if (method == "srvf_mv") { + return(tf_register_srvf_mv( + x = x, + template = template, + max_iter = max_iter, + ... + )) + } + signal <- mv_registration_signal(x, ref_component) + tmpl <- if (is_tf_mv(template)) { + mv_registration_signal(template, ref_component) + } else { + template + } + warps <- tf_estimate_warps( + signal, + ..., + template = tmpl, + method = method, + max_iter = max_iter, + tol = tol + ) + # drop the (univariate) template attribute so tf_register() derives a + # multivariate template via mean() of the aligned components instead. + attr(warps, "template") <- NULL + warps +} + +# True multivariate SRVF time registration ------------------------------------- + +srvf_mv_check_dots <- function(dots, arg = "method = \"srvf_mv\"") { + forbidden <- c( + "alignment", + "rotation", + "rotated", + "scale", + "mode", + "isclosed", + "maxit" + ) + supplied <- intersect(names(dots), forbidden) + if (length(supplied)) { + cli::cli_abort(c( + "{arg} keeps rotation = FALSE, scale = FALSE, and mode = \"O\".", + "x" = "Unsupported argument{?s}: {.arg {supplied}}.", + "i" = "Use {.fn tf_register_shape} when curves may be rotated or rescaled." + )) + } +} + +srvf_mv_validate_regular <- function(x, arg = "x") { + assert_tf_mv(x, .var.name = arg) + if (!is_tfd_mv(x)) { + cli::cli_abort(c( + "{.arg {arg}} must be a regular shared-grid {.cls tfd_mv} object.", + "i" = "Convert basis or irregular data to {.cls tfd_mv} on a common grid first." + )) + } + comps <- tf_components(x) + if (!length(comps)) { + cli::cli_abort("{.arg {arg}} must have at least one component.") + } + if (!all(map_lgl(comps, is_reg))) { + cli::cli_abort(c( + "{.arg {arg}} must use regular component grids.", + "i" = "Irregular {.cls tf_mv} inputs are not supported for SRVF multivariate registration yet." + )) + } + grid <- tf_arg(x) + if (!is.numeric(grid)) { + cli::cli_abort(c( + "{.arg {arg}} must have one shared argument grid across all components.", + "i" = "Re-evaluate all components on the same grid before registration." + )) + } + if (length(grid) < 2L) { + cli::cli_abort("{.arg {arg}} must contain at least two argument values.") + } + values <- as.matrix(x) + if (anyNA(values) || any(!is.finite(values))) { + cli::cli_abort( + "{.arg {arg}} must not contain missing or non-finite evaluations." + ) + } + invisible(x) +} + +srvf_mv_validate_template <- function(template, x) { + if (is.null(template)) { + return(NULL) + } + srvf_mv_validate_regular(template, arg = "template") + check_compatible_mv(x, template) + if (length(template) != 1L && length(template) != length(x)) { + cli::cli_abort( + "{.arg template} must be of length 1 or the same length as {.arg x}." + ) + } + if (!all(tf_domain(x) == tf_domain(template))) { + cli::cli_abort("{.arg x} and {.arg template} must have the same domain.") + } + if (!isTRUE(all.equal(tf_arg(x), tf_arg(template)))) { + cli::cli_abort("{.arg x} and {.arg template} must have the same grid.") + } + invisible(template) +} + +srvf_mv_to_array <- function(x) { + aperm(as.matrix(x), c(3, 2, 1)) +} + +srvf_mv_array_to_tfd_mv <- function( + beta, + arg, + comp_names, + domain, + curve_names = NULL +) { + dims <- dim(beta) + if (length(dims) == 2L) { + components <- map(seq_len(dims[1]), \(k) { + mat <- matrix(beta[k, ], nrow = 1L) + if (!is.null(curve_names)) { + rownames(mat) <- curve_names + } + mat + }) + } else { + components <- map(seq_len(dims[1]), \(k) { + mat <- t(beta[k, , ]) + if (!is.null(curve_names)) { + rownames(mat) <- curve_names + } + mat + }) + } + names(components) <- comp_names + tfd_mv(components, arg = arg, domain = domain) +} + +srvf_mv_gamma_to_warps <- function(gamma, arg, domain, curve_names = NULL) { + gamma <- as.matrix(gamma) + # fdasrvf returns `gamma` normalized to the observed time grid; rescale by + # `range(arg)` -- NOT `domain`, which may be wider (#242). Endpoints get + # pinned to arg[1]/arg[k], so a domain-based rescale would yield + # non-monotone warps whenever `domain != range(arg)`. + lwr <- arg[1] + upr <- arg[length(arg)] + warp <- lwr + (upr - lwr) * t(gamma) + warp[, 1] <- arg[1] + warp[, ncol(warp)] <- arg[length(arg)] + if (!is.null(curve_names)) { + rownames(warp) <- curve_names + } + tfd(warp, arg = arg, domain = domain) +} + +# Work around an fdasrvf scaling quirk in scale-quotient shape registration. +# +# `fdasrvf::multiple_align_multivariate(..., scale = TRUE)` is meant to return +# every aligned curve (`betan`) in a common, scale-normalised frame, but it does +# not: the returned curves come out with size *inversely* proportional to each +# input curve's length. Concretely, for curves that are identical up to a +# similarity transform, `betan` fails to collapse, and the residual sizes invert +# the order seen with `scale = FALSE` -- a curve drawn smaller comes back larger +# (verified against fdasrvf directly; `betan` for `scale = FALSE` is fine, sizes +# are preserved as expected). This looks like an upstream bug rather than a +# convention, so we correct it on our side here. +# +# We renormalise every returned aligned curve to a shared (mean) arc length, so +# congruent shapes overlay as they should. The per-curve scale factors that were +# removed are reported separately and left untouched in `tf_scales()`. +# `beta` is `[component, arg, curve]`. +srvf_mv_equalize_scale <- function(beta) { + arclen <- apply(beta, 3, function(m) { + d <- m[, -1, drop = FALSE] - m[, -ncol(m), drop = FALSE] + sum(sqrt(colSums(d^2))) + }) + target <- mean(arclen) + for (k in seq_len(dim(beta)[3])) { + if (arclen[k] > 0) { + beta[,, k] <- beta[,, k] * (target / arclen[k]) + } + } + beta +} + +tf_register_srvf_mv <- function( + x, + template, + max_iter, + lambda = 0, + ... +) { + rlang::check_installed( + "fdasrvf", + version = "2.4.0", + reason = "for multivariate SRVF registration" + ) + dots <- list(...) + srvf_mv_check_dots(dots) + srvf_mv_validate_regular(x) + srvf_mv_validate_template(template, x) + assert_count(max_iter, positive = TRUE) + assert_number(lambda, lower = 0, finite = TRUE) + + arg <- tf_arg(x) + domain <- tf_domain(x) + comp_names <- attr(x, "comp_names") + curve_names <- names(x) + beta <- srvf_mv_to_array(x) + + if (is.null(template)) { + if (max_iter < 2L) { + cli::cli_abort( + "{.arg max_iter} must be at least 2 for {.val srvf_mv} with {.arg template = NULL}." + ) + } + ret <- suppressMessages(do.call( + fdasrvf::multivariate_karcher_mean, + c( + list( + beta = beta, + mode = "O", + alignment = TRUE, + rotation = FALSE, + scale = FALSE, + lambda = lambda, + maxit = max_iter + ), + dots + ) + )) + warps <- srvf_mv_gamma_to_warps(ret$gamma, arg, domain, curve_names) + attr(warps, "template") <- srvf_mv_array_to_tfd_mv( + ret$betamean, + arg = arg, + comp_names = comp_names, + domain = domain + ) + return(warps) + } + + template_beta <- srvf_mv_to_array(template) + is_single_template <- length(template) == 1L + gamma <- matrix(NA_real_, nrow = length(arg), ncol = length(x)) + for (i in seq_along(x)) { + beta1 <- if (is_single_template) { + template_beta[,, 1] + } else { + template_beta[,, i] + } + ret <- suppressMessages(do.call( + fdasrvf::reparam_curve, + c( + list( + beta1 = beta1, + beta2 = beta[,, i], + lambda = lambda, + rotated = FALSE, + mode = "O" + ), + dots + ) + )) + gamma[, i] <- ret$gam + } + warps <- srvf_mv_gamma_to_warps(gamma, arg, domain, curve_names) + attr(warps, "template") <- template + warps +} + +# Shape registration ----------------------------------------------------------- + +#' Register vector-valued curves in elastic shape space +#' +#' `tf_register_shape()` aligns vector-valued `tf_mv` curves with SRVF curve +#' alignment, optionally allowing rotations and rescaling in addition to time +#' warping. Unlike [tf_register()], this is a shape-registration interface: the +#' aligned curves live in centered shape space and the result stores rotations +#' and scale factors. +#' +#' @details +#' The per-curve scale factors returned by [tf_scales()] are expressed +#' *relative to the template*: each is the ratio of the template's SRVF norm to +#' the curve's own SRVF norm, so a value `> 1` means the curve was scaled up to +#' match the template and `< 1` means it was scaled down. With `template = NULL` +#' the returned [tf_template()] is the empirical mean of the aligned shape-space +#' curves rather than any single input curve. +#' +#' Only open curves (`mode = "O"`) are supported. Closed curves (`mode = "C"`) +#' additionally optimise over a circular seed shift that the returned warping +#' functions do not represent, which would make the stored warps inconsistent +#' with the aligned curves; `mode = "C"` is therefore rejected for now. +#' +#' @param x a regular shared-grid `tfd_mv` object. +#' @param ... additional arguments passed to fdasrvf alignment routines, such as +#' `ncores` or `verbose`. +#' @param template optional length-one `tf_mv` template. If `NULL`, a template +#' is refined from the first curve by iterative group alignment. +#' @param max_iter integer: maximum template refinement iterations when +#' `template = NULL`. Default `3L`. +#' @param tol numeric: relative template convergence tolerance. +#' @param rotation logical: allow rotations? Default `TRUE`. +#' @param scale logical: allow scale changes? Default `TRUE`. +#' @param mode character: curve mode for fdasrvf. Only open curves (`"O"`, +#' the default) are currently supported; `"C"` (closed curves) is reserved +#' for a future release (see Details). +#' @param lambda numeric: non-negative elastic penalty passed to the fdasrvf +#' alignment routines. Larger values penalise warping. Default `0`. +#' @param store_x logical: store original data in the result object? +#' @returns A `tf_shape_registration` object. Access aligned curves with +#' [tf_aligned()], inverse warps with [tf_inv_warps()], the template with +#' [tf_template()], rotations with [tf_rotations()], and scales with +#' [tf_scales()]. +#' @examplesIf rlang::is_installed("fdasrvf") +#' t <- seq(0, 1, length.out = 51) +#' base <- rbind(t, t^2) +#' beta <- array(NA_real_, dim = c(3, length(t), 2)) +#' for (i in 1:3) { +#' beta[i,, 1] <- base[1, ] +#' beta[i,, 2] <- base[2, ] +#' } +#' curves <- tfd_mv(beta, arg = t) +#' reg <- tf_register_shape(curves, max_iter = 1) +#' tf_rotations(reg) +#' tf_scales(reg) +#' @export +#' @family registration functions +tf_register_shape <- function( + x, + ..., + template = NULL, + max_iter = 3L, + tol = 1e-2, + rotation = TRUE, + scale = TRUE, + mode = c("O", "C"), + lambda = 0, + store_x = TRUE +) { + cl <- match.call() + rlang::check_installed( + "fdasrvf", + version = "2.4.0", + reason = "for multivariate SRVF shape registration" + ) + srvf_mv_validate_regular(x) + if (!is.null(template)) { + srvf_mv_validate_regular(template, arg = "template") + check_compatible_mv(x, template) + if (length(template) != 1L) { + cli::cli_abort( + "{.arg template} must be a length-one {.cls tf_mv} object." + ) + } + if (!all(tf_domain(x) == tf_domain(template))) { + cli::cli_abort("{.arg x} and {.arg template} must have the same domain.") + } + if (!isTRUE(all.equal(tf_arg(x), tf_arg(template)))) { + cli::cli_abort("{.arg x} and {.arg template} must have the same grid.") + } + } + assert_count(max_iter, positive = TRUE) + assert_number(tol, lower = 0, finite = TRUE) + assert_flag(rotation) + assert_flag(scale) + assert_flag(store_x) + assert_number(lambda, lower = 0, finite = TRUE) + mode <- match.arg(mode) + if (mode == "C") { + cli::cli_abort(c( + "Closed-curve shape registration ({.code mode = \"C\"}) is not supported yet.", + "i" = "Closed curves optimise over a circular seed shift that the returned warps cannot represent.", + "i" = "Use {.code mode = \"O\"} for open curves." + )) + } + + shape <- tf_register_shape_srvf_mv( + x = x, + template = template, + max_iter = max_iter, + tol = tol, + rotation = rotation, + scale = scale, + mode = mode, + lambda = lambda, + ... + ) + inv_warps <- tf_invert(shape$warps) + names(inv_warps) <- names(shape$warps) + new_tf_shape_registration( + registered = shape$registered, + warps = shape$warps, + inv_warps = inv_warps, + template = shape$template, + x = if (store_x) x else NULL, + rotations = shape$rotations, + scales = shape$scales, + call = cl + ) +} + +tf_register_shape_srvf_mv <- function( + x, + template, + max_iter, + tol, + rotation, + scale, + mode, + lambda = 0, + ... +) { + dots <- list(...) + if ("maxit" %in% names(dots)) { + cli::cli_abort( + "Use {.arg max_iter}, not fdasrvf's {.arg maxit}, in {.fn tf_register_shape}." + ) + } + assert_number(lambda, lower = 0, finite = TRUE) + + arg <- tf_arg(x) + domain <- tf_domain(x) + comp_names <- attr(x, "comp_names") + curve_names <- names(x) + beta <- srvf_mv_to_array(x) + current_template <- if (is.null(template)) { + beta[,, 1] + } else { + srvf_mv_to_array(template)[,, 1] + } + best <- NULL + best_template <- current_template + iterations <- if (is.null(template)) max_iter else 1L + + for (iter in seq_len(iterations)) { + ret <- suppressMessages(do.call( + fdasrvf::multiple_align_multivariate, + c( + list( + beta = beta, + mu = current_template, + mode = mode, + alignment = TRUE, + rotation = rotation, + scale = scale, + lambda = lambda + ), + dots + ) + )) + new_template <- rowMeans(ret$betan, dims = 2) + best <- ret + best_template <- new_template + + # a supplied template fixes `iterations` to 1 (see above), so this also + # covers the "don't refine a user template" case. + if (iter == iterations) { + break + } + delta <- mean((new_template - current_template)^2) + norm_sq <- mean(current_template^2) + if (is.finite(delta) && delta / max(norm_sq, .Machine$double.eps) < tol^2) { + break + } + current_template <- new_template + } + + if (scale) { + # fdasrvf returns scale-aligned curves at the wrong (inverted) size; see + # srvf_mv_equalize_scale() above. Renormalise, then refresh the template. + best$betan <- srvf_mv_equalize_scale(best$betan) + best_template <- rowMeans(best$betan, dims = 2) + } + + warps <- srvf_mv_gamma_to_warps(best$gamma, arg, domain, curve_names) + scales <- if (scale) best$qmean_norm / best$len_q else rep(1, length(x)) + names(scales) <- curve_names + rotations <- best$R + dimnames(rotations) <- list(comp_names, comp_names, curve_names) + list( + registered = srvf_mv_array_to_tfd_mv( + best$betan, + arg = arg, + comp_names = comp_names, + domain = domain, + curve_names = curve_names + ), + warps = warps, + template = srvf_mv_array_to_tfd_mv( + best_template, + arg = arg, + comp_names = comp_names, + domain = domain + ), + rotations = rotations, + scales = scales + ) +} diff --git a/R/register-utils.R b/R/register-utils.R index 90a9b9f7..2fcbd780 100644 --- a/R/register-utils.R +++ b/R/register-utils.R @@ -226,14 +226,28 @@ validate_template_landmarks <- function( domain, n_landmarks ) { - if (is.null(template)) { - return(colMeans(landmarks, na.rm = TRUE)) + user_supplied <- !is.null(template) + if (!user_supplied) { + template <- colMeans(landmarks, na.rm = TRUE) } assert_numeric(template, len = n_landmarks, any.missing = FALSE) if (n_landmarks > 1 && !all(diff(template) > 0)) { - cli::cli_abort("Template landmarks must be strictly increasing.") + if (user_supplied) { + cli::cli_abort("Template landmarks must be strictly increasing.") + } + # Default template (column-wise mean) crosses because of NA patterns + # in `landmarks`. Identify which columns are not monotone and report + # the NA counts to help the user diagnose. (#243) + na_per_col <- colSums(is.na(landmarks)) + bad <- which(diff(template) <= 0) + cli::cli_abort(c( + "Default template landmarks (column means of {.arg landmarks}) are not strictly increasing.", + "i" = "Crossing between columns: {paste(bad, bad + 1L, sep = '-', collapse = ', ')}.", + "i" = "NA count per landmark column: {paste(na_per_col, collapse = ', ')}.", + "i" = "Supply a strictly increasing {.arg template_landmarks} vector, or impute/drop the NA-laden curves." + )) } if (any(template < domain[1]) || any(template > domain[2])) { cli::cli_abort( diff --git a/R/register.R b/R/register.R index 38d8327a..1d6be07a 100644 --- a/R/register.R +++ b/R/register.R @@ -204,6 +204,8 @@ tf_align.tfb <- function(x, warp, ...) { #' Not used for `method = "landmark"`. #' @param method the registration method to use: #' * `"srvf"`: Square Root Velocity Framework (elastic registration). +#' * `"srvf_mv"`: true multivariate SRVF time registration for `tf_mv` +#' objects on a regular shared grid. #' * `"cc"`: continuous-criterion registration via a tf-native dense-grid #' optimizer with monotone spline warps. #' * `"affine"`: affine (linear) registration. @@ -261,7 +263,7 @@ tf_register <- function( x, ..., template = NULL, - method = c("srvf", "cc", "affine", "landmark"), + method = c("srvf", "srvf_mv", "cc", "affine", "landmark"), max_iter = 3L, tol = 1e-2, store_x = TRUE @@ -361,6 +363,9 @@ outer_registration_objective <- function(method, aligned, template, arg, dots) { #' @param method the registration method to use: #' * `"srvf"`: Square Root Velocity Framework (elastic registration). #' For details, see [fdasrvf::time_warping()]. Default template is the Karcher mean. +#' * `"srvf_mv"`: true multivariate SRVF time registration for `tf_mv` +#' objects on a regular shared grid. This method estimates one shared warp +#' from all components jointly and does not rotate or rescale curves. #' * `"cc"`: continuous-criterion registration via a tf-native dense-grid #' optimizer with monotone spline warps. Default template is the arithmetic #' mean. @@ -455,7 +460,7 @@ tf_estimate_warps <- function( x, ..., template = NULL, - method = c("srvf", "cc", "affine", "landmark"), + method = c("srvf", "srvf_mv", "cc", "affine", "landmark"), max_iter = 3L, tol = 1e-2 ) { @@ -467,7 +472,7 @@ tf_estimate_warps.tfd_reg <- function( x, ..., template = NULL, - method = c("srvf", "cc", "affine", "landmark"), + method = c("srvf", "srvf_mv", "cc", "affine", "landmark"), max_iter = 3L, tol = 1e-2 ) { @@ -477,6 +482,13 @@ tf_estimate_warps.tfd_reg <- function( assert_count(max_iter, positive = TRUE) assert_number(tol, lower = 0) + if (method == "srvf_mv") { + cli::cli_abort(c( + "{.val srvf_mv} registration is only available for {.cls tf_mv} objects.", + "i" = "Use {.val srvf} for univariate functional data." + )) + } + # Landmark method doesn't use template, uses landmarks instead if (method == "landmark") { warps <- do.call(tf_register_landmark, c(list(x = x), dots)) @@ -632,7 +644,7 @@ tf_estimate_warps.tfb <- function( x, ..., template = NULL, - method = c("srvf", "cc", "affine", "landmark"), + method = c("srvf", "srvf_mv", "cc", "affine", "landmark"), max_iter = 3L, tol = 1e-2 ) { @@ -652,7 +664,7 @@ tf_estimate_warps.tfd_irreg <- function( x, ..., template = NULL, - method = c("srvf", "cc", "affine", "landmark"), + method = c("srvf", "srvf_mv", "cc", "affine", "landmark"), max_iter = 3L, tol = 1e-2 ) { @@ -661,6 +673,13 @@ tf_estimate_warps.tfd_irreg <- function( assert_count(max_iter, positive = TRUE) assert_number(tol, lower = 0) + if (method == "srvf_mv") { + cli::cli_abort(c( + "{.val srvf_mv} registration is only available for {.cls tf_mv} objects.", + "i" = "Use {.val srvf} for univariate functional data." + )) + } + if (method %in% c("srvf", "cc")) { cli::cli_abort( c( @@ -707,8 +726,6 @@ tf_register_srvf <- function(x, template, ...) { arg <- tf_arg(x) domain <- tf_domain(x) - lwr <- domain[1] - upr <- domain[2] # Karcher mean x_mat <- as.matrix(x) @@ -735,11 +752,17 @@ tf_register_srvf <- function(x, template, ...) { } tmpl <- template } + # fdasrvf returns `gamma` normalized to the observed time grid, so rescale + # by `range(arg)` -- NOT `domain`, which may be wider. Pinning endpoints to + # arg[1]/arg[k] after a domain-based rescale would yield non-monotone warps + # whenever `domain != range(arg)` (#242). + lwr <- arg[1] + upr <- arg[length(arg)] warp <- lwr + (upr - lwr) * warp # avoid numerical over/underflow issue: warp[, 1] <- arg[1] warp[, length(arg)] <- arg[length(arg)] - result <- tfd(warp, arg = arg) + result <- tfd(warp, arg = arg, domain = domain) attr(result, "template") <- tmpl result } @@ -768,6 +791,23 @@ tf_register_landmark <- function(x, landmarks, template_landmarks = NULL) { valid <- !is.na(landmark_row) t_arg <- c(domain[1], template_landmarks[valid], domain[2]) w_vals <- c(domain[1], landmark_row[valid], domain[2]) + # `approx()` silently sorts a non-monotone `t_arg` (and the corresponding + # `w_vals` go along for the ride), producing a non-monotone warp that + # would later trip `assert_monotonic()` with no useful message (#243). + if (any(diff(t_arg) <= 0)) { + cli::cli_abort(c( + "Template landmark grid (with boundary anchors) is not strictly increasing.", + "i" = "Got: {paste(round(t_arg, 4), collapse = ', ')}.", + "i" = "Check that {.arg template_landmarks} lie strictly inside the domain and are strictly increasing." + )) + } + if (any(diff(w_vals) <= 0)) { + cli::cli_abort(c( + "Curve landmark sequence (with boundary anchors) is not strictly increasing.", + "i" = "Got: {paste(round(w_vals, 4), collapse = ', ')}.", + "i" = "Check the corresponding row of {.arg landmarks}." + )) + } approx(t_arg, w_vals, xout = x_arg, rule = 2)$y } diff --git a/R/registration-class.R b/R/registration-class.R index 7b46e9f0..4cb9bd28 100644 --- a/R/registration-class.R +++ b/R/registration-class.R @@ -6,6 +6,9 @@ #' \eqn{h_i^{-1}} (observed \eqn{\to} aligned time), and the template used. #' Use accessors [tf_aligned()], [tf_inv_warps()], and [tf_template()] to extract #' components. +#' `tf_shape_registration` objects, returned by [tf_register_shape()], extend +#' this structure with shape-space rotations and scale factors. Use +#' [tf_rotations()] and [tf_scales()] to extract those components. #' #' @section Summary diagnostics: #' `summary()` computes per-curve diagnostics for assessing registration @@ -50,8 +53,14 @@ #' \eqn{h_i^{-1}(t)} that map observed time to aligned time (`tfd` vector). #' Use [tf_invert()] on the result to obtain forward warps if needed. #' - `tf_template(x)`: extract the template function (`tf` vector of length 1). +#' - `tf_rotations(x)`: extract the per-curve rotation matrices from a +#' `tf_shape_registration` object. +#' - `tf_scales(x)`: extract the per-curve scale factors from a +#' `tf_shape_registration` object. Each factor is relative to the template +#' (template SRVF norm divided by the curve's SRVF norm); see +#' [tf_register_shape()]. #' -#' @param x a `tf_registration` object +#' @param x a `tf_registration` or `tf_shape_registration` object #' @param i index for subsetting (integer, logical, or character) #' @param object a `tf_registration` object #' @param ... additional arguments (currently unused) @@ -60,7 +69,9 @@ #' (inverse warping functions aligning `x` to the template function), the #' `template` function, the original data `x` (if `store_x = TRUE` was used #' in [tf_register()]), and the `call` to [tf_register()] that created the -#' object. Accessors return the respective component. +#' object. `tf_shape_registration` objects additionally contain the forward +#' `warps`, `rotations`, and `scales`. Accessors return the respective +#' component. #' @examples #' reg <- tf_register(pinch[1:5], method = "affine", type = "shift_scale") #' reg @@ -85,6 +96,31 @@ new_tf_registration <- function(registered, inv_warps, template, x, call) { ) } +new_tf_shape_registration <- function( + registered, + warps, + inv_warps, + template, + x, + rotations, + scales, + call +) { + structure( + list( + registered = registered, + warps = warps, + inv_warps = inv_warps, + template = template, + x = x, + rotations = rotations, + scales = scales, + call = call + ), + class = c("tf_shape_registration", "tf_registration") + ) +} + #' @rdname tf_registration #' @export tf_aligned <- function(x) { @@ -108,24 +144,98 @@ tf_template <- function(x) { #' @rdname tf_registration #' @export -print.tf_registration <- function(x, ...) { +tf_rotations <- function(x) { + assert_class(x, "tf_shape_registration") + x$rotations +} + +#' @rdname tf_registration +#' @export +tf_scales <- function(x) { + assert_class(x, "tf_shape_registration") + x$scales +} + +# Shared body of the registration print methods: prints the class header, the +# call, a " curve(s)[ with component(s)] on [a, b]" line, and the list of +# present (non-NULL) slots. `count_line` is a cli/glue template evaluated here +# (so it may reference `x` and the local `domain`); `slots` is a named logical. +print_registration <- function(x, cls, count_line, slots) { domain <- tf_domain(x$registered) - cli::cli_text("{.cls tf_registration}") + cli::cli_text("{.cls {cls}}") cat("Call: ") print(x$call) + cli::cli_text(count_line, .envir = environment()) cli::cli_text( - "{length(x$registered)} curve{?s} on [{domain[1]}, {domain[2]}]" + "Components: {paste(names(slots)[slots], collapse = ', ')}" + ) + invisible(x) +} + +#' @rdname tf_registration +#' @export +print.tf_registration <- function(x, ...) { + print_registration( + x, "tf_registration", + "{length(x$registered)} curve{?s} on [{domain[1]}, {domain[2]}]", + c( + "aligned" = !is.null(x$registered), + "inv_warps" = !is.null(x$inv_warps), + "template" = !is.null(x$template), + "original data" = !is.null(x$x) + ) ) - components <- c( - "aligned" = !is.null(x$registered), - "inv_warps" = !is.null(x$inv_warps), - "template" = !is.null(x$template), - "original data" = !is.null(x$x) +} + +#' @rdname tf_registration +#' @export +print.tf_shape_registration <- function(x, ...) { + print_registration( + x, "tf_shape_registration", + "{length(x$registered)} curve{?s} with {tf_ncomp(x$registered)} component{?s} on [{domain[1]}, {domain[2]}]", + c( + "aligned" = !is.null(x$registered), + "inv_warps" = !is.null(x$inv_warps), + "template" = !is.null(x$template), + "rotations" = !is.null(x$rotations), + "scales" = !is.null(x$scales), + "original data" = !is.null(x$x) + ) ) - cli::cli_text( - "Components: {paste(names(components)[components], collapse = ', ')}" +} + +# Mean (over the domain) of the pointwise variance (across curves) of a +# tf-vector. For tf_mv inputs the per-component scalars are averaged, giving a +# single scalar comparable to the univariate case (#249). Returns NA_real_ on +# unexpected failures (e.g. degenerate inputs where `var()` or +# `tf_evaluations()` error) rather than letting `summary()` itself error. +mean_pointwise_variance <- function(x) { + if (is.null(x)) { + return(NA_real_) + } + vx <- tryCatch( + suppressWarnings(var(x)), + error = function(e) NULL ) - invisible(x) + if (is.null(vx)) { + return(NA_real_) + } + if (is_tf_mv(vx)) { + per_comp <- tryCatch( + vapply( + tf_components(vx), + \(comp) suppressWarnings(mean(tf_evaluations(comp)[[1]], na.rm = TRUE)), + numeric(1) + ), + error = function(e) NA_real_ + ) + return(suppressWarnings(mean(per_comp, na.rm = TRUE))) + } + ev <- tryCatch(tf_evaluations(vx), error = function(e) NULL) + if (!length(ev)) { + return(NA_real_) + } + suppressWarnings(mean(ev[[1]], na.rm = TRUE)) } #' @rdname tf_registration @@ -137,19 +247,12 @@ summary.tf_registration <- function(object, ...) { probs <- c(0, 0.1, 0.25, 0.5, 0.75, 0.9, 1) # Amplitude variance reduction: 1 - mean(pointwise_var(reg)) / mean(pointwise_var(orig)) + # `var(tf_mv)` is component-wise (returns a tf_mv), so handle multivariate + # input by averaging per-component scalar mean variances (#249). amp_var_reduction <- NA_real_ if (!is.null(object$x)) { - var_orig <- tryCatch( - mean(tf_evaluations(var(object$x))[[1]], na.rm = TRUE), - error = function(e) NA_real_ - ) - var_reg <- tryCatch( - suppressWarnings(mean( - tf_evaluations(var(object$registered))[[1]], - na.rm = TRUE - )), - error = function(e) NA_real_ - ) + var_orig <- mean_pointwise_variance(object$x) + var_reg <- mean_pointwise_variance(object$registered) if (is.finite(var_orig) && var_orig > 0 && is.finite(var_reg)) { amp_var_reduction <- 1 - var_reg / var_orig } @@ -167,8 +270,7 @@ summary.tf_registration <- function(object, ...) { \(i) { a <- args_list[[i]] dev <- abs(inv_warp_evals[[i]] - a) - dt <- diff(a) - sum((dev[-length(dev)] + dev[-1]) / 2 * dt) + sum(trapezoid_weights(a) * dev) }, numeric(1) ) / @@ -314,6 +416,85 @@ print.summary.tf_registration <- function(x, ...) { invisible(x) } +# Extract rotation angle (radians) from each per-curve rotation matrix in the +# shape registration. Uses `atan2(R[2,1], R[1,1])` in 2D (signed angle in +# [-pi, pi]) and `acos((tr(R) - 1) / 2)` in 3D (the standard SO(3) angle). +# For d > 3 there is no single rotation angle -- a rotation is characterised by +# `floor(d / 2)` independent angles (Jordan-block / SVD structure of R) -- so +# we return NA with a warning. Follow-up: a dimension-agnostic scalar summary +# could be `sqrt(sum(log_skew_matrix(R)^2) / 2)`, the Frobenius norm of the +# matrix logarithm, which reduces to the angle in 2D/3D. +shape_rotation_angles <- function(rotations) { + if (is.null(rotations)) { + return(numeric(0)) + } + d <- dim(rotations)[1L] + n <- dim(rotations)[3L] + if (!length(n) || n == 0L) { + return(numeric(0)) + } + if (d == 2L) { + return(vapply( + seq_len(n), + \(i) atan2(rotations[2, 1, i], rotations[1, 1, i]), + numeric(1) + )) + } + if (d == 3L) { + return(vapply( + seq_len(n), + \(i) { + tr <- sum(diag(rotations[,, i])) + acos(max(-1, min(1, (tr - 1) / 2))) + }, + numeric(1) + )) + } + cli::cli_warn(c( + "Rotation-angle summary is not implemented for {.code d = {d}} (d > 3).", + "i" = "Higher-dimensional rotations are characterised by {floor(d / 2)} + independent rotation-plane angles, not a single angle.", + "i" = "Consider reporting per-pair angles or the matrix-logarithm norm." + )) + rep(NA_real_, n) +} + +#' @rdname tf_registration +#' @export +summary.tf_shape_registration <- function(object, ...) { + base <- NextMethod() + probs <- c(0, 0.1, 0.25, 0.5, 0.75, 0.9, 1) + angles <- shape_rotation_angles(object$rotations) + scales <- object$scales %||% numeric(0) + base$rotation_angles_deg <- if (length(angles) && any(!is.na(angles))) { + stats::quantile(angles * 180 / pi, probs = probs, na.rm = TRUE) + } else { + NULL + } + base$scale_quantiles <- if (length(scales)) { + stats::quantile(scales, probs = probs) + } else { + NULL + } + class(base) <- c("summary.tf_shape_registration", class(base)) + base +} + +#' @rdname tf_registration +#' @export +print.summary.tf_shape_registration <- function(x, ...) { + NextMethod() + if (!is.null(x$rotation_angles_deg)) { + cat("\nRotation angles (degrees), per-curve deciles:\n") + print(round(x$rotation_angles_deg, 2)) + } + if (!is.null(x$scale_quantiles)) { + cat("\nScale factors (template / curve SRVF norm), per-curve deciles:\n") + print(round(x$scale_quantiles, 4)) + } + invisible(x) +} + #' @rdname tf_registration #' @export plot.tf_registration <- function(x, ...) { @@ -378,3 +559,18 @@ plot.tf_registration <- function(x, ...) { length.tf_registration <- function(x) { length(x$registered) } + +#' @rdname tf_registration +#' @export +`[.tf_shape_registration` <- function(x, i) { + new_tf_shape_registration( + registered = x$registered[i], + warps = x$warps[i], + inv_warps = x$inv_warps[i], + template = x$template, + x = if (!is.null(x$x)) x$x[i] else NULL, + rotations = x$rotations[,, i, drop = FALSE], + scales = x$scales[i], + call = x$call + ) +} diff --git a/R/rng.R b/R/rng.R index 0ae9d3d9..79385200 100644 --- a/R/rng.R +++ b/R/rng.R @@ -8,7 +8,7 @@ #' \delta_{t}(t')}. #' - *Wiener* process: \eqn{Cov(x(t), x(t')) = #' \min(t',t)/\phi + \sigma^2 \delta_{t}(t')}, -#' - [*Matèrn* process](https://en.wikipedia.org/wiki/Mat%C3%A9rn_covariance_function#Definition): +#' - [*Matérn* process](https://en.wikipedia.org/wiki/Mat%C3%A9rn_covariance_function#Definition): #' \eqn{Cov(x(t), x(t')) = #' \tfrac{2^{1-o}}{\Gamma(o)} (\tfrac{\sqrt{2o}|t-t'|}{\phi})^o \text{Bessel}_o(\tfrac{\sqrt{2o}|t-t'|}{s}) #' + \sigma^2 \delta_{t}(t')} @@ -29,14 +29,14 @@ #' effect* for pairs of inputs t and t'. #' @param nugget nugget effect for additional white noise / unstructured #' variability. Defaults to `scale/200` (so: very little white noise). -#' @param order order of the Matèrn covariance (if used, must be >0), defaults +#' @param order order of the Matérn covariance (if used, must be >0), defaults #' to 1.5. The higher, the smoother the process. Evaluation of the covariance #' function becomes numerically unstable for large (>20) `order`, use #' "squareexp". #' @param domain of the generated functions. If not provided, the range of the #' supplied `arg` values. #' @returns an `tfd`-vector of length `n`. -#' @importFrom mvtnorm rmvnorm +#' @importFrom stats rnorm #' @export #' @family tidyfun RNG functions #' @examples @@ -120,7 +120,18 @@ tf_rgp <- function( ret <- map(arg, \(.arg) { cov <- outer(.arg, .arg, f_cov) + diag(0 * .arg + nugget) - cbind(.arg, t(rmvnorm(1, mean = 0 * .arg, sigma = cov))) + # zero-mean multivariate normal draw via eigen-decomposition of the + # covariance. Matches `mvtnorm::rmvnorm(method = "eigen")` semantics + # exactly: rank-deficient kernels (e.g. squared-exp with zero nugget) are + # sampled in their effective subspace; tiny negative eigenvalues from + # numerical noise are clamped to zero. We use the symmetric square root + # `V D^{1/2} V^T` so draws match the previous `mvtnorm` implementation + # under `set.seed()`. + e <- eigen(cov, symmetric = TRUE) + d <- pmax(e$values, 0) + R <- e$vectors %*% (sqrt(d) * t(e$vectors)) + sample <- as.numeric(R %*% rnorm(nrow(cov))) + cbind(.arg, sample) }) |> tfd() names(ret) <- 1:n @@ -140,6 +151,9 @@ tf_rgp <- function( #' lie, at most (relative to original distance to neighboring grid points). #' Defaults to at most 40% (0.4) of the original grid distances. Must be lower #' than 0.5. +#' @param same_arg for `tf_mv` objects, should all components receive the same +#' random argument-grid changes? Defaults to `TRUE`; use `FALSE` to jitter or +#' sparsify each component independently. #' @param ... additional args for the returned `tfd` in `tf_jiggle`. #' @returns an (irregular) `tfd` object. #' @importFrom stats runif @@ -153,8 +167,16 @@ tf_rgp <- function( #' (x_sp <- tf_sparsify(x, dropout = 0.3)) #' c(is_irreg(x_jig), is_irreg(x_sp)) tf_jiggle <- function(f, amount = 0.4, ...) { + UseMethod("tf_jiggle") +} + +#' @export +tf_jiggle.default <- function(f, amount = 0.4, ...) { assert_tfd(f) assert_number(amount, lower = 0, upper = 0.5) + if (!vec_size(f)) { + return(as.tfd_irreg(f)) + } f <- as.tfd_irreg(f) new_args <- map(tf_arg(f), tf_jiggle_args, amount = amount) evaluator <- attr(f, "evaluator_name") @@ -167,6 +189,17 @@ tf_jiggle <- function(f, amount = 0.4, ...) { ret } +#' @export +#' @rdname tf_jiggle +tf_jiggle.tf_mv <- function(f, amount = 0.4, same_arg = TRUE, ...) { + assert_flag(same_arg) + if (!same_arg) { + return(map_components(f, \(comp) tf_jiggle(comp, amount = amount, ...))) + } + tf_mv_assert_shared_arg(f, op = "tf_jiggle") + tf_mv_map_same_rng(f, \(comp) tf_jiggle(comp, amount = amount, ...)) +} + tf_jiggle_args <- function(arg, amount) { diffs <- diff(arg) g <- length(arg) @@ -187,8 +220,17 @@ tf_jiggle_args <- function(arg, amount) { #' @rdname tf_jiggle #' @param dropout what proportion of values of `f` to drop, on average. Defaults to half. #' @export -tf_sparsify <- function(f, dropout = 0.5) { +tf_sparsify <- function(f, dropout = 0.5, ...) { + UseMethod("tf_sparsify") +} + +#' @export +tf_sparsify.default <- function(f, dropout = 0.5, ...) { + rlang::check_dots_empty() assert_tf(f) + if (!vec_size(f)) { + return(as.tfd_irreg(f)) + } nas <- map(tf_evaluations(f), \(x) runif(length(x)) < dropout) tf_evals <- map2(tf_evaluations(f), nas, \(x, y) x[!y]) tf_args <- ensure_list(tf_arg(f)) @@ -200,3 +242,59 @@ tf_sparsify <- function(f, dropout = 0.5) { } ret } + +#' @export +#' @rdname tf_jiggle +tf_sparsify.tf_mv <- function(f, dropout = 0.5, same_arg = TRUE, ...) { + assert_flag(same_arg) + if (!same_arg) { + return(map_components(f, \(comp) tf_sparsify(comp, dropout = dropout, ...))) + } + tf_mv_assert_shared_arg(f, op = "tf_sparsify") + tf_mv_map_same_rng(f, \(comp) tf_sparsify(comp, dropout = dropout, ...)) +} + +tf_component_arg_list <- function(comp, n = vec_size(comp)) { + args <- ensure_list(tf_arg(comp)) + if (length(args) == 1L && n != 1L) { + rep(args, n) + } else { + args + } +} + +tf_mv_assert_shared_arg <- function(f, op) { + comps <- tf_components(f) + n <- vec_size(f) + if (!length(comps)) { + return(invisible(TRUE)) + } + comp_args <- map(comps, tf_component_arg_list, n = n) + shared <- comp_args[[1]] + compatible <- map_lgl(comp_args[-1], function(args) { + length(args) == length(shared) && + all(map2_lgl(args, shared, \(x, y) isTRUE(all.equal(x, y)))) + }) + if (any(!compatible)) { + cli::cli_abort(c( + "{.fn {op}} with {.code same_arg = TRUE} requires all components to share argument values.", + i = "Use {.code same_arg = FALSE} to change argument grids independently per component." + )) + } + invisible(TRUE) +} + +tf_mv_map_same_rng <- function(f, .f) { + comps <- tf_components(f) + if (!length(comps)) return(f) + if (!exists(".Random.seed", envir = .GlobalEnv, inherits = FALSE)) { + runif(1) + } + seed <- get(".Random.seed", envir = .GlobalEnv, inherits = FALSE) + comps <- map(comps, function(comp) { + assign(".Random.seed", seed, envir = .GlobalEnv) + .f(comp) + }) + names(comps) <- attr(f, "comp_names") + new_tf_mv(comps, domain = tf_domain(f)) +} diff --git a/R/smooth.R b/R/smooth.R index 16158a71..78daee3f 100644 --- a/R/smooth.R +++ b/R/smooth.R @@ -19,7 +19,8 @@ #' #' @param x a `tf` object containing functional data. #' @param method one of `"lowess"` (see [stats::lowess()]), `"rollmean"`, -#' `"rollmedian"` (see [zoo::rollmean()]) or `"savgol"` (see [pracma::savgol()]). +#' `"rollmedian"` (see [zoo::rollmean()]) or `"savgol"` (a Savitzky-Golay +#' filter, see [savgol()]). #' @param verbose give lots of diagnostic messages? Defaults to `TRUE`. #' @param ... arguments for the respective `method`. See details. #' @returns a smoothed version of the input. For some methods/options, the @@ -43,12 +44,10 @@ tf_smooth.tfb <- function(x, verbose = TRUE, ...) { x } -#' @importFrom pracma savgol #' @rdname tf_smooth #' @export #' @examples #' library(zoo) -#' library(pracma) #' f <- tf_sparsify(tf_jiggle(tf_rgp(4, 201, nugget = 0.05))) #' f_lowess <- tf_smooth(f, "lowess") #' # these methods ignore the distances between arg-values: @@ -78,7 +77,6 @@ tf_smooth.tfd <- function( dots <- list(...) nas <- is.na(x) x_evals <- tf_evaluations(x)[!nas] - # nocov start if (method %in% c("savgol", "rollmean", "rollmedian")) { if (verbose && !is_equidist(x)) { cli::cli_inform(c( @@ -114,7 +112,6 @@ tf_smooth.tfd <- function( \(x) do.call(smoother, append(list(x), dots)) ) } - # nocov end if (method == "lowess") { if (is.null(dots$f)) { dots$f <- 0.15 @@ -140,3 +137,68 @@ tf_smooth.tfd <- function( #' @export tf_smooth.default <- function(x, ...) .NotYetImplemented() + +# Cache of Savitzky-Golay convolution coefficients keyed by (fl, forder, dorder) +# so repeated tf_smooth() calls with the same parameters avoid recomputing the +# pseudoinverse. +.savgol_coef_cache <- new.env(parent = emptyenv()) + +.savgol_coefs <- function(fl, forder, dorder) { + key <- paste(fl, forder, dorder, sep = "_") + if (!is.null(.savgol_coef_cache[[key]])) { + return(.savgol_coef_cache[[key]]) + } + fc <- (fl - 1) / 2 + # Vandermonde-style design matrix on the centered window (Savitzky & Golay 1964). + X <- outer(-fc:fc, 0:forder, FUN = "^") + # SVD pseudoinverse, matching pracma::pinv's behavior; row dorder + 1 + # gives the convolution coefficients for the derivative of order dorder. + s <- svd(X) + tol <- .Machine$double.eps^(2 / 3) + p <- s$d > max(tol * s$d[1], 0) + pinv_X <- s$v[, p, drop = FALSE] %*% (1 / s$d[p] * t(s$u[, p, drop = FALSE])) + coefs <- pinv_X[dorder + 1, ] + .savgol_coef_cache[[key]] <- coefs + coefs +} + +#' Savitzky-Golay smoothing filter +#' +#' Local polynomial least-squares smoother. Re-implementation of +#' `pracma::savgol()` to avoid the extra dependency; numerically equivalent +#' (Savitzky & Golay 1964). +#' +#' @param T a numeric vector to smooth. +#' @param fl filter window length (odd integer > 1, must be greater than +#' `forder`). +#' @param forder polynomial order of the local fit (non-negative integer, +#' default 4). +#' @param dorder derivative order (non-negative integer not greater than +#' `forder`, default 0). +#' @returns a smoothed numeric vector of the same length as `T`. +#' @keywords internal +#' @export +savgol <- function(T, fl, forder = 4, dorder = 0) { + assert_numeric(T) + assert_int(fl, lower = 3) + assert_count(forder) + assert_count(dorder) + if (fl %% 2 == 0) { + cli::cli_abort("Argument {.arg fl} must be an odd integer greater than 1.") + } + if (fl <= forder) { + cli::cli_abort("Argument {.arg fl} must be greater than {.arg forder}.") + } + if (dorder > forder) { + cli::cli_abort( + "Argument {.arg dorder} must not be greater than {.arg forder}." + ) + } + fc <- (fl - 1) / 2 + coefs <- .savgol_coefs(fl, forder, dorder) + # Match pracma's exact end-effect handling: open (linear) convolution, then + # trim fc samples from each end to recover length(T). + T2 <- stats::convolve(T, rev(coefs), type = "o") + T2 <- T2[(fc + 1):(length(T2) - fc)] + (-1)^dorder * T2 +} diff --git a/R/soft-impute-svd.R b/R/soft-impute-svd.R index 51eb74aa..bbbd8206 100644 --- a/R/soft-impute-svd.R +++ b/R/soft-impute-svd.R @@ -1,73 +1,88 @@ -# copied from softImpute::Frob (v 1.4-1) under GPL-2 -# original authors: Trevor Hastie and Rahul Mazumder -frob <- function(Uold, Dsqold, Vold, U, Dsq, V) { - denom <- sum(Dsqold^2) - utu <- Dsq * (t(U) %*% Uold) - vtv <- Dsqold * (t(Vold) %*% V) - uvprod <- sum(diag(utu %*% vtv)) - num <- denom + sum(Dsq^2) - 2 * uvprod - num / max(denom, 1e-09) +# Soft-impute SVD: iteratively SVD a matrix with NAs, replacing NAs by their +# rank-J reconstruction (with optionally soft-thresholded singular values) until +# the Frobenius-distance of consecutive rank-J factors converges. Used by +# `fpc_wsvd()` to do FPCA on partially missing data on a common grid. + +# Squared Frobenius distance between two rank-J factorizations +# (U1 diag(d1) V1') and (U2 diag(d2) V2'), normalized by ||U1 diag(d1) V1'||_F^2. +# Computed via the inner-product identity to avoid forming the full products. +.simpute_frob_ratio <- function(u1, d1, v1, u2, d2, v2) { + denom <- sum(d1^2) + # cross = tr( diag(d2) (u2' u1) diag(d1) (v1' v2) ), computed as sum(A * t(B)) + cross <- sum((d2 * crossprod(u2, u1)) * t(d1 * crossprod(v1, v2))) + (denom + sum(d2^2) - 2 * cross) / max(denom, 1e-9) } -# code slightly adapted and shortened from softImpute::simpute.svd.R (v 1.4-1) under GPL-2 -# original authors: Trevor Hastie and Rahul Mazumder -# adaptations: -# - no warm starts, no returned call, no attributes -# inputs: x matrix of weighted (w/ integration weights) & centered (!) function evaluations, with NAs -# output: (regularized) SVD of x -simpute_svd <- function( - x, - J = min(dim(x)) - 1, - thresh = 1e-05, - lambda = 0, - maxit = 100, - trace.it = FALSE, - ... -) { - n <- dim(x) - m <- n[2] - n <- n[1] - xnas <- is.na(x) +# Soft-impute SVD as used in fpc_wsvd() for matrices with NAs. +# +# inputs: +# x : numeric matrix of (weighted, centered) function evaluations, +# entries may be NA +# J : truncation rank used for the imputation reconstruction +# thresh : convergence threshold on the relative Frobenius change +# lambda : soft-threshold applied to singular values (0 = hard rank-J) +# maxit : iteration cap +# output: list(u, d, v) with non-zero singular values (and a final +1 buffer), +# analogous to base::svd() but operating on an NA-filled matrix. +simpute_svd <- function(x, + J = min(dim(x)) - 1, + thresh = 1e-5, + lambda = 0, + maxit = 100, + ...) { + # clamp J to the available SVD rank; guards against J > min(dim(x)) and + # against the degenerate default J = 0 for single-row/column inputs + J <- max(min(as.integer(J), min(dim(x))), 1L) + nas <- is.na(x) + if (!any(nas)) { + s <- svd(x) + keep <- seq_len(min(J, length(s$d))) + return(list(u = s$u[, keep, drop = FALSE], + d = pmax(s$d[keep] - lambda, 0), + v = s$v[, keep, drop = FALSE])) + } + + # Initial fill: zeros. Callers pass column-centered data, so zero is the + # column mean. This is a standard warm start for soft-impute. + filled <- x + filled[nas] <- 0 + + s_prev <- svd(filled) + idx <- seq_len(J) - nz <- m * n - sum(xnas) - xfill <- x - xfill[xnas] <- 0 + # so the convergence check below is defined even if maxit < 1 + ratio <- Inf + for (iter in seq_len(maxit)) { + d_thr <- pmax(s_prev$d - lambda, 0) + # rank-J reconstruction; impute the missing cells with it + xhat <- s_prev$u[, idx, drop = FALSE] %*% + (d_thr[idx] * t(s_prev$v[, idx, drop = FALSE])) + filled[nas] <- xhat[nas] - svd.xfill <- svd(xfill) - ratio <- 1 - iter <- 0 - while ((ratio > thresh) && (iter < maxit)) { - iter <- iter + 1 - svd.old <- svd.xfill - d <- svd.xfill$d - d <- pmax(d - lambda, 0) - xhat <- svd.xfill$u[, seq(J)] %*% (d[seq(J)] * t(svd.xfill$v[, seq(J)])) - xfill[xnas] <- xhat[xnas] - svd.xfill <- svd(xfill) - ratio <- frob( - svd.old$u[, seq(J)], - d[seq(J)], - svd.old$v[, seq(J)], - svd.xfill$u[, seq(J)], - pmax(svd.xfill$d - lambda, 0)[seq(J)], - svd.xfill$v[, seq(J)] + s_new <- svd(filled) + d_new_thr <- pmax(s_new$d - lambda, 0) + + ratio <- .simpute_frob_ratio( + s_prev$u[, idx, drop = FALSE], d_thr[idx], s_prev$v[, idx, drop = FALSE], + s_new$u[, idx, drop = FALSE], d_new_thr[idx], s_new$v[, idx, drop = FALSE] ) - if (trace.it) { - obj <- (0.5 * sum((xfill - xhat)[!xnas]^2) + lambda * sum(d)) / nz - cat(iter, ":", "obj", format(round(obj, 5)), "ratio", ratio, "\n") - } + + s_prev <- s_new + if (ratio < thresh) break } - d <- pmax(svd.xfill$d[seq(J)] - lambda, 0) - J <- min(sum(d > 0) + 1, J) - svd.xfill <- list( - u = svd.xfill$u[, seq(J)], - d = d[seq(J)], - v = svd.xfill$v[, seq(J)] - ) - if (iter == maxit) { + + # the loop above only breaks early once ratio < thresh, so this means + # the iteration budget was exhausted without convergence + if (ratio >= thresh) { cli::cli_warn( "Convergence not achieved in {maxit} iterations for incomplete-data SVD." ) } - svd.xfill + + d_final <- pmax(s_prev$d[idx] - lambda, 0) + keep <- min(sum(d_final > 0) + 1, J) + keep_idx <- seq_len(keep) + list(u = s_prev$u[, keep_idx, drop = FALSE], + d = d_final[keep_idx], + v = s_prev$v[, keep_idx, drop = FALSE]) } diff --git a/R/split-combine.R b/R/split-combine.R index dded2d32..174e641c 100644 --- a/R/split-combine.R +++ b/R/split-combine.R @@ -31,17 +31,20 @@ tf_split <- function(x, splits, include = c("both", "left", "right")) { include <- match.arg(include) resolution_x <- get_resolution(tf_arg(x)) # if user supplied domain limit(s), remove - if (splits[1] == tf_domain(x)[1]) { + if (length(splits) && splits[1] == tf_domain(x)[1]) { splits <- splits[-1] } - if (splits[length(splits)] == tf_domain(x)[2]) { + if (length(splits) && splits[length(splits)] == tf_domain(x)[2]) { splits <- splits[-length(splits)] } + if (length(splits) == 0) { + return(list(x)) + } start <- c(tf_domain(x)[1], splits) end <- c(splits, tf_domain(x)[2]) if (include == "left") { - end[1:(length(end) - 1)] <- head(end, -1) - resolution_x + end[seq_len(length(end) - 1)] <- head(end, -1) - resolution_x } if (include == "right") { start[-1] <- start[-1] + resolution_x @@ -74,7 +77,7 @@ tf_split <- function(x, splits, include = c("both", "left", "right")) { #' tfs2_sparse <- tf_sparsify(tfs[[2]]) #' tfs3_spline <- tfb(tfs[[3]]) #' tf_combine(tfs[[1]], tfs2_sparse, tfs3_spline) -#' # combine(.., strict = F) can be used to coalesce different measurements +#' # combine(.., strict = FALSE) can be used to coalesce different measurements #' # of the same process over different grids: #' x1 <- tfd(x, arg = tf_arg(x)[seq(1, 51, by = 2)]) #' x2 <- tfd(x, arg = tf_arg(x)[seq(2, 50, by = 2)]) @@ -134,8 +137,9 @@ tf_combine <- function(..., strict = FALSE) { "Can't combine functions with multiple values at the same argument." ) } - cli::cli_alert_warning( - "removing {length(duplicates)} duplicated points from input data." + cli::cli_warn( + "removing {length(duplicates)} duplicated points from input data.", + class = "tf_combine_duplicates" ) tfs_data <- tfs_data[-duplicates, ] } diff --git a/R/summarize.R b/R/summarize.R index b2ea72c5..30b4e47e 100644 --- a/R/summarize.R +++ b/R/summarize.R @@ -2,7 +2,11 @@ # op has to be a string! summarize_tf <- function(..., op = NULL, eval = FALSE, verbose = TRUE) { dots <- list(...) - funs <- map_lgl(dots, is_tf) + # only univariate tf vectors flow through this scalar-evaluations summary + # path; tf_mv inputs have their own .tf_mv group-generic in R/ops-mv.R and + # must not be silently absorbed into the as.matrix() + apply() reduction + # below (which assumes 2-d [curve, arg] not 3-d [curve, arg, component]). + funs <- map_lgl(dots, is_tf_1d) op_args <- dots[!funs] funs <- dots[funs] op_call <- function(x) do.call(op, c(list(x), op_args)) @@ -38,7 +42,7 @@ summarize_tf <- function(..., op = NULL, eval = FALSE, verbose = TRUE) { } return(unname(ret)) } - ret <- do.call( + do.call( tfb, c(args, penalized = FALSE, verbose = FALSE, attr(funs, "basis_args")) ) |> @@ -88,7 +92,7 @@ mean.tf <- function(x, ...) { #' @rdname tfsummaries median.tf <- function(x, na.rm = FALSE, depth = "MBD", ...) { if (!na.rm && anyNA(x)) { - return(1 * NA * x[1]) + return(tf_na_like(x)) } x <- x[!is.na(x)] if (is.character(depth) && length(depth) == 1 && depth == "pointwise") { @@ -136,6 +140,12 @@ var.default <- stats::var #' @export #' @rdname tfsummaries var.tf <- function(x, y = NULL, na.rm = FALSE, use) { + if (!is.null(y)) { + cli::cli_abort(c( + "{.fn var} on {.cls tf} does not support a second argument {.arg y}.", + "i" = "Use {.fn tf_crosscov} or {.fn tf_crosscor} for cross-(co)variance." + )) + } summarize_tf(x, na.rm = na.rm, op = "var", eval = is_tfd(x)) } @@ -202,7 +212,7 @@ fivenum.default <- function(x, na.rm = FALSE, ...) { #' @rdname fivenum fivenum.tf <- function(x, na.rm = FALSE, depth = "MHI", ...) { if (!na.rm && anyNA(x)) { - return(1 * NA * x[1]) + return(tf_na_like(x)) } prepared <- depth_data(x, depth, na.rm = na.rm, ...) if (is.null(prepared$d)) { @@ -235,7 +245,7 @@ fivenum.tf <- function(x, na.rm = FALSE, depth = "MHI", ...) { Summary.tf <- function(...) { not_defined <- switch(.Generic, all = , any = TRUE, FALSE) if (not_defined) { - cli::cli_abort("{.Generic} not defined for {.cls tf} objects.") + cli::cli_abort("{(.Generic)} not defined for {.cls tf} objects.") } # min, max, range have dedicated methods that accept a depth argument if (.Generic %in% c("min", "max", "range")) { @@ -244,51 +254,41 @@ Summary.tf <- function(...) { summarize_tf(..., op = .Generic, eval = is_tfd(list(...)[[1]])) } +# Single dispatcher for the four cum* Math-group operations across tfd / tfb. +# Each cum* S3 method below is a thin wrapper that forwards .Generic. +cum_tf <- function(op, ..., eval) { + summarize_tf(..., op = op, eval = eval) +} + #' @rdname tfgroupgenerics #' @export -cummax.tfd <- function(...) { - summarize_tf(..., op = "cummax", eval = TRUE) -} +cummax.tfd <- function(...) cum_tf("cummax", ..., eval = TRUE) #' @rdname tfgroupgenerics #' @export -cummin.tfd <- function(...) { - summarize_tf(..., op = "cummin", eval = TRUE) -} +cummin.tfd <- function(...) cum_tf("cummin", ..., eval = TRUE) #' @rdname tfgroupgenerics #' @export -cumsum.tfd <- function(...) { - summarize_tf(..., op = "cumsum", eval = TRUE) -} +cumsum.tfd <- function(...) cum_tf("cumsum", ..., eval = TRUE) #' @rdname tfgroupgenerics #' @export #' @family tidyfun compute -cumprod.tfd <- function(...) { - summarize_tf(..., op = "cumprod", eval = TRUE) -} +cumprod.tfd <- function(...) cum_tf("cumprod", ..., eval = TRUE) #' @rdname tfgroupgenerics #' @export -cummax.tfb <- function(...) { - summarize_tf(..., op = "cummax", eval = FALSE) -} +cummax.tfb <- function(...) cum_tf("cummax", ..., eval = FALSE) #' @rdname tfgroupgenerics #' @export -cummin.tfb <- function(...) { - summarize_tf(..., op = "cummin", eval = FALSE) -} +cummin.tfb <- function(...) cum_tf("cummin", ..., eval = FALSE) #' @rdname tfgroupgenerics #' @export -cumsum.tfb <- function(...) { - summarize_tf(..., op = "cumsum", eval = FALSE) -} +cumsum.tfb <- function(...) cum_tf("cumsum", ..., eval = FALSE) #' @rdname tfgroupgenerics #' @export -cumprod.tfb <- function(...) { - summarize_tf(..., op = "cumprod", eval = FALSE) -} +cumprod.tfb <- function(...) cum_tf("cumprod", ..., eval = FALSE) diff --git a/R/tf-package.R b/R/tf-package.R index d0270a54..60ce8975 100644 --- a/R/tf-package.R +++ b/R/tf-package.R @@ -22,7 +22,7 @@ #' @keywords internal #' @importFrom checkmate assert_choice assert_class assert_count #' @importFrom checkmate assert_data_frame assert_flag assert_function -#' @importFrom checkmate assert_int assert_list assert_logical assert_matrix +#' @importFrom checkmate assert_int assert_integerish assert_list assert_logical assert_matrix #' @importFrom checkmate assert_number assert_numeric assert_string #' @importFrom checkmate assert_set_equal assert_subset assert_true #' @importFrom checkmate allMissing check_numeric makeAssertionFunction vname diff --git a/R/tf-s4.R b/R/tf-s4.R index 4f8cdbe1..733ad5af 100644 --- a/R/tf-s4.R +++ b/R/tf-s4.R @@ -74,3 +74,10 @@ setClass( ) ) setOldClass("tfb", S4Class = "tfb") + +setClass("tf_mv", contains = "tf") +setOldClass(c("tf_mv", "tf"), S4Class = "tf_mv") +setClass("tfd_mv", contains = "tf_mv") +setOldClass(c("tfd_mv", "tf_mv", "tf"), S4Class = "tfd_mv") +setClass("tfb_mv", contains = "tf_mv") +setOldClass(c("tfb_mv", "tf_mv", "tf"), S4Class = "tfb_mv") diff --git a/R/tfb-fpc-utils.R b/R/tfb-fpc-utils.R index a008e825..a68bbdfb 100644 --- a/R/tfb-fpc-utils.R +++ b/R/tfb-fpc-utils.R @@ -11,14 +11,13 @@ #' given quadrature weights \eqn{\Delta_i}, not #' \eqn{\phi_j'\phi_j = \sum_i \phi_j(t_i)^2 = 1};\cr #' \eqn{\int_T \phi_j(t) \phi_k(t) dt = 0} not -#' \eqn{\phi_j'\phi_k = \sum_i \phi_j(t_i)\phi_k(t_i) = 0}, -#' c.f. `mogsa::wsvd()`.\cr -#' For incomplete data, this uses an adaptation of `softImpute::softImpute()`, -#' see references. Note that will not work well for data on a common grid if more -#' than a few percent of data points are missing, and it breaks down completely -#' for truly irregular data with no/few common timepoints, even if observed very -#' densely. For such data, either re-evaluate on a common grid first or use more -#' advanced FPCA approaches like `refund::fpca_sc()`, +#' \eqn{\phi_j'\phi_k = \sum_i \phi_j(t_i)\phi_k(t_i) = 0}.\cr +#' For incomplete data, this uses a soft-impute iterative-SVD scheme +#' (see references). Note that this will not work well for data on a common grid +#' if more than a few percent of data points are missing, and it breaks down +#' completely for truly irregular data with no/few common timepoints, even if +#' observed very densely. For such data, either re-evaluate on a common grid +#' first or use more advanced FPCA approaches like `refund::fpca_sc()`, #' see last example for [tfb_fpc()] #' #' @param data numeric matrix of function evaluations @@ -32,12 +31,18 @@ #' - `npc` how many FPCs were returned for the given `pve` (integer) #' - `scoring_function` a function that returns FPC scores for new data #' and given eigenfunctions, see `tf:::.fpc_wsvd_scores` for an example. -#' @author Trevor Hastie, Rahul Mazumder, Chen Meng, Fabian Scheipl -#' @references code adapted from / inspired by `mogsa::wsvd()` by Chen Meng -#' and `softImpute::softImpute()` by Trevor Hastie and Rahul Mazumder.\cr -#' `r format_bib("meng2023mogsa", "mazumder2010", "softimpute")` +#' @author Fabian Scheipl +#' @references the soft-impute SVD algorithm for incomplete data is described in +#' `r format_bib("mazumder2010")` #' @family tfb-class #' @family tfb_fpc-class +#' @examples +#' arg <- seq(0, 1, length.out = 41) +#' data <- t(sapply(seq(0, 2 * pi, length.out = 10), +#' function(p) sin(2 * pi * arg + p))) +#' fpc <- fpc_wsvd(data, arg = arg, pve = 0.99) +#' fpc$npc +#' @export fpc_wsvd <- function(data, arg, pve = 0.995) { UseMethod("fpc_wsvd") } @@ -49,9 +54,8 @@ fpc_wsvd.matrix <- function(data, arg, pve = 0.995) { assert_numeric(arg, any.missing = FALSE, sorted = TRUE, len = ncol(data)) assert_number(pve, lower = 0, upper = 1) - delta <- c(0, diff(arg)) # trapezoid integration weights: - weights <- 0.5 * c(delta[-1] + head(delta, -1), tail(delta, 1)) + weights <- trapezoid_weights(arg) mean <- colMeans(data, na.rm = TRUE) data_wc <- t((t(data) - mean) * sqrt(weights)) @@ -61,7 +65,7 @@ fpc_wsvd.matrix <- function(data, arg, pve = 0.995) { svd(data_wc, nu = 0, nv = min(dim(data))) } else { cli::cli_inform( - "Using softImpute SVD on {round(mean(nas) * 100, 1)}% missing data." + "Using soft-impute SVD on {round(mean(nas) * 100, 1)}% missing data." ) if (pve + mean(nas) > 1) { cli::cli_inform(c( diff --git a/R/tfb-fpc.R b/R/tfb-fpc.R index 54c79bd7..07ee9574 100644 --- a/R/tfb-fpc.R +++ b/R/tfb-fpc.R @@ -47,8 +47,7 @@ new_tfb_fpc <- function( scoring_function <- attr(basis_from, "scoring_function") # trapezoid integration weights: #TODO generally appropriate or just for wsvd? - delta <- c(0, diff(arg)) - weights <- 0.5 * c(delta[-1] + head(delta, -1), tail(delta, 1)) + weights <- trapezoid_weights(arg) scores <- scoring_function( df_2_mat(data), basis_matrix[, -1], @@ -241,6 +240,5 @@ tfb_fpc.default <- function( ) } - data <- data_frame0() - new_tfb_spline(data = data, arg = arg, method = method, domain = domain, ...) + new_tfb_fpc(data = numeric(0), method = method, domain = domain, ...) } diff --git a/R/tfb-mfpc.R b/R/tfb-mfpc.R new file mode 100644 index 00000000..e65920d1 --- /dev/null +++ b/R/tfb-mfpc.R @@ -0,0 +1,627 @@ +#' @include tfb-mv.R tfb-fpc.R tfd-mv.R +NULL + +# Multivariate FPCA (Happ & Greven, 2018) -------------------------------------- +# +# Strategy (see `attic/design/multivariate.md`): run the ordinary univariate +# FPCA on each of the `d` components, then combine the per-component scores into +# a single set of *shared* scalar scores per curve with vector-valued +# eigenfunctions. The result is a normal `tfb_mv` whose `d` components are +# ordinary `tfb_fpc` objects -- but every component stores the *same* score +# vector per curve, and `score_variance` holds the joint (multivariate) +# eigenvalues. Reconstruction, printing and plotting therefore work for free via +# the existing `tfb_mv` machinery; only *re-scoring new data* needs a joint +# code path (see `mfpc_rescore()`), wired into `tf_rebase()` / `vec_cast()`. + +#' Multivariate functional principal component analysis (`f: R -> R^d`) +#' +#' `tfb_mfpc()` computes a *multivariate* functional principal component +#' analysis (MFPCA) of vector-valued functional data in the sense of +#' Happ & Greven (2018): a single set of scalar scores per curve, shared across +#' all `d` components, together with vector-valued eigenfunctions +#' \eqn{\Psi_m: \mathcal{T} \to \mathbb{R}^d}, so that +#' \eqn{f_i(t) \approx \mu(t) + \sum_m s_{im}\,\Psi_m(t)}. +#' +#' This is qualitatively different from `tfb_mv(data, basis = "fpc")`, which +#' fits an *independent* FPCA per component (separate eigenfunctions **and** +#' separate scores) and so cannot capture joint variation across dimensions. +#' +#' The estimator first runs the univariate FPCA (see [tfb_fpc()] / [fpc_wsvd()]) +#' on each component to obtain univariate scores \eqn{\xi^{(j)}} and +#' eigenfunctions \eqn{\phi^{(j)}}, then eigendecomposes the joint covariance of +#' the (weighted) stacked scores. With component weights \eqn{w_j > 0} the +#' shared scores and multivariate eigenfunctions are +#' \deqn{s_{im} = \sum_j \sqrt{w_j} \sum_l [c_m]^{(j)}_l \xi^{(j)}_{il}, \qquad +#' \Psi_m^{(j)} = \frac{1}{\sqrt{w_j}} \sum_l [c_m]^{(j)}_l \phi^{(j)}_l,} +#' where \eqn{c_m} are the eigenvectors of the weighted joint score covariance. +#' +#' The returned object is a [tfb_mv()] whose components are [tfb_fpc()] objects +#' sharing identical per-curve scores; cast it back to evaluations with +#' [as.tfd_mv()] / `vec_cast()`, and project **new** `tfd_mv` data onto the +#' fitted basis with [tf_rebase()]. Like the univariate FPCA, the estimator +#' targets data observed on a common grid per component; re-scoring new data +#' evaluates it on each component's estimation grid, so new curves must be +#' observable there. +#' +#' @param data a [tfd_mv()] / `tfb_mv` object, a (named) `list` of univariate +#' `tf` vectors, or anything [tfd_mv()] accepts. +#' @param weights component weighting scheme for the joint analysis. Either a +#' string -- `"inverse_variance"` (default; \eqn{w_j = 1 / \sum_l +#' \lambda^{(j)}_l}, so each component contributes equal total variance), +#' `"snr"` (signal-to-noise: retained variance over the discarded-variance +#' tail of the univariate fit), or `"equal"` (\eqn{w_j = 1}) -- or a numeric +#' vector of `d` non-negative weights. Weights are rescaled to sum to `d` +#' (so `"equal"` gives all-ones). +#' @param pve proportion of variance explained used to truncate the +#' *multivariate* components (default `0.995`). Ignored if `npc` is given. +#' @param npc number of multivariate FPCs to retain (overrides `pve`). +#' @param uni_pve proportion of variance explained for the *univariate* FPCA of +#' each component (default `0.995`); forwarded as `pve` to the univariate +#' `method`. +#' @param method univariate FPCA method, see [tfb_fpc()]. Defaults to +#' [fpc_wsvd()]. +#' @param ... further arguments forwarded to the univariate `method`. As in +#' [tfb_mv()], a `...` argument given as a list named by the component names +#' is distributed per component. +#' @returns a `tfb_mv` object whose `d` components are [tfb_fpc()] objects with +#' shared per-curve scores; `is_tfb_mfpc()` is `TRUE` for it. Use +#' [tf_mfpc_scores()] and [tf_mfpc_efunctions()] to extract the shared scores +#' and the multivariate eigenfunctions. +#' @references `r format_bib("happ2018")` +#' @seealso [tfb_mv()] for independent per-component FPCA, [tfb_fpc()] / +#' [fpc_wsvd()] for the univariate machinery. +#' @family tf_mv-class +#' @family tfb_fpc-class +#' @examples +#' set.seed(1) +#' g <- tfd_mv(list(hip = tf_rgp(20), knee = tf_rgp(20))) +#' m <- tfb_mfpc(g, pve = 0.99) +#' m +#' dim(tf_mfpc_scores(m)) +#' tf_mfpc_efunctions(m) +#' # reconstruct and project new data: +#' plot(as.tfd_mv(m), type = "facet") +#' g_new <- tfd_mv(list(hip = tf_rgp(3), knee = tf_rgp(3))) +#' tf_rebase(g_new, m) +#' @rdname tfb_mfpc +#' @export +tfb_mfpc <- function(data, ...) UseMethod("tfb_mfpc") + +#' @rdname tfb_mfpc +#' @export +tfb_mfpc.tf_mv <- function( + data, + weights = c("inverse_variance", "snr", "equal"), + pve = 0.995, + npc = NULL, + uni_pve = 0.995, + method = fpc_wsvd, + ... +) { + if (!tf_ncomp(data)) { + cli::cli_abort("Can't compute MFPCA: {.arg data} has no components.") + } + fit <- mfpc_fit( + data, + weights = weights, + pve = pve, + npc = npc, + uni_pve = uni_pve, + method = method, + ... + ) + new_tf_mv( + fit$components, + domain = tf_domain(data), + mfpc = fit$mfpc + ) +} + +#' @rdname tfb_mfpc +#' @export +tfb_mfpc.list <- function(data, ...) { + tfb_mfpc(tfd_mv(data), ...) +} + +#' @rdname tfb_mfpc +#' @export +tfb_mfpc.default <- function(data, ...) { + if (missing(data) || vec_size(data) == 0) { + cli::cli_abort("Can't compute MFPCA on empty input.") + } + tfb_mfpc(tfd_mv(data), ...) +} + +# Core fitter ------------------------------------------------------------------ + +# Returns a list with `components` (named list of shared-score tfb_fpc) and +# `mfpc` (the joint spec needed to re-score new data). +mfpc_fit <- function( + data, + weights = c("inverse_variance", "snr", "equal"), + pve = 0.995, + npc = NULL, + uni_pve = 0.995, + method = fpc_wsvd, + ... +) { + comps <- tf_components(data) + comp_names <- attr(data, "comp_names") + d <- length(comps) + domain <- tf_domain(data) + n <- vec_size(data) + ids <- names(data) %||% as.character(seq_len(n)) + dots <- list(...) + + # 1. univariate FPCA per component (full spec incl. error_variance for SNR) + specs <- map2(comps, comp_names, function(comp, nm) { + pcd <- distribute_dots(dots, nm, comp_names) + mfpc_uni_fit(comp, method = method, uni_pve = uni_pve, pcd = pcd) + }) + + # 2. component weights (rescaled to sum to d) + w <- mfpc_weights(specs, weights, d = d, comp_names = comp_names) + + # 3. joint eigen-analysis of the weighted, stacked univariate scores + scores_uni <- map(specs, "scores") # each n x M_j + m_j <- map_int(scores_uni, ncol) + xi <- do.call(cbind, scores_uni) # n x M_+ + sqrt_w_cols <- rep(sqrt(w), m_j) + xi_w <- sweep(xi, 2, sqrt_w_cols, `*`) + # Happ & Greven combine the *uncentered* univariate scores: they are + # mean-zero by construction of the univariate FPCA (each time point is + # centered), so the eigenvectors and the raw `xi_w %*% loadings` scores below + # are mutually consistent and truncated reconstruction is the optimal rank-M + # approximation. Using the sum-of-squares (not /(n-1)) keeps the multivariate + # eigenvalues on the same scale as univariate tfb_fpc `score_variance`; + # scaling the matrix by a constant leaves the eigenvectors unchanged. + joint_ss <- crossprod(xi_w) + eig <- eigen(joint_ss, symmetric = TRUE) + evalues_all <- pmax(eig$values, 0) + loadings_all <- eig$vectors # M_+ x M_+, columns are c_m + m_keep <- mfpc_choose_npc(evalues_all, pve = pve, npc = npc) + loadings <- loadings_all[, seq_len(m_keep), drop = FALSE] + evalues <- evalues_all[seq_len(m_keep)] + # shared scores: raw (uncentered) projection -> exact reconstruction at full + # rank; mu absorbs the level, univariate scores are ~mean-zero. + scores_shared <- xi_w %*% loadings # n x M + + # 4. multivariate eigenfunctions per component -> shared-score tfb_fpc + block_end <- cumsum(m_j) + block_start <- block_end - m_j + 1L + basis_label <- paste0(m_keep, " MFPCs") + components <- map(seq_len(d), function(j) { + rows <- block_start[j]:block_end[j] + # Psi_j = (1 / sqrt(w_j)) * phi_j %*% c^(j) [n_arg_j x M] + psi_j <- (specs[[j]]$efunctions %*% loadings[rows, , drop = FALSE]) / + sqrt(w[j]) + basis_matrix <- cbind(specs[[j]]$mu, psi_j) + new_tfb_fpc_shared( + basis_matrix = basis_matrix, + scores = scores_shared, + arg = specs[[j]]$arg, + domain = domain, + evalues = evalues, + ids = ids, + scoring_function = mfpc_component_scoring, + basis_label = basis_label + ) + }) + names(components) <- comp_names + + # 5. compact spec for joint re-scoring of new data + mfpc <- list( + weights = w, + weight_scheme = if (is.character(weights)) weights[1] else "user", + loadings = loadings, # M_+ x M + block_sizes = m_j, + evalues = evalues, + npc = m_keep, + comp_names = comp_names, + uni = map(specs, function(s) { + list( + efunctions = s$efunctions, + mu = s$mu, + arg = s$arg, + scoring_function = s$scoring_function, + evalues = s$evalues + ) + }) + ) + list(components = components, mfpc = mfpc) +} + +# univariate FPCA of one component, returning the full method spec plus `arg`. +mfpc_uni_fit <- function(comp, method, uni_pve, pcd) { + comp_df <- tf_2_df(comp, arg = tf_arg(comp)) + arg <- sort_unique(comp_df$arg) + margs <- get_args(c(list(pve = uni_pve), pcd), method) + spec <- do.call(method, c(list(data = comp_df, arg = arg), margs)) + if (is.null(spec$scores) || is.null(spec$efunctions) || is.null(spec$mu)) { + cli::cli_abort( + "FPCA {.arg method} must return {.field mu}, {.field efunctions} and {.field scores}." + ) + } + spec$scores <- as.matrix(spec$scores) + spec$efunctions <- as.matrix(spec$efunctions) + if ( + nrow(spec$efunctions) != length(arg) || + length(spec$mu) != length(arg) || + ncol(spec$scores) != ncol(spec$efunctions) || + nrow(spec$scores) != vec_size(comp) + ) { + cli::cli_abort( + "FPCA {.arg method} returned inconsistent dimensions for {.field mu} / {.field efunctions} / {.field scores}." + ) + } + spec$arg <- arg + spec +} + +# component weights, rescaled to sum to `d`. +mfpc_weights <- function(specs, weights, d, comp_names) { + if (is.numeric(weights)) { + assert_numeric( + weights, + len = d, + finite = TRUE, + any.missing = FALSE + ) + # weights enter as 1/sqrt(w_j) in the eigenfunctions, so zero/negative + # weights are not allowed (a zero would yield Inf basis functions). + if (any(weights <= 0)) { + cli::cli_abort("Numeric {.arg weights} must be strictly positive.") + } + w <- weights + } else { + weights <- match.arg(weights, c("inverse_variance", "snr", "equal")) + # per-component total signal variance from the univariate scores + total_var <- map_dbl(specs, function(s) { + sum(apply(s$scores, 2, var)) + }) + w <- switch( + weights, + equal = rep(1, d), + inverse_variance = { + if (any(total_var <= 0)) { + cli::cli_abort( + "Can't use {.val inverse_variance} weights: a component has zero variance." + ) + } + 1 / total_var + }, + snr = { + # signal-to-noise: total retained variance over discarded variance. + # `fpc_wsvd()` returns `error_variance` as the *cumulative* tail sums of + # the dropped eigenvalues, so its last element is the total discarded + # variance (the noise floor). + noise <- map_dbl(specs, function(s) { + ev <- s$error_variance + if (is.null(ev) || !length(ev)) NA_real_ else tail(ev, 1L) + }) + if (anyNA(noise) || any(noise <= 0)) { + cli::cli_abort(c( + "Can't compute {.val snr} weights.", + i = "The univariate fit left no discarded-variance tail (lower {.arg uni_pve}), or {.arg method} does not return {.field error_variance}." + )) + } + total_var / noise + } + ) + } + w <- w * d / sum(w) + names(w) <- comp_names + w +} + +# choose number of multivariate components from positive eigenvalues. +mfpc_choose_npc <- function(evalues, pve, npc) { + m_plus <- length(evalues) + if (!is.null(npc)) { + assert_count(npc, positive = TRUE) + if (npc > m_plus) { + cli::cli_warn( + "Requested {.arg npc} = {npc} exceeds available {m_plus} components; using {m_plus}." + ) + } + return(min(as.integer(npc), m_plus)) + } + assert_number(pve, lower = 0, upper = 1) + total <- sum(evalues) + if (total <= 0) { + return(1L) + } + # tolerance guards against pve = 1 rounding cumsum/total to just below 1 + # (which would make which() empty); fall back to all components. + keep <- which(cumsum(evalues) / total >= pve - 1e-12) + if (!length(keep)) m_plus else min(keep) +} + +# assemble a tfb_fpc from a precomputed (mean + eigenfunctions) basis and a +# score matrix -- mirrors the assembly in new_tfb_fpc() but takes the spec +# directly, so it can carry *shared* multivariate scores. +new_tfb_fpc_shared <- function( + basis_matrix, + scores, + arg, + domain, + evalues, + ids, + scoring_function, + basis_label +) { + fpc_basis <- tfd(t(basis_matrix), arg = arg, domain = domain) + fpc_constructor <- fpc_wrapper(fpc_basis) + coefs <- cbind(1, scores) + coef_list <- split(coefs, row(coefs)) + names(coef_list) <- ids + new_vctr( + coef_list, + domain = domain, + basis = fpc_constructor, + basis_label = basis_label, + basis_matrix = basis_matrix, + arg = arg, + score_variance = evalues, + scoring_function = scoring_function, + class = c("tfb_fpc", "tfb", "tf") + ) +} + + +# per-component scoring stub: scoring a *single* MFPC component is ill-defined +# (the eigenfunctions are orthonormal only in the joint weighted product), so +# direct use aborts with a pointer to the multivariate path. +mfpc_component_scoring <- function(...) { + cli::cli_abort(c( + "Can't score data onto a single component of a multivariate FPCA basis.", + i = "Project the full {.cls tfd_mv} with {.fn tf_rebase} (or {.fn vec_cast}) onto the {.fn tfb_mfpc} object instead." + )) +} + +# Demote a `tfb_mfpc` to a plain `tfb_mv` by stripping the joint spec. Used by +# Math/Ops and `$<-` interceptors that warn the user the joint MFPC +# representation is being dropped before continuing along the standard +# `tfb_mv` path. Each shared-score component carries a rank-deficient +# multivariate eigenfunction basis (`Psi_j` lives in an M-dim space but only +# spans `rank <= ncol(phi_j)` directions in the per-component grid), so simply +# swapping the abort-stub `scoring_function` for `.fpc_wsvd_scores` is not +# enough: downstream `tf_rebase()` would fit data via `qr.coef()` on a +# rank-deficient `Psi_j` and return `NA` scores. Instead, rebuild each +# component as a fresh `tfb_fpc` on the *univariate* basis (the full-rank +# orthonormal `phi^(j)` from the univariate FPCA stored in `mfpc$uni`), with +# the component's own univariate `scoring_function` (or `.fpc_wsvd_scores` as +# fallback), so Math/Ops dispatched on `tfb_fpc` can re-fit scores without +# exploding. +tfb_mfpc_demote <- function(x) { + mfpc <- attr(x, "mfpc") + attr(x, "mfpc") <- NULL + comps <- attr(x, "components") + if (is.null(mfpc) || is.null(mfpc$uni)) { + # nothing to rebuild against; leave components as-is. + return(x) + } + new_comps <- map2(comps, mfpc$uni, new_tfb_fpc_demoted) + names(new_comps) <- names(comps) + attr(x, "components") <- new_comps + x +} + +mfpc_reorder_spec <- function(spec, comp_names) { + if (is.null(spec) || identical(spec$comp_names, comp_names)) { + return(spec) + } + loc <- match(comp_names, spec$comp_names) + if (anyNA(loc)) { + cli::cli_abort("Cannot preserve MFPC spec for unknown component names.") + } + + block_sizes <- spec$block_sizes + block_end <- cumsum(block_sizes) + block_start <- block_end - block_sizes + 1L + row_idx <- unlist( + map(loc, \(i) seq.int(block_start[i], block_end[i])), + use.names = FALSE + ) + + spec$weights <- spec$weights[loc] + spec$block_sizes <- spec$block_sizes[loc] + spec$loadings <- spec$loadings[row_idx, , drop = FALSE] + spec$comp_names <- comp_names + spec$uni <- spec$uni[loc] + names(spec$uni) <- comp_names + spec +} + +# Rebuild a single shared-score MFPC component as a plain, full-rank `tfb_fpc` +# on the stored univariate eigenfunctions `phi^(j)`. The component's +# evaluations are reconstructed from the joint basis (`mu_j + Psi_j s^T`) and +# then re-scored onto `phi^(j)` using the univariate `scoring_function`. +new_tfb_fpc_demoted <- function(component, uni) { + arg <- uni$arg + mu_j <- uni$mu + phi_j <- uni$efunctions + scoring_function <- uni$scoring_function %||% .fpc_wsvd_scores + # reconstruct evaluations on the univariate grid from the joint basis. + joint_bm <- attr(component, "basis_matrix") # n_arg x (1 + M) + coefs_old <- do.call(rbind, unclass(component)) # n x (1 + M) + data_matrix <- coefs_old %*% t(joint_bm) # n x n_arg + quad_w <- trapezoid_weights(arg) + scores <- as.matrix(scoring_function(data_matrix, phi_j, mu_j, quad_w)) + basis_matrix <- unname(cbind(mu_j, phi_j)) + domain <- attr(component, "domain") + fpc_basis <- suppressMessages(tfd( + t(basis_matrix), + arg = arg, + domain = domain + )) + fpc_constructor <- fpc_wrapper(fpc_basis) + coefs <- cbind(1, scores) + coef_list <- split(coefs, row(coefs)) + names(coef_list) <- names(component) + basis_label <- paste0(ncol(phi_j), " FPCs") + new_vctr( + coef_list, + domain = domain, + basis = fpc_constructor, + basis_label = basis_label, + basis_matrix = basis_matrix, + arg = arg, + score_variance = uni$evalues %||% rep(NA_real_, ncol(phi_j)), + scoring_function = scoring_function, + class = c("tfb_fpc", "tfb", "tf") + ) +} + +# Rebuild a single shared-score MFPC component that is assigned as a +# standalone vector (e.g. the `value` in `mf2$x <- mf$x`), so it does not +# retain the abort-stub `scoring_function`. The matching univariate FPCA fit +# is identified by the component's joint basis matrix among the parent fit's +# original components `comps`; components from a foreign fit (no match) are +# returned unchanged. +mfpc_demote_component_value <- function(value, comps, uni) { + if ( + !is_tfb_fpc(value) || + !identical(attr(value, "scoring_function"), mfpc_component_scoring) + ) { + return(value) + } + match_k <- which(map_lgl( + comps, + \(co) identical(attr(co, "basis_matrix"), attr(value, "basis_matrix")) + )) + if (!length(match_k)) { + return(value) + } + new_tfb_fpc_demoted(value, uni[[match_k[1]]]) +} + +# Internal warning shown when an operation forces a `tfb_mfpc` back to a plain +# per-component `tfb_fpc` (`tfb_mv`) representation. Centralised so the message +# stays consistent across Math/Ops, `$<-` and `vec_c()`. Uses class +# `tf_mfpc_demotion` so callers can intercept via `withCallingHandlers()`. +warn_mfpc_demotion <- function(reason) { + cli::cli_warn( + c( + "Demoting to per-component {.cls tfb_fpc} representation; the joint MFPC spec is dropped.", + i = reason, + i = "Re-score with {.fn tf_rebase} for joint MFPC arithmetic." + ), + class = "tf_mfpc_demotion" + ) +} + +# Joint re-scoring of new data onto a fitted MFPC basis ------------------------ + +# `newdata`: a tf_mv (tfd_mv/tfb_mv) compatible with `mfpc_obj`. +mfpc_rescore <- function(newdata, mfpc_obj, arg = NULL) { + spec <- attr(mfpc_obj, "mfpc") + if (is.null(spec)) { + cli::cli_abort("{.arg mfpc_obj} is not a multivariate FPCA basis.") + } + if (!is.null(arg)) { + cli::cli_abort(c( + "Can't re-score onto a multivariate FPCA basis at a custom {.arg arg}.", + i = "The basis is tied to its estimation grid; omit {.arg arg}." + )) + } + check_compatible_mv(newdata, mfpc_obj) + if (!isTRUE(all.equal(tf_domain(newdata), tf_domain(mfpc_obj)))) { + cli::cli_abort( + "{.arg newdata} domain {.val {tf_domain(newdata)}} does not match the MFPCA basis domain {.val {tf_domain(mfpc_obj)}}." + ) + } + new_comps <- tf_components(newdata) + n_new <- vec_size(newdata) + ids <- names(newdata) %||% as.character(seq_len(n_new)) + + # univariate scores of the new data on each component's eigenfunctions + xi_new <- map2(new_comps, spec$uni, function(comp, u) { + if (is.null(u$scoring_function)) { + cli::cli_abort( + "The univariate FPCA {.arg method} did not provide a scoring function; can't re-score new data." + ) + } + comp_df <- tf_2_df(comp, arg = u$arg) + mat <- df_2_mat(comp_df) + # align columns to the stored eigenfunction grid + idx <- match(u$arg, attr(mat, "arg")) + if (anyNA(idx)) { + cli::cli_abort(c( + "New data does not cover the training argument grid of every component.", + i = "Re-scoring requires each new curve to be observable on the component's eigenfunction grid." + )) + } + mat <- mat[, idx, drop = FALSE] + quad_w <- trapezoid_weights(u$arg) + as.matrix(u$scoring_function(mat, u$efunctions, u$mu, quad_w)) + }) + xi_new <- do.call(cbind, xi_new) + sqrt_w_cols <- rep(sqrt(spec$weights), spec$block_sizes) + xi_w <- sweep(xi_new, 2, sqrt_w_cols, `*`) + scores_new <- xi_w %*% spec$loadings # n_new x M + + # rebuild each component with the stored Psi basis + the new shared scores + base_comps <- tf_components(mfpc_obj) + out <- map(seq_along(base_comps), function(j) { + b <- base_comps[[j]] + new_tfb_fpc_shared( + basis_matrix = attr(b, "basis_matrix"), + scores = scores_new, + arg = attr(b, "arg"), + domain = attr(b, "domain"), + evalues = attr(b, "score_variance"), + ids = ids, + scoring_function = mfpc_component_scoring, + basis_label = attr(b, "basis_label") + ) + }) + names(out) <- spec$comp_names + new_tf_mv(out, domain = tf_domain(mfpc_obj), mfpc = spec) +} + +# Predicate and accessors ------------------------------------------------------ + +#' @rdname tfb_mfpc +#' @param x a `tfb_mv` object, ideally one returned by [tfb_mfpc()]. +#' @returns `is_tfb_mfpc()`: a logical flag. +#' @export +is_tfb_mfpc <- function(x) is_tfb_mv(x) && !is.null(attr(x, "mfpc")) + +#' @rdname tfb_mfpc +#' @returns `tf_mfpc_scores()`: an `n x M` matrix of shared multivariate FPC +#' scores (rows = curves, columns = components). +#' @export +tf_mfpc_scores <- function(x) { + if (!is_tfb_mfpc(x)) { + cli::cli_abort( + "{.arg x} is not a multivariate FPCA basis (see {.fn tfb_mfpc})." + ) + } + coefs <- unclass(tf_component(x, 1L)) + scores <- do.call(rbind, lapply(coefs, function(co) co[-1L])) + rownames(scores) <- names(x) + colnames(scores) <- paste0("mfpc", seq_len(ncol(scores))) + scores +} + +#' @rdname tfb_mfpc +#' @returns `tf_mfpc_efunctions()`: a `tfd_mv` of length `M` holding the +#' multivariate eigenfunctions \eqn{\Psi_m} (one "curve" per component). +#' @export +tf_mfpc_efunctions <- function(x) { + if (!is_tfb_mfpc(x)) { + cli::cli_abort( + "{.arg x} is not a multivariate FPCA basis (see {.fn tfb_mfpc})." + ) + } + comps <- tf_components(x) + ef_comps <- map(comps, function(b) { + psi <- attr(b, "basis_matrix")[, -1L, drop = FALSE] + tfd(t(psi), arg = attr(b, "arg"), domain = attr(b, "domain")) + }) + names(ef_comps) <- attr(x, "comp_names") + ef <- new_tf_mv(ef_comps, domain = tf_domain(x)) + names(ef) <- paste0("mfpc", seq_len(attr(x, "mfpc")$npc)) + ef +} diff --git a/R/tfb-mv.R b/R/tfb-mv.R new file mode 100644 index 00000000..4d0f4faf --- /dev/null +++ b/R/tfb-mv.R @@ -0,0 +1,112 @@ +#' @include tfd-mv.R +NULL + +#' Vector-valued functional data in basis representation (`f: R -> R^d`) +#' +#' `tfb_mv` is the basis-representation analogue of [tfd_mv()]: it bundles `d` +#' univariate [tfb()] vectors (one per output dimension / component) into a +#' single vctrs vector of vector-valued functions \eqn{f: \mathbb{R} \to +#' \mathbb{R}^d}. Each component is fitted independently with the usual +#' univariate [tfb()] machinery (spline or FPC basis), so all of its arguments +#' (`k`, `bs`, `penalized`, `basis`, ...) apply per component. +#' +#' @param data a [tfd_mv()] / `tfb_mv` object, a (named) `list` of univariate +#' `tf` vectors, or anything [tfd_mv()] accepts (it is converted to `tfd_mv` +#' first and then each component is expanded into a basis). +#' @param basis spline (default) or fpc basis, see [tfb()]. +#' @param ... forwarded to the univariate [tfb()] constructor. +#' @returns a `tfb_mv` object. +#' @family tf_mv-class +#' @examples +#' traj <- tfd_mv(list(x = tf_rgp(5), y = tf_rgp(5))) +#' tb <- tfb_mv(traj, k = 7, verbose = FALSE) +#' tb +#' tf_ncomp(tb) +#' @rdname tfb_mv +#' @export +tfb_mv <- function(data, ...) UseMethod("tfb_mv") + +#' @rdname tfb_mv +#' @details +#' By default a single `...` is shared across all components (every component +#' gets the same `k`, `bs`, `sp`, etc.). To pass *different* basis arguments +#' to different components, give the argument as a list named by component +#' names -- e.g. `tfb_mv(f, k = list(x = 5, y = 12))` fits component `x` with +#' `k = 5` and component `y` with `k = 12`. Any list-valued `...` whose names +#' do **not** match the component names is treated as a shared argument value. +#' (Already-`tfb` components passed via `tfb_mv.list()` are kept as-is, which +#' is the most permissive way to mix entirely different basis kinds across +#' components.) +#' @export +tfb_mv.tf_mv <- function(data, basis = c("spline", "fpc"), ...) { + basis <- match.arg(basis) + dots <- list(...) + if (!tf_ncomp(data)) { + return(new_tf_mv(list(), domain = tf_domain(data), class = "tfb_mv")) + } + if (is_tfb_mv(data) && !length(dots)) { + return(data) + } + comp_names <- attr(data, "comp_names") + components <- map2(tf_components(data), comp_names, function(comp, nm) { + per_comp_dots <- distribute_dots(dots, nm, comp_names) + do.call(tfb, c(list(comp), list(basis = basis), per_comp_dots)) + }) + new_tf_mv(components, domain = tf_domain(data)) +} + +# Distribute a `...` collection across one component `nm`: a `...` argument that +# is a list named by *all* component names is treated as per-component (return +# its `nm` entry); any other argument is shared (returned as-is). Used by both +# tfb_mv() and tfb_mfpc() to allow e.g. `k = list(x = 5, y = 12)`. +distribute_dots <- function(dots, nm, comp_names) { + map(dots, function(arg) { + if ( + is.list(arg) && + !is.null(names(arg)) && + length(arg) == length(comp_names) && + !anyDuplicated(names(arg)) && + setequal(names(arg), comp_names) + ) { + arg[[nm]] + } else { + arg + } + }) +} + +#' @rdname tfb_mv +#' @export +tfb_mv.list <- function(data, basis = c("spline", "fpc"), ...) { + basis <- match.arg(basis) + if (!length(data)) { + return(new_tf_mv(list(), class = "tfb_mv")) + } + if (all(map_lgl(data, is_tf))) { + mv <- new_tf_mv(data) + if (all(map_lgl(data, is_tfb)) && !length(list(...))) { + return(mv) + } + return(tfb_mv(mv, basis = basis, ...)) + } + tfb_mv(tfd_mv(data, ...), basis = basis, ...) +} + +#' @rdname tfb_mv +#' @export +tfb_mv.default <- function(data, basis = c("spline", "fpc"), ...) { + if (missing(data) || vec_size(data) == 0) { + return(new_tf_mv(list(), class = "tfb_mv")) + } + tfb_mv(tfd_mv(data), basis = match.arg(basis), ...) +} + +#------------------------------------------------------------------------------ + +#' @rdname tfb_mv +#' @export +as.tfb_mv <- function(data, ...) UseMethod("as.tfb_mv") + +#' @rdname tfb_mv +#' @export +as.tfb_mv.default <- function(data, ...) tfb_mv(data, ...) diff --git a/R/tfb-spline.R b/R/tfb-spline.R index 4808c2c2..2d95c3a5 100644 --- a/R/tfb-spline.R +++ b/R/tfb-spline.R @@ -1,11 +1,11 @@ new_tfb_spline <- function( data, # data.frame with id, arg, value domain = NULL, - arg = NULL, penalized = TRUE, global = FALSE, verbose = FALSE, - ... + ..., + spec_override = NULL ) { if (vec_size(data) == 0) { ret <- new_vctr( @@ -38,16 +38,6 @@ new_tfb_spline <- function( data$id <- factor(data$id, unique(as.character(data$id))) dots <- list(...) - s_args <- dots[names(dots) %in% formalArgs(s)] - if (!has_name(s_args, "bs")) s_args$bs <- "cr" - if (s_args$bs == "ad") { - cli::cli_warn(c( - x = "Adaptive smooths with ({.code bs = 'ad'}) not implemented yet.", - i = "Return value uses {.code bs = 'cr'} instead." - )) - s_args$bs <- "cr" - } - if (!has_name(s_args, "k")) s_args$k <- min(25, nrow(arg_u)) gam_args <- dots[names(dots) %in% c(formalArgs(gam), formalArgs(bam))] if (!has_name(gam_args, "sp")) gam_args$sp <- -1 @@ -55,14 +45,38 @@ new_tfb_spline <- function( arg_list <- split(data$arg, data$id) regular <- all(duplicated(arg_list)[-1]) - s_call <- as.call(c(quote(s), quote(arg), s_args)) - s_spec <- eval(s_call) - spec_object <- smooth.construct( - s_spec, - data = data_frame0(arg = arg_u$x), - knots = NULL - ) - spec_object$call <- s_call + if (is.null(spec_override)) { + s_args <- dots[names(dots) %in% formalArgs(s)] + if (!has_name(s_args, "bs")) s_args$bs <- "cr" + if (s_args$bs == "ad") { + cli::cli_warn(c( + x = "Adaptive smooths with ({.code bs = 'ad'}) not implemented yet.", + i = "Return value uses {.code bs = 'cr'} instead." + )) + s_args$bs <- "cr" + } + if (!has_name(s_args, "k")) s_args$k <- min(25, nrow(arg_u)) + s_call <- as.call(c(quote(s), quote(arg), s_args)) + s_spec <- eval(s_call) + spec_object <- smooth.construct( + s_spec, + data = data_frame0(arg = arg_u$x), + knots = NULL + ) + spec_object$call <- s_call + } else { + # Reuse a pre-built mgcv spec (used by tf_rebase.tfd.tfb_spline to project + # onto an existing basis without re-constructing it). Skips mgcv's + # unique-args-vs-k check, which is exactly what we want when fewer + # unique args than knots are available and the penalty handles the rest. + spec_object <- spec_override + spec_object$X <- PredictMat( + spec_object, + data = data_frame0(arg = arg_u$x) + ) + s_args <- list(bs = spec_object$bs.dim, k = spec_object$bs.dim) + s_call <- spec_object$call %||% as.call(c(quote(s), quote(arg), s_args)) + } if (is.null(gam_args$family)) { gam_args$family <- stats::gaussian() @@ -545,7 +559,6 @@ tfb_spline.tfb <- function( new_tfb_spline( data, - arg = arg, domain = domain, penalized = penalized, global = global, @@ -584,9 +597,8 @@ tfb_spline.default <- function( "Input {.arg data} not from a recognized class; returning prototype of length 0." ) - data <- data_frame0() new_tfb_spline( - data, + numeric(0), domain = domain, penalized = penalized, global = global, diff --git a/R/tfd-class.R b/R/tfd-class.R index 8f71fa40..c736917c 100644 --- a/R/tfd-class.R +++ b/R/tfd-class.R @@ -36,14 +36,20 @@ new_tfd <- function( evaluator_f <- get(evaluator, mode = "function", envir = parent.frame()) } - if ( - vec_size(datalist) == 0 || allMissing(unlist(datalist, use.names = FALSE)) - ) { + # Truly empty input -> length-0 prototype. The sentinel domain + # `c(NA_real_, NA_real_)` matches the S4 prototype in R/tf-s4.R and avoids + # the `unique = TRUE` violation that `numeric(2)` (= c(0, 0)) would cause + # if a downstream caller ran `assert_arg` against it. "Truly empty" means + # either size 0 or every entry is a length-0 *numeric* vector (no NULLs -- + # NULL is the in-vector representation of an NA function and must survive). + truly_empty <- vec_size(datalist) == 0 || ( + !any(map_lgl(datalist, is.null)) && all(lengths(datalist) == 0L) + ) + if (truly_empty) { arg <- arg %||% list(numeric()) - domain <- domain %||% numeric(2) + domain <- domain %||% c(NA_real_, NA_real_) subclass <- if (regular) "tfd_reg" else "tfd_irreg" datalist <- list() - # message("empty or missing input `data`; returning prototype of length 0") ret <- new_vctr( datalist, arg = arg, @@ -55,9 +61,67 @@ new_tfd <- function( return(ret) } + # All-NA input with size > 0: produce a length-n vector of NA functions + # rather than silently collapsing to length 0. + if (allMissing(unlist(datalist, use.names = FALSE))) { + n <- vec_size(datalist) + arg_list <- if (is.null(arg)) { + list(seq_len(max(lengths(datalist), 1L))) + } else { + ensure_list(arg) + } + domain <- domain %||% range(unlist(arg_list, use.names = FALSE)) + if (!is.finite(domain[1]) || !is.finite(domain[2]) || domain[1] == domain[2]) { + domain <- c(NA_real_, NA_real_) + } + subclass <- if (regular) "tfd_reg" else "tfd_irreg" + # All entries are NA, so each becomes a NULL placeholder (tf's NA representation) + null_data <- vector("list", n) + names(null_data) <- names(datalist) + warn_na_entries_created(seq_len(n)) + arg_attr <- if (regular) list(arg_list[[1]]) else numeric(0) + ret <- new_vctr( + null_data, + arg = arg_attr, + domain = domain, + evaluator = evaluator_f, + evaluator_name = evaluator, + class = c(subclass, "tfd", "tf") + ) + return(ret) + } + assert_string(evaluator) assert_function(evaluator_f, args = c("x", "arg", "evaluations"), nargs = 3) + # arg/datalist length invariant: regular -> shared arg (list of length 1); + # irregular -> per-entry arg list (length 1 = shared, or length n). + # Every non-NA data entry must have length matching its arg vector. + arg_list <- ensure_list(arg) + if ( + length(arg_list) != 1 && length(arg_list) != length(datalist) + ) { + cli::cli_abort( + "Length of {.arg arg} list ({length(arg_list)}) must be 1 or match length of {.arg data} ({length(datalist)})." + ) + } + arg_lens <- lengths(arg_list) + data_lens <- lengths(datalist) + bad <- map_lgl(seq_along(datalist), \(i) { + x <- datalist[[i]] + if (is.null(x) || allMissing(x)) return(FALSE) + expected <- if (length(arg_list) == 1) arg_lens[1] else arg_lens[i] + length(x) != expected + }) + if (any(bad)) { + bad_idx <- which(bad) + cli::cli_abort(c( + "Lengths of {.arg arg} do not match lengths of {.arg data} entries.", + i = "Mismatched entries at indices: {.val {bad_idx}}.", + i = "Data lengths there: {.val {data_lens[bad]}}." + )) + } + # sort args and values by arg: arg_o <- map(arg, order) arg <- map2(arg, arg_o, \(x, y) x[y]) @@ -282,20 +346,38 @@ tfd.list <- function( where_na <- map(data, is.na) data <- map2(data, where_na, \(x, y) x[!y]) lens <- lengths(data) - empty <- lens != 0 - regular <- all(lens[!empty] == lens[!empty][1]) & - (is.numeric(arg) || all(duplicated(arg)[-1])) + nonempty <- lens != 0 + # arg-length probe: derive the shared arg-vector length (if any) we'd + # use in the regular path; if it doesn't match the (post-NA-strip) data + # entries, the data is in fact irregular and must take the per-entry path. + arg_shared_len <- if (is.numeric(arg)) { + length(arg) + } else if (is.list(arg) && length(arg) == 1) { + length(arg[[1]]) + } else { + NA_integer_ + } + arg_lens_ok <- is.na(arg_shared_len) || + all(lens[nonempty] == arg_shared_len) + regular <- all(lens[nonempty] == lens[nonempty][1]) & + (is.numeric(arg) || all(duplicated(arg)[-1])) & + arg_lens_ok # duplicated(NULL) == TRUE! if (!regular) { if (is.null(arg)) { cli::cli_abort("{.arg arg} cannot be NULL") } + # expand a shared arg-vector to a per-entry list before NA-stripping + if (is.numeric(arg) || (is.list(arg) && length(arg) == 1)) { + arg_vec <- if (is.numeric(arg)) arg else arg[[1]] + arg <- rep(list(arg_vec), length(data)) + } if (length(arg) != length(data)) { cli::cli_abort( "Length of {.arg arg} list does not match {.arg data} list." ) } - if (any(lengths(arg)[!empty] != lengths(where_na)[!empty])) { + if (any(lengths(arg)[nonempty] != lengths(where_na)[nonempty])) { cli::cli_abort( "Lengths of {.arg arg} vectors do not match lengths of {.arg data} list entries." ) diff --git a/R/tfd-mv.R b/R/tfd-mv.R new file mode 100644 index 00000000..fd1c513d --- /dev/null +++ b/R/tfd-mv.R @@ -0,0 +1,349 @@ +#' @include tfd-class.R tfb-class.R +NULL + +# Low-level constructor shared by tfd_mv and tfb_mv ---------------------------- + +# `components`: a (named) list of `d` *univariate* tf vectors, all of the same +# length `n`, the same kind (all tfd or all tfb) and the same domain. The +# multivariate object is a vctrs vector of length `n` (the number of curves) +# that simply bundles these `d` component-functions; (almost) all methods +# delegate to the univariate machinery by mapping over the components. See +# `design/multivariate.md` for the rationale. +new_tf_mv <- function( + components = list(), + domain = NULL, + class = NULL, + check_curve_names = TRUE, + mfpc = NULL +) { + assert_list(components) + if (length(components)) { + if (!all(map_lgl(components, is_tf))) { + cli::cli_abort("All components must be {.cls tf} objects.") + } + all_tfd <- all(map_lgl(components, is_tfd)) + all_tfb <- all(map_lgl(components, is_tfb)) + if (!(all_tfd || all_tfb)) { + cli::cli_abort( + "All components must be the same kind: either all {.cls tfd} or all {.cls tfb}." + ) + } + lens <- map_int(components, vec_size) + if (length(unique(lens)) > 1L) { + cli::cli_abort( + "All components must have the same length, but lengths are {.val {lens}}." + ) + } + domains <- map(components, tf_domain) + if (is.null(domain)) { + # union of component domains: a tf_mv lives on a single time axis, but + # individual components may have been observed only on a subset of it. + lows <- map_dbl(domains, 1) + highs <- map_dbl(domains, 2) + domain <- c(min(lows), max(highs)) + } else { + assert_numeric(domain, len = 2, finite = TRUE, sorted = TRUE) + # supplied domain must contain every component's domain + for (cd in domains) { + if (cd[1] < domain[1] || cd[2] > domain[2]) { + cli::cli_abort( + "Component domain {.val {cd}} not contained in supplied {.arg domain} {.val {domain}}." + ) + } + } + } + # widen each component's domain to the shared mv domain so all components + # agree on the time axis. (`tf_domain<-` warns about changing the domain; + # in this context the widening is intended, so we silence it.) + components <- map(components, function(comp) { + if (!isTRUE(all.equal(tf_domain(comp), domain))) { + suppressWarnings(tf_domain(comp) <- domain) + } + comp + }) + curve_names <- map(components, names) + has_curve_names <- map_lgl(curve_names, Negate(is.null)) + if (any(has_curve_names) && !all(has_curve_names)) { + cli::cli_abort( + "All components must either be unnamed or have identical curve names." + ) + } + if (all(has_curve_names)) { + first_names <- curve_names[[1]] + if (!all(map_lgl(curve_names[-1], identical, y = first_names))) { + if (check_curve_names) { + cli::cli_abort("All components must have identical curve names.") + } + curve_names <- NULL + } else { + curve_names <- first_names + } + } else { + curve_names <- NULL + } + subclass <- if (all_tfb) "tfb_mv" else "tfd_mv" + if (!is.null(class) && !identical(class, subclass)) { + cli::cli_abort( + "Components are {.cls {subclass}} but {.arg class} is {.val {class}}." + ) + } + n <- lens[1] + if (is.null(names(components))) { + names(components) <- paste0("v", seq_along(components)) + } + names(components) <- vec_as_names(names(components), repair = "unique") + # "arg" is the grid column in evaluation data.frames (`[.tf_mv` with + # matrix = FALSE, tf_evaluations(), tf_where()); a component of that name + # would silently overwrite it there. + if ("arg" %in% names(components)) { + cli::cli_abort( + "{.val arg} is reserved and cannot be used as a component name." + ) + } + } else { + domain <- domain %||% numeric(2) + subclass <- class %||% "tfd_mv" + n <- 0L + curve_names <- NULL + } + data <- seq_len(n) + names(data) <- curve_names + new_vctr( + data, + components = components, + comp_names = names(components), + domain = domain, + mfpc = mfpc, + class = c(subclass, "tf_mv", "tf") + ) +} + +# normalize the `evaluator`/`basis` argument forwarding via rlang injection so +# the univariate constructors' NSE-capture (`as_name(enexpr(evaluator))`) sees +# the original expression rather than the local variable. +build_components <- function(data, constructor, arg, domain, dots, extra) { + # `data` is a list whose elements are matrices/data.frames/numerics + nms <- names(data) %||% paste0("v", seq_along(data)) + components <- map(data, function(d) { + rlang::inject( + constructor(d, arg = arg, domain = domain, !!!extra, !!!dots) + ) + }) + setNames(components, nms) +} + +#------------------------------------------------------------------------------ + +#' Constructors for vector-valued functional data (`f: R -> R^d`) +#' +#' `tfd_mv` represents *vector-valued* functional data -- vectors of functions +#' \eqn{f: \mathcal{T} \subset \mathbb{R} \to \mathbb{R}^d}, such as movement +#' trajectories \eqn{(x(t), y(t))} or other multivariate-output curves. +#' +#' A `tfd_mv` object of length `n` bundles `d` *univariate* [tfd()] vectors +#' (one per output dimension / component), each of length `n`. All numeric work +#' (evaluation, arithmetic, smoothing, ...) is delegated to these components, so +#' regular and irregular sampling, the choice of `evaluator`, etc. all behave +#' exactly as in the univariate case -- and components may even live on +#' different argument grids. Use [tfb_mv()] for a basis representation. +#' +#' @section Inheritance contract: +#' `tf_mv` classes inherit from `"tf"` *only* for the purpose of `tf_domain()`, +#' type predicates (`is_tf()`, `is_tf_mv()`, ...) and S4 generic reuse. +#' **Behaviour** on `tf_mv` comes *only* from explicitly registered `.tf_mv` +#' methods: any generic without one aborts with a classed +#' `tf_mv_method_unimplemented` condition. The earlier promise of automatic +#' "right thing component-wise" dispatch via inheritance was incorrect -- +#' silent fall-through produced wrong-shape results or deep internal errors, +#' so it has been replaced with fail-fast stubs. The stubbed (i.e., *not* +#' implemented) verbs are listed in [tf_mv_unimplemented]; design of real +#' component-wise semantics is tracked at +#' . When you need to *distinguish* +#' univariate-only from any-`tf` inside a helper, use [is_tf_1d()]: it returns +#' `TRUE` for `tfd` / `tfb` and `FALSE` for `tfd_mv` / `tfb_mv`. +#' +#' @param data one of: a (named) `list` of univariate `tf` vectors (used +#' directly, one per component); a (named) `list` of numeric matrices / +#' data.frames (one *per component*, each turned into a [tfd()]); a 3-d +#' numeric `array` with dimensions `[curve, arg, component]`; or a long +#' `data.frame` with an `id` column, an `arg` column and one or more `value` +#' columns (one component per `value` column). +#' @param arg evaluation grid, see [tfd()]. +#' @param domain range of `arg`, see [tfd()]. +#' @param evaluator inter-/extrapolation function, see [tfd()]. +#' @param ... forwarded to the univariate [tfd()] constructor. +#' @returns a `tfd_mv` object (a vctrs vector of length `n`). +#' @seealso [tfb_mv()] for basis representation; [tf_components()], +#' [tf_ncomp()] and the `$` operator to access components. +#' @family tf_mv-class +#' @examples +#' # (a) from a (named) list of univariate tfd vectors -- one per component: +#' traj <- tfd_mv(list(x = tf_rgp(3), y = tf_rgp(3))) +#' traj +#' tf_ncomp(traj) +#' traj$x +#' +#' # (b) from a list of matrices (one [curve, arg] matrix per component): +#' t <- seq(0, 1, length.out = 50) +#' mx <- matrix(sin(2 * pi * outer(1:3, t)), nrow = 3) +#' my <- matrix(cos(2 * pi * outer(1:3, t)), nrow = 3) +#' tfd_mv(list(x = mx, y = my), arg = t) +#' +#' # (c) from a 3-d array with dimensions [curve, arg, component]: +#' arr <- array(c(mx, my), dim = c(3, 50, 2), +#' dimnames = list(NULL, NULL, c("x", "y"))) +#' tfd_mv(arr, arg = t) +#' +#' # (d) from a long data.frame (id, arg, one value column per component): +#' df <- data.frame( +#' id = rep(1:3, each = 50), +#' arg = rep(t, times = 3), +#' x = as.vector(t(mx)), +#' y = as.vector(t(my)) +#' ) +#' tfd_mv(df, id = "id", arg = "arg", value = c("x", "y")) +#' @rdname tfd_mv +#' @export +tfd_mv <- function(data, ...) UseMethod("tfd_mv") + +#' @rdname tfd_mv +#' @export +tfd_mv.list <- function( + data, + arg = NULL, + domain = NULL, + evaluator = tf_approx_linear, + ... +) { + if (!length(data)) { + return(new_tf_mv(list(), domain = domain, class = "tfd_mv")) + } + if (all(map_lgl(data, is_tf))) { + return(new_tf_mv(data, domain = domain)) + } + evaluator <- enexpr(evaluator) + components <- build_components( + data, + constructor = tfd, + arg = arg, + domain = domain, + dots = list(...), + extra = list(evaluator = evaluator) + ) + new_tf_mv(components, domain = domain) +} + +#' @rdname tfd_mv +#' @export +tfd_mv.array <- function( + data, + arg = NULL, + domain = NULL, + evaluator = tf_approx_linear, + ... +) { + d <- dim(data) + if (length(d) != 3) { + cli::cli_abort( + "{.arg data} array must be 3-dimensional ([curve, arg, component])." + ) + } + comp_names <- dimnames(data)[[3]] %||% paste0("v", seq_len(d[3])) + slices <- map(seq_len(d[3]), \(k) data[,, k, drop = TRUE]) |> + setNames(comp_names) + evaluator <- enexpr(evaluator) + components <- build_components( + slices, + constructor = tfd, + arg = arg, + domain = domain, + dots = list(...), + extra = list(evaluator = evaluator) + ) + new_tf_mv(components, domain = domain) +} + +#' @rdname tfd_mv +#' @param id,value for the `data.frame` method: the column defining function +#' `id`, the column defining the `arg` grid, and the (possibly several) +#' columns containing component evaluations (one component per `value` +#' column). +#' @export +tfd_mv.data.frame <- function( + data, + id = 1, + arg = 2, + value = 3, + domain = NULL, + evaluator = tf_approx_linear, + ... +) { + evaluator <- enexpr(evaluator) + value_names <- if (is.character(value)) value else names(data)[value] + components <- map(value, function(v) { + rlang::inject( + tfd( + data[, c( + if (is.character(id)) id else names(data)[id], + if (is.character(arg)) arg else names(data)[arg], + if (is.character(v)) v else names(data)[v] + )], + domain = domain, + evaluator = !!evaluator, + ... + ) + ) + }) |> + setNames(value_names) + new_tf_mv(components, domain = domain) +} + +#' @rdname tfd_mv +#' @export +tfd_mv.tf_mv <- function( + data, + arg = NULL, + domain = NULL, + evaluator = NULL, + ... +) { + evaluator <- enexpr(evaluator) + components <- map(tf_components(data), function(comp) { + rlang::inject(tfd( + comp, + arg = arg, + domain = domain, + evaluator = !!evaluator, + ... + )) + }) + new_tf_mv(components, domain = domain) +} + +#' @rdname tfd_mv +#' @export +tfd_mv.default <- function(data, arg = NULL, domain = NULL, ...) { + if (!missing(data)) { + cli::cli_warn( + "Input {.arg data} not a recognized class; returning prototype of length 0." + ) + } + new_tf_mv(list(), domain = domain, class = "tfd_mv") +} + +#------------------------------------------------------------------------------ + +#' @rdname tfd_mv +#' @export +as.tfd_mv <- function(data, ...) UseMethod("as.tfd_mv") + +#' @rdname tfd_mv +#' @export +as.tfd_mv.default <- function(data, ...) tfd_mv(data, ...) + +#' @rdname tfd_mv +#' @export +as.tfd_mv.tf_mv <- function(data, ...) { + components <- map(tf_components(data), \(comp) as.tfd(comp, ...)) + new_tf_mv(components, domain = tf_domain(data)) +} diff --git a/R/utils.R b/R/utils.R index 7b928359..99573b44 100644 --- a/R/utils.R +++ b/R/utils.R @@ -35,6 +35,13 @@ find_arg <- function(data, arg) { list(arg) } +# A length-1 NA-valued tf with the same attributes / domain / arg as `x[i]`. +# Uses the `1 * NA * ` arithmetic dispatch so attributes propagate correctly +# for both tfd and tfb. +tf_na_like <- function(x, i = 1L) { + 1 * NA * x[i] +} + domains_overlap <- function(x, y) { dom_x <- tf_domain(x) dom_y <- tf_domain(y) @@ -190,37 +197,44 @@ get_args <- function(args, f) { args[names(args) %in% formalArgs(f)] } -#' Turns any object into a list +#' Wrap a non-list object in a list +#' +#' Returns `x` unchanged if it is already a list, otherwise wraps it in +#' a one-element list. Used internally to normalize `arg` inputs that may +#' be either a single numeric vector or a list of per-curve vectors. #' -#' See above. #' @param x any input. -#' @returns `x` turned into a list. +#' @returns `x` if it is a list, otherwise `list(x)`. #' @examples #' ensure_list(1:3) -#' ensure_list(list(1, 2)) +#' ensure_list(list(1:3, 4:6)) +#' @family tidyfun utility functions #' @export -#' @family tidyfun developer tools ensure_list <- function(x) { if (is.list(x)) x else list(x) } #' Make syntactically valid unique names #' -#' See above. -#' @param x any input. -#' @returns `x` turned into a list. +#' Coerces `x` to character and returns syntactically valid, unique +#' identifiers. Empty strings are replaced with `"NA"` before +#' deduplication. If `x` already has no duplicates it is returned +#' unchanged. +#' +#' @param x any input that can be coerced to character. +#' @returns A character vector of unique, syntactically valid names of +#' the same length as `x`. #' @examples -#' unique_id(c("a", "b", "a")) +#' unique_id(c("a", "a", "b")) +#' unique_id(c(1, 1, 2)) +#' @family tidyfun utility functions #' @export -#' @family tidyfun developer tools unique_id <- function(x) { if (anyDuplicated(x) == 0) { return(x) } - if (is.character(x)) { - x <- sub("^$", "NA", x) - } - x <- make.names(as.character(x), unique = TRUE) + x <- sub("^$", "NA", as.character(x)) + x <- make.names(x, unique = TRUE) x } diff --git a/R/vctrs-cast.R b/R/vctrs-cast.R index 1aad3aa7..e2a37ee2 100644 --- a/R/vctrs-cast.R +++ b/R/vctrs-cast.R @@ -110,9 +110,13 @@ vec_cast_tfb_tfb <- function(x, to, ...) { attr(x, "basis_matrix"), check.attributes = FALSE )) - if (same_basis) return(x) + # cast invariant: result attributes (including `arg`) must match `to`. The + # basis-matrix check above does not catch cases where x and to share a basis + # spec but were stored on different arg grids -- only short-circuit when arg + # also matches; otherwise fall through to tf_rebase. (#269 copilot comment) + if (same_basis && identical(tf_arg(x), tf_arg(to))) return(x) maybe_lossy_cast( - tf_rebase(x, to, arg = tf_arg(x)), + tf_rebase(x, to, arg = tf_arg(to)), x, to, lossy = TRUE, diff --git a/R/vctrs-mv.R b/R/vctrs-mv.R new file mode 100644 index 00000000..21fca40a --- /dev/null +++ b/R/vctrs-mv.R @@ -0,0 +1,158 @@ +#' @include tfd-mv.R tfb-mv.R +NULL + +# vctrs integration for vector-valued functional data -------------------------- +# +# The proxy of a `tf_mv` is a data.frame with `n` rows and `d` columns, one +# per component (each column is itself a univariate `tf` vector). This is the +# only piece of genuinely new vctrs machinery: it makes `vec_slice()`, +# `vec_c()`, casting and tibble-column behaviour all fall out of the existing +# *univariate* vctrs methods applied column-/component-wise. `vec_restore()` +# rebuilds the multivariate wrapper from the (sliced / concatenated) proxy. + +#' @export +vec_proxy.tf_mv <- function(x, ...) { + components <- attr(x, "components") + # NB: must NOT call vec_size(x) here -- that would recurse through vec_proxy. + if (!length(components)) { + return(vctrs::new_data_frame(n = length(unclass(x)))) + } + vctrs::new_data_frame(unclass(components), n = vec_size(components[[1]])) +} + +#' @export +vec_restore.tf_mv <- function(x, to, ...) { + components <- as.list(x) + # An MFPC fit carries a curve-independent joint spec. Slicing reuses the + # original object as `to`, so the spec (and the ability to re-score) is + # preserved. Concatenation (`vec_c`/`c`) builds `to` via `tf_mv_ptype2()`, + # which forwards an `identical()`-matching spec (same fit, e.g. `c(mf[1:4], + # mf[5:8])`) and otherwise demotes with a warning -- so reading the spec off + # `to` here is correct for both paths. + mfpc <- attr(to, "mfpc") + if (!length(components)) { + return(new_tf_mv( + list(), + domain = attr(to, "domain"), + class = class(to)[1], + mfpc = mfpc + )) + } + new_tf_mv(components, check_curve_names = FALSE, mfpc = mfpc) +} + +#------------------------------------------------------------------------------- + +check_compatible_mv <- function(x, y) { + if (tf_ncomp(x) != tf_ncomp(y)) { + stop_incompatible_type( + x, + y, + x_arg = "", + y_arg = "", + details = "different number of components" + ) + } + if (!identical(attr(x, "comp_names"), attr(y, "comp_names"))) { + stop_incompatible_type( + x, + y, + x_arg = "", + y_arg = "", + details = "different component names" + ) + } + invisible(TRUE) +} + +tf_mv_ptype2 <- function(x, y, ...) { + check_compatible_mv(x, y) + comps <- map2(tf_components(x), tf_components(y), \(a, b) vec_ptype2(a, b)) + names(comps) <- attr(x, "comp_names") + # Carry the joint MFPC spec through `vec_c()` when *all* inputs are the same + # fit, i.e. carry an `identical()` `mfpc` attribute. `vec_ptype2()` is called + # pairwise so an identical match here -- combined with the matching pairwise + # checks performed by `vec_c()` -- implies all inputs share the spec. + # The result of `vec_ptype2` becomes `to` in `vec_restore.tf_mv`, where the + # spec is then re-stamped onto the concatenation. When specs differ (or one + # side is a plain `tfb_mv` / `tfd_mv` with no spec) we warn and demote -- the + # downgrade matters because a `dplyr::bind_rows` round-trip otherwise + # silently strips the spec. + mfpc_x <- attr(x, "mfpc") + mfpc_y <- attr(y, "mfpc") + proto <- new_tf_mv(comps) + same_spec <- identical(mfpc_x, mfpc_y) && !is.null(mfpc_x) + if (same_spec && is_tfb_mv(proto)) { + attr(proto, "mfpc") <- mfpc_x + } else if (!is.null(mfpc_x) || !is.null(mfpc_y)) { + # warn only when at least one side actually had a spec to lose. The proto + # may have demoted further to `tfd_mv` (when component bases differ) -- the + # extra demote_warning is still useful: the per-component + # `vec_ptype2.tfb_fpc.tfb_fpc` cast-warning does not mention MFPC. + warn_mfpc_demotion( + "Combining MFPC fits with different (or missing) joint specs." + ) + } + proto +} + +tf_mv_cast <- function(x, to, ...) { + # casting *onto* a multivariate FPCA basis means jointly re-scoring the new + # data (per-component casting would give wrong, component-local scores). + # Reconstruction (`to` a plain tfd_mv) keeps the component-wise path below. + if (is_tfb_mfpc(to)) { + return(mfpc_rescore(x, to)) + } + check_compatible_mv(x, to) + comps <- map2(tf_components(x), tf_components(to), \(a, b) vec_cast(a, b)) + names(comps) <- attr(x, "comp_names") + new_tf_mv(comps) +} + +#' @rdname vctrs +#' @name vctrs +#' @export +vec_ptype2.tfd_mv.tfd_mv <- function(x, y, ...) tf_mv_ptype2(x, y) +#' @rdname vctrs +#' @export +vec_ptype2.tfb_mv.tfb_mv <- function(x, y, ...) tf_mv_ptype2(x, y) +# mixing a tfd_mv with a tfb_mv combines component-wise via the univariate +# vec_ptype2(tfd, tfb), which resolves to tfd -- so the common type of a +# tfd_mv and a tfb_mv is a tfd_mv (same demotion as in the univariate case). +#' @rdname vctrs +#' @export +vec_ptype2.tfd_mv.tfb_mv <- function(x, y, ...) tf_mv_ptype2(x, y) +#' @rdname vctrs +#' @export +vec_ptype2.tfb_mv.tfd_mv <- function(x, y, ...) tf_mv_ptype2(x, y) + +#' @rdname vctrs +#' @export +vec_cast.tfd_mv.tfd_mv <- function(x, to, ...) tf_mv_cast(x, to) +#' @rdname vctrs +#' @export +vec_cast.tfb_mv.tfb_mv <- function(x, to, ...) tf_mv_cast(x, to) +#' @rdname vctrs +#' @export +vec_cast.tfd_mv.tfb_mv <- function(x, to, ...) tf_mv_cast(x, to) +#' @rdname vctrs +#' @export +vec_cast.tfb_mv.tfd_mv <- function(x, to, ...) tf_mv_cast(x, to) + +#------------------------------------------------------------------------------- + +#' @export +vec_ptype_abbr.tfd_mv <- function(x, ...) "tfd_mv" + +#' @export +vec_ptype_abbr.tfb_mv <- function(x, ...) "tfb_mv" + +#' @export +vec_ptype_full.tfd_mv <- function(x, ...) { + paste0("tfd_mv") +} + +#' @export +vec_ptype_full.tfb_mv <- function(x, ...) { + paste0("tfb_mv") +} diff --git a/R/where.R b/R/where.R index b7127c9c..02134148 100644 --- a/R/where.R +++ b/R/where.R @@ -12,6 +12,16 @@ #' of the usual `dplyr` tricks are available as well, see examples.\cr #' Any `cond`ition evaluates to `NA` on `NA`-entries in `f`. #' +#' For vector-valued (`tf_mv`) input, the `data.frame` on which `cond` is +#' evaluated has one column per component, named like the components, instead +#' of a single `value` column: conditions are *joint* conditions across +#' components, e.g. `tf_where(f, x > 0 & y < 1)` or +#' `tf_where(f, sqrt(x^2 + y^2) > 1)` for components `"x"` and `"y"`. To apply +#' a condition to a single component, extract it first: +#' `tf_where(f[, component = "x"], value > 0)`. All components have to be +#' observed on a common grid -- use [tf_interpolate()] to align them first, or +#' supply `arg` explicitly. +#' #' @param f a `tf` object. #' @param cond a logical expression about `value` (and/or `arg`) that defines a #' condition about the functions, see examples and details. @@ -57,6 +67,12 @@ #' tf_where(f, arg > 0.5 & value > 0) #' # does the function ever exceed? #' tf_anywhere(f, value > 1) +#' +#' # vector-valued input: conditions refer to components by name +#' fm <- tfd_mv(list(x = tf_rgp(3, 11L), y = tf_rgp(3, 11L))) +#' tf_where(fm, x > 0 & y < 0) +#' tf_where(fm, sqrt(x^2 + y^2) > 1, "first") +#' tf_anywhere(fm, x > y) #' @family tidyfun query-functions #' @export tf_where <- function( @@ -64,13 +80,99 @@ tf_where <- function( cond, return = c("all", "first", "last", "range", "any"), arg = tf_arg(f) +) { + UseMethod("tf_where") +} + +#' @export +tf_where.default <- function( + f, + cond, + return = c("all", "first", "last", "range", "any"), + arg = tf_arg(f) ) { assert_arg(arg, f) + return <- match.arg(return) + tf_where_impl(f, enquo(cond), return, arg) +} + +#' @export +tf_where.tf_mv <- function( + f, + cond, + return = c("all", "first", "last", "range", "any"), + arg = tf_arg(f) +) { return <- match.arg(return) cond_quo <- enquo(cond) + comp_names <- attr(f, "comp_names") + if ( + "value" %in% all.vars(quo_get_expr(cond_quo)) && !("value" %in% comp_names) + ) { + cli::cli_abort( + c( + "{.var value} is undefined for vector-valued {.cls tf_mv} input.", + i = "Refer to the components by name instead: {.val {comp_names}}.", + i = paste( + "If you meant a variable {.var value} from the calling environment,", + "inject it with {.code !!value}." + ) + ), + class = "tf_mv_where_value", + call = NULL + ) + } + if (missing(arg) && !mv_args_shared(f)) { + cli::cli_abort( + c( + "{.fn tf_where} needs all components of {.arg f} on a common grid.", + i = paste( + "Use {.fn tf_interpolate} to align the components first,", + "or supply a single numeric {.arg arg} vector explicitly." + ) + ), + class = "tf_mv_incommensurate_args", + call = NULL + ) + } + if (!missing(arg) && is.list(arg) && identical(names(arg), comp_names)) { + # tf_arg(f) returns such a per-component list exactly when the component + # grids differ; the evaluation machinery would misread it as per-curve + # grids and silently evaluate each curve on a different component's grid. + cli::cli_abort( + c( + paste( + "{.arg arg} is a per-component list, as returned by", + "{.code tf_arg(f)} for components on different grids." + ), + i = paste( + "Supply a single numeric vector of evaluation points instead,", + "or a per-curve list of such vectors." + ) + ), + class = "tf_mv_incommensurate_args", + call = NULL + ) + } + assert_arg(arg, f) + tf_where_impl(f, cond_quo, return, arg) +} + +tf_where_impl <- function(f, cond_quo, return, arg) { + # no subset(): subset.data.frame() evaluates its condition *inside* the + # data.frame, so for tf_mv input a component named like any symbol used + # here (e.g. "x") would capture it. Logical indexing with NAs set to FALSE + # reproduces subset()'s semantics, including recycling of short conditions + # (e.g. a scalar `mean(value) > 0` applies to all args). where_at <- map( f[, arg, matrix = FALSE], - \(x) subset(x, eval_tidy(cond_quo, x))[["arg"]] + \(d) { + keep <- eval_tidy(cond_quo, d) + if (!is.logical(keep)) { + cli::cli_abort("{.arg cond} must evaluate to a logical vector.") + } + d[["arg"]][keep & !is.na(keep)] + } ) where_at[is.na(f)] <- NA @@ -80,6 +182,9 @@ tf_where <- function( where_at[lengths(where_at) == 0] <- NA if (return == "range") { + if (!length(where_at)) { + return(data.frame(begin = numeric(0), end = numeric(0))) + } where_at <- map(where_at, range) where_at <- do.call(rbind, where_at) |> as.data.frame() |> @@ -98,5 +203,21 @@ tf_where <- function( #' @rdname tf_where #' @export tf_anywhere <- function(f, cond, arg = tf_arg(f)) { + UseMethod("tf_anywhere") +} + +#' @export +tf_anywhere.default <- function(f, cond, arg = tf_arg(f)) { tf_where(f = f, cond = {{ cond }}, return = "any", arg = arg) } + +#' @export +tf_anywhere.tf_mv <- function(f, cond, arg = tf_arg(f)) { + # forward arg only if supplied, so tf_where.tf_mv can tell apart a + # deliberate grid choice from the default (which requires shared grids) + if (missing(arg)) { + tf_where(f = f, cond = {{ cond }}, return = "any") + } else { + tf_where(f = f, cond = {{ cond }}, return = "any", arg = arg) + } +} diff --git a/R/zzz.R b/R/zzz.R index 74aade14..3db6cf97 100644 --- a/R/zzz.R +++ b/R/zzz.R @@ -3,6 +3,7 @@ # nocov start .onLoad <- function(libname, pkgname) { vctrs::s3_register("pillar::format_glimpse", "tf") + vctrs::s3_register("pillar::format_glimpse", "tf_mv") invisible() } diff --git a/README.Rmd b/README.Rmd index 575ee469..d5ddac0f 100755 --- a/README.Rmd +++ b/README.Rmd @@ -23,7 +23,7 @@ knitr::opts_chunk$set( [![R-universe version](https://tidyfun.r-universe.dev/tf/badges/version)](https://tidyfun.r-universe.dev/tf) -The **`tf`** package provides necessary infrastructure for [**`tidyfun`**](https://tidyfun.github.io/tidyfun/) with minimal dependencies -- specifically: no `tidyverse`-dependencies. +The **`tf`** package provides necessary infrastructure for [**`tidyfun`**](https://tidyfun.github.io/tidyfun/) with minimal dependencies -- `vctrs`/`rlang`/`cli`/`purrr` from the r-lib/tidyverse ecosystem, otherwise just `mgcv`, `zoo`, and `checkmate`. The goal of **`tidyfun`**, in turn, is to provide accessible and well-documented software that **makes functional data analysis in `R` easy** -- specifically data wrangling and exploratory analysis. @@ -82,7 +82,18 @@ are defined for the vector classes defined in **`tf`** ([more](https://tidyfun.g The `tf` objects are just glorified lists, so they work well as columns in data frames. That makes it a lot easier to keep your other data and functional measurements together in one object for preprocessing, exploratory analysis and description. At the same time, these objects actually behave like vectors of *functions* to some extent, i.e., they can be evaluated on any point in their domain, they can be integrated or differentiated, etc. -[See here](https://tidyfun.github.io/tidyfun/articles/x01_tf_Vectors.html) for more information on the operations defined for `tf` vectors. +[See here](https://tidyfun.github.io/tidyfun/articles/x01_tf_Vectors.html) for more information on the operations defined for `tf` vectors. + +#### Vector-valued (multivariate) functional data + +`tf` also provides `tfd_mv()` and `tfb_mv()` for vector-valued functional data -- i.e., functions whose codomain is `R^d`, sharing a single domain across components. These behave like the univariate `tf` classes (subsetting, arithmetic, plotting), with component-wise accessors and the geometry verbs `tf_norm()`, `tf_inner()`, `tf_tangent()`, `tf_arclength()`: + +```{r, eval = FALSE} +fm <- tfd_mv(list(x = tf_rgp(5), y = tf_rgp(5))) +plot(fm, type = "facet") +``` + +Multivariate FPCA is available via `tfb_mfpc()` (Happ & Greven, 2018), and `tf_register()` supports joint multivariate alignment (`method = "srvf_mv"`) as well as elastic shape registration (`tf_register_shape()`). Note that `is_tf()` is `TRUE` for `tf_mv` as well; use the new `is_tf_1d()` to test specifically for univariate `tf`. #### Methods for converting existing data to `tf` and back diff --git a/README.md b/README.md index cee0ae13..f20159a5 100644 --- a/README.md +++ b/README.md @@ -17,7 +17,8 @@ version](https://tidyfun.r-universe.dev/tf/badges/version)](https://tidyfun.r-un The **`tf`** package provides necessary infrastructure for [**`tidyfun`**](https://tidyfun.github.io/tidyfun/) with minimal -dependencies – specifically: no `tidyverse`-dependencies. +dependencies – `vctrs`/`rlang`/`cli`/`purrr` from the r-lib/tidyverse +ecosystem, otherwise just `mgcv`, `zoo`, and `checkmate`. The goal of **`tidyfun`**, in turn, is to provide accessible and well-documented software that **makes functional data analysis in `R` @@ -105,6 +106,26 @@ differentiated, etc. here](https://tidyfun.github.io/tidyfun/articles/x01_tf_Vectors.html) for more information on the operations defined for `tf` vectors. +#### Vector-valued (multivariate) functional data + +`tf` also provides `tfd_mv()` and `tfb_mv()` for vector-valued +functional data – i.e., functions whose codomain is `R^d`, sharing a +single domain across components. These behave like the univariate `tf` +classes (subsetting, arithmetic, plotting), with component-wise +accessors and the geometry verbs `tf_norm()`, `tf_inner()`, +`tf_tangent()`, `tf_arclength()`: + +``` r +fm <- tfd_mv(list(x = tf_rgp(5), y = tf_rgp(5))) +plot(fm, type = "facet") +``` + +Multivariate FPCA is available via `tfb_mfpc()` (Happ & Greven, 2018), +and `tf_register()` supports joint multivariate alignment +(`method = "srvf_mv"`) as well as elastic shape registration +(`tf_register_shape()`). Note that `is_tf()` is `TRUE` for `tf_mv` as +well; use the new `is_tf_1d()` to test specifically for univariate `tf`. + #### Methods for converting existing data to `tf` and back **`tf`** includes functions `tfd` and `tfb` for converting matrices, diff --git a/_pkgdown.yml b/_pkgdown.yml index ea8a4e3c..a9f592cf 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -18,6 +18,17 @@ reference: - tfb_fpc - fpc_wsvd - tf_rebase +- title: 'Vector-valued functional data (f: R -> R^d)' + desc: Multivariate-output curves, e.g. movement trajectories (see issues #18, #27) + contents: + - tfd_mv + - tfb_mv + - tfb_mfpc + - tf_mv_methods + - tf_geom + - tf_arclength + - plot.tf_mv + - converters-mv - title: Evaluating, indexing & re-arranging desc: Accessing, appending, evaluating, splitting & combining functional data objects contents: @@ -45,6 +56,7 @@ reference: desc: Functions for registering, and warping functional data contents: - tf_register + - tf_register_shape - tf_estimate_warps - tf_registration - tf_align diff --git a/attic/design/multivariate.md b/attic/design/multivariate.md new file mode 100644 index 00000000..98fdfda2 --- /dev/null +++ b/attic/design/multivariate.md @@ -0,0 +1,361 @@ +# Design notes: vector-valued functional data `f: R -> R^d` + +This document records the design of `tfd_mv` / `tfb_mv`, the `tf` classes for +*vector-valued* functional data — vectors of functions +\(f: \mathcal{T}\subset\mathbb{R} \to \mathbb{R}^d\). The motivating use cases +are GitHub issues [#18](https://github.com/tidyfun/tf/issues/18) ("multivariate +evaluations (curves)") and [#27](https://github.com/tidyfun/tf/issues/27) +(movement / tracking data, à la Joo et al. 2019), e.g. an animal's position +\((x(t), y(t))\) or \((x(t), y(t), z(t))\) over time. + +**Scope.** This covers multivariate *output* (curves into `R^d`). Multivariate +*input* (surfaces `f: R^p -> R`, the other branch of #18) is **out of scope** +here; see "Surfaces (future work)" below. + +## The problem + +`tf` already represents univariate functional data `f: R -> R` with four leaf +classes, all built on `vctrs::new_vctr()`: + +| class | parents | per-curve data | +|--------------|-------------|---------------------------------| +| `tfd_reg` | `tfd`, `tf` | numeric value vector (shared `arg`) | +| `tfd_irreg` | `tfd`, `tf` | `list(arg=, value=)` | +| `tfb_spline` | `tfb`, `tf` | spline coefficients | +| `tfb_fpc` | `tfb`, `tf` | FPC scores | + +We want `f: R -> R^d` while (a) keeping both the raw (`tfd`) and basis (`tfb`) +representations, (b) supporting regular *and* irregular sampling, (c) full +`vctrs` compatibility (subset, `c()`, casting, tibble columns), (d) an elegant +UI, and — emphatically — (e) **maximal reuse** of the existing univariate +machinery, with no new numeric kernels. + +## Approaches considered + +### (A) Composition — bundle `d` univariate `tf` vectors *(chosen)* + +A `tf_mv` object of length `n` stores, as an attribute, a named list of `d` +univariate `tf` vectors (the components / output dimensions), each of length +`n`. Almost every method is obtained by mapping the existing univariate code +over the `d` components. + +* **+** Massive reuse: evaluation, arithmetic, `Math`/`Summary`, smoothing, + basis fitting, casting — all delegate to univariate methods via `map`/`map2`. +* **+** `tfd` **and** `tfb`, regular **and** irregular, all "for free" because a + component is itself any univariate `tf`. Components may even live on + *different* argument grids (natural for gappy movement data). +* **+** Intuitive UI: `f$x`, `tf_component(f, "x")`, `tf_components(f)`. +* **−** Needs one genuinely new piece of `vctrs` machinery: a custom + `vec_proxy`/`vec_restore` pair (the package otherwise uses `new_vctr` + defaults). This is a small, well-understood, localized addition. + +### (B) Matrix-valued evaluations — one class, each curve's value is an `n_arg × d` matrix + +Extend the `tfd` internals so each curve stores an `(n_arg × d)` matrix and the +evaluator returns a matrix; `d` is an attribute. + +* **+** Keeps the default `vctrs` proxy (no custom restore). +* **−** Large blast radius: every consumer of an evaluation vector + (`evaluate.R`, `summarize.R`, `ops.R`, `print-format.R`, the spline fitter, + plotting helpers, ...) must branch matrix-vs-vector. That is exactly the code + bloat/duplication we were asked to avoid. +* **−** `tfb` is awkward: fitting a basis to matrix-valued evaluations either + reduces to fitting per column (i.e. approach A in disguise) or needs a + genuinely multivariate basis (much harder, out of scope). +* **−** Forces a single shared `arg` across dimensions within a curve. + +### (C) Long / stacked encoding — a `d·n`-length univariate object + dimension index + +Stack the `d` components into one univariate vector of length `n·d` plus a +component-index attribute, reshaping on access. + +* **+** Reuses univariate storage verbatim. +* **−** Breaks `vctrs` semantics: `length()`, `[`, `c()`, recycling and tibble + row counts would all operate on `n·d`, not `n`. Recovering correct + per-observation semantics needs a fragile grouped proxy — i.e. all of (A)'s + custom-proxy work plus brittle index arithmetic. Irregular + per-dimension + grids become a bookkeeping nightmare, and the conceptual model leaks into the + UI. + +### Decision + +**(A) Composition.** It is the only option that satisfies *every* hard +requirement, and it wins decisively on reuse: the entire numeric surface of the +package is inherited by delegation. The single cost — a custom +`vec_proxy`/`vec_restore` — is paid once, in `R/mv-vctrs.R`. + +## Chosen design in detail + +### Classes + +``` +tfd_mv = c("tfd_mv", "tf_mv", "tf") # raw-evaluation components +tfb_mv = c("tfb_mv", "tf_mv", "tf") # basis components +``` + +`tf_mv` is the abstract multivariate parent (convention only, like `tf`). The +classes inherit `tf` (so `is_tf()`, `tf_domain()`, S4 `setOldClass()` apply) but +**deliberately not** `tfd`/`tfb`: univariate methods such as `Math.tfd`, +`as.matrix.tf` or `[.tf` assume *scalar* evaluations and would misbehave on a +bundle. `is_tfd()`/`is_tfb()` therefore return `FALSE`; use `is_tf_mv()`, +`is_tfd_mv()`, `is_tfb_mv()`. + +### Internal layout (`R/tfd-mv.R`, `new_tf_mv()`) + +Built with `vctrs::new_vctr()`: + +* `.data = seq_len(n)` — an integer placeholder of length `n` (number of curves). +* attribute `components` — a named list of the `d` univariate `tf` vectors. +* attribute `comp_names` — the component names (`c("x","y",...)`). +* attribute `domain` — the shared `tf_mv` domain. + +All per-curve metadata (`arg`, `evaluator`, `basis`, `basis_matrix`, ...) lives +*inside* the component objects, so none of it is duplicated. **Domain +handling**: a `tf_mv` lives on a single time axis. If components arrive with +differing domain attributes (typical when each was built from independent +irregular sampling and the auto-derived range differs in the last digit), +`new_tf_mv()` takes the union as the mv domain and widens each component to +match. Users can supply an explicit `domain` to `tfd_mv()` as long as it +contains every component's observed range; a `domain` that does not is an +error. + +### vctrs integration (`R/mv-vctrs.R`) + +The proxy is a **data frame with `n` rows and `d` columns**, one column per +component: + +```r +vec_proxy.tf_mv(x) # -> data.frame(x = , y = , ...) (n rows) +vec_restore.tf_mv(x, to) # rebuild the bundle from the (sliced/combined) proxy +``` + +This is the key idea: because the proxy columns *are* univariate `tf` vectors, +`vec_slice()`, `vec_c()` and casting all fall out of the existing **univariate** +`vctrs` methods applied column-wise. For example, `c(reg_mv, irreg_mv)` combines +each component with the univariate `vec_ptype2.tfd_reg.tfd_irreg`, yielding an +irregular result — no extra code. (Note: `vec_proxy.tf_mv` must take the size +from the components, never from `vec_size(x)`, which would recurse.) + +`vec_ptype2`/`vec_cast` are defined for all four `{tfd_mv,tfb_mv}²` pairs and +computed component-wise (so `tfd_mv <-> tfb_mv` reuses `tf_rebase`); they require +equal `d` and equal `comp_names`. `vec_ptype_abbr` → `"tfd_mv"`; +`vec_ptype_full` → `"tfd_mv"`. (Like the existing leaf-class methods, the +ptype methods are registered on the *leaf* classes `tfd_mv`/`tfb_mv`, not on the +`tf_mv` parent — vctrs picks them up there.) + +### UI and methods (`R/mv-methods.R`) + +* Constructors: `tfd_mv()` / `tfb_mv()` accept a list of `tf` vectors, a list of + matrices (one per component), a 3-d array `[curve, arg, component]`, or a long + data frame with several `value` columns. +* Accessors: `tf_ncomp()`, `tf_components()`, `tf_component()` / `<-`, and `$` + sugar (`f$x`). `tf_arg()` adapts to the irregularity structure (see + "Irregularity cases" below); `tf_evaluations()` returns a list of `n` + `(n_arg × d)` matrices when components share the per-curve grid, otherwise a + per-curve named list; `tf_count()` an `n × d` matrix. +* `[`: `f[i]` subsets curves; `f[i, j]` evaluates, returning a 3-d array + `[curve, arg, component]` (this is issue #18's "array-valued `j`"), or a list + of per-curve data frames when `matrix = FALSE`; `component=` drops to the + univariate result; a 2-column matrix index returns one row per + `(curve, arg)` pair × `d` columns. +* Arithmetic / `Math` / `Summary` / `mean`/`median`/`sd`/`var` / `==` / `!=`: + all component-wise, delegating to the univariate operators. +* Display: `print`/`format` join the per-component sparklines with `" | "`; + pillar `format_glimpse` is registered for tibble columns. +* `plot()`: `"facet"` (one panel per component) or `"trajectory"` (for `d = 2`, + `y(t)` vs `x(t)` — the movement view). +* Interop: `as.matrix()` → `[curve, arg, component]` array; `as.data.frame(., + unnest = TRUE)` → long format with one column per component. + +### Irregularity cases + +Because each output dimension is an independent univariate `tf` vector, the +composition design naturally accommodates four qualitatively different +"shapes" of irregularity. `tf_arg()` and `tf_evaluations()` adapt their return +type so callers can tell which shape they have: + +| Case | Components | `tf_arg(f)` | `tf_evaluations(f)[[i]]` | +|------|------------|-------------|--------------------------| +| 0. fully regular (shared grid across components *and* curves) | all `tfd_reg` / `tfb`, identical `arg` | numeric vector | `(n_arg × d)` matrix | +| 1. **per-curve grid shared across components**, varies across curves (e.g. movement data sampled at irregular but synchronized timestamps) | all `tfd_irreg`, identical per-curve arg lists | **list of `n` numeric vectors** (collapsed — not nested) | `(n_arg_i × d)` matrix | +| 2. per-component regular grid, same for all curves (e.g. position on a finer grid than velocity) | all `tfd_reg`, different `arg` vectors | list of `d` numeric vectors | per-curve named list (lengths differ across components) | +| 3. per-(curve, component) grids | all `tfd_irreg`, independent per-curve arg lists | list of `d` lists (one per component) | per-curve named list | + +The independence-of-components design comes with one cost: in case 1 the +shared per-curve arg vector is stored once *per component*, so `d`-fold +redundant. That is the price of the composition principle — components must +be self-contained univariate `tf` vectors so the existing numeric machinery +reuses verbatim. The redundancy is purely in storage; the public accessors +(`tf_arg`, `tf_evaluations`) collapse to the non-redundant shape. + +Mixed cases (some components regular, some irregular; some on shared grids, +some not) also work — they fall through to the "list of `d` per-component" +shape. + +## Worked example + +```r +library(tf) +# a 2-d trajectory from two GP draws +traj <- tfd_mv(list(x = tf_rgp(5), y = tf_rgp(5))) +traj +tf_ncomp(traj) # 2 +traj$x # the x-component as a univariate tfd +traj[1:2] # subset curves +traj[1, c(.2, .5, .8)] # array [curve, arg, component] +traj + traj # component-wise arithmetic +mean(traj) # component-wise pointwise mean (length-1 tfd_mv) + +# basis representation, fitted per component: +tb <- tfb_mv(traj, k = 15, verbose = FALSE) +as.tfd_mv(tb) # back to raw (lossy) + +# trajectory plot (movement view): +plot(traj, type = "trajectory") + +# works as a tibble column: +tibble::tibble(id = 1:5, path = traj) +``` + +## Delegated verbs (calculus, smoothing, re-basing, registration) + +Because a `tf_mv` is just a bundle of univariate `tf` vectors, the standard +verbs work by mapping over the components: + +* `tf_rebase()` — re-expresses each component in the target basis/grid, hence + also drives `tfd_mv <-> tfb_mv` conversion. +* `tf_derive()`, `tf_smooth()`, `tf_zoom()` — component-wise, returning a + `tf_mv`. +* `tf_integrate()` — definite integrals return an `n × d` matrix (one + integral per curve per component); indefinite integrals return a `tf_mv`. + +**Registration** is the one verb that is *not* a naive component-wise map: a +vector-valued curve has a single time axis, so all components must share one +warp or they desynchronize. `tf_estimate_warps.tf_mv()` therefore estimates a +**single warp per curve** from a univariate *registration signal* and +`tf_warp.tf_mv()` / `tf_align.tf_mv()` apply that one warp to every component. +`tf_register()` (a plain function) then composes correctly with no changes, +producing a `tf_registration` whose `registered`/`template` are `tf_mv` and +whose warps are univariate. + +The registration signal defaults to the **first component** — predictable and +never degenerate. `ref_component` can select another component (by name/index), +`"norm"` for the pointwise Euclidean norm \(\lVert f(t)\rVert\) +(rotation-invariant, but degenerate for constant-modulus signals such as +`(sin, cos)`), or a custom `function(tf_mv) -> tf`. Fully-joint multivariate +elastic registration (e.g. a multivariate SRVF criterion summed across +components inside the optimizer) is left as future work. + +## TODO / future work + +* **Convenience functions to regularize irregular `tf_mv` args.** Two + orthogonal "regularization" axes are useful and currently leave the user + threading raw `tfd()` / `tf_interpolate()` calls per component: + * *across components within each entry* — when components arrive on + independent per-curve grids (case 3 above), force every component of + curve `i` onto a single shared per-curve grid (collapsing case 3 → + case 1). Natural default: the union, intersection, or a user-supplied + grid per curve. + * *across entries within each component* — collapse irregular components + to a regular shared grid (case 1 → case 0 for that component). Standard + `tf_interpolate()`-style operation, but a single `tf_mv`-aware verb + avoids per-component bookkeeping. +* **`tfb_mv` with a *shared* basis across components.** The current + `tfb_mv()` fits each component independently, so the `d` basis function + systems can differ (and the per-component `basis_matrix` attributes are + stored separately). A shared-basis variant would fit one basis on the + stacked components and store a single `basis_matrix` + `d` coefficient + vectors per curve — smaller storage, stronger statistical coupling, and + cleaner mathematics (e.g. derivatives, integrals, inner products done + uniformly). +* **`tfb_mv` from MFPCA (multivariate functional principal components).** + Multivariate FPCA produces vector-valued eigenfunctions + \(\Psi_k: \mathcal{T} \to \mathbb{R}^d\) with shared *scalar* loadings + per curve: \(f_i(t) \approx \mu(t) + \sum_k s_{ik}\,\Psi_k(t)\). This is + qualitatively different from "FPC per component": it captures joint + variation across dimensions in a single coordinate system. Would require + a new `tfb_mv` subclass storing the multivariate eigenfunctions plus an + `n × K` score matrix, an MFPCA fitter (or a wrapper around + `MFPCA::MFPCA()`), and a `vec_cast` path back to `tfd_mv`. +* **Disperse `R/mv-methods.R` into the existing per-topic files.** While + the feature is in flux it is convenient to keep almost everything mv- + specific in one file so the diff is easy to read and review. Once the + design stabilizes, the individual `*.tf_mv` methods should move next to + their univariate siblings so the topical organization that the rest of + `R/` already follows is preserved: + * `[.tf_mv`, `[<-.tf_mv` → `R/brackets.R` + * `tf_warp.tf_mv`, `tf_align.tf_mv`, `tf_estimate_warps.tf_mv`, + `mv_registration_signal` → `R/register.R` + * `tf_rebase.tf_mv` → `R/rebase.R` + * `tf_derive.tf_mv`, `tf_integrate.tf_mv` → `R/calculus.R` + * `tf_smooth.tf_mv` → `R/smooth.R` + * `tf_zoom.tf_mv` → `R/zoom.R` + * `vec_arith.tf_mv*`, `==.tf_mv`, `!=.tf_mv` → `R/ops.R` + * `Math.tf_mv` → `R/math.R` + * `Summary.tf_mv`, `mean.tf_mv`, `median.tf_mv`, `sd.tf_mv`, + `var.tf_mv` → `R/summarize.R` + * `format.tf_mv`, `print.tf_mv`, `format_glimpse.tf_mv` → + `R/print-format.R` + * `plot.tf_mv`, `lines.tf_mv` → `R/graphics.R` + * `as.matrix.tf_mv`, `as.data.frame.tf_mv` → `R/convert.R` + * accessors (`tf_ncomp`, `tf_components`, `tf_component(<-)`, + `$.tf_mv`, `tf_arg.tf_mv`, `tf_evaluations.tf_mv`, `tf_count.tf_mv`, + `is.na.tf_mv`) → `R/methods.R` + + What can reasonably stay together: the core constructors and shared mv + helpers (`new_tf_mv`, `check_compatible_mv`, `assemble_mv_evals`, + `mv_registration_signal`) in `R/tfd-mv.R` / `R/tfb-mv.R`, and the + custom `vec_proxy`/`vec_restore`/`vec_ptype2`/`vec_cast`/`vec_ptype_*` + methods in `R/mv-vctrs.R` (or folded into `R/vctrs-cast.R` / + `R/vctrs-ptype2.R`). +* **More geometric / mv-specific verbs (tier 2 and tier 3).** A + rough roadmap of useful additions beyond the tier-1 primitives + (`tf_norm` / `tf_speed` / `tf_inner` / `tf_distance` / `tf_tangent` / + `tf_reparam_arclength` / `tf_arclength`) already in: + * *Tier 2 -- compositional, modest math*: `tf_curvature(f)` (2-d + signed \eqn{\kappa = (x'y'' - y'x'')/(x'^2 + y'^2)^{3/2}}; 3-d + magnitude \eqn{\lVert f' \times f'' \rVert / \lVert f' \rVert^3}), + `tf_frenet(f)` (3-d orthonormal tangent/normal/binormal frame), + `tf_rotate(f, R)` / `tf_translate(f, v)` / `tf_affine(f, A, b)` + (constant `R^d -> R^d` maps applied to every curve), + `tf_project(f, axes)` (drop to a sub-`tf_mv` by selecting + components), `tf_is_closed(f, tol)` (does `f(a) ~= f(b)` per + curve? -- a simple `tf_distance(f[, a], f[, b]) < tol` check + returned as a logical vector). + * *Tier 3 -- substantive*: `tf_self_intersection(f)` (for 2-d: + where -- and at which parameter pairs `(t_1, t_2)` -- does a + trajectory cross itself; standard polyline segment-intersection + problem, O(n^2) sweep with a kd-tree refinement later), + `tf_align_rigid(f, template)` (Procrustes-style alignment via + rotation + translation per curve), `tf_landmarks_extrema.tf_mv` + (multivariate extension of the existing univariate landmark + detector -- which "extremum" do we want on a vector-valued curve? + Component-wise, or geometric speed extrema, or user-supplied + objective). Multivariate FPCA and shared-basis `tfb_mv` are + already in the TODO list above. +* **A proper vignette with real-data case studies.** This design doc is + not a substitute for narrative documentation. A `vignettes/multivariate.Rmd` + walking through (1) construction from the shipped `gait` data (a real + 2-d example: knee + hip angle), (2) bracket/`$`/array semantics, (3) + `tfb_mv` round-trip, (4) registration on synchronized vs desynchronized + components, and (5) a movement-data case study would exercise the + feature end-to-end and surface remaining rough edges. Adds `knitr` / + `rmarkdown` to `Suggests`. + +## Surfaces (future work) + +Multivariate *input* (`f: R^p -> R`, e.g. images/surfaces) is a different axis: +there `arg` becomes a multi-column grid / list of grids, which the composition +design does not address. The `tf_mv` name is reserved for multivariate *output*; +a future multivariate-input class (e.g. `tfd_surface`) would slot under `tf` +alongside `tf_mv`. No surface code is shipped yet, to avoid dead scaffolding. + +## Files + +* `R/tfd-mv.R` — `new_tf_mv()`, `tfd_mv()` constructors, `as.tfd_mv()`. +* `R/tfb-mv.R` — `tfb_mv()` constructors, `as.tfb_mv()`. +* `R/mv-vctrs.R` — proxy/restore, ptype2/cast, ptype abbr/full. +* `R/mv-methods.R` — accessors, `$`, `[`, evaluate, ops/math/summary, + print/format, plot, converters. +* Tests: `tests/testthat/test-tfd-mv.R`, `test-tfb-mv.R`, `test-mv-vctrs.R`, + `test-mv-methods.R`, `test-mv-verbs.R`. diff --git a/attic/mv-playground.R b/attic/mv-playground.R new file mode 100644 index 00000000..feb1e336 --- /dev/null +++ b/attic/mv-playground.R @@ -0,0 +1,284 @@ +#!/usr/bin/env Rscript +# ============================================================================ +# tfd_mv / tfb_mv playground (PR #233 -- vector-valued functional data) +# ---------------------------------------------------------------------------- +# A guided tour of the new f: R -> R^d functionality. Source the whole file +# (`Rscript mv-playground.R`) for a non-stop run, or step through it line by +# line in an interactive session to poke at each piece. Plots only pop up in +# interactive sessions; under Rscript they are silently skipped. +# +# Sections: +# 1. Construction (list-of-tf, matrices, 3-d array, long data.frame, basis) +# 2. Accessors and component (re)assignment +# 3. Subsetting & bracket-evaluation ([ , component=, matrix-index) +# 4. Evaluation / interop (tf_arg, tf_evaluations, as.matrix, as.data.frame) +# 5. Arithmetic, Math, Summary, mean/sd/var +# 6. Geometry (norm, speed, inner, distance, tangent, arclength reparam) +# 7. Arc length (polyline vs derive; definite vs cumulative) +# 8. Calculus & verbs (derive, integrate, smooth, zoom, rebase) +# 9. Registration (one shared warp/curve; affine + srvf; ref_component) +# 10. Plotting (facet + trajectory) +# 11. tibble / dplyr integration +# 12. Edge cases (irregular, mixed grids, NA, empty) +# ============================================================================ + +devtools::load_all( + "/home/fabians/fda/tidyfun-pkgs/tf/.worktrees/pr-233", + quiet = TRUE +) +set.seed(1) + +# tiny helper: only plot when a screen device is available +show <- function(expr) if (interactive()) print(expr) +plt <- function(code) if (interactive()) eval.parent(substitute(code)) +hr <- function(title) cat("\n========== ", title, " ==========\n", sep = "") + +# ============================================================================ +hr("1. CONSTRUCTION") +# ============================================================================ + +## (a) from a named list of univariate tf vectors -- the canonical way +traj <- tfd_mv(list(x = tf_rgp(4), y = tf_rgp(4))) +traj # note the header + per-component value ranges +tf_ncomp(traj) # 2 output dimensions + +## (b) from a list of matrices (one matrix per component, [curve, arg]) +t <- seq(0, 1, length.out = 50) +mx <- matrix(sin(2 * pi * outer(1:3, t)), nrow = 3) +my <- matrix(cos(2 * pi * outer(1:3, t)), nrow = 3) +traj_mat <- tfd_mv(list(x = mx, y = my), arg = t) +traj_mat + +## (c) from a 3-d array [curve, arg, component] +arr <- array( + rnorm(3 * 50 * 2), + dim = c(3, 50, 2), + dimnames = list(NULL, NULL, c("x", "y")) +) +traj_arr <- tfd_mv(arr, arg = t) +traj_arr + +## (d) from a long data.frame (id, arg, one value column per component) +df <- data.frame( + id = rep(1:3, each = 50), + arg = rep(t, times = 3), + x = as.vector(t(mx)), + y = as.vector(t(my)) +) +traj_df <- tfd_mv(df, id = "id", arg = "arg", value = c("x", "y")) +traj_df + +## (e) basis representation: fit a spline basis per component +tb <- tfb_mv(traj_mat, k = 31, verbose = FALSE) +tb +## per-component basis specs via component-named lists: +tb2 <- tfb_mv(traj_mat, k = list(x = 7, y = 15), verbose = TRUE) +tb2 + +# ============================================================================ +hr("2. ACCESSORS") +# ============================================================================ + +tf_ncomp(traj) # number of output dimensions d +names(tf_components(traj)) # component names +traj$x # extract a component (univariate tfd) -- via $ +tf_component(traj, "y") # ... or via tf_component() +tf_component(traj, 2) # ... by index + +## replace / add a component +traj$x <- traj$x * 2 # replace an existing component +traj$z <- tf_rgp(4) # add a third dimension by name +tf_ncomp(traj) # now 3 +traj <- tfd_mv(list(x = traj$x, y = traj$y)) # rebuild as 2-d for what follows + +# ============================================================================ +hr("3. SUBSETTING & BRACKET-EVALUATION") +# ============================================================================ + +traj[1:2] # subset curves -> tfd_mv of length 2 +length(traj[1:2]) + +## evaluate on a grid -> array [curve, arg, component] +ev <- traj[1, c(0.25, 0.5, 0.75)] +dim(ev) +dimnames(ev)[[3]] + +## restrict to one component -> univariate result +traj[, c(0.25, 0.5, 0.75), component = "x"] + +## matrix index: (curve, arg) pairs -> one row per pair, one col per component +idx <- cbind(curve = c(1, 2), arg = c(0.3, 0.6)) +traj[idx] + +# ============================================================================ +hr("4. EVALUATION / INTEROP") +# ============================================================================ + +tf_arg(traj_mat) # shared regular grid -> plain numeric vector +str(tf_evaluations(traj_mat)[[1]]) # per-curve [arg x d] matrix +A <- as.matrix(traj_mat) # [curve, arg, component] array +dim(A) +head(as.data.frame(traj_mat, unnest = TRUE)) # long (id, arg, x, y) + +# ============================================================================ +hr("5. ARITHMETIC / MATH / SUMMARY") +# ============================================================================ + +(2 * traj_mat)$x # scalar * mv (component-wise) +(traj_mat + traj_mat) # mv + mv +(-traj_mat) # unary minus +abs(traj_mat) # Math group generic, component-wise +show(mean(traj_mat)) # pointwise mean curve (length-1 tfd_mv) +sd(traj_mat) +var(traj_mat) +traj_mat == traj_mat # all TRUE +identical(traj_mat != traj_mat, rep(FALSE, 3)) + +# ============================================================================ +hr("6. GEOMETRY") +# ============================================================================ + +## unit circle on [0,1]: norm == 1 everywhere, speed == 2*pi +tt <- seq(0, 1, length.out = 401) +circ <- tfd_mv(list( + x = tfd(matrix(cos(2 * pi * tt), nrow = 1), arg = tt), + y = tfd(matrix(sin(2 * pi * tt), nrow = 1), arg = tt) +)) +range(as.matrix(tf_norm(circ))) # ~ c(1, 1) +mean(as.matrix(tf_speed(circ))) # ~ 2*pi = 6.283 +tf_inner(circ, circ) # == tf_norm(circ)^2 == 1 +tf_distance(traj_mat, traj_mat) # 0 everywhere +tf_tangent(circ) # unit tangent, a tfd_mv +tf_reparam_arclength(circ) # constant-speed reparam (domain [0,1]!) + +# ============================================================================ +hr("7. ARC LENGTH") +# ============================================================================ + +tf_arclength(circ) # total length ~ 2*pi +tf_arclength(circ, lower = 0, upper = 0.25) # quarter circle ~ pi/2 +tf_arclength(circ, definite = FALSE) # cumulative s(t), a univariate tfd +tf_arclength(circ, method = "derive") # via tf_derive + tf_speed + integrate + +# ============================================================================ +hr("8. CALCULUS & VERBS") +# ============================================================================ + +tf_derive(traj_mat) # component-wise derivative +tf_integrate(traj_mat) # definite integral -> n x d matrix +tf_integrate(traj_mat, definite = FALSE) # indefinite -> tfd_mv +tf_smooth(traj_mat, verbose = FALSE) +tf_zoom(traj_mat, 0.2, 0.8) # restrict the time window +tf_rebase(traj_mat, tfb(traj_mat$x, verbose = FALSE)) # to a basis +as.tfd_mv(tb) # tfb_mv -> tfd_mv round-trip + +# ============================================================================ +hr("9. REGISTRATION (single shared warp per curve, applied to all dims)") +# ============================================================================ + +## build curves with genuine per-curve phase variation shared across x and y +n <- 8 +tg <- seq(0, 1, length.out = 80) +wps <- lapply(1:n, function(i) tg^runif(1, 0.7, 1.4)) +mk <- function(f) tfd(do.call(rbind, lapply(wps, f)), arg = tg) +fmv <- tfd_mv(list( + x = mk(function(s) sin(2 * pi * s)), + y = mk(function(s) cos(2 * pi * s)) +)) + +## affine registration (no extra deps) +reg_aff <- tf_register(fmv, method = "affine", type = "shift_scale") +tf_aligned(reg_aff) # a tfd_mv +tf_template(reg_aff) # length-1 tfd_mv template + +## elastic SRVF registration (needs the {fdasrvf} Suggests) +if (requireNamespace("fdasrvf", quietly = TRUE)) { + reg_srvf <- tf_register(fmv, method = "srvf") + cat( + "srvf: phase var x", + round(mean(apply(as.matrix(fmv$x), 2, var)), 4), + "->", + round(mean(apply(as.matrix(tf_aligned(reg_srvf)$x), 2, var)), 4), + "\n" + ) + ## the warp is a SINGLE univariate function per curve, shared by all components: + w <- tf_estimate_warps(fmv, method = "srvf") + cat("warps are univariate:", !is_tf_mv(w), " length:", length(w), "\n") + ## choose a different registration signal: + tf_estimate_warps(fmv, method = "srvf", ref_component = "y") # by name + tf_estimate_warps(fmv, method = "srvf", ref_component = "norm") # pointwise ||f|| + tf_estimate_warps( + fmv, + method = "srvf", + ref_component = function(z) tf_norm(z) + ) # custom signal +} else { + cat("(skipping srvf -- install 'fdasrvf' to try elastic registration)\n") +} + +# ============================================================================ +hr("10. PLOTTING") +# ============================================================================ + +plt({ + # for d == 2, the default is "trajectory" (y(t) vs x(t), the movement view); + # per-curve graphical params (col, lty, lwd, ...) are recycled across curves + plot(fmv, col = 1:length(fmv)) + lines(circ, col = "red", lwd = 2) # add another trajectory on top + plot(fmv, type = "facet", col = 1:length(fmv)) # one panel per component +}) + +# ============================================================================ +hr("11. tibble / dplyr INTEGRATION") +# ============================================================================ + +if ( + requireNamespace("tibble", quietly = TRUE) && + requireNamespace("dplyr", quietly = TRUE) +) { + tbl <- tibble::tibble( + id = 1:n, + grp = rep(c("A", "B"), length.out = n), + path = fmv + ) + print(tbl) + tbl |> + dplyr::mutate(len = tf_arclength(path), speed = tf_speed(path)) |> + dplyr::group_by(grp) |> + dplyr::summarize(mean_path = mean(path), mean_len = mean(len)) |> + print() +} else { + cat("(skipping tibble/dplyr demo -- install 'tibble' and 'dplyr')\n") +} + +# ============================================================================ +hr("12. EDGE CASES") +# ============================================================================ + +## components on DIFFERENT argument grids (gappy movement data) +mixed <- tfd_mv(list( + x = tfd(matrix(rnorm(2 * 10), 2), arg = seq(0, 1, length.out = 10)), + y = tfd(matrix(rnorm(2 * 25), 2), arg = seq(0, 1, length.out = 25)) +)) +mixed +tf_arclength(mixed) # polyline copes with per-component grids +str(tf_arg(mixed)) # returns per-component args when they differ + +## irregular components +irr <- tfd_mv(list( + x = tfd(list(c(0, 0.5, 1)), list(c(1, 2, 3))), + y = tfd(list(c(0, 0.5, 1)), list(c(4, 5, 6))) +)) +irr + +## NA propagation: an NA in any component marks the curve NA +withna <- traj_mat +withna$x <- withna$x * c(1, NA_real_, 1) # blank out the 2nd curve of comp x +is.na(withna) # -> FALSE TRUE FALSE (whole curve NA) + +## empty / prototype +empty <- tfd_mv(list()) +length(empty) +tf_ncomp(empty) + +hr("DONE -- happy exploring") diff --git a/attic/references.bib b/attic/references.bib new file mode 100644 index 00000000..89777f37 --- /dev/null +++ b/attic/references.bib @@ -0,0 +1,93 @@ +@book{ramsay2005functional, + title = {Functional Data Analysis}, + author = {Ramsay, James O. and Silverman, Bernard W.}, + series = {Springer Series in Statistics}, + edition = {2nd}, + year = {2005}, + publisher = {Springer}, + address = {New York}, + isbn = {978-0-387-40080-8} +} + +@article{olshen1989gait, + title = {Gait Analysis and the Bootstrap}, + author = {Olshen, Richard A. and Biden, Edmund N. and Wyatt, Marilynn P. and Sutherland, David H.}, + journal = {The Annals of Statistics}, + volume = {17}, + number = {4}, + pages = {1419--1440}, + year = {1989}, + doi = {10.1214/aos/1176347373} +} + +@article{marron2015, + title = {Functional Data Analysis of Amplitude and Phase Variation}, + author = {Marron, J. S. and Ramsay, James O. and Sangalli, Laura M. and Srivastava, Anuj}, + journal = {Statistical Science}, + volume = {30}, + number = {4}, + pages = {468--484}, + year = {2015}, + doi = {10.1214/15-STS524} +} + +@article{srivastava2011registration, + title = {Registration of Functional Data Using Fisher-Rao Metric}, + author = {Srivastava, Anuj and Wu, Wei and Kurtek, Sebastian and Klassen, Eric and Marron, J. S.}, + journal = {arXiv preprint arXiv:1103.3817}, + year = {2011}, + url = {https://arxiv.org/abs/1103.3817} +} + +@article{srivastava2011shape, + title = {Shape Analysis of Elastic Curves in Euclidean Spaces}, + author = {Srivastava, Anuj and Klassen, Eric and Joshi, Shantanu H. and Jermyn, Ian H.}, + journal = {IEEE Transactions on Pattern Analysis and Machine Intelligence}, + volume = {33}, + number = {7}, + pages = {1415--1428}, + year = {2011}, + doi = {10.1109/TPAMI.2010.184} +} + +@book{srivastava2016, + title = {Functional and Shape Data Analysis}, + author = {Srivastava, Anuj and Klassen, Eric P.}, + series = {Springer Series in Statistics}, + year = {2016}, + publisher = {Springer}, + address = {New York}, + doi = {10.1007/978-1-4939-4020-2} +} + +@article{tucker2013generative, + title = {Generative Models for Functional Data Using Phase and Amplitude Separation}, + author = {Tucker, J. Derek and Wu, Wei and Srivastava, Anuj}, + journal = {Computational Statistics \& Data Analysis}, + volume = {61}, + pages = {50--66}, + year = {2013}, + doi = {10.1016/j.csda.2012.12.001} +} + +@article{happ2018, + title = {Multivariate Functional Principal Component Analysis for Data Observed on Different (Dimensional) Domains}, + author = {Happ, Clara and Greven, Sonja}, + journal = {Journal of the American Statistical Association}, + volume = {113}, + number = {522}, + pages = {649--659}, + year = {2018}, + doi = {10.1080/01621459.2016.1273115} +} + +@article{joo2019, + title = {Navigating through the R Packages for Movement}, + author = {Joo, Roc\'{i}o and Boone, Matthew E. and Clay, Thomas A. and Patrick, Samantha C. and Clusella-Trullas, Susana and Basille, Mathieu}, + journal = {Journal of Animal Ecology}, + volume = {89}, + number = {1}, + pages = {248--267}, + year = {2019}, + doi = {10.1111/1365-2656.13116} +} diff --git a/attic/vector-valued-functions.Rmd b/attic/vector-valued-functions.Rmd new file mode 100644 index 00000000..f4d19128 --- /dev/null +++ b/attic/vector-valued-functions.Rmd @@ -0,0 +1,1072 @@ +--- +title: "Vector-valued functional data: gait cycles and hurricane tracks" +output: + rmarkdown::html_vignette: + code_folding: show +bibliography: references.bib +link-citations: true +vignette: > + %\VignetteIndexEntry{Vector-valued functional data: gait cycles and hurricane tracks} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- + +```{r setup, include = FALSE} +knitr::opts_chunk$set( + collapse = TRUE, + comment = "#>", + fig.width = 7, + fig.height = 4.2, + dpi = 96, + out.width = "100%" +) +set.seed(1) +``` + +A `tf` vector represents a sample of functions $f: \mathbb{R} \to \mathbb{R}$. +Many real measurement processes, though, produce several coupled signals that +share a common argument: the `(hip, knee)` joint-angle pair sampled across a +gait cycle, the `(longitude, latitude)` position of a hurricane sampled in +time, the `(x, y, z)` body coordinates of a moving animal. The natural object +is a **vector-valued function** $f: \mathbb{R} \to \mathbb{R}^d$, and `tf` +represents a sample of those with the `tf_mv` family: `tfd_mv` for raw +evaluations and `tfb_mv` for basis representations. Internally a `tf_mv` is a +bundle of $d$ ordinary `tf` vectors, so the multivariate methods shown below +reuse the existing univariate numerical kernels by mapping over components. + +This article uses two real datasets to put the API through its paces: + +* **`tf::gait`** -- regularly sampled hip and knee angles for 39 boys across + one gait cycle. *Question: how variable is "normal" gait, where in the + cycle does that variation concentrate, and what are the dominant modes of + variation?* +* **`dplyr::storms`** -- irregularly sampled position and intensity of every + Atlantic tropical storm or hurricane from `r min(dplyr::storms$year)` to + `r max(dplyr::storms$year)`. *Question: do stronger storms travel further + and move faster than weak ones?* + +```{r packages, message = FALSE} +library(tf) +library(dplyr) +``` + +# 1. A quick tour of vector-valued functional data + +Before the case studies, this part introduces the objects: what a vector-valued +function *is*, the two classes that represent samples of them, how to construct +them, and the handful of geometric operations the case studies lean on. + +## What is a vector-valued function? + +A `tf` vector stores a sample of scalar functions $f: [a, b] \to \mathbb{R}$. A +**vector-valued** function bundles $d$ of them on a *shared* argument, + +$$ f(t) = \big(f_1(t), \dots, f_d(t)\big), \qquad t \in [a, b], $$ + +so that as $t$ runs over the domain the value $f(t)$ traces a *path* through +$\mathbb{R}^d$. `tf` represents a sample of such paths with the `tf_mv` family. +Internally a `tf_mv` *is* a bundle of `d` ordinary `tf` vectors sharing one +argument, so every univariate verb -- evaluation, arithmetic, derivatives, +integration, smoothing, basis fitting -- extends component-wise for free. + +Two kinds of variation run through everything below: **amplitude** (how large +the values get) and **phase** (*where in* $t$ the features happen). Much of the +work in functional data analysis [@ramsay2005functional] is telling the two +apart [@marron2015], and Section 2 is largely about exactly that. + +## The `tf_mv` classes: `tfd_mv` and `tfb_mv` + +Like their univariate cousins `tfd`/`tfb`, vector-valued functions come in two +flavours: + +* `tfd_mv` -- *raw evaluations*: each component is a `tfd`, i.e. values on a grid + plus an interpolation rule. +* `tfb_mv` -- *basis representation*: each component is a `tfb` (spline or FPC), + i.e. basis coefficients. + +Components are named, and a few accessors reach into the bundle: `tf_ncomp()` +(number of components), `tf_components()` (the list of univariate `tf` vectors), +`tf_component(x, k)` (one component, by name or index), and `names()`. + +## Constructing `tf_mv` objects + +`tfd_mv()` accepts three interchangeable input layouts. We use the built-in +`gait` data [@olshen1989gait; @ramsay2005functional] throughout the gait case +study -- hip and knee angles, in degrees, for 39 boys, each sampled at 20 +points across one gait cycle. The most direct +construction is a **named list** of univariate `tf` vectors: + +```{r tour-list} +data(gait) +g <- tfd_mv(list(hip = gait$hip_angle, knee = gait$knee_angle)) +g +``` + +The print-out shows `d = 2` components on a shared grid, with a sparkline pair +per subject. It behaves like a vector of 39 bivariate curves; component +accessors and bracket extraction expose the two levels separately -- curves on +rows, argument values on columns, components on the third array dimension: + +```{r gait-anatomy} +tf_ncomp(g) +names(tf_components(g)) +dim(g[1:3, c(0.1, 0.5, 0.9)]) +class(g[, , component = "hip"]) +``` + +The *same* object can be built from a **3-d array** +`[curve, arg, component]` -- the layout `as.matrix()` returns -- + +```{r tour-array} +arr <- as.matrix(g) # [curve, arg, component] +dim(arr) +g_arr <- tfd_mv(arr, arg = tf_arg(g)) +identical(as.matrix(g_arr), arr) +``` + +or from a **wide data frame** with one column per component (the long +`(id, arg, component, value)` schema is what `as.data.frame(unnest = TRUE)` +emits; `long = FALSE` gives the wide schema the constructor consumes): + +```{r tour-df} +df <- as.data.frame(g, unnest = TRUE, long = FALSE) # id, arg, hip, knee +head(df, 2) +g_df <- tfd_mv(df, id = "id", arg = "arg", value = c("hip", "knee")) +``` + +Each route also takes an optional `domain` and a per-component `evaluator` +(interpolation rule, default `tf_approx_linear`). A `tfd_mv` becomes a basis +object with `tfb_mv()`, choosing a `"spline"` or `"fpc"` basis; basis arguments +apply globally or per component when passed as a named list: + +```{r tour-basis} +g_spline <- tfb_mv(g, basis = "spline", k = list(hip = 5, knee = 12), + verbose = FALSE) +g_spline +``` + +## Geometry on the bundle: speed, arc length, reparametrization + +Because a `tf_mv` traces a path, the usual curve-geometry quantities are +available, and they return *univariate* summaries of the multivariate object: + +* `tf_speed(f)` $= \lVert f'(t)\rVert$ -- pointwise speed, a `tfd`, +* `tf_arclength(f)` -- total path length, one number per curve, +* `tf_reparam_arclength(f)` -- the same path traversed at *unit speed*. + +```{r tour-geom, fig.height = 3.6} +head(tf_arclength(g)) +plot(tf_speed(g), alpha = 0.3, main = expression("pointwise speed " * "||" * f*"'"*(t) * "||")) +``` + +## Aligning vector-valued curves: a ladder + +When curves differ mainly in *timing*, we usually want to factor that out -- +but "factor out" can mean several different things. There is a ladder of +increasingly aggressive operations, distinguished by *what each treats as a +nuisance* [@marron2015]: + +| rung | operation | removes | keeps | `tf` entry point | +|---|---|---|---|---| +| 1 | arc-length reparametrization | the clock (parametrization only) | shape **and** size | `tf_reparam_arclength()` | +| 2 | warp from a 1-d reference signal | phase, onto a chosen reference | amplitude, size, orientation | `tf_register(., method = "cc")` | +| 3 | joint multivariate warp | phase, from all components at once | amplitude, size, orientation | `tf_register(., method = "srvf_mv")` | +| 4 | full elastic shape registration | phase **+** rotation **+** scale | shape only | `tf_register_shape()` | + +Rung 1 re-labels time within each curve without picking a template; rungs 2 and +3 warp every curve onto a common template (from one reference signal, or from all +components jointly) but leave the values untouched; rung 4 *additionally* rotates +and rescales each curve, landing in a **shape space** where only the geometric +form remains. Section 2 walks all four rungs on the gait data -- and shows where +the top rung is, and is not, the right tool. + +# 2. How variable is a "normal" gait cycle? + +We built the two-component object `g` above. The two natural views of these 39 +curves are the **time-series** +(`type = "facet"`, one panel per component) and the **trajectory in phase +space** (`type = "trajectory"`, the default for two-component objects). + +```{r gait-facet, fig.height = 3.6} +plot(g, type = "facet", alpha = 0.4) +``` + +```{r gait-traj, fig.height = 4.2} +plot(g, alpha = 0.4) # type = "trajectory" by default for d = 2 +``` + +The facet view shows that both joints flex twice per cycle; the trajectory +view collapses time and reveals the characteristic "butterfly" loop traced +out as the leg cycles through stance and swing. Both views suggest the +between-subject spread is far from uniform along the cycle. + +## Pointwise mean and standard deviation + +`mean()` and `sd()` are vctrs group generics and dispatch component-wise: +they each return a length-1 `tfd_mv`. Plot the raw curves with `plot.tf`, +then overlay the mean +/- 2 sd envelope with `lines.tf` -- all arithmetic +on the components is component-wise on `tf_mv`, so `mu + 2 * s` is itself +a `tfd_mv` and `tf_component(...)` gives the per-axis envelope: + +```{r gait-mean-sd, fig.height = 3.8} +mu <- mean(g) +s <- sd(g) + +op <- par(mfrow = c(1, 2), mar = c(4, 4, 2, 1)) +for (k in seq_len(tf_ncomp(g))) { + nm <- names(tf_components(g))[k] + plot(tf_component(g, k), alpha = 0.25, + ylab = nm, main = nm) + lines(tf_component(mu, k), col = "red", lwd = 2) + lines(tf_component(mu + 2 * s, k), col = "red", lwd = 1.2, lty = 2) + lines(tf_component(mu - 2 * s, k), col = "red", lwd = 1.2, lty = 2) +} +par(op) +``` + +The pointwise sd peaks roughly where the angle itself is changing fastest -- +around the stance/swing transitions -- so most of the between-subject +variability lives in *timing* of those transitions, not in the angles +attained at rest. + +## Most- and least-varied subjects + +`tf_arclength()` measures the path length traced out in `(hip, knee)`-space. +A short orbit = a subject whose `(hip, knee)` excursions are small or who +moves the two joints in lock-step; a long orbit = a subject with +large-amplitude, less synchronised excursions. + +```{r gait-arc, fig.height = 4.2} +arc <- tf_arclength(g) +extreme <- c(which.min(arc), which.max(arc)) +extreme + +plot(g, alpha = 0.15) +lines(g[extreme[1]], col = "steelblue", lwd = 2.2) +lines(g[extreme[2]], col = "firebrick", lwd = 2.2) +legend("bottomright", bty = "n", + legend = c(sprintf("min arc length (%.0f deg)", arc[extreme[1]]), + sprintf("max arc length (%.0f deg)", arc[extreme[2]])), + col = c("steelblue", "firebrick"), lwd = 2) +``` + +## A ladder of registrations + +The pointwise sd above conflates two kinds of between-subject variability: +*amplitude* (how far the knee swings) and *phase* (at what fraction of the cycle +heel-strike happens). Removing phase -- "registration" -- can mean progressively +more, depending on what we are willing to treat as a nuisance. Below are four +rungs, applied in turn to the gait sample, each followed by a facet plot of the +result; we quantify them together at the end. + +### Rung 1 -- arc-length reparametrization + +The mildest operation re-labels time *within each curve* and uses no template at +all. `tf_reparam_arclength()` traverses every curve at approximately constant +speed in its value space: the `(hip, knee)` path is unchanged -- the same set of +points -- but equal time intervals now cover equal arc length. + +```{r gait-rung1, fig.width = 8.2, fig.height = 3.6} +g_unit <- tf_reparam_arclength(g) +plot(g_unit, type = "facet", alpha = 0.3) +``` + +`tf_speed()` makes the effect explicit. The raw speeds swing wildly (fast +through swing, near-still in stance); after reparametrization they are nearly +flat -- each curve traversed at constant speed: + +```{r gait-rung1-speed, fig.height = 4.0} +sp_raw <- tf_speed(g) +sp_unit <- tf_speed(g_unit) +yl <- c(0, max(c(unlist(tf_evaluations(sp_raw)), + unlist(tf_evaluations(sp_unit))), na.rm = TRUE)) + +op <- par(mfrow = c(1, 2), mar = c(4, 4, 2, 1)) +plot(sp_raw, alpha = 0.4, ylim = yl, main = "raw parameterization", + ylab = "speed (deg / cycle)") +lines(mean(sp_raw), col = "firebrick", lwd = 2) +plot(sp_unit, alpha = 0.4, ylim = yl, main = "arc-length parameterization", + ylab = "speed (deg / cycle)") +lines(mean(sp_unit), col = "firebrick", lwd = 2) +par(op) +``` + +No curve is moved toward any other: this removes the *parametrization* only, +leaving shape and size untouched and aligning nothing to a common reference. + +### Rung 2 -- alignment to a 1-d reference signal + +The next rung aligns curves *to each other* by estimating one shared time-warp +per curve and applying it to both components. The warp is driven by a single +univariate **reference signal** -- a component, or any derived quantity. The +choice matters: the knee carries the cycle's sharpest event (heel-strike), so it +locks the alignment best. + +```{r gait-rung2, message = FALSE, warning = FALSE, fig.width = 8.2, fig.height = 3.6} +r_knee <- tf_register(g, method = "cc", ref_component = "knee") +plot(tf_aligned(r_knee), type = "facet", alpha = 0.3) +``` + +The knee component visibly tightens; the hip, phase-coupled to it, tightens only +incidentally. Choosing `ref_component = "hip"` or `= tf_speed` would emphasise +different events -- a 1-d reference forces that modelling choice. The warps +themselves are univariate `tfd`s mapping raw to aligned phase, fanning out around +the swing phase where heel-strike timing varies between subjects: + +```{r gait-rung2-warps, fig.height = 4.0} +plot(tf_invert(tf_inv_warps(r_knee)), alpha = 0.5, + main = "reference (knee) warps", ylab = "aligned phase") +abline(0, 1, lty = 3) +``` + +### Rung 3 -- joint multivariate reparametrization (`srvf_mv`) + +`method = "srvf_mv"` removes the choice of reference: it estimates the single +time-warp that best aligns the joint `(hip, knee)` trajectories in the elastic +(square-root-velocity) sense, using all components at once +[@srivastava2011registration; @tucker2013generative]. It is still only a +re-timing -- values are never rotated or rescaled -- and the template is the +multivariate Karcher mean in `tf_template(reg_mv)`. + +```{r gait-rung3, message = FALSE, warning = FALSE, fig.width = 8.2, fig.height = 3.6} +reg_mv <- tf_register(g, method = "srvf_mv", max_iter = 2) +plot(tf_aligned(reg_mv), type = "facet", alpha = 0.3) +``` + +```{r gait-rung3-warps, fig.height = 4.0} +plot(tf_invert(tf_inv_warps(reg_mv)), alpha = 0.5, + main = "srvf_mv warps", ylab = "aligned phase") +abline(0, 1, lty = 3) +``` + +Because the warp must compromise between both components rather than chase one, +it need not shrink any single channel as hard as a reference-targeted warp -- but +it needs no reference choice and respects both axes symmetrically. + +### Rung 4 -- full elastic shape registration + +`tf_register_shape()` goes furthest: on top of the time-warp it also fits, per +curve, a **rotation** and a **scale**, and reports the aligned curves in a +centered, normalised **shape space** [@srivastava2011shape; @srivastava2016]. +Translation, rotation and size are all quotiented out. + +```{r gait-rung4, message = FALSE, warning = FALSE, fig.width = 8.2, fig.height = 3.6} +reg_shape <- tf_register_shape(g, max_iter = 2) +plot(tf_aligned(reg_shape), type = "facet", alpha = 0.3) # note the y-axis: shape space +``` + +The `(hip, knee)` trajectory view makes the collapse vivid -- the original +butterfly loops on the left, the shape-registered curves on the right reduced to +essentially one normalised shape (note the very different axis ranges): + +```{r gait-rung4-traj, message = FALSE, warning = FALSE, fig.width = 8.2, fig.height = 4.2} +op <- par(mfrow = c(1, 2), mar = c(4, 4, 2, 1)) +plot(g, type = "trajectory", alpha = 0.3, main = "original") +plot(tf_aligned(reg_shape), type = "trajectory", alpha = 0.3, + main = "shape-registered (shape space)") +par(op) +``` + +The estimated rotations sit near the identity and the scales near 1 (little +genuine rotation or scaling is present), yet the normalisation alone flattens +the real between-subject differences: + +```{r gait-rung4-diag, message = FALSE, warning = FALSE} +round(tf_rotations(reg_shape)[, , 1], 3) # rotation for subject 1 +summary(as.numeric(tf_scales(reg_shape))) # scales, relative to the template +``` + +**Gait's two axes are not interchangeable**: hip angle and knee angle are +distinct physical quantities in fixed units. Quotienting out rotation mixes them +into meaningless combinations, and quotienting out scale discards amplitude -- +which here *is* the signal of interest. Shape registration is the wrong tool for +a bundle of fixed-unit channels; we return to where it *is* right below. + +### Quantifying the alignment + +Two views across the rungs. First, a visual overview: the two components (rows +*hip*, *knee*) and the estimated time-warp (row *warp*) for each registration, +side by side -- *as-observed*, *arc-length* reparametrized, registered to a 1-d +*reference* (knee), registered jointly (*srvf_mv*), and fully *shape*-registered. +Raw data has no warp; arc-length has no template warp but does reparametrize, so +its warp panel shows the normalised cumulative arc length. + +```{r quant-warp-helper, class.source = "fold-hide"} +# warp implied by arc-length reparametrization: normalised cumulative arc length +# (raw cycle phase -> arc-length phase, same direction as the registration warps) +arclen_warp <- function(mv) { + sp <- tf_speed(mv) + arg <- as.numeric(tf_arg(sp)) + dom <- tf_domain(sp) + d <- diff(arg) + w <- sapply(tf_evaluations(sp), function(s) { + cum <- c(0, cumsum((head(s, -1) + tail(s, -1)) / 2 * d)) + dom[1] + diff(dom) * cum / cum[length(cum)] + }) + tfd(t(w), arg = arg, domain = dom) +} +``` + +```{r gait-grid, message = FALSE, warning = FALSE, fig.width = 9, fig.height = 6.6} +cols <- list( + list(nm = "as-observed", data = g, warp = NULL), + list(nm = "arc-length", data = g_unit, warp = arclen_warp(g)), + list(nm = "ref = knee", data = tf_aligned(r_knee), warp = tf_invert(tf_inv_warps(r_knee))), + list(nm = "srvf_mv", data = tf_aligned(reg_mv), warp = tf_invert(tf_inv_warps(reg_mv))), + list(nm = "shape", data = tf_aligned(reg_shape), warp = tf_invert(tf_inv_warps(reg_shape))) +) + +op <- par(mfrow = c(3, 5), mar = c(2.6, 3.2, 2, 0.8), mgp = c(1.9, 0.6, 0)) +for (j in seq_along(cols)) + plot(tf_component(cols[[j]]$data, "hip"), alpha = 0.3, + main = cols[[j]]$nm, ylab = if (j == 1) "hip" else "") +for (j in seq_along(cols)) + plot(tf_component(cols[[j]]$data, "knee"), alpha = 0.3, + main = "", ylab = if (j == 1) "knee" else "") +for (j in seq_along(cols)) { + w <- cols[[j]]$warp + if (is.null(w)) { + plot.new(); text(0.5, 0.5, "(identity)", col = "grey55") + } else { + plot(w, alpha = 0.4, main = "", ylab = if (j == 1) "warp" else "") + abline(0, 1, lty = 3) + } +} +par(op) +``` + +Across the warp row, the registration warps fan out around the swing phase where +heel-strike timing varies between subjects; the shape warp is comparable, since +it shares the same time-warp step. Down the `shape` column the components have +been rescaled into normalised shape space -- note the compressed vertical axis -- +which is why those curves look so much tighter than the others. + +Second, the numbers: the maximum pointwise sd of each component, before and +after. Rungs 1--3 keep the data in its original degree units and are directly +comparable; rung 4 (marked `*`) lives in normalised shape space, so its row is on +a different scale and only illustrates the collapse. + +```{r gait-register-table-helper, class.source = "fold-hide"} +peak_sd <- function(f, k) { + arg <- tf_arg(f) + max(unlist(tf_evaluate(tf_component(sd(f), k), arg))) +} +``` + +```{r gait-register-table} +rungs <- list( + "raw" = g, + "1 arc-length" = g_unit, + "2 reference (knee)" = tf_aligned(r_knee), + "3 srvf_mv" = tf_aligned(reg_mv), + "4 shape (*)" = tf_aligned(reg_shape) +) +data.frame( + registration = names(rungs), + sd_hip = sapply(rungs, peak_sd, "hip"), + sd_knee = sapply(rungs, peak_sd, "knee"), + row.names = NULL +) +``` + +In degree units the phase rungs chip away at the spread only modestly -- gait's +phase variation is real but subtle. The knee-reference warp shrinks the knee +component most (it targets that channel); `srvf_mv` spreads a gentler correction +across both; and arc-length reparametrization, which aligns nothing to a common +template, barely moves the pointwise sd at all. Rung 4's near-zero entries are +*not* a better alignment -- they are the signal being quotiented away. + +## Shape registration and its quotient spaces + +Shape registration earns its keep when the components really are *interchangeable +spatial coordinates* and position, orientation and size are all nuisances: +handwriting, gesture or movement paths, object outlines. Here is a synthetic +example -- a single base curve $(t,\, t^2)$, copied three times with each copy +rotated, rescaled and shifted in the plane: + +```{r shape-demo-build} +t <- seq(0, 1, length.out = 51) +base <- rbind(t, t^2) +scales <- c(1, 0.7, 1.3) +angles <- c(0, 0.4, -0.25) +offset <- rbind(c(0.2, -0.1), c(0.4, -0.2), c(0.6, -0.3)) +beta <- array(NA_real_, dim = c(3, length(t), 2), + dimnames = list(c("a", "b", "c"), NULL, c("x", "y"))) +for (i in 1:3) { + rot <- matrix(c(cos(angles[i]), sin(angles[i]), + -sin(angles[i]), cos(angles[i])), nrow = 2) + curve <- scales[i] * (rot %*% base) + matrix(offset[i, ], 2, length(t)) + beta[i, , 1] <- curve[1, ] + beta[i, , 2] <- curve[2, ] +} +shapes <- tfd_mv(beta, arg = t) +``` + +What counts as "the same shape" depends on which transformations we quotient +out, and `tf_register_shape()` lets us choose via the `rotation` and `scale` +flags (translation and the time-warp are always removed). The three quotients +differ in what survives: + +```{r shape-demo-quotients, message = FALSE, warning = FALSE, fig.width = 7.4, fig.height = 6.6} +reg_full <- tf_register_shape(shapes, max_iter = 2, rotation = TRUE, scale = TRUE) +reg_rot <- tf_register_shape(shapes, max_iter = 2, rotation = TRUE, scale = FALSE) +reg_scale <- tf_register_shape(shapes, max_iter = 2, rotation = FALSE, scale = TRUE) + +op <- par(mfrow = c(2, 2), mar = c(4, 4, 2, 1)) +plot(shapes, asp = 1, col = 1:3, lwd = 2, main = "input: 3 placements") +plot(tf_aligned(reg_rot), asp = 1, col = 1:3, lwd = 2, main = "rotation-only (keeps size)") +plot(tf_aligned(reg_scale), asp = 1, col = 1:3, lwd = 2, main = "scale-only (keeps orientation)") +plot(tf_aligned(reg_full), asp = 1, col = 1:3, lwd = 2, main = "rotation + scale (full)") +par(op) +``` + +* **rotation-only** (`scale = FALSE`): orientations are aligned but the original + sizes are kept, so the curves stay nested. +* **scale-only** (`rotation = FALSE`): sizes are equalised but orientations are + kept, so the curves fan out at one common size. +* **rotation + scale** (the full shape quotient): both are removed, and the three + congruent curves collapse onto a single shape. + +`tf_scales()` reports the per-curve size factors that were removed (relative to +the template; `1` whenever scaling is off): + +```{r shape-demo-scales} +data.frame(curve = c("a", "b", "c"), + injected = scales, + full = round(as.numeric(tf_scales(reg_full)), 3), + rot_only = round(as.numeric(tf_scales(reg_rot)), 3), + scale_only = round(as.numeric(tf_scales(reg_scale)), 3)) +``` + +## Modes of variation via FPC + +Back on the gait sample, with the most visible phase variation reduced (we carry +the knee-aligned curves forward), fit a per-component FPC basis +[@ramsay2005functional]. The leading PCs now mostly describe within-component +amplitude modes: + +```{r gait-fpc-helper, class.source = "fold-hide"} +fpc_var <- function(comp) attr(comp, "score_variance") +``` + +```{r gait-fpc, fig.height = 3.8} +g_aligned <- tf_aligned(r_knee) +g_b <- tfb_mv(g_aligned, basis = "fpc", verbose = FALSE) + +v_hip <- fpc_var(tf_component(g_b, "hip")); v_hip <- v_hip / sum(v_hip) +v_knee <- fpc_var(tf_component(g_b, "knee")); v_knee <- v_knee / sum(v_knee) + +op <- par(mfrow = c(1, 2), mar = c(4, 4, 2, 1)) +barplot(head(v_hip, 6), names.arg = 1:6, main = "hip: variance share", + ylab = "proportion", col = "grey70") +barplot(head(v_knee, 6), names.arg = 1:6, main = "knee: variance share", + ylab = "proportion", col = "grey70") +par(op) + +# residual: how well does the low-rank FPC basis approximate the curves? +g_round <- vctrs::vec_cast(g_b, g_aligned) +resid <- g_aligned - g_round +rmse_per_subject <- sqrt(unlist(lapply(tf_evaluations(resid), function(m) { + mean(as.matrix(m[, -1L, drop = FALSE])^2) +}))) +summary(rmse_per_subject) +``` + +The first 2-3 FPCs explain most of the remaining within-component variance, +and the RMSE between the registered and FPC-reconstructed `(hip, knee)` +curves is well below a degree for most subjects. + +## Joint modes of variation via MFPCA + +The per-component FPC above gives each component its *own* scores: a subject's +hip-mode-1 score and knee-mode-1 score are separate numbers, so the basis +cannot, on its own, express that the two joints co-vary. **Multivariate FPCA** +[@happ2018], via `tfb_mfpc()`, instead produces a *single* score per +subject per mode, paired with a **vector-valued eigenfunction** +$\Psi_m = (\Psi_m^{\text{hip}}, \Psi_m^{\text{knee}})$. One shared score +$s_{im}$ scales both halves at once, so each mode encodes coupled hip-knee +co-variation in a single coordinate system: +$$ f_i(t) \approx \mu(t) + \sum_m s_{im}\,\Psi_m(t). $$ + +The default `weights = "inverse_variance"` normalises each component to equal +total variance before combining, so the (larger-range) knee does not simply +dominate the hip; `"snr"`, `"equal"`, or a numeric vector are also available. + +```{r gait-mfpc} +g_m <- tfb_mfpc(g_aligned, pve = 0.95) +g_m + +scores <- tf_mfpc_scores(g_m) # 39 x M, shared across both components +nu <- attr(g_m, "mfpc")$evalues # joint (multivariate) eigenvalues +ve <- nu / sum(nu) # variance share per shared mode +dim(scores) +round(head(ve, 4), 3) +``` + +The multivariate eigenfunctions come back as a `tfd_mv` -- one bivariate "curve" +per mode -- so each MFPC can be inspected component by component. The leading +mode's hip and knee parts move together, which is exactly the coupling the +independent per-component analysis could not represent: + +```{r gait-mfpc-efun, fig.width = 8.2, fig.height = 3.6} +psi <- tf_mfpc_efunctions(g_m) +k_show <- min(3L, length(psi)) +op <- par(mfrow = c(1, 2), mar = c(4, 4, 2, 1)) +plot(tf_component(psi, "hip")[seq_len(k_show)], col = seq_len(k_show), lwd = 2, + main = "MFPC eigenfunctions: hip", ylab = "loading") +plot(tf_component(psi, "knee")[seq_len(k_show)], col = seq_len(k_show), lwd = 2, + main = "MFPC eigenfunctions: knee", ylab = "loading") +legend("topright", bty = "n", lwd = 2, col = seq_len(k_show), + legend = paste("MFPC", seq_len(k_show)), cex = 0.85) +par(op) +``` + +The shared scores live in one coordinate system, so MFPCA summarises each +subject with a single set of `M` numbers rather than one set per component. +That is a genuine trade-off: the independent per-component basis is free to +optimise each component separately, so it reconstructs more accurately for a +given per-component truncation, while MFPCA spends fewer, *coupled* scores and +accepts a little more error in exchange for the shared coordinate system: + +```{r gait-mfpc-rmse, class.source = "fold-hide"} +rmse_mv <- function(approx) { + resid <- g_aligned - vctrs::vec_cast(approx, g_aligned) + sqrt(mean(unlist(lapply(tf_evaluations(resid), function(m) { + as.matrix(m[, -1L, drop = FALSE])^2 + })))) +} +n_indep <- sum(vapply(tf_components(g_b), + function(co) length(attr(co, "score_variance")), integer(1))) +data.frame( + representation = c("independent FPC", "joint MFPCA"), + stored_scores = c(n_indep, attr(g_m, "mfpc")$npc), + rmse = round(c(rmse_mv(g_b), rmse_mv(g_m)), 3) +) +``` + +```{r gait-mfpc-recap, echo = FALSE, results = "asis"} +cat(sprintf( + "The first %d shared modes already capture %.0f%% of the joint variance. ", + min(2L, length(ve)), 100 * sum(head(ve, 2)) +)) +cat(sprintf( + "New subjects can be projected onto this fitted basis with `tf_rebase()`, which re-scores them jointly rather than component by component." +)) +``` + +# 3. Atlantic storms as 4-dimensional curves + +`dplyr::storms` records four time-varying quantities for every Atlantic +tropical storm or hurricane from `r min(dplyr::storms$year)` to +`r max(dplyr::storms$year)`, sampled every 6 hours along the storm's life: +position (longitude, latitude) and intensity (sustained wind speed, central +pressure). The natural object is a single +**four-component** vector-valued curve per storm, `f_i: [0, T_i] -> R^4` +-- spatial trajectory and intensity life-cycle bundled together. + +Two practical points: + +* `tracks_km` -- the *spatial* `(x_km, y_km)` view in physical units + (projecting each storm's `long` / `lat` into a per-storm local-km + frame at its own mean latitude) -- is what we use for arc length and + forward speed, because deg/h speeds wrongly upweight east-west motion + at high latitudes (a longitude degree is ~111 km at the equator but + only ~78 km at 45 N). +* Most life-cycle comparisons between short and long storms only make + sense on a **normalised time axis** `phase = t / T_i in [0, 1]`. We + build a normalised 4-d object on top of the real-time one and use the + appropriate version per question. + +```{r storms-build, class.source = "fold-hide"} +KM_PER_DEG <- 111.32 + +storms_clean <- storms |> + mutate( + storm_id = paste(name, year), + ts = as.POSIXct(ISOdate(year, month, day, hour), tz = "UTC") + ) |> + group_by(storm_id) |> + distinct(ts, .keep_all = TRUE) |> # dedupe duplicate-timestamp rows + mutate( + t_hours = as.numeric(ts - min(ts), units = "hours"), + phase = if (max(t_hours) > 0) t_hours / max(t_hours) else 0, + ref_lat = mean(lat), + x_km = (long - mean(long)) * KM_PER_DEG * cos(ref_lat * pi / 180), + y_km = (lat - ref_lat) * KM_PER_DEG + ) |> + filter(n() >= 16, !is.na(wind), !is.na(pressure)) |> # >= 4 days + ungroup() + +# spatial (km, real time): the compact data-frame constructor. +tracks_km <- storms_clean |> + transmute(storm_id, t_hours, x = x_km, y = y_km) |> + tfd_mv(id = "storm_id", arg = "t_hours", value = c("x", "y")) + +# Equivalent spelling if the component tfd vectors are already available. +tracks_km_from_components <- tfd_mv(list( + x = tfd(storms_clean, id = "storm_id", arg = "t_hours", value = "x_km"), + y = tfd(storms_clean, id = "storm_id", arg = "t_hours", value = "y_km") +)) + +# full 4-d, on normalised life-cycle phase +tracks4 <- tfd_mv(list( + long = tfd(storms_clean, id = "storm_id", arg = "phase", value = "long"), + lat = tfd(storms_clean, id = "storm_id", arg = "phase", value = "lat"), + wind = tfd(storms_clean, id = "storm_id", arg = "phase", value = "wind"), + pres = tfd(storms_clean, id = "storm_id", arg = "phase", value = "pressure") +)) +tracks4 + +# storm-level metadata +peak <- storms |> + group_by(name, year) |> + summarise(peak_cat = suppressWarnings(max(category, na.rm = TRUE)), + .groups = "drop") |> + mutate(peak_cat = if_else(is.finite(peak_cat), peak_cat, 0), + peak_cat = as.integer(peak_cat), + storm_id = paste(name, year)) + +df <- tibble::tibble( + storm_id = names(tracks4), + track = tracks4, + track_km = tracks_km +) |> + left_join(peak, by = "storm_id") |> + mutate(strength = factor( + pmin(peak_cat, 4), + levels = 0:4, + labels = c("TS/TD", "Cat 1", "Cat 2", "Cat 3", "Cat 4+") + )) +``` + +Before analysing the storms, inspect the irregular object directly. Counts +vary by storm because each lifetime has a different number of 6-hourly +observations, while the long data-frame representation keeps the components +aligned by `(storm, phase)`. + +```{r storms-irregular} +head(tf_count(tracks4)) +as.data.frame(tracks4[1:2], unnest = TRUE) |> head() +``` + +## Movement workflow: regularize, then describe + +@joo2019 describe movement analysis as a workflow built around +tracking records `(x, y, t)`: clean the fixes, regularize or reconstruct +the path when needed, visualize the track, then extract descriptors such +as speed, heading and turning angles. The storm data follow the same +pattern. The build step above already performed basic preprocessing: +duplicate timestamps were removed, very short tracks were dropped, and +longitude/latitude were projected into local kilometre coordinates. + +For diagnostic plots it is useful to regularize a few storms onto a +common lifecycle grid. Differentiating the regularized `(x, y)` curves +gives velocity components; their norm is forward speed, and `atan2(v_y, +v_x)` gives heading. Turning angle is the wrapped difference between +successive headings. + +```{r storms-movement-workflow, fig.width = 8.2, fig.height = 6.6} +movement_ids <- c("Andrew 1992", "Katrina 2005", "Sandy 2012") +movement_ids <- movement_ids[movement_ids %in% df$storm_id] +movement_rows <- match(movement_ids, df$storm_id) +movement_grid <- seq(0, 1, length.out = 81) +movement_duration <- vapply( + tf_arg(df$track_km[movement_rows]), + \(t) max(t) - min(t), + numeric(1) +) + +movement_eval <- lapply(seq_along(movement_rows), function(k) { + tf_evaluate( + df$track_km[movement_rows[k]], + arg = movement_grid * movement_duration[k] + )[[1]] +}) +movement_x <- do.call(rbind, lapply(movement_eval, `[[`, "x")) +movement_y <- do.call(rbind, lapply(movement_eval, `[[`, "y")) + +movement_track <- tfd_mv(list( + x = tfd(movement_x, arg = movement_grid, domain = c(0, 1)), + y = tfd(movement_y, arg = movement_grid, domain = c(0, 1)) +), domain = c(0, 1)) +names(movement_track) <- movement_ids + +movement_velocity <- tf_derive(movement_track) +movement_speed <- tf_norm(movement_velocity) / movement_duration + +vx <- as.matrix(tf_component(movement_velocity, "x"), + arg = movement_grid, interpolate = TRUE) +vy <- as.matrix(tf_component(movement_velocity, "y"), + arg = movement_grid, interpolate = TRUE) +heading_mat <- atan2(vy, vx) * 180 / pi +heading <- tfd(heading_mat, arg = movement_grid, domain = c(0, 1)) +names(heading) <- movement_ids + +wrap_degrees <- function(x) ((x + 180) %% 360) - 180 +turning_mat <- t(apply(heading_mat, 1, function(x) { + c(NA_real_, wrap_degrees(diff(x))) +})) +turning <- tfd(turning_mat, arg = movement_grid, domain = c(0, 1)) +names(turning) <- movement_ids + +movement_cols <- c("#1b9e77", "#d95f02", "#7570b3")[seq_along(movement_ids)] +xy <- as.matrix(movement_track, interpolate = TRUE) + +op <- par(mfrow = c(2, 2), mar = c(4, 4, 2, 1)) +plot(range(xy[, , "x"], na.rm = TRUE), range(xy[, , "y"], na.rm = TRUE), + type = "n", asp = 1, xlab = "x (km)", ylab = "y (km)", + main = "regularized tracks") +lines(movement_track, col = movement_cols, lwd = 2) +legend("topleft", bty = "n", lwd = 2, col = movement_cols, + legend = movement_ids, cex = 0.85) +plot(movement_speed, col = movement_cols, lwd = 2, alpha = 1, + xlab = "lifecycle phase", ylab = "km / h", main = "forward speed") +plot(heading, col = movement_cols, lwd = 2, alpha = 1, + xlab = "lifecycle phase", ylab = "degrees", main = "heading") +plot(turning, col = movement_cols, lwd = 2, alpha = 1, + xlab = "lifecycle phase", ylab = "degrees", main = "turning angle") +par(op) +``` + +The result is a movement-data diagnostic rather than just a map: storms +with visually similar tracks can differ sharply in when they accelerate, +when they turn, and how abruptly their heading changes during recurvature. + +## A single 4-d curve: Hurricane Katrina (2005) + +For a single storm the 4-component `tf_mv` is naturally displayed +in facet mode (`type = "facet"`) -- one panel per component, all on the +same normalised cycle phase axis: + +```{r storms-katrina, fig.width = 8, fig.height = 5.4} +katrina <- df |> filter(storm_id == "Katrina 2005") |> pull(track) +plot(katrina, type = "facet", lwd = 2) +``` + +Wind and pressure are anti-correlated -- pressure dips below 920 mbar as +wind peaks above 150 knots near phase ~0.55 -- and the `(long, lat)` +panels show Katrina sliding northwest across the Gulf and turning north +into Louisiana. Bundling intensity and trajectory as a single `tfd_mv` +lets every operation downstream (subsetting, summaries, arithmetic, +plotting) treat them as one object. + +## Track map, faceted by peak intensity + +The geographic view uses the `(long, lat)` components only. Faceting by +peak Saffir-Simpson category separates the populations cleanly, and a +coastline backdrop (from `maps::map`, if installed) anchors the geography: + +```{r storms-plot-helpers, class.source = "fold-hide"} +have_maps <- requireNamespace("maps", quietly = TRUE) +pal <- c("grey50", "#fed976", "#feb24c", "#fd8d3c", "#e31a1c") + +draw_coast <- function(xlim, ylim) { + if (!have_maps) return(invisible()) + m <- maps::map("world", plot = FALSE, + xlim = xlim, ylim = ylim, fill = FALSE) + lines(m$x, m$y, col = "grey55", lwd = 0.6) +} +``` + +```{r storms-map, fig.width = 8.2, fig.height = 6.8} +xlim <- range(unlist(tf_evaluations(tf_component(df$track, "long"))), na.rm = TRUE) +ylim <- range(unlist(tf_evaluations(tf_component(df$track, "lat"))), na.rm = TRUE) + +op <- par(mfrow = c(2, 3), mar = c(3.4, 3.4, 2, 1), mgp = c(2, 0.7, 0)) +for (k in seq_along(levels(df$strength))) { + lev <- levels(df$strength)[k] + trks <- df$track[df$strength == lev] + spatial <- tfd_mv(list( + long = tf_component(trks, "long"), + lat = tf_component(trks, "lat") + )) + plot(range(xlim), range(ylim), type = "n", + xlab = "long", ylab = "lat", + main = sprintf("%s (n = %d)", lev, length(trks))) + draw_coast(xlim, ylim) + lines(spatial, col = pal[k], alpha = 0.4, lwd = 1) +} +plot.new() +legend("center", bty = "n", lwd = 2, col = pal, legend = levels(df$strength), + title = "peak intensity", cex = 1.05) +par(op) +``` + +Tropical storms / depressions stay in the southern, western basin; cat-3 +and cat-4+ tracks reach further north and east -- they live longer, get +caught by the westerlies, and recurve out to sea. + +## Scalar features extracted from the 4-d object + +Every component of a `tf_mv` is a real `tf` vector and supports the full +univariate API, so per-storm scalar features fall out as one-liners: + +```{r storms-scalars} +df <- df |> mutate( + path_km = tf_arclength(track_km), + duration = vapply(tf_arg(track_km), \(t) max(t) - min(t), numeric(1)), + mean_speed = path_km / duration, # km/h, lifetime average + peak_wind = vapply(tf_evaluations(tf_component(track, "wind")), max, numeric(1)), + min_pres = vapply(tf_evaluations(tf_component(track, "pres")), min, numeric(1)) +) + +df |> + group_by(strength) |> + summarise( + n = dplyr::n(), + median_path_km = round(median(path_km)), + median_speed_kmh = round(median(mean_speed), 1), + median_peak_wind = median(peak_wind), + median_min_pres = median(min_pres), + .groups = "drop" + ) +``` + +Median path length grows strongly from tropical-storm to cat-4+ storms, +whereas lifetime-average forward speed increases more modestly. The +canonical wind / pressure gap between categories is visible. + +## Intensity and forward-speed life-cycles per category + +Plotting all forward-speed and intensity time courses at once is hopeless +(hundreds of irregular noodles). What we actually want is the +*mean curve per intensity stratum* on the normalised lifecycle phase +axis. Evaluate each component on a common phase grid, average within each +stratum, and re-wrap the result as a length-`G` univariate `tfd` so we +can use the standard `plot.tf` / `lines.tf` machinery: + +```{r storms-lifecycle-prep, class.source = "fold-hide"} +phase_grid <- seq(0, 1, length.out = 41) + +# forward speed on normalised time: re-arg each storm's km-speed by +# t / T_i, so all storms share phase domain [0, 1]. +speed_km <- tf_speed(df$track_km) +durations <- df$duration +speed_long <- do.call(rbind, lapply(seq_along(speed_km), function(i) { + data.frame( + id = names(speed_km)[i], + phase = tf_arg(speed_km[i])[[1]] / durations[i], + value = tf_evaluations(speed_km[i])[[1]] + ) +})) +speed_phase <- tfd(speed_long, id = "id", arg = "phase", value = "value", + domain = c(0, 1)) + +# per-stratum mean curve on the regular phase grid, packaged as length-G tfd +stratum_mean_tfd <- function(comp, grp, grid = phase_grid) { + mat <- as.matrix(comp, arg = grid, interpolate = TRUE) + means <- vapply(levels(grp), function(g) { + rows <- which(grp == g) + if (length(rows) < 2L) return(rep(NA_real_, length(grid))) + colMeans(mat[rows, , drop = FALSE], na.rm = TRUE) + }, numeric(length(grid))) + out <- tfd(t(means), arg = grid, domain = c(0, 1)) + names(out) <- levels(grp) + out +} + +wind_avg <- stratum_mean_tfd(tf_component(df$track, "wind"), df$strength) +pres_avg <- stratum_mean_tfd(tf_component(df$track, "pres"), df$strength) +speed_avg <- stratum_mean_tfd(speed_phase, df$strength) +``` + +```{r storms-lifecycle, fig.width = 8.2, fig.height = 6.8} +op <- par(mfrow = c(2, 2), mar = c(4, 4, 2, 1)) +plot(wind_avg, col = pal, lwd = 2, alpha = 1, + xlab = "lifecycle phase", ylab = "wind (knots)", + main = "mean sustained wind") +legend("topright", bty = "n", lwd = 2, col = pal, legend = levels(df$strength), + cex = 0.9, title = "peak intensity") +plot(pres_avg, col = pal, lwd = 2, alpha = 1, + xlab = "lifecycle phase", ylab = "pressure (mbar)", + main = "mean central pressure") +plot(speed_avg, col = pal, lwd = 2, alpha = 1, + xlab = "lifecycle phase", ylab = "forward speed (km/h)", + main = "mean forward speed") +plot.new() +text(0.5, 0.7, "wind and pressure peak\nnear lifecycle phase 0.5;", + adj = 0.5, cex = 1.05) +text(0.5, 0.35, "forward speed peaks\nlate, during recurvature", + adj = 0.5, cex = 1.05) +par(op) +``` + +Three clean stories on a single `tf_mv` object: intensity peaks +mid-life-cycle (around `phase ~ 0.5`) and the gap between categories is +roughly uniform across the cycle; central pressure mirrors wind; forward +speed peaks much later (around `phase ~ 0.75`) and the strongest storms +accelerate the most -- the canonical recurvature signature. + +## Smoothing on normalised time + +Fitting a basis representation on raw real-time data has a tractable +domain-extrapolation problem -- the longest storm runs 600+ hours, but +most tracks end after 100-200 hours, and a shared basis over [0, 600] +extrapolates wildly. On the **normalised phase** version this just +disappears, because every storm spans `[0, 1]`: + +```{r storms-tfb, fig.width = 8.2, fig.height = 4.8, warning = FALSE} +top6 <- df |> arrange(desc(path_km)) |> slice(1:6) |> pull(storm_id) + +# fit a per-component spline basis on (long, lat, wind, pres) over [0, 1] +tb <- tfb_mv(df$track[df$storm_id %in% top6], k = 12, verbose = FALSE) + +# pull just the (long, lat) components out of the 4-d objects so plot.tf_mv +# defaults to the trajectory (long, lat) view +raw_xy <- df$track[df$storm_id %in% top6, , c("long", "lat")] +sm_xy <- tb[, , c("long", "lat")] + +op <- par(mfrow = c(1, 2), mar = c(3.6, 3.6, 2, 1), mgp = c(2, 0.7, 0)) +plot(range(xlim), range(ylim), type = "n", + xlab = "long", ylab = "lat", main = "raw observations") +draw_coast(xlim, ylim) +lines(raw_xy, col = 1:6, lwd = 1.6) +plot(range(xlim), range(ylim), type = "n", + xlab = "long", ylab = "lat", main = "spline-smoothed") +draw_coast(xlim, ylim) +lines(sm_xy, col = 1:6, lwd = 1.6) +par(op) +``` + +The smoothed trajectories pick up the gross recurvature shape without the +6-hourly sampling jitter and -- thanks to the normalised time axis -- +without any extrapolation pathology. The same `tb` object also carries +clean smoothed wind / pressure curves for each storm; for example: + +```{r storms-tfb-intensity, fig.width = 8.2, fig.height = 3.4} +op <- par(mfrow = c(1, 2), mar = c(4, 4, 2, 1)) +plot(tf_component(tb, "wind"), col = 1:6, lwd = 2, + xlab = "lifecycle phase", ylab = "wind (knots)", + main = "smoothed wind") +plot(tf_component(tb, "pres"), col = 1:6, lwd = 2, + xlab = "lifecycle phase", ylab = "pressure (mbar)", + main = "smoothed pressure") +par(op) +``` + +# 4. Recap + +The two case studies above exercised exactly the same surface: + +* construction from per-component `tf` vectors (`tfd_mv(list(...))`), from a 3-d + `[curve, arg, component]` array, or from a long/wide data frame; +* `tf_components()`, `$component`, `tf_ncomp()` for component access; +* facet vs. trajectory plotting (`plot(..., type = ...)`); +* vctrs-native arithmetic / `mean()` / `sd()` returning a length-1 `tf_mv`; +* `tf_arclength()` and `tf_speed()` as geometric primitives on the bundle; +* `tfb_mv()` for smoothing (`basis = "spline"`) or per-component PC + decomposition (`basis = "fpc"`), and `tfb_mfpc()` for joint multivariate + FPCA with a single set of shared scores per curve [@happ2018]; +* the alignment ladder: `tf_reparam_arclength()` for constant-speed (shape-only) + reparametrization, `tf_register()` for shared-warp alignment -- from a single + reference component (`method = "cc"`) or jointly from all components + (`method = "srvf_mv"`) -- and `tf_register_shape()` for full elastic shape + registration, whose `tf_rotations()` / `tf_scales()` accessors expose the + estimated rotations and (template-relative) scale factors; +* full dplyr / tibble integration via the underlying vctrs proxy. + +The common pattern is simple: keep coupled signals bundled as one object, use +component accessors when a scalar or univariate summary is needed, and let the +multivariate methods reuse the existing univariate kernels component-wise. + +# References diff --git a/inst/testdata/make-test-data.R b/inst/testdata/make-test-data.R index 0d52401e..9479099d 100644 --- a/inst/testdata/make-test-data.R +++ b/inst/testdata/make-test-data.R @@ -14,3 +14,11 @@ irr_list <- tf_evaluations(irr) irr_matrix <- suppressWarnings(as.matrix(irr)) irr_df <- as.data.frame(irr, unnest = TRUE) narrow_df <- as.data.frame(narrow, unnest = TRUE) + +# vector-valued (f: R -> R^d) fixtures: 2-d "movement" trajectories +traj_mv <- tfd_mv(list(x = tf_rgp(10), y = tf_rgp(10))) +traj_mv_irr <- tfd_mv(list( + x = tf_sparsify(tf_rgp(10)), + y = tf_sparsify(tf_rgp(10)) +)) +traj_mvb <- tfb_mv(traj_mv, verbose = FALSE) diff --git a/man/converters-mv.Rd b/man/converters-mv.Rd new file mode 100644 index 00000000..858d9e61 --- /dev/null +++ b/man/converters-mv.Rd @@ -0,0 +1,75 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/convert-mv.R +\name{converters-mv} +\alias{converters-mv} +\alias{as.matrix.tf_mv} +\alias{as.data.frame.tf_mv} +\title{Coerce a \code{tf_mv} to a matrix or data.frame} +\usage{ +\method{as.matrix}{tf_mv}(x, arg, interpolate = FALSE, ...) + +\method{as.data.frame}{tf_mv}( + x, + row.names = NULL, + optional = FALSE, + unnest = FALSE, + long = TRUE, + arg = NULL, + interpolate = TRUE, + ... +) +} +\arguments{ +\item{x}{a \code{tf_mv} object.} + +\item{arg}{optional evaluation grid (numeric vector or per-curve list). +When \code{NULL} (default for \code{as.data.frame.tf_mv}; equivalent to "missing" +for \code{as.matrix.tf_mv}), the per-curve union of all components' native +argument grids is used.} + +\item{interpolate}{forwarded to the underlying \code{tf} evaluation. \code{tfb} +components are always interpolated.} + +\item{...}{passed through.} + +\item{row.names, optional}{standard \code{as.data.frame} plumbing.} + +\item{unnest}{if \code{TRUE}, return an evaluated data.frame (see \code{long}); if +\code{FALSE} (default), a one-column data.frame wrapping \code{x}.} + +\item{long}{when \code{unnest = TRUE}, controls the schema. \code{long = TRUE} +(default) returns a 4-column data.frame +\verb{(id, arg, component, value)} -- the multivariate analogue of the +univariate \verb{(id, arg, value)} contract, with \code{component} a \code{factor} over +\code{attr(x, "comp_names")}. \code{long = FALSE} returns the wide +\verb{(id, arg, comp1, ..., compd)} schema.} +} +\value{ +a 3-d array (\code{as.matrix.tf_mv}) or a data.frame (\code{as.data.frame.tf_mv}). +} +\description{ +\code{as.matrix.tf_mv} returns a \strong{3-d} array \verb{[curve, arg, component]} -- the +natural shape for a vector-valued evaluation. This is deliberately +different from \code{\link{as.matrix.tf}} (2-d, \verb{[curve, arg]}); see \verb{@seealso}. +} +\details{ +\code{as.data.frame.tf_mv} returns either a single-column wrapping data.frame +(\code{unnest = FALSE}, for storing a \code{tf_mv} in a tibble column) or an +evaluated long/wide data.frame (\code{unnest = TRUE}). +} +\examples{ +arg <- seq(0, 1, length.out = 11) +xf <- tfd(t(sapply(1:3, function(i) sin(2 * pi * arg + i))), arg = arg) +yf <- tfd(t(sapply(1:3, function(i) cos(2 * pi * arg + i))), arg = arg) +mv <- tfd_mv(list(x = xf, y = yf)) +dim(as.matrix(mv)) +head(as.data.frame(mv, unnest = TRUE)) +} +\seealso{ +\code{\link[=as.matrix.tf]{as.matrix.tf()}} (2-d sibling), \code{\link[=as.data.frame.tf]{as.data.frame.tf()}} (univariate +contract), \code{\link[=tf_evaluate]{tf_evaluate()}}. + +Other tidyfun converters: +\code{\link[=as.data.frame.tf]{as.data.frame.tf()}} +} +\concept{tidyfun converters} diff --git a/man/converters.Rd b/man/converters.Rd index 00f0ac7d..5c0152c4 100644 --- a/man/converters.Rd +++ b/man/converters.Rd @@ -58,4 +58,8 @@ as.matrix(f) fun <- as.function(f) fun(c(0, 0.5, 1)) } +\seealso{ +Other tidyfun converters: +\code{\link{converters-mv}} +} \concept{tidyfun converters} diff --git a/man/ensure_list.Rd b/man/ensure_list.Rd index 52919678..48415027 100644 --- a/man/ensure_list.Rd +++ b/man/ensure_list.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/utils.R \name{ensure_list} \alias{ensure_list} -\title{Turns any object into a list} +\title{Wrap a non-list object in a list} \usage{ ensure_list(x) } @@ -10,18 +10,22 @@ ensure_list(x) \item{x}{any input.} } \value{ -\code{x} turned into a list. +\code{x} if it is a list, otherwise \code{list(x)}. } \description{ -See above. +Returns \code{x} unchanged if it is already a list, otherwise wraps it in +a one-element list. Used internally to normalize \code{arg} inputs that may +be either a single numeric vector or a list of per-curve vectors. } \examples{ ensure_list(1:3) -ensure_list(list(1, 2)) +ensure_list(list(1:3, 4:6)) } \seealso{ -Other tidyfun developer tools: -\code{\link{prep_plotting_arg}()}, -\code{\link{unique_id}()} +Other tidyfun utility functions: +\code{\link[=in_range]{in_range()}}, +\code{\link[=tf_arg]{tf_arg()}}, +\code{\link[=tf_zoom]{tf_zoom()}}, +\code{\link[=unique_id]{unique_id()}} } -\concept{tidyfun developer tools} +\concept{tidyfun utility functions} diff --git a/man/fivenum.Rd b/man/fivenum.Rd index 9083847d..91901880 100644 --- a/man/fivenum.Rd +++ b/man/fivenum.Rd @@ -37,7 +37,7 @@ f <- tf_rgp(7) fivenum(f) } \seealso{ -Other tidyfun summary functions: +Other tidyfun summary functions: \code{\link{functionwise}}, \code{\link{tfsummaries}} } diff --git a/man/fpc_wsvd.Rd b/man/fpc_wsvd.Rd index 129e16a9..829be4f9 100644 --- a/man/fpc_wsvd.Rd +++ b/man/fpc_wsvd.Rd @@ -44,42 +44,40 @@ orthonormal eigen\emph{functions} \eqn{\phi_j(t)}, not eigen\emph{vectors} given quadrature weights \eqn{\Delta_i}, not \eqn{\phi_j'\phi_j = \sum_i \phi_j(t_i)^2 = 1};\cr \eqn{\int_T \phi_j(t) \phi_k(t) dt = 0} not -\eqn{\phi_j'\phi_k = \sum_i \phi_j(t_i)\phi_k(t_i) = 0}, -c.f. \code{mogsa::wsvd()}.\cr -For incomplete data, this uses an adaptation of \code{softImpute::softImpute()}, -see references. Note that will not work well for data on a common grid if more -than a few percent of data points are missing, and it breaks down completely -for truly irregular data with no/few common timepoints, even if observed very -densely. For such data, either re-evaluate on a common grid first or use more -advanced FPCA approaches like \code{refund::fpca_sc()}, +\eqn{\phi_j'\phi_k = \sum_i \phi_j(t_i)\phi_k(t_i) = 0}.\cr +For incomplete data, this uses a soft-impute iterative-SVD scheme +(see references). Note that this will not work well for data on a common grid +if more than a few percent of data points are missing, and it breaks down +completely for truly irregular data with no/few common timepoints, even if +observed very densely. For such data, either re-evaluate on a common grid +first or use more advanced FPCA approaches like \code{refund::fpca_sc()}, see last example for \code{\link[=tfb_fpc]{tfb_fpc()}} } +\examples{ +arg <- seq(0, 1, length.out = 41) +data <- t(sapply(seq(0, 2 * pi, length.out = 10), + function(p) sin(2 * pi * arg + p))) +fpc <- fpc_wsvd(data, arg = arg, pve = 0.99) +fpc$npc +} \references{ -code adapted from / inspired by \code{mogsa::wsvd()} by Chen Meng -and \code{softImpute::softImpute()} by Trevor Hastie and Rahul Mazumder.\cr -Meng C (2023). -\emph{mogsa: Multiple omics data integrative clustering and gene set analysis}. -\doi{10.18129/B9.bioc.mogsa}, \url{https://bioconductor.org/packages/mogsa}. - +the soft-impute SVD algorithm for incomplete data is described in Mazumder, Rahul, Hastie, Trevor, Tibshirani, Robert (2010). \dQuote{Spectral Regularization Algorithms for Learning Large Incomplete Matrices.} \emph{The Journal of Machine Learning Research}, \bold{11}, 2287--2322. - -Hastie T, Mazumder R (2021). -\emph{softImpute: Matrix Completion via Iterative Soft-Thresholded SVD}. -\doi{10.32614/CRAN.package.softImpute}, R package version 1.4-1, \url{https://CRAN.R-project.org/package=softImpute}. } \seealso{ -Other tfb-class: -\code{\link{tfb}}, -\code{\link{tfb_fpc}()}, -\code{\link{tfb_spline}()} +Other tfb-class: +\code{\link[=tfb]{tfb()}}, +\code{\link[=tfb_fpc]{tfb_fpc()}}, +\code{\link[=tfb_spline]{tfb_spline()}} -Other tfb_fpc-class: -\code{\link{tfb_fpc}()} +Other tfb_fpc-class: +\code{\link[=tfb_fpc]{tfb_fpc()}}, +\code{\link[=tfb_mfpc]{tfb_mfpc()}} } \author{ -Trevor Hastie, Rahul Mazumder, Chen Meng, Fabian Scheipl +Fabian Scheipl } \concept{tfb-class} \concept{tfb_fpc-class} diff --git a/man/functionwise.Rd b/man/functionwise.Rd index 51330f51..4c377719 100644 --- a/man/functionwise.Rd +++ b/man/functionwise.Rd @@ -8,8 +8,11 @@ \alias{tf_fmedian} \alias{tf_frange} \alias{tf_fmean} +\alias{tf_fmean.tf_mv} \alias{tf_fvar} +\alias{tf_fvar.tf_mv} \alias{tf_fsd} +\alias{tf_fsd.tf_mv} \alias{tf_crosscov} \alias{tf_crosscor} \title{Summarize each \code{tf} in a vector (function-wise)} @@ -26,10 +29,16 @@ tf_frange(x, arg = tf_arg(x), na.rm = FALSE, finite = FALSE) tf_fmean(x, arg = tf_arg(x)) +\method{tf_fmean}{tf_mv}(x, arg = tf_arg(x)) + tf_fvar(x, arg = tf_arg(x)) +\method{tf_fvar}{tf_mv}(x, arg = tf_arg(x)) + tf_fsd(x, arg = tf_arg(x)) +\method{tf_fsd}{tf_mv}(x, arg = tf_arg(x)) + tf_crosscov(x, y, arg = tf_arg(x)) tf_crosscor(x, y, arg = tf_arg(x)) @@ -84,12 +93,18 @@ and \code{values} internally, so the function/formula in \code{.f} gets a data.f \item \code{tf_fmean()}: mean of each function: \eqn{\tfrac{1}{|T|}\int_T x_i(t) dt} +\item \code{tf_fmean(tf_mv)}: component-wise means of each vector-valued function + \item \code{tf_fvar()}: variance of each function: \eqn{\tfrac{1}{|T|}\int_T (x_i(t) - \bar x(t))^2 dt} +\item \code{tf_fvar(tf_mv)}: component-wise variances of each vector-valued function + \item \code{tf_fsd()}: standard deviation of each function: \eqn{\sqrt{\tfrac{1}{|T|}\int_T (x_i(t) - \bar x(t))^2 dt}} +\item \code{tf_fsd(tf_mv)}: component-wise standard deviations of each vector-valued function + \item \code{tf_crosscov()}: cross-covariances between two functional vectors: \eqn{\tfrac{1}{|T|}\int_T (x_i(t) - \bar x(t)) (y_i(t)-\bar y(t)) dt} @@ -118,8 +133,8 @@ tf_crosscor(x, -x) tf_crosscov(x, x) == tf_fvar(x) } \seealso{ -Other tidyfun summary functions: -\code{\link{fivenum}()}, +Other tidyfun summary functions: +\code{\link[=fivenum]{fivenum()}}, \code{\link{tfsummaries}} } \concept{tidyfun summary functions} diff --git a/man/gait.Rd b/man/gait.Rd index 8503482e..d6a191e3 100644 --- a/man/gait.Rd +++ b/man/gait.Rd @@ -8,8 +8,8 @@ A data frame with 39 rows and 3 variables: \describe{ \item{subject_id}{subject identifier} -\item{knee_angle}{knee joint angles in degrees} -\item{hip_angle}{hip joint angle in degrees} +\item{knee_angle}{a \code{tfd} column of knee-joint angles (degrees)} +\item{hip_angle}{a \code{tfd} column of hip-joint angles (degrees)} } } \usage{ @@ -17,10 +17,13 @@ gait } \description{ Hip and knee angle measurements in degrees through a 20-point movement cycle -for 39 boys. The data represents the angular positions of hip and knee +for 39 children. The data represents the angular positions of hip and knee joints during normal walking gait, captured at evenly spaced time points throughout the gait cycle. } +\details{ +Data is also included in the \CRANpkg{fda} package in another format. +} \examples{ head(gait) } @@ -28,7 +31,5 @@ head(gait) Olshen, A R, Biden, N E, Wyatt, P M, Sutherland, H D (1989). \dQuote{Gait Analysis and the Bootstrap.} \emph{The Annals of Statistics}, \bold{17}(4), 1419--1440. - -Data is also include in the \code{datasets} package in another format. } \keyword{datasets} diff --git a/man/growth.Rd b/man/growth.Rd index 79f3da38..acfa141f 100644 --- a/man/growth.Rd +++ b/man/growth.Rd @@ -7,8 +7,8 @@ \format{ A data frame with 93 rows and 2 variables: \describe{ -\item{gender}{sex of the subject (boy/girl)} -\item{height}{height in centimeters} +\item{gender}{sex of the subject, a factor with levels \code{female}/\code{male}} +\item{height}{a \code{tfd} column of height-over-age curves (cm)} } } \usage{ @@ -20,7 +20,7 @@ of the Berkeley Growth Study. The data tracks physical development over time with measurements at 31 different ages that are not equally spaced. } \details{ -Data is also include in the \CRANpkg{fda} package in another format. +Data is also included in the \CRANpkg{fda} package in another format. } \examples{ head(growth) @@ -29,7 +29,8 @@ head(growth) Ramsay, O. J, Hooker, Giles, Graves, Spencer (2009). \emph{Functional Data Analysis with R and MATLAB}, series Use R!, 1 edition. Springer New York, New York. -ISBN 978-0-387-98184-0, \doi{10.1007/978-0-387-98185-7}. +ISBN 978-0-387-98184-0. +\doi{10.1007/978-0-387-98185-7}. Ramsay, O. J, Silverman, W. B (2005). \emph{Functional Data Analysis}, series Springer Series in Statistics, 2nd edition. diff --git a/man/in_range.Rd b/man/in_range.Rd index 3da79ca6..a9593724 100644 --- a/man/in_range.Rd +++ b/man/in_range.Rd @@ -27,8 +27,10 @@ in_range(1:10, c(3, 7)) 1:10 \%inr\% c(3, 7) } \seealso{ -Other tidyfun utility functions: -\code{\link{tf_arg}()}, -\code{\link{tf_zoom}()} +Other tidyfun utility functions: +\code{\link[=ensure_list]{ensure_list()}}, +\code{\link[=tf_arg]{tf_arg()}}, +\code{\link[=tf_zoom]{tf_zoom()}}, +\code{\link[=unique_id]{unique_id()}} } \concept{tidyfun utility functions} diff --git a/man/pinch.Rd b/man/pinch.Rd index 32ec428a..9cbfa37c 100644 --- a/man/pinch.Rd +++ b/man/pinch.Rd @@ -5,7 +5,7 @@ \alias{pinch} \title{Pinch force data} \format{ -An object of class \code{tfd_reg} (inherits from \code{tfd}, \code{tf}, \code{vctrs_vctr}, \code{list}) of length 20. +A \code{tfd} vector of length 20 containing the pinch-force curves. } \usage{ pinch @@ -16,7 +16,7 @@ recorded every 2 milliseconds over 300 milliseconds. The data captures the dynamics of finger pinch force applied during controlled motor tasks. } \details{ -Data is also include in the \CRANpkg{fda} package in another format. +Data is also included in the \CRANpkg{fda} package in another format. } \examples{ pinch @@ -25,7 +25,8 @@ pinch Ramsay, O. J, Hooker, Giles, Graves, Spencer (2009). \emph{Functional Data Analysis with R and MATLAB}, series Use R!, 1 edition. Springer New York, New York. -ISBN 978-0-387-98184-0, \doi{10.1007/978-0-387-98185-7}. +ISBN 978-0-387-98184-0. +\doi{10.1007/978-0-387-98185-7}. Ramsay, O. J, Silverman, W. B (2005). \emph{Functional Data Analysis}, series Springer Series in Statistics, 2nd edition. diff --git a/man/plot.tf_mv.Rd b/man/plot.tf_mv.Rd new file mode 100644 index 00000000..3f6786de --- /dev/null +++ b/man/plot.tf_mv.Rd @@ -0,0 +1,57 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/plot-mv.R +\name{plot.tf_mv} +\alias{plot.tf_mv} +\alias{lines.tf_mv} +\title{Plot vector-valued functional data} +\usage{ +\method{plot}{tf_mv}(x, y, ..., type = NULL) + +\method{lines}{tf_mv}(x, ..., type = NULL) +} +\arguments{ +\item{x}{a \code{tf_mv} object.} + +\item{y}{ignored.} + +\item{...}{passed to the underlying plotting calls. Per-curve graphical +parameters (\code{col}, \code{lty}, \code{lwd}, ...) are recycled across curves.} + +\item{type}{\code{"trajectory"} or \code{"facet"}. Defaults to \code{"trajectory"} for +two-component (\code{d == 2}) objects and to \code{"facet"} otherwise.} +} +\value{ +\code{x}, invisibly. +} +\description{ +Two simple display modes for \code{tf_mv} objects: \code{"facet"} draws one panel per +output dimension (delegating to the univariate \code{\link[=plot.tf]{plot.tf()}}); +\code{"trajectory"} (only for \code{d == 2}) draws the curves in the plane, i.e. +\eqn{y(t)} against \eqn{x(t)} -- the natural view for movement data. +} +\details{ +In \code{"trajectory"} mode the two components must be paired at common argument +values to form \eqn{(x(t), y(t))} points. When the components are sampled on +different (or per-curve irregular) grids they are therefore evaluated on the +union of their argument grids with \code{interpolate = TRUE} (values outside a +component's observed range become \code{NA} and are skipped). For components that +already share a grid this is a no-op. +} +\examples{ +arg <- seq(0, 1, length.out = 31) +xf <- tfd(t(sapply(1:5, function(i) sin(2 * pi * arg + i / 5))), arg = arg) +yf <- tfd(t(sapply(1:5, function(i) cos(2 * pi * arg + i / 5))), arg = arg) +mv <- tfd_mv(list(x = xf, y = yf)) +plot(mv, type = "trajectory") +plot(mv, type = "facet") +} +\seealso{ +Other tf_mv-class: +\code{\link[=tf_arclength]{tf_arclength()}}, +\code{\link{tf_geom}}, +\code{\link{tf_mv_methods}}, +\code{\link[=tfb_mfpc]{tfb_mfpc()}}, +\code{\link[=tfb_mv]{tfb_mv()}}, +\code{\link[=tfd_mv]{tfd_mv()}} +} +\concept{tf_mv-class} diff --git a/man/prep_plotting_arg.Rd b/man/prep_plotting_arg.Rd index 994b5283..8f23935f 100644 --- a/man/prep_plotting_arg.Rd +++ b/man/prep_plotting_arg.Rd @@ -21,9 +21,4 @@ a semi-regular grid rounded down to appropriate resolution. f <- tfd(sin(seq(0, 2 * pi, length.out = 21)), arg = seq(0, 1, length.out = 21)) prep_plotting_arg(f, n_grid = 50) } -\seealso{ -Other tidyfun developer tools: -\code{\link{ensure_list}()}, -\code{\link{unique_id}()} -} \concept{tidyfun developer tools} diff --git a/man/savgol.Rd b/man/savgol.Rd new file mode 100644 index 00000000..c9d376b2 --- /dev/null +++ b/man/savgol.Rd @@ -0,0 +1,29 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/smooth.R +\name{savgol} +\alias{savgol} +\title{Savitzky-Golay smoothing filter} +\usage{ +savgol(T, fl, forder = 4, dorder = 0) +} +\arguments{ +\item{T}{a numeric vector to smooth.} + +\item{fl}{filter window length (odd integer > 1, must be greater than +\code{forder}).} + +\item{forder}{polynomial order of the local fit (non-negative integer, +default 4).} + +\item{dorder}{derivative order (non-negative integer not greater than +\code{forder}, default 0).} +} +\value{ +a smoothed numeric vector of the same length as \code{T}. +} +\description{ +Local polynomial least-squares smoother. Re-implementation of +\code{pracma::savgol()} to avoid the extra dependency; numerically equivalent +(Savitzky & Golay 1964). +} +\keyword{internal} diff --git a/man/tf-package.Rd b/man/tf-package.Rd index cb771281..6236612e 100644 --- a/man/tf-package.Rd +++ b/man/tf-package.Rd @@ -40,6 +40,7 @@ Useful links: Authors: \itemize{ + \item Fabian Scheipl \email{fabian.scheipl@googlemail.com} (\href{https://orcid.org/0000-0001-8172-3603}{ORCID}) [copyright holder] \item Jeff Goldsmith \item Maximilian Mücke (\href{https://orcid.org/0009-0000-9432-9795}{ORCID}) } @@ -48,9 +49,6 @@ Other contributors: \itemize{ \item Julia Wrobel (\href{https://orcid.org/0000-0001-6783-1421}{ORCID}) [contributor] \item Sebastian Fischer (\href{https://orcid.org/0000-0002-9609-3197}{ORCID}) [contributor] - \item Trevor Hastie (softImpute author) [contributor] - \item Rahul Mazumder (softImpute author) [contributor] - \item Chen Meng (mogsa author) [contributor] } } diff --git a/man/tf_align.Rd b/man/tf_align.Rd index 4c23ce2f..f5e5c5f0 100644 --- a/man/tf_align.Rd +++ b/man/tf_align.Rd @@ -39,11 +39,12 @@ aligned <- tf_align(x, warps) plot(aligned, col = 1:3) } \seealso{ -Other registration functions: -\code{\link{tf_estimate_warps}()}, -\code{\link{tf_landmarks_extrema}()}, -\code{\link{tf_register}()}, +Other registration functions: +\code{\link[=tf_estimate_warps]{tf_estimate_warps()}}, +\code{\link[=tf_landmarks_extrema]{tf_landmarks_extrema()}}, +\code{\link[=tf_register]{tf_register()}}, +\code{\link[=tf_register_shape]{tf_register_shape()}}, \code{\link{tf_registration}}, -\code{\link{tf_warp}()} +\code{\link[=tf_warp]{tf_warp()}} } \concept{registration functions} diff --git a/man/tf_approx.Rd b/man/tf_approx.Rd index edbfb4d2..5b20e51d 100644 --- a/man/tf_approx.Rd +++ b/man/tf_approx.Rd @@ -49,7 +49,7 @@ The list: \itemize{ \item \code{tf_approx_linear} for linear interpolation without extrapolation (i.e., \code{\link[zoo:na.approx]{zoo::na.approx()}} with \code{na.rm = FALSE}) -- this is the default, -\item \code{tf_approx_spline} for cubic spline interpolation, (i.e., \code{\link[zoo:na.approx]{zoo::na.spline()}} +\item \code{tf_approx_spline} for cubic spline interpolation, (i.e., \code{\link[zoo:na.spline]{zoo::na.spline()}} with \code{na.rm = FALSE}), \item \code{tf_approx_none} in order to not inter-/extrapolate ever (i.e., \code{\link[zoo:na.fill]{zoo::na.fill()}} with \code{fill = NA}) \item \code{tf_approx_fill_extend} for linear interpolation and constant extrapolation @@ -77,28 +77,8 @@ tf_approx_linear( \seealso{ \code{\link[=tfd]{tfd()}} -Other tidyfun inter/extrapolation functions: -\code{\link{tf_evaluate}()}, -\code{\link{tf_interpolate}()} - -Other tidyfun inter/extrapolation functions: -\code{\link{tf_evaluate}()}, -\code{\link{tf_interpolate}()} - -Other tidyfun inter/extrapolation functions: -\code{\link{tf_evaluate}()}, -\code{\link{tf_interpolate}()} - -Other tidyfun inter/extrapolation functions: -\code{\link{tf_evaluate}()}, -\code{\link{tf_interpolate}()} - -Other tidyfun inter/extrapolation functions: -\code{\link{tf_evaluate}()}, -\code{\link{tf_interpolate}()} - -Other tidyfun inter/extrapolation functions: -\code{\link{tf_evaluate}()}, -\code{\link{tf_interpolate}()} +Other tidyfun inter/extrapolation functions: +\code{\link[=tf_evaluate]{tf_evaluate()}}, +\code{\link[=tf_interpolate]{tf_interpolate()}} } \concept{tidyfun inter/extrapolation functions} diff --git a/man/tf_arclength.Rd b/man/tf_arclength.Rd new file mode 100644 index 00000000..51128de1 --- /dev/null +++ b/man/tf_arclength.Rd @@ -0,0 +1,79 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/geometry-mv.R +\name{tf_arclength} +\alias{tf_arclength} +\alias{tf_arclength.default} +\alias{tf_arclength.tf_mv} +\title{Arc length of vector-valued functional data} +\usage{ +tf_arclength(f, ...) + +\method{tf_arclength}{default}(f, ...) + +\method{tf_arclength}{tf_mv}( + f, + arg = NULL, + lower = tf_domain(f)[1], + upper = tf_domain(f)[2], + definite = TRUE, + method = c("polyline", "derive"), + ... +) +} +\arguments{ +\item{f}{a \code{tf_mv} object.} + +\item{...}{forwarded to \code{\link[=tf_integrate]{tf_integrate()}} when \code{method = "derive"}.} + +\item{arg, lower, upper}{optional evaluation/integration grid and limits.} + +\item{definite}{\code{TRUE} (default) returns a numeric vector of total arc +lengths per curve; \code{FALSE} returns the cumulative arc length +\eqn{s(t) = \int_a^t \lVert f'(u) \rVert\, du} as a univariate \code{tfd}.} + +\item{method}{\code{"polyline"} (default) or \code{"derive"}.} +} +\value{ +a numeric vector (definite) or a univariate \code{tfd} (indefinite). +} +\description{ +For a vector-valued curve \verb{f: [a, b] -> R^d}, the arc length is +\eqn{\int_a^b \lVert f'(t) \rVert\, dt} -- the length traced out by \code{f} in +\code{R^d}. +} +\details{ +Two methods are supported: +\itemize{ +\item \strong{\code{"polyline"}} (default): sum of the Euclidean lengths of the line +segments between consecutive sample points (in \code{R^d}). Each curve is +evaluated on the union of its components' argument grids (or a supplied +\code{arg}) and the segment-sum is computed in closed form. For raw \code{tfd_mv} +data this is more accurate than \code{"derive"} because it avoids the +compounding error of numerical differentiation followed by quadrature. +\item \strong{\code{"derive"}}: composes the existing verbs -- per-component +differentiation (\code{\link[=tf_derive]{tf_derive()}}), pointwise speed \code{\link[=tf_speed]{tf_speed()}}, then +\code{\link[=tf_integrate]{tf_integrate()}}. Best for \code{tfb_mv} (analytical derivatives) or when a +custom \code{tf_integrate(...)} argument is needed. +} +} +\examples{ +# unit circle parameterised on [0, 1] -- arc length is 2*pi +t <- seq(0, 1, length.out = 401) +circ <- tfd_mv(list( + x = tfd(matrix(cos(2 * pi * t), nrow = 1), arg = t), + y = tfd(matrix(sin(2 * pi * t), nrow = 1), arg = t) +)) +tf_arclength(circ) +tf_arclength(circ, lower = 0, upper = 0.25) # quarter -> pi/2 +tf_arclength(circ, definite = FALSE) # cumulative s(t) +} +\seealso{ +Other tf_mv-class: +\code{\link[=plot.tf_mv]{plot.tf_mv()}}, +\code{\link{tf_geom}}, +\code{\link{tf_mv_methods}}, +\code{\link[=tfb_mfpc]{tfb_mfpc()}}, +\code{\link[=tfb_mv]{tfb_mv()}}, +\code{\link[=tfd_mv]{tfd_mv()}} +} +\concept{tf_mv-class} diff --git a/man/tf_depth.Rd b/man/tf_depth.Rd index c1e2c5fa..1af5179e 100644 --- a/man/tf_depth.Rd +++ b/man/tf_depth.Rd @@ -93,7 +93,7 @@ Bočinec, Filip, Nagy, Stanislav, Yeon, Hyemin (2026). \emph{arXiv preprint arXiv:2602.22877}. } \seealso{ -Other tidyfun ordering and ranking functions: +Other tidyfun ordering and ranking functions: \code{\link{tf_minmax}}, \code{\link{tf_order}} } diff --git a/man/tf_derive.Rd b/man/tf_derive.Rd index cf851e4a..afb7be36 100644 --- a/man/tf_derive.Rd +++ b/man/tf_derive.Rd @@ -84,7 +84,7 @@ Fornberg, Bengt (1988). \emph{Mathematics of Computation}, \bold{51}(184), 699--706. } \seealso{ -Other tidyfun calculus functions: -\code{\link{tf_integrate}()} +Other tidyfun calculus functions: +\code{\link[=tf_integrate]{tf_integrate()}} } \concept{tidyfun calculus functions} diff --git a/man/tf_estimate_warps.Rd b/man/tf_estimate_warps.Rd index 19d7570b..dfe82dbd 100644 --- a/man/tf_estimate_warps.Rd +++ b/man/tf_estimate_warps.Rd @@ -8,7 +8,7 @@ tf_estimate_warps( x, ..., template = NULL, - method = c("srvf", "cc", "affine", "landmark"), + method = c("srvf", "srvf_mv", "cc", "affine", "landmark"), max_iter = 3L, tol = 0.01 ) @@ -27,6 +27,9 @@ Not used for \code{method = "landmark"}.} \itemize{ \item \code{"srvf"}: Square Root Velocity Framework (elastic registration). For details, see \code{\link[fdasrvf:time_warping]{fdasrvf::time_warping()}}. Default template is the Karcher mean. +\item \code{"srvf_mv"}: true multivariate SRVF time registration for \code{tf_mv} +objects on a regular shared grid. This method estimates one shared warp +from all components jointly and does not rotate or rescale curves. \item \code{"cc"}: continuous-criterion registration via a tf-native dense-grid optimizer with monotone spline warps. Default template is the arithmetic mean. @@ -140,12 +143,13 @@ plot(warps) \dontshow{\}) # examplesIf} } \seealso{ -Other registration functions: -\code{\link{tf_align}()}, -\code{\link{tf_landmarks_extrema}()}, -\code{\link{tf_register}()}, +Other registration functions: +\code{\link[=tf_align]{tf_align()}}, +\code{\link[=tf_landmarks_extrema]{tf_landmarks_extrema()}}, +\code{\link[=tf_register]{tf_register()}}, +\code{\link[=tf_register_shape]{tf_register_shape()}}, \code{\link{tf_registration}}, -\code{\link{tf_warp}()} +\code{\link[=tf_warp]{tf_warp()}} } \author{ Maximilian Muecke, Fabian Scheipl, Claude Opus 4.6 diff --git a/man/tf_evaluate.Rd b/man/tf_evaluate.Rd index 38221fcb..88286950 100644 --- a/man/tf_evaluate.Rd +++ b/man/tf_evaluate.Rd @@ -47,8 +47,8 @@ tf_evaluate(f, arg = new_grid) |> str() f[, new_grid] } \seealso{ -Other tidyfun inter/extrapolation functions: -\code{\link{tf_approx_linear}()}, -\code{\link{tf_interpolate}()} +Other tidyfun inter/extrapolation functions: +\code{\link[=tf_approx_linear]{tf_approx_linear()}}, +\code{\link[=tf_interpolate]{tf_interpolate()}} } \concept{tidyfun inter/extrapolation functions} diff --git a/man/tf_geom.Rd b/man/tf_geom.Rd new file mode 100644 index 00000000..8b049925 --- /dev/null +++ b/man/tf_geom.Rd @@ -0,0 +1,102 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/geometry-mv.R +\name{tf_geom} +\alias{tf_geom} +\alias{tf_norm} +\alias{tf_norm.default} +\alias{tf_norm.tf} +\alias{tf_norm.tf_mv} +\alias{tf_speed} +\alias{tf_inner} +\alias{tf_inner.default} +\alias{tf_inner.tf} +\alias{tf_inner.tf_mv} +\alias{tf_distance} +\alias{tf_tangent} +\alias{tf_tangent.default} +\alias{tf_tangent.tf} +\alias{tf_tangent.tf_mv} +\alias{tf_reparam_arclength} +\title{Pointwise norm and inner product for functional data} +\usage{ +tf_norm(f) + +\method{tf_norm}{default}(f) + +\method{tf_norm}{tf}(f) + +\method{tf_norm}{tf_mv}(f) + +tf_speed(f) + +tf_inner(f, g) + +\method{tf_inner}{default}(f, g) + +\method{tf_inner}{tf}(f, g) + +\method{tf_inner}{tf_mv}(f, g) + +tf_distance(f, g) + +tf_tangent(f) + +\method{tf_tangent}{default}(f) + +\method{tf_tangent}{tf}(f) + +\method{tf_tangent}{tf_mv}(f) + +tf_reparam_arclength(f) +} +\arguments{ +\item{f, g}{\code{tf_mv} objects, or univariate \code{tf} (\code{tfd}/\code{tfb}) objects (with +identical \code{d} and component names where two \code{tf_mv} arguments are required).} +} +\value{ +a univariate \code{tfd} for \code{tf_norm}/\code{tf_speed}/\code{tf_inner}/\code{tf_distance}; +\code{tf_tangent} returns a \code{tf_mv} (or a univariate \code{tf} for univariate input) +and \code{tf_reparam_arclength} a \code{tf_mv}. +} +\description{ +Small geometric helpers defined by component-wise composition of the existing +univariate \code{Ops} / \code{Math} machinery: +\itemize{ +\item \code{tf_norm(f)} -- pointwise Euclidean norm \eqn{\lVert f(t) \rVert}; +\item \code{tf_speed(f)} -- pointwise speed \eqn{\lVert f'(t) \rVert}; +\item \code{tf_inner(f, g)} -- pointwise inner product \eqn{\langle f(t), g(t) \rangle}; +\item \code{tf_distance(f, g)} -- pointwise Euclidean distance \eqn{\lVert f(t) - g(t) \rVert}; +\item \code{tf_tangent(f)} -- unit tangent \eqn{f'(t) / \lVert f'(t) \rVert} +(undefined where the speed is zero -- callers get \code{NaN}s there); +\item \code{tf_reparam_arclength(f)} -- re-parametrize the curve at constant speed +(i.e. by its normalized cumulative arc length). +} +} +\details{ +These also apply to \emph{univariate} \code{tfd}/\code{tfb} (treated as scalar-valued curves +\eqn{f: T \to \mathbb{R}}), where they reduce to their one-dimensional +specializations: \eqn{\lVert f(t) \rVert = |f(t)|}, +\eqn{\langle f(t), g(t) \rangle = f(t)\,g(t)}, and the unit tangent +\eqn{f'(t) / |f'(t)| = \mathrm{sign}(f'(t))}. +} +\examples{ +set.seed(1) +f <- tfd_mv(list(x = tf_rgp(2), y = tf_rgp(2))) +tf_norm(f) +tf_speed(f) +tf_distance(f, tfd_mv(list(x = tf_rgp(2), y = tf_rgp(2)))) +# univariate: tf_norm reduces to the pointwise absolute value +u <- tf_rgp(2) +tf_norm(u) +tf_inner(u, tf_rgp(2)) +} +\seealso{ +Other tf_mv-class: +\code{\link[=plot.tf_mv]{plot.tf_mv()}}, +\code{\link[=tf_arclength]{tf_arclength()}}, +\code{\link{tf_mv_methods}}, +\code{\link[=tfb_mfpc]{tfb_mfpc()}}, +\code{\link[=tfb_mv]{tfb_mv()}}, +\code{\link[=tfd_mv]{tfd_mv()}} +} +\concept{tf_mv-class} diff --git a/man/tf_integrate.Rd b/man/tf_integrate.Rd index 8a291e8f..672eb80a 100644 --- a/man/tf_integrate.Rd +++ b/man/tf_integrate.Rd @@ -59,6 +59,17 @@ alternatively for \code{definite = FALSE} the \emph{anti-derivative} on \verb{[lower, upper]}, e.g. a \code{tfd} or \code{tfb} object representing \eqn{F(t) \approx \int^{t}_{lower}f(s)ds}, for \eqn{t \in}\verb{[lower, upper]}, is returned. } +\details{ +When \code{f} is irregular \strong{and} \code{lower} / \code{upper} are not supplied explicitly, +they default to each curve's own observed arg range (i.e., the range of its +\code{tf_arg()} values) rather than the (shared) domain endpoints; for regular \code{tfd} +the defaults remain the domain endpoints. +Without this per-curve default, curves that do not span the full domain +would silently NA-poison the trapezoidal sum, because the default linear +evaluator does not extrapolate. Pass explicit \code{lower} / \code{upper} to integrate +over a fixed sub-interval, or switch to an extrapolating evaluator +(e.g. \code{\link[=tf_approx_fill_extend]{tf_approx_fill_extend()}}) to integrate over the full domain. +} \examples{ arg <- seq(0, 1, length.out = 11) x <- tfd(rbind(arg, arg^2), arg = arg) @@ -67,7 +78,7 @@ anti <- tf_integrate(x, definite = FALSE) tf_arg(anti) } \seealso{ -Other tidyfun calculus functions: -\code{\link{tf_derive}()} +Other tidyfun calculus functions: +\code{\link[=tf_derive]{tf_derive()}} } \concept{tidyfun calculus functions} diff --git a/man/tf_interpolate.Rd b/man/tf_interpolate.Rd index 8b8c1e97..ac21ce2b 100644 --- a/man/tf_interpolate.Rd +++ b/man/tf_interpolate.Rd @@ -4,6 +4,7 @@ \alias{tf_interpolate} \alias{tf_interpolate.tfb} \alias{tf_interpolate.tfd} +\alias{tf_interpolate.tf_mv} \title{Re-evaluate \code{tf}-objects on a new grid of argument values.} \usage{ tf_interpolate(object, arg, ...) @@ -11,6 +12,8 @@ tf_interpolate(object, arg, ...) \method{tf_interpolate}{tfb}(object, arg, ...) \method{tf_interpolate}{tfd}(object, arg, ...) + +\method{tf_interpolate}{tf_mv}(object, arg, ...) } \arguments{ \item{object}{an object inheriting from \code{tf}.} @@ -68,9 +71,9 @@ tf_interpolate(sparse_irregular, arg = seq(0, 1, length.out = 51)) \seealso{ \code{\link[=tf_rebase]{tf_rebase()}}, which is more general. -Other tidyfun inter/extrapolation functions: -\code{\link{tf_approx_linear}()}, -\code{\link{tf_evaluate}()} +Other tidyfun inter/extrapolation functions: +\code{\link[=tf_approx_linear]{tf_approx_linear()}}, +\code{\link[=tf_evaluate]{tf_evaluate()}} } \concept{tidyfun inter/extrapolation functions} \concept{tidyfun setters} diff --git a/man/tf_jiggle.Rd b/man/tf_jiggle.Rd index 3f8e7350..26a009b5 100644 --- a/man/tf_jiggle.Rd +++ b/man/tf_jiggle.Rd @@ -2,12 +2,18 @@ % Please edit documentation in R/rng.R \name{tf_jiggle} \alias{tf_jiggle} +\alias{tf_jiggle.tf_mv} \alias{tf_sparsify} +\alias{tf_sparsify.tf_mv} \title{Make a \code{tf} (more) irregular} \usage{ tf_jiggle(f, amount = 0.4, ...) -tf_sparsify(f, dropout = 0.5) +\method{tf_jiggle}{tf_mv}(f, amount = 0.4, same_arg = TRUE, ...) + +tf_sparsify(f, dropout = 0.5, ...) + +\method{tf_sparsify}{tf_mv}(f, dropout = 0.5, same_arg = TRUE, ...) } \arguments{ \item{f}{a \code{tfd} object.} @@ -19,6 +25,10 @@ than 0.5.} \item{...}{additional args for the returned \code{tfd} in \code{tf_jiggle}.} +\item{same_arg}{for \code{tf_mv} objects, should all components receive the same +random argument-grid changes? Defaults to \code{TRUE}; use \code{FALSE} to jitter or +sparsify each component independently.} + \item{dropout}{what proportion of values of \code{f} to drop, on average. Defaults to half.} } \value{ @@ -39,7 +49,7 @@ set.seed(1) c(is_irreg(x_jig), is_irreg(x_sp)) } \seealso{ -Other tidyfun RNG functions: -\code{\link{tf_rgp}()} +Other tidyfun RNG functions: +\code{\link[=tf_rgp]{tf_rgp()}} } \concept{tidyfun RNG functions} diff --git a/man/landmarks.Rd b/man/tf_landmarks_extrema.Rd similarity index 51% rename from man/landmarks.Rd rename to man/tf_landmarks_extrema.Rd index 8ab76add..8db0f5eb 100644 --- a/man/landmarks.Rd +++ b/man/tf_landmarks_extrema.Rd @@ -2,52 +2,30 @@ % Please edit documentation in R/landmarks.R \name{tf_landmarks_extrema} \alias{tf_landmarks_extrema} -\alias{detect_landmarks} -\alias{cluster_landmarks} -\alias{build_landmark_matrix} \title{Find Extrema Locations in Functional Data} \usage{ tf_landmarks_extrema(x, which = "all", threshold = 0.5, boundary_tol = NULL) - -detect_landmarks(x, arg_list, which) - -cluster_landmarks(features, n, bandwidth, threshold) - -build_landmark_matrix(features, clusters, n, bandwidth) } \arguments{ -\item{x}{tf object (already smoothed if needed)} +\item{x}{a \code{tf} vector.} -\item{which}{character vector: subset of c("max", "min", "zero")} +\item{which}{character: which features to detect. Either \code{"all"} (maxima, +minima, and zero crossings), \code{"both"} (maxima and minima), or any subset +of \code{c("max", "min", "zero")}.} -\item{threshold}{minimum proportion of curves for a cluster to be retained} +\item{threshold}{numeric in (0, 1]: minimum proportion of curves that must +contain a feature for it to be retained as a landmark. Defaults to \code{0.5}.} \item{boundary_tol}{numeric: features within this distance of the domain boundary are dropped (they are redundant with the boundary anchors in landmark registration). Defaults to 2x the grid spacing. Set to \code{0} to keep all features.} - -\item{arg_list}{list of numeric vectors: per-curve evaluation grids} - -\item{features}{list of per-curve data.frames} - -\item{n}{number of curves} - -\item{bandwidth}{matching distance} - -\item{clusters}{data.frame from cluster_landmarks()} } \value{ A numeric matrix with one row per function and one column per landmark, sorted left-to-right on the domain. Has attribute \code{"feature_types"} (character vector of \code{"max"}, \code{"min"}, or \code{"zero"} for each column). Contains \code{NA} where a curve is missing a landmark. - -\code{detect_landmarks}: list of n data.frames with columns (position, type) - -\code{cluster_landmarks}: data.frame with columns: center, type, count, proportion - -\code{build_landmark_matrix}: n x k matrix with feature_types attribute } \description{ Find landmark locations for registration @@ -58,21 +36,6 @@ returns a landmark matrix suitable for \code{\link[=tf_register]{tf_register()}} \code{method = "landmark"}. Uses position-based clustering across curves to establish feature correspondence and majority-count filtering to discard unstable landmarks. - -\itemize{ -\item \code{detect_landmarks} detects local extrema and zero crossings per curve. -} - -\itemize{ -\item \code{cluster_landmarks} clusters within each feature type separately -(max with max, min with min, etc.) to avoid merging adjacent features of -different types. Then combines and sorts by position. -} - -\itemize{ -\item \code{build_landmark_matrix} creates a landmark matrix by matching per-curve -features to clusters. -} } \examples{ t <- seq(0, 1, length.out = 101) @@ -83,12 +46,12 @@ tf_landmarks_extrema(x, "both") \seealso{ \code{\link[=tf_register]{tf_register()}} with \code{method = "landmark"} -Other registration functions: -\code{\link{tf_align}()}, -\code{\link{tf_estimate_warps}()}, -\code{\link{tf_register}()}, +Other registration functions: +\code{\link[=tf_align]{tf_align()}}, +\code{\link[=tf_estimate_warps]{tf_estimate_warps()}}, +\code{\link[=tf_register]{tf_register()}}, +\code{\link[=tf_register_shape]{tf_register_shape()}}, \code{\link{tf_registration}}, -\code{\link{tf_warp}()} +\code{\link[=tf_warp]{tf_warp()}} } \concept{registration functions} -\keyword{internal} diff --git a/man/tf_minmax.Rd b/man/tf_minmax.Rd index cb978901..debd8b45 100644 --- a/man/tf_minmax.Rd +++ b/man/tf_minmax.Rd @@ -45,8 +45,8 @@ max(x, depth = "MHI") \seealso{ \code{\link[=tf_depth]{tf_depth()}}, \code{\link[=rank.tf]{rank.tf()}} -Other tidyfun ordering and ranking functions: -\code{\link{tf_depth}()}, +Other tidyfun ordering and ranking functions: +\code{\link[=tf_depth]{tf_depth()}}, \code{\link{tf_order}} } \concept{tidyfun ordering and ranking functions} diff --git a/man/tf_mv_methods.Rd b/man/tf_mv_methods.Rd new file mode 100644 index 00000000..2983729e --- /dev/null +++ b/man/tf_mv_methods.Rd @@ -0,0 +1,69 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/accessors-mv.R +\name{tf_mv_methods} +\alias{tf_mv_methods} +\alias{tf_ncomp} +\alias{tf_components} +\alias{tf_component} +\alias{tf_component<-} +\title{Accessors and methods for vector-valued functional data} +\usage{ +tf_ncomp(f) + +tf_components(f) + +tf_component(f, which) + +tf_component(f, which) <- value +} +\arguments{ +\item{f}{a \code{tf_mv} object.} + +\item{which}{a component name or index.} + +\item{value}{a univariate \code{tf} vector (replacement) of matching length and +domain.} +} +\value{ +\code{tf_ncomp()}: an integer. \code{tf_components()}: a named list of \code{tf} +vectors. \code{tf_component()}: a single univariate \code{tf} vector. +} +\description{ +Utilities for \code{tf_mv} objects (see \code{\link[=tfd_mv]{tfd_mv()}} / \code{\link[=tfb_mv]{tfb_mv()}}). \code{tf_ncomp()} +returns the number of output dimensions \eqn{d}, \code{tf_components()} the list +of the \code{d} underlying univariate \code{tf} vectors, and \code{tf_component()} extracts +or replaces a single one (also available via the \code{$} operator, e.g. \code{f$x}). +} +\details{ +Most univariate \code{tf} verbs also work on \code{tf_mv} objects by acting on each +component: \code{\link[=tf_rebase]{tf_rebase()}} (and hence \code{tfd_mv}/\code{tfb_mv} conversion), +\code{\link[=tf_derive]{tf_derive()}}, \code{\link[=tf_integrate]{tf_integrate()}} (definite integrals return an \verb{n x d} +matrix), \code{\link[=tf_smooth]{tf_smooth()}} and \code{\link[=tf_zoom]{tf_zoom()}}. Registration +(\code{\link[=tf_register]{tf_register()}} / \code{\link[=tf_estimate_warps]{tf_estimate_warps()}} / \code{\link[=tf_warp]{tf_warp()}} / \code{\link[=tf_align]{tf_align()}}) +estimates a \emph{single, shared} time-warp per curve and applies it jointly to +every component. The registration signal is, by default, the first +component; use \code{ref_component} to pick another component (by name/index), +\code{"norm"} for the pointwise Euclidean norm, or a function mapping the +\code{tf_mv} to a univariate \code{tf} vector. + +\code{is.na()} flags a curve as missing if \strong{any} of its components is missing +(the union, not the intersection), which also drives the \code{na.rm} behaviour +of \code{\link[=mean]{mean()}} / \code{\link[=median]{median()}} etc. +} +\examples{ +f <- tfd_mv(list(x = tf_rgp(3), y = tf_rgp(3))) +tf_ncomp(f) +tf_components(f) +tf_component(f, "y") +f$y +} +\seealso{ +Other tf_mv-class: +\code{\link[=plot.tf_mv]{plot.tf_mv()}}, +\code{\link[=tf_arclength]{tf_arclength()}}, +\code{\link{tf_geom}}, +\code{\link[=tfb_mfpc]{tfb_mfpc()}}, +\code{\link[=tfb_mv]{tfb_mv()}}, +\code{\link[=tfd_mv]{tfd_mv()}} +} +\concept{tf_mv-class} diff --git a/man/tf_mv_unimplemented.Rd b/man/tf_mv_unimplemented.Rd new file mode 100644 index 00000000..5bd68a92 --- /dev/null +++ b/man/tf_mv_unimplemented.Rd @@ -0,0 +1,20 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/mv-stubs.R +\name{tf_mv_unimplemented} +\alias{tf_mv_unimplemented} +\title{Methods registered on vector-valued (\code{tf_mv}) classes} +\description{ +\code{tf_mv} classes (\code{tfd_mv} / \code{tfb_mv}) inherit from \code{"tf"} so that +\code{tf_domain()}, the type predicates (\code{is_tf()}, \code{is_tf_mv()}, ...) and S4 +generic reuse continue to work. \strong{Behaviour} on \code{tf_mv} objects, however, +is supplied \emph{only} by explicitly registered \code{.tf_mv} methods: any generic +without one aborts with a classed \code{tf_mv_method_unimplemented} condition +(e.g. \verb{tf_depth()}, \verb{summary()}). This avoids silent +fall-through to the univariate method, which would otherwise produce +wrong-shape results or deep internal errors. +} +\details{ +Real component-wise semantics (joint vs. per-component, norm-based, ...) are +being designed verb-by-verb in \url{https://github.com/tidyfun/tf/issues/255}. +} +\keyword{internal} diff --git a/man/tf_order.Rd b/man/tf_order.Rd index 98016b94..4e8756ba 100644 --- a/man/tf_order.Rd +++ b/man/tf_order.Rd @@ -87,8 +87,8 @@ rank(x, depth = "MBD") \seealso{ \code{\link[=tf_depth]{tf_depth()}}, \code{\link[=min.tf]{min.tf()}}, \code{\link[=max.tf]{max.tf()}} -Other tidyfun ordering and ranking functions: -\code{\link{tf_depth}()}, +Other tidyfun ordering and ranking functions: +\code{\link[=tf_depth]{tf_depth()}}, \code{\link{tf_minmax}} } \concept{tidyfun ordering and ranking functions} diff --git a/man/tf_register.Rd b/man/tf_register.Rd index 7755ea3d..7345c9e5 100644 --- a/man/tf_register.Rd +++ b/man/tf_register.Rd @@ -8,7 +8,7 @@ tf_register( x, ..., template = NULL, - method = c("srvf", "cc", "affine", "landmark"), + method = c("srvf", "srvf_mv", "cc", "affine", "landmark"), max_iter = 3L, tol = 0.01, store_x = TRUE @@ -28,6 +28,8 @@ Not used for \code{method = "landmark"}.} \item{method}{the registration method to use: \itemize{ \item \code{"srvf"}: Square Root Velocity Framework (elastic registration). +\item \code{"srvf_mv"}: true multivariate SRVF time registration for \code{tf_mv} +objects on a regular shared grid. \item \code{"cc"}: continuous-criterion registration via a tf-native dense-grid optimizer with monotone spline warps. \item \code{"affine"}: affine (linear) registration. @@ -144,12 +146,13 @@ Tucker JD, Wu W, Srivastava A (2013). \doi{10.1016/j.csda.2012.12.001}. } \seealso{ -Other registration functions: -\code{\link{tf_align}()}, -\code{\link{tf_estimate_warps}()}, -\code{\link{tf_landmarks_extrema}()}, +Other registration functions: +\code{\link[=tf_align]{tf_align()}}, +\code{\link[=tf_estimate_warps]{tf_estimate_warps()}}, +\code{\link[=tf_landmarks_extrema]{tf_landmarks_extrema()}}, +\code{\link[=tf_register_shape]{tf_register_shape()}}, \code{\link{tf_registration}}, -\code{\link{tf_warp}()} +\code{\link[=tf_warp]{tf_warp()}} } \author{ Maximilian Muecke, Fabian Scheipl, Claude Opus 4.6 diff --git a/man/tf_register_shape.Rd b/man/tf_register_shape.Rd new file mode 100644 index 00000000..ef410bb1 --- /dev/null +++ b/man/tf_register_shape.Rd @@ -0,0 +1,97 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/register-mv.R +\name{tf_register_shape} +\alias{tf_register_shape} +\title{Register vector-valued curves in elastic shape space} +\usage{ +tf_register_shape( + x, + ..., + template = NULL, + max_iter = 3L, + tol = 0.01, + rotation = TRUE, + scale = TRUE, + mode = c("O", "C"), + lambda = 0, + store_x = TRUE +) +} +\arguments{ +\item{x}{a regular shared-grid \code{tfd_mv} object.} + +\item{...}{additional arguments passed to fdasrvf alignment routines, such as +\code{ncores} or \code{verbose}.} + +\item{template}{optional length-one \code{tf_mv} template. If \code{NULL}, a template +is refined from the first curve by iterative group alignment.} + +\item{max_iter}{integer: maximum template refinement iterations when +\code{template = NULL}. Default \code{3L}.} + +\item{tol}{numeric: relative template convergence tolerance.} + +\item{rotation}{logical: allow rotations? Default \code{TRUE}.} + +\item{scale}{logical: allow scale changes? Default \code{TRUE}.} + +\item{mode}{character: curve mode for fdasrvf. Only open curves (\code{"O"}, +the default) are currently supported; \code{"C"} (closed curves) is reserved +for a future release (see Details).} + +\item{lambda}{numeric: non-negative elastic penalty passed to the fdasrvf +alignment routines. Larger values penalise warping. Default \code{0}.} + +\item{store_x}{logical: store original data in the result object?} +} +\value{ +A \code{tf_shape_registration} object. Access aligned curves with +\code{\link[=tf_aligned]{tf_aligned()}}, inverse warps with \code{\link[=tf_inv_warps]{tf_inv_warps()}}, the template with +\code{\link[=tf_template]{tf_template()}}, rotations with \code{\link[=tf_rotations]{tf_rotations()}}, and scales with +\code{\link[=tf_scales]{tf_scales()}}. +} +\description{ +\code{tf_register_shape()} aligns vector-valued \code{tf_mv} curves with SRVF curve +alignment, optionally allowing rotations and rescaling in addition to time +warping. Unlike \code{\link[=tf_register]{tf_register()}}, this is a shape-registration interface: the +aligned curves live in centered shape space and the result stores rotations +and scale factors. +} +\details{ +The per-curve scale factors returned by \code{\link[=tf_scales]{tf_scales()}} are expressed +\emph{relative to the template}: each is the ratio of the template's SRVF norm to +the curve's own SRVF norm, so a value \verb{> 1} means the curve was scaled up to +match the template and \verb{< 1} means it was scaled down. With \code{template = NULL} +the returned \code{\link[=tf_template]{tf_template()}} is the empirical mean of the aligned shape-space +curves rather than any single input curve. + +Only open curves (\code{mode = "O"}) are supported. Closed curves (\code{mode = "C"}) +additionally optimise over a circular seed shift that the returned warping +functions do not represent, which would make the stored warps inconsistent +with the aligned curves; \code{mode = "C"} is therefore rejected for now. +} +\examples{ +\dontshow{if (rlang::is_installed("fdasrvf")) withAutoprint(\{ # examplesIf} +t <- seq(0, 1, length.out = 51) +base <- rbind(t, t^2) +beta <- array(NA_real_, dim = c(3, length(t), 2)) +for (i in 1:3) { + beta[i,, 1] <- base[1, ] + beta[i,, 2] <- base[2, ] +} +curves <- tfd_mv(beta, arg = t) +reg <- tf_register_shape(curves, max_iter = 1) +tf_rotations(reg) +tf_scales(reg) +\dontshow{\}) # examplesIf} +} +\seealso{ +Other registration functions: +\code{\link[=tf_align]{tf_align()}}, +\code{\link[=tf_estimate_warps]{tf_estimate_warps()}}, +\code{\link[=tf_landmarks_extrema]{tf_landmarks_extrema()}}, +\code{\link[=tf_register]{tf_register()}}, +\code{\link{tf_registration}}, +\code{\link[=tf_warp]{tf_warp()}} +} +\concept{registration functions} diff --git a/man/tf_registration.Rd b/man/tf_registration.Rd index a1b00b7c..7d5c1fc0 100644 --- a/man/tf_registration.Rd +++ b/man/tf_registration.Rd @@ -5,12 +5,18 @@ \alias{tf_aligned} \alias{tf_inv_warps} \alias{tf_template} +\alias{tf_rotations} +\alias{tf_scales} \alias{print.tf_registration} +\alias{print.tf_shape_registration} \alias{summary.tf_registration} \alias{print.summary.tf_registration} +\alias{summary.tf_shape_registration} +\alias{print.summary.tf_shape_registration} \alias{plot.tf_registration} \alias{[.tf_registration} \alias{length.tf_registration} +\alias{[.tf_shape_registration} \title{Registration Result Object} \usage{ tf_aligned(x) @@ -19,20 +25,32 @@ tf_inv_warps(x) tf_template(x) +tf_rotations(x) + +tf_scales(x) + \method{print}{tf_registration}(x, ...) +\method{print}{tf_shape_registration}(x, ...) + \method{summary}{tf_registration}(object, ...) \method{print}{summary.tf_registration}(x, ...) +\method{summary}{tf_shape_registration}(object, ...) + +\method{print}{summary.tf_shape_registration}(x, ...) + \method{plot}{tf_registration}(x, ...) \method{[}{tf_registration}(x, i) \method{length}{tf_registration}(x) + +\method{[}{tf_shape_registration}(x, i) } \arguments{ -\item{x}{a \code{tf_registration} object} +\item{x}{a \code{tf_registration} or \code{tf_shape_registration} object} \item{...}{additional arguments (currently unused)} @@ -46,7 +64,9 @@ For \code{tf_registration} objects: a list with entries \code{registered} (inverse warping functions aligning \code{x} to the template function), the \code{template} function, the original data \code{x} (if \code{store_x = TRUE} was used in \code{\link[=tf_register]{tf_register()}}), and the \code{call} to \code{\link[=tf_register]{tf_register()}} that created the -object. Accessors return the respective component. +object. \code{tf_shape_registration} objects additionally contain the forward +\code{warps}, \code{rotations}, and \code{scales}. Accessors return the respective +component. } \description{ \code{tf_registration} objects store the result of \code{\link[=tf_register]{tf_register()}}, including @@ -54,6 +74,9 @@ the aligned (registered) curves, estimated inverse warping functions \eqn{h_i^{-1}} (observed \eqn{\to} aligned time), and the template used. Use accessors \code{\link[=tf_aligned]{tf_aligned()}}, \code{\link[=tf_inv_warps]{tf_inv_warps()}}, and \code{\link[=tf_template]{tf_template()}} to extract components. +\code{tf_shape_registration} objects, returned by \code{\link[=tf_register_shape]{tf_register_shape()}}, extend +this structure with shape-space rotations and scale factors. Use +\code{\link[=tf_rotations]{tf_rotations()}} and \code{\link[=tf_scales]{tf_scales()}} to extract those components. } \section{Summary diagnostics}{ @@ -102,6 +125,12 @@ curves outside the original domain. Domain-preserving methods (\code{srvf}, \eqn{h_i^{-1}(t)} that map observed time to aligned time (\code{tfd} vector). Use \code{\link[=tf_invert]{tf_invert()}} on the result to obtain forward warps if needed. \item \code{tf_template(x)}: extract the template function (\code{tf} vector of length 1). +\item \code{tf_rotations(x)}: extract the per-curve rotation matrices from a +\code{tf_shape_registration} object. +\item \code{tf_scales(x)}: extract the per-curve scale factors from a +\code{tf_shape_registration} object. Each factor is relative to the template +(template SRVF norm divided by the curve's SRVF norm); see +\code{\link[=tf_register_shape]{tf_register_shape()}}. } } @@ -112,12 +141,13 @@ summary(reg) plot(reg) } \seealso{ -Other registration functions: -\code{\link{tf_align}()}, -\code{\link{tf_estimate_warps}()}, -\code{\link{tf_landmarks_extrema}()}, -\code{\link{tf_register}()}, -\code{\link{tf_warp}()} +Other registration functions: +\code{\link[=tf_align]{tf_align()}}, +\code{\link[=tf_estimate_warps]{tf_estimate_warps()}}, +\code{\link[=tf_landmarks_extrema]{tf_landmarks_extrema()}}, +\code{\link[=tf_register]{tf_register()}}, +\code{\link[=tf_register_shape]{tf_register_shape()}}, +\code{\link[=tf_warp]{tf_warp()}} } \author{ Fabian Scheipl, Claude Opus 4.6 diff --git a/man/tf_rgp.Rd b/man/tf_rgp.Rd index d8c039d9..2b5f67eb 100644 --- a/man/tf_rgp.Rd +++ b/man/tf_rgp.Rd @@ -33,7 +33,7 @@ domain divided by 10.} \item{nugget}{nugget effect for additional white noise / unstructured variability. Defaults to \code{scale/200} (so: very little white noise).} -\item{order}{order of the Matèrn covariance (if used, must be >0), defaults +\item{order}{order of the Matérn covariance (if used, must be >0), defaults to 1.5. The higher, the smoother the process. Evaluation of the covariance function becomes numerically unstable for large (>20) \code{order}, use "squareexp".} @@ -54,7 +54,7 @@ accepts user-defined covariance functions (without "nugget" effect, see \delta_{t}(t')}. \item \emph{Wiener} process: \eqn{Cov(x(t), x(t')) = \min(t',t)/\phi + \sigma^2 \delta_{t}(t')}, -\item \href{https://en.wikipedia.org/wiki/Mat\%C3\%A9rn_covariance_function#Definition}{\emph{Matèrn} process}: +\item \href{https://en.wikipedia.org/wiki/Mat\%C3\%A9rn_covariance_function#Definition}{\emph{Matérn} process}: \eqn{Cov(x(t), x(t')) = \tfrac{2^{1-o}}{\Gamma(o)} (\tfrac{\sqrt{2o}|t-t'|}{\phi})^o \text{Bessel}_o(\tfrac{\sqrt{2o}|t-t'|}{s}) + \sigma^2 \delta_{t}(t')} @@ -68,7 +68,7 @@ accepts user-defined covariance functions (without "nugget" effect, see tf_rgp(2, arg = list(sort(runif(25)), sort(runif(34)))) } \seealso{ -Other tidyfun RNG functions: -\code{\link{tf_jiggle}()} +Other tidyfun RNG functions: +\code{\link[=tf_jiggle]{tf_jiggle()}} } \concept{tidyfun RNG functions} diff --git a/man/tf_smooth.Rd b/man/tf_smooth.Rd index 55ffbd4e..226eca0d 100644 --- a/man/tf_smooth.Rd +++ b/man/tf_smooth.Rd @@ -25,7 +25,8 @@ tf_smooth(x, ...) \item{verbose}{give lots of diagnostic messages? Defaults to \code{TRUE}.} \item{method}{one of \code{"lowess"} (see \code{\link[stats:lowess]{stats::lowess()}}), \code{"rollmean"}, -\code{"rollmedian"} (see \code{\link[zoo:rollmean]{zoo::rollmean()}}) or \code{"savgol"} (see \code{\link[pracma:savgol]{pracma::savgol()}}).} +\code{"rollmedian"} (see \code{\link[zoo:rollmean]{zoo::rollmean()}}) or \code{"savgol"} (a Savitzky-Golay +filter, see \code{\link[=savgol]{savgol()}}).} } \value{ a smoothed version of the input. For some methods/options, the @@ -52,7 +53,6 @@ grid points$>$/10 (i.e., the nearest odd integer to that). } \examples{ library(zoo) -library(pracma) f <- tf_sparsify(tf_jiggle(tf_rgp(4, 201, nugget = 0.05))) f_lowess <- tf_smooth(f, "lowess") # these methods ignore the distances between arg-values: diff --git a/man/tf_splitcombine.Rd b/man/tf_splitcombine.Rd index 291f762d..08d80a63 100644 --- a/man/tf_splitcombine.Rd +++ b/man/tf_splitcombine.Rd @@ -51,7 +51,7 @@ tf_split(x, splits = c(20, 80), include = "right") tfs2_sparse <- tf_sparsify(tfs[[2]]) tfs3_spline <- tfb(tfs[[3]]) tf_combine(tfs[[1]], tfs2_sparse, tfs3_spline) - # combine(.., strict = F) can be used to coalesce different measurements + # combine(.., strict = FALSE) can be used to coalesce different measurements # of the same process over different grids: x1 <- tfd(x, arg = tf_arg(x)[seq(1, 51, by = 2)]) x2 <- tfd(x, arg = tf_arg(x)[seq(2, 50, by = 2)]) diff --git a/man/tf_warp.Rd b/man/tf_warp.Rd index 7b3117d7..9beccf5e 100644 --- a/man/tf_warp.Rd +++ b/man/tf_warp.Rd @@ -78,11 +78,12 @@ plot(tf_inv_warps(reg), col = 1:5); lines(tf_invert(warp), lty = 3, lwd = 1.5, c plot(tf_aligned(reg), col = 1:5, points = FALSE); lines(template, lty = 2) } \seealso{ -Other registration functions: -\code{\link{tf_align}()}, -\code{\link{tf_estimate_warps}()}, -\code{\link{tf_landmarks_extrema}()}, -\code{\link{tf_register}()}, +Other registration functions: +\code{\link[=tf_align]{tf_align()}}, +\code{\link[=tf_estimate_warps]{tf_estimate_warps()}}, +\code{\link[=tf_landmarks_extrema]{tf_landmarks_extrema()}}, +\code{\link[=tf_register]{tf_register()}}, +\code{\link[=tf_register_shape]{tf_register_shape()}}, \code{\link{tf_registration}} } \author{ diff --git a/man/tf_where.Rd b/man/tf_where.Rd index 6074021a..1c385f50 100644 --- a/man/tf_where.Rd +++ b/man/tf_where.Rd @@ -54,6 +54,16 @@ Entries in \code{f} that do not fulfill \code{cond} anywhere yield \code{numeric containing a single entry in \code{f} with columns \code{arg} and \code{value}, so most of the usual \code{dplyr} tricks are available as well, see examples.\cr Any \code{cond}ition evaluates to \code{NA} on \code{NA}-entries in \code{f}. + +For vector-valued (\code{tf_mv}) input, the \code{data.frame} on which \code{cond} is +evaluated has one column per component, named like the components, instead +of a single \code{value} column: conditions are \emph{joint} conditions across +components, e.g. \code{tf_where(f, x > 0 & y < 1)} or +\code{tf_where(f, sqrt(x^2 + y^2) > 1)} for components \code{"x"} and \code{"y"}. To apply +a condition to a single component, extract it first: +\code{tf_where(f[, component = "x"], value > 0)}. All components have to be +observed on a common grid -- use \code{\link[=tf_interpolate]{tf_interpolate()}} to align them first, or +supply \code{arg} explicitly. } \examples{ \dontshow{if (rlang::is_installed("dplyr")) withAutoprint(\{ # examplesIf} @@ -83,6 +93,12 @@ tf_where( tf_where(f, arg > 0.5 & value > 0) # does the function ever exceed? tf_anywhere(f, value > 1) + +# vector-valued input: conditions refer to components by name +fm <- tfd_mv(list(x = tf_rgp(3, 11L), y = tf_rgp(3, 11L))) +tf_where(fm, x > 0 & y < 0) +tf_where(fm, sqrt(x^2 + y^2) > 1, "first") +tf_anywhere(fm, x > y) \dontshow{\}) # examplesIf} } \concept{tidyfun query-functions} diff --git a/man/tf_zoom.Rd b/man/tf_zoom.Rd index 8f4c6284..2c552085 100644 --- a/man/tf_zoom.Rd +++ b/man/tf_zoom.Rd @@ -43,8 +43,10 @@ tf_zoom(x, seq(0, 0.5, length.out = 10), seq(0.5, 1, length.out = 10)) |> lines(col = "blue", lty = 3) } \seealso{ -Other tidyfun utility functions: -\code{\link{in_range}()}, -\code{\link{tf_arg}()} +Other tidyfun utility functions: +\code{\link[=ensure_list]{ensure_list()}}, +\code{\link[=in_range]{in_range()}}, +\code{\link[=tf_arg]{tf_arg()}}, +\code{\link[=unique_id]{unique_id()}} } \concept{tidyfun utility functions} diff --git a/man/tfb.Rd b/man/tfb.Rd index 835749e8..1e19e482 100644 --- a/man/tfb.Rd +++ b/man/tfb.Rd @@ -53,14 +53,9 @@ xb as.tfb(x, basis = "spline", k = 8) } \seealso{ -Other tfb-class: -\code{\link{fpc_wsvd}()}, -\code{\link{tfb_fpc}()}, -\code{\link{tfb_spline}()} - -Other tfb-class: -\code{\link{fpc_wsvd}()}, -\code{\link{tfb_fpc}()}, -\code{\link{tfb_spline}()} +Other tfb-class: +\code{\link[=fpc_wsvd]{fpc_wsvd()}}, +\code{\link[=tfb_fpc]{tfb_fpc()}}, +\code{\link[=tfb_spline]{tfb_spline()}} } \concept{tfb-class} diff --git a/man/tfb_fpc.Rd b/man/tfb_fpc.Rd index 6582f3f8..0fb7b612 100644 --- a/man/tfb_fpc.Rd +++ b/man/tfb_fpc.Rd @@ -144,13 +144,14 @@ lines(x_pc, col = 2, lty = 2) \seealso{ \code{\link[=fpc_wsvd]{fpc_wsvd()}} for FPCA options. -Other tfb-class: -\code{\link{fpc_wsvd}()}, -\code{\link{tfb}}, -\code{\link{tfb_spline}()} +Other tfb-class: +\code{\link[=fpc_wsvd]{fpc_wsvd()}}, +\code{\link[=tfb]{tfb()}}, +\code{\link[=tfb_spline]{tfb_spline()}} -Other tfb_fpc-class: -\code{\link{fpc_wsvd}()} +Other tfb_fpc-class: +\code{\link[=fpc_wsvd]{fpc_wsvd()}}, +\code{\link[=tfb_mfpc]{tfb_mfpc()}} } \concept{tfb-class} \concept{tfb_fpc-class} diff --git a/man/tfb_mfpc.Rd b/man/tfb_mfpc.Rd new file mode 100644 index 00000000..46b06441 --- /dev/null +++ b/man/tfb_mfpc.Rd @@ -0,0 +1,143 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/tfb-mfpc.R +\name{tfb_mfpc} +\alias{tfb_mfpc} +\alias{tfb_mfpc.tf_mv} +\alias{tfb_mfpc.list} +\alias{tfb_mfpc.default} +\alias{is_tfb_mfpc} +\alias{tf_mfpc_scores} +\alias{tf_mfpc_efunctions} +\title{Multivariate functional principal component analysis (\code{f: R -> R^d})} +\usage{ +tfb_mfpc(data, ...) + +\method{tfb_mfpc}{tf_mv}( + data, + weights = c("inverse_variance", "snr", "equal"), + pve = 0.995, + npc = NULL, + uni_pve = 0.995, + method = fpc_wsvd, + ... +) + +\method{tfb_mfpc}{list}(data, ...) + +\method{tfb_mfpc}{default}(data, ...) + +is_tfb_mfpc(x) + +tf_mfpc_scores(x) + +tf_mfpc_efunctions(x) +} +\arguments{ +\item{data}{a \code{\link[=tfd_mv]{tfd_mv()}} / \code{tfb_mv} object, a (named) \code{list} of univariate +\code{tf} vectors, or anything \code{\link[=tfd_mv]{tfd_mv()}} accepts.} + +\item{...}{further arguments forwarded to the univariate \code{method}. As in +\code{\link[=tfb_mv]{tfb_mv()}}, a \code{...} argument given as a list named by the component names +is distributed per component.} + +\item{weights}{component weighting scheme for the joint analysis. Either a +string -- \code{"inverse_variance"} (default; \eqn{w_j = 1 / \sum_l +\lambda^{(j)}_l}, so each component contributes equal total variance), +\code{"snr"} (signal-to-noise: retained variance over the discarded-variance +tail of the univariate fit), or \code{"equal"} (\eqn{w_j = 1}) -- or a numeric +vector of \code{d} non-negative weights. Weights are rescaled to sum to \code{d} +(so \code{"equal"} gives all-ones).} + +\item{pve}{proportion of variance explained used to truncate the +\emph{multivariate} components (default \code{0.995}). Ignored if \code{npc} is given.} + +\item{npc}{number of multivariate FPCs to retain (overrides \code{pve}).} + +\item{uni_pve}{proportion of variance explained for the \emph{univariate} FPCA of +each component (default \code{0.995}); forwarded as \code{pve} to the univariate +\code{method}.} + +\item{method}{univariate FPCA method, see \code{\link[=tfb_fpc]{tfb_fpc()}}. Defaults to +\code{\link[=fpc_wsvd]{fpc_wsvd()}}.} + +\item{x}{a \code{tfb_mv} object, ideally one returned by \code{\link[=tfb_mfpc]{tfb_mfpc()}}.} +} +\value{ +a \code{tfb_mv} object whose \code{d} components are \code{\link[=tfb_fpc]{tfb_fpc()}} objects with +shared per-curve scores; \code{is_tfb_mfpc()} is \code{TRUE} for it. Use +\code{\link[=tf_mfpc_scores]{tf_mfpc_scores()}} and \code{\link[=tf_mfpc_efunctions]{tf_mfpc_efunctions()}} to extract the shared scores +and the multivariate eigenfunctions. + +\code{is_tfb_mfpc()}: a logical flag. + +\code{tf_mfpc_scores()}: an \verb{n x M} matrix of shared multivariate FPC +scores (rows = curves, columns = components). + +\code{tf_mfpc_efunctions()}: a \code{tfd_mv} of length \code{M} holding the +multivariate eigenfunctions \eqn{\Psi_m} (one "curve" per component). +} +\description{ +\code{tfb_mfpc()} computes a \emph{multivariate} functional principal component +analysis (MFPCA) of vector-valued functional data in the sense of +Happ & Greven (2018): a single set of scalar scores per curve, shared across +all \code{d} components, together with vector-valued eigenfunctions +\eqn{\Psi_m: \mathcal{T} \to \mathbb{R}^d}, so that +\eqn{f_i(t) \approx \mu(t) + \sum_m s_{im}\,\Psi_m(t)}. +} +\details{ +This is qualitatively different from \code{tfb_mv(data, basis = "fpc")}, which +fits an \emph{independent} FPCA per component (separate eigenfunctions \strong{and} +separate scores) and so cannot capture joint variation across dimensions. + +The estimator first runs the univariate FPCA (see \code{\link[=tfb_fpc]{tfb_fpc()}} / \code{\link[=fpc_wsvd]{fpc_wsvd()}}) +on each component to obtain univariate scores \eqn{\xi^{(j)}} and +eigenfunctions \eqn{\phi^{(j)}}, then eigendecomposes the joint covariance of +the (weighted) stacked scores. With component weights \eqn{w_j > 0} the +shared scores and multivariate eigenfunctions are +\deqn{s_{im} = \sum_j \sqrt{w_j} \sum_l [c_m]^{(j)}_l \xi^{(j)}_{il}, \qquad + \Psi_m^{(j)} = \frac{1}{\sqrt{w_j}} \sum_l [c_m]^{(j)}_l \phi^{(j)}_l,} +where \eqn{c_m} are the eigenvectors of the weighted joint score covariance. + +The returned object is a \code{\link[=tfb_mv]{tfb_mv()}} whose components are \code{\link[=tfb_fpc]{tfb_fpc()}} objects +sharing identical per-curve scores; cast it back to evaluations with +\code{\link[=as.tfd_mv]{as.tfd_mv()}} / \code{vec_cast()}, and project \strong{new} \code{tfd_mv} data onto the +fitted basis with \code{\link[=tf_rebase]{tf_rebase()}}. Like the univariate FPCA, the estimator +targets data observed on a common grid per component; re-scoring new data +evaluates it on each component's estimation grid, so new curves must be +observable there. +} +\examples{ +set.seed(1) +g <- tfd_mv(list(hip = tf_rgp(20), knee = tf_rgp(20))) +m <- tfb_mfpc(g, pve = 0.99) +m +dim(tf_mfpc_scores(m)) +tf_mfpc_efunctions(m) +# reconstruct and project new data: +plot(as.tfd_mv(m), type = "facet") +g_new <- tfd_mv(list(hip = tf_rgp(3), knee = tf_rgp(3))) +tf_rebase(g_new, m) +} +\references{ +Happ, Clara, Greven, Sonja (2018). +\dQuote{Multivariate functional principal component analysis for data observed on different (dimensional) domains.} +\emph{Journal of the American Statistical Association}, \bold{113}(522), 649--659. +} +\seealso{ +\code{\link[=tfb_mv]{tfb_mv()}} for independent per-component FPCA, \code{\link[=tfb_fpc]{tfb_fpc()}} / +\code{\link[=fpc_wsvd]{fpc_wsvd()}} for the univariate machinery. + +Other tf_mv-class: +\code{\link[=plot.tf_mv]{plot.tf_mv()}}, +\code{\link[=tf_arclength]{tf_arclength()}}, +\code{\link{tf_geom}}, +\code{\link{tf_mv_methods}}, +\code{\link[=tfb_mv]{tfb_mv()}}, +\code{\link[=tfd_mv]{tfd_mv()}} + +Other tfb_fpc-class: +\code{\link[=fpc_wsvd]{fpc_wsvd()}}, +\code{\link[=tfb_fpc]{tfb_fpc()}} +} +\concept{tf_mv-class} +\concept{tfb_fpc-class} diff --git a/man/tfb_mv.Rd b/man/tfb_mv.Rd new file mode 100644 index 00000000..d7cd4e37 --- /dev/null +++ b/man/tfb_mv.Rd @@ -0,0 +1,70 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/tfb-mv.R +\name{tfb_mv} +\alias{tfb_mv} +\alias{tfb_mv.tf_mv} +\alias{tfb_mv.list} +\alias{tfb_mv.default} +\alias{as.tfb_mv} +\alias{as.tfb_mv.default} +\title{Vector-valued functional data in basis representation (\code{f: R -> R^d})} +\usage{ +tfb_mv(data, ...) + +\method{tfb_mv}{tf_mv}(data, basis = c("spline", "fpc"), ...) + +\method{tfb_mv}{list}(data, basis = c("spline", "fpc"), ...) + +\method{tfb_mv}{default}(data, basis = c("spline", "fpc"), ...) + +as.tfb_mv(data, ...) + +\method{as.tfb_mv}{default}(data, ...) +} +\arguments{ +\item{data}{a \code{\link[=tfd_mv]{tfd_mv()}} / \code{tfb_mv} object, a (named) \code{list} of univariate +\code{tf} vectors, or anything \code{\link[=tfd_mv]{tfd_mv()}} accepts (it is converted to \code{tfd_mv} +first and then each component is expanded into a basis).} + +\item{...}{forwarded to the univariate \code{\link[=tfb]{tfb()}} constructor.} + +\item{basis}{spline (default) or fpc basis, see \code{\link[=tfb]{tfb()}}.} +} +\value{ +a \code{tfb_mv} object. +} +\description{ +\code{tfb_mv} is the basis-representation analogue of \code{\link[=tfd_mv]{tfd_mv()}}: it bundles \code{d} +univariate \code{\link[=tfb]{tfb()}} vectors (one per output dimension / component) into a +single vctrs vector of vector-valued functions \eqn{f: \mathbb{R} \to +\mathbb{R}^d}. Each component is fitted independently with the usual +univariate \code{\link[=tfb]{tfb()}} machinery (spline or FPC basis), so all of its arguments +(\code{k}, \code{bs}, \code{penalized}, \code{basis}, ...) apply per component. +} +\details{ +By default a single \code{...} is shared across all components (every component +gets the same \code{k}, \code{bs}, \code{sp}, etc.). To pass \emph{different} basis arguments +to different components, give the argument as a list named by component +names -- e.g. \code{tfb_mv(f, k = list(x = 5, y = 12))} fits component \code{x} with +\code{k = 5} and component \code{y} with \code{k = 12}. Any list-valued \code{...} whose names +do \strong{not} match the component names is treated as a shared argument value. +(Already-\code{tfb} components passed via \code{tfb_mv.list()} are kept as-is, which +is the most permissive way to mix entirely different basis kinds across +components.) +} +\examples{ +traj <- tfd_mv(list(x = tf_rgp(5), y = tf_rgp(5))) +tb <- tfb_mv(traj, k = 7, verbose = FALSE) +tb +tf_ncomp(tb) +} +\seealso{ +Other tf_mv-class: +\code{\link[=plot.tf_mv]{plot.tf_mv()}}, +\code{\link[=tf_arclength]{tf_arclength()}}, +\code{\link{tf_geom}}, +\code{\link{tf_mv_methods}}, +\code{\link[=tfb_mfpc]{tfb_mfpc()}}, +\code{\link[=tfd_mv]{tfd_mv()}} +} +\concept{tf_mv-class} diff --git a/man/tfb_spline.Rd b/man/tfb_spline.Rd index 909e77d6..4998f3de 100644 --- a/man/tfb_spline.Rd +++ b/man/tfb_spline.Rd @@ -227,10 +227,10 @@ fit \seealso{ \code{\link[mgcv:smooth.terms]{mgcv::smooth.terms()}} for spline basis options. -Other tfb-class: -\code{\link{fpc_wsvd}()}, -\code{\link{tfb}}, -\code{\link{tfb_fpc}()} +Other tfb-class: +\code{\link[=fpc_wsvd]{fpc_wsvd()}}, +\code{\link[=tfb]{tfb()}}, +\code{\link[=tfb_fpc]{tfb_fpc()}} } \concept{tfb-class} \concept{tfb_spline-class} diff --git a/man/tfbrackets.Rd b/man/tfbrackets.Rd index d17e7457..8eb342c7 100644 --- a/man/tfbrackets.Rd +++ b/man/tfbrackets.Rd @@ -1,11 +1,17 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/brackets.R -\name{tfbrackets} +% Please edit documentation in R/brackets-mv.R, R/brackets.R +\name{[.tf_mv} +\alias{[.tf_mv} +\alias{[<-.tf_mv} \alias{tfbrackets} \alias{[.tf} \alias{[<-.tf} \title{Accessing, evaluating, subsetting and subassigning \code{tf} vectors} \usage{ +\method{[}{tf_mv}(x, i, j, component = NULL, interpolate = TRUE, matrix = TRUE) + +\method{[}{tf_mv}(x, i) <- value + \method{[}{tf}(x, i, j, interpolate = TRUE, matrix = TRUE) \method{[}{tf}(x, i) <- value @@ -25,6 +31,12 @@ vectors. \emph{NOT} interpreted as a column number but as the argument value of the respective functional datum. If \code{j} is missing but \code{matrix} is explicitly given, \code{j} defaults to \link[=tf_arg]{tf_arg(x)}.} +\item{component}{for \code{tf_mv} objects only: optionally restrict evaluation / +extraction to a subset of the output dimensions, given by name or integer +index. A single name/index drops to the univariate component (a \code{tfd} or +\code{tfb}); a vector of length > 1 returns a sub-\code{tf_mv} containing just those +components. \code{NULL} (default) keeps all \code{d} components.} + \item{interpolate}{should functions be evaluated (i.e., inter-/extrapolated) for values in \code{arg} for which no original data is available? Only relevant for the raw data class \code{tfd}, for which it defaults to \code{TRUE}. Basis-represented diff --git a/man/tfd.Rd b/man/tfd.Rd index 5ad33927..849bfe81 100644 --- a/man/tfd.Rd +++ b/man/tfd.Rd @@ -107,7 +107,7 @@ Available \code{evaluator}-functions: \itemize{ \item \code{tf_approx_linear} for linear interpolation without extrapolation (i.e., \code{\link[zoo:na.approx]{zoo::na.approx()}} with \code{na.rm = FALSE}) -- this is the default, -\item \code{tf_approx_spline} for cubic spline interpolation, (i.e., \code{\link[zoo:na.approx]{zoo::na.spline()}} +\item \code{tf_approx_spline} for cubic spline interpolation, (i.e., \code{\link[zoo:na.spline]{zoo::na.spline()}} with \code{na.rm = FALSE}), \item \code{tf_approx_fill_extend} for linear interpolation and constant extrapolation (i.e., \code{\link[zoo:na.fill]{zoo::na.fill()}} with \code{fill = "extend"}) diff --git a/man/tfd_mv.Rd b/man/tfd_mv.Rd new file mode 100644 index 00000000..5cc8b8ad --- /dev/null +++ b/man/tfd_mv.Rd @@ -0,0 +1,134 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/tfd-mv.R +\name{tfd_mv} +\alias{tfd_mv} +\alias{tfd_mv.list} +\alias{tfd_mv.array} +\alias{tfd_mv.data.frame} +\alias{tfd_mv.tf_mv} +\alias{tfd_mv.default} +\alias{as.tfd_mv} +\alias{as.tfd_mv.default} +\alias{as.tfd_mv.tf_mv} +\title{Constructors for vector-valued functional data (\code{f: R -> R^d})} +\usage{ +tfd_mv(data, ...) + +\method{tfd_mv}{list}(data, arg = NULL, domain = NULL, evaluator = tf_approx_linear, ...) + +\method{tfd_mv}{array}(data, arg = NULL, domain = NULL, evaluator = tf_approx_linear, ...) + +\method{tfd_mv}{data.frame}( + data, + id = 1, + arg = 2, + value = 3, + domain = NULL, + evaluator = tf_approx_linear, + ... +) + +\method{tfd_mv}{tf_mv}(data, arg = NULL, domain = NULL, evaluator = NULL, ...) + +\method{tfd_mv}{default}(data, arg = NULL, domain = NULL, ...) + +as.tfd_mv(data, ...) + +\method{as.tfd_mv}{default}(data, ...) + +\method{as.tfd_mv}{tf_mv}(data, ...) +} +\arguments{ +\item{data}{one of: a (named) \code{list} of univariate \code{tf} vectors (used +directly, one per component); a (named) \code{list} of numeric matrices / +data.frames (one \emph{per component}, each turned into a \code{\link[=tfd]{tfd()}}); a 3-d +numeric \code{array} with dimensions \verb{[curve, arg, component]}; or a long +\code{data.frame} with an \code{id} column, an \code{arg} column and one or more \code{value} +columns (one component per \code{value} column).} + +\item{...}{forwarded to the univariate \code{\link[=tfd]{tfd()}} constructor.} + +\item{arg}{evaluation grid, see \code{\link[=tfd]{tfd()}}.} + +\item{domain}{range of \code{arg}, see \code{\link[=tfd]{tfd()}}.} + +\item{evaluator}{inter-/extrapolation function, see \code{\link[=tfd]{tfd()}}.} + +\item{id, value}{for the \code{data.frame} method: the column defining function +\code{id}, the column defining the \code{arg} grid, and the (possibly several) +columns containing component evaluations (one component per \code{value} +column).} +} +\value{ +a \code{tfd_mv} object (a vctrs vector of length \code{n}). +} +\description{ +\code{tfd_mv} represents \emph{vector-valued} functional data -- vectors of functions +\eqn{f: \mathcal{T} \subset \mathbb{R} \to \mathbb{R}^d}, such as movement +trajectories \eqn{(x(t), y(t))} or other multivariate-output curves. +} +\details{ +A \code{tfd_mv} object of length \code{n} bundles \code{d} \emph{univariate} \code{\link[=tfd]{tfd()}} vectors +(one per output dimension / component), each of length \code{n}. All numeric work +(evaluation, arithmetic, smoothing, ...) is delegated to these components, so +regular and irregular sampling, the choice of \code{evaluator}, etc. all behave +exactly as in the univariate case -- and components may even live on +different argument grids. Use \code{\link[=tfb_mv]{tfb_mv()}} for a basis representation. +} +\section{Inheritance contract}{ + +\code{tf_mv} classes inherit from \code{"tf"} \emph{only} for the purpose of \code{tf_domain()}, +type predicates (\code{is_tf()}, \code{is_tf_mv()}, ...) and S4 generic reuse. +\strong{Behaviour} on \code{tf_mv} comes \emph{only} from explicitly registered \code{.tf_mv} +methods: any generic without one aborts with a classed +\code{tf_mv_method_unimplemented} condition. The earlier promise of automatic +"right thing component-wise" dispatch via inheritance was incorrect -- +silent fall-through produced wrong-shape results or deep internal errors, +so it has been replaced with fail-fast stubs. The stubbed (i.e., \emph{not} +implemented) verbs are listed in \link{tf_mv_unimplemented}; design of real +component-wise semantics is tracked at +\url{https://github.com/tidyfun/tf/issues/255}. When you need to \emph{distinguish} +univariate-only from any-\code{tf} inside a helper, use \code{\link[=is_tf_1d]{is_tf_1d()}}: it returns +\code{TRUE} for \code{tfd} / \code{tfb} and \code{FALSE} for \code{tfd_mv} / \code{tfb_mv}. +} + +\examples{ +# (a) from a (named) list of univariate tfd vectors -- one per component: +traj <- tfd_mv(list(x = tf_rgp(3), y = tf_rgp(3))) +traj +tf_ncomp(traj) +traj$x + +# (b) from a list of matrices (one [curve, arg] matrix per component): +t <- seq(0, 1, length.out = 50) +mx <- matrix(sin(2 * pi * outer(1:3, t)), nrow = 3) +my <- matrix(cos(2 * pi * outer(1:3, t)), nrow = 3) +tfd_mv(list(x = mx, y = my), arg = t) + +# (c) from a 3-d array with dimensions [curve, arg, component]: +arr <- array(c(mx, my), dim = c(3, 50, 2), + dimnames = list(NULL, NULL, c("x", "y"))) +tfd_mv(arr, arg = t) + +# (d) from a long data.frame (id, arg, one value column per component): +df <- data.frame( + id = rep(1:3, each = 50), + arg = rep(t, times = 3), + x = as.vector(t(mx)), + y = as.vector(t(my)) +) +tfd_mv(df, id = "id", arg = "arg", value = c("x", "y")) +} +\seealso{ +\code{\link[=tfb_mv]{tfb_mv()}} for basis representation; \code{\link[=tf_components]{tf_components()}}, +\code{\link[=tf_ncomp]{tf_ncomp()}} and the \code{$} operator to access components. + +Other tf_mv-class: +\code{\link[=plot.tf_mv]{plot.tf_mv()}}, +\code{\link[=tf_arclength]{tf_arclength()}}, +\code{\link{tf_geom}}, +\code{\link{tf_mv_methods}}, +\code{\link[=tfb_mfpc]{tfb_mfpc()}}, +\code{\link[=tfb_mv]{tfb_mv()}} +} +\concept{tf_mv-class} diff --git a/man/tfmethods.Rd b/man/tfmethods.Rd index 5675c048..dc557ada 100644 --- a/man/tfmethods.Rd +++ b/man/tfmethods.Rd @@ -15,9 +15,11 @@ \alias{tf_arg<-.tfb} \alias{coef.tfb} \alias{rev.tf} +\alias{rev.tf_mv} \alias{is.na.tf} \alias{is.na.tfd_irreg} \alias{is_tf} +\alias{is_tf_1d} \alias{is_tfd} \alias{is_reg} \alias{is_tfd_reg} @@ -26,6 +28,9 @@ \alias{is_tfb} \alias{is_tfb_spline} \alias{is_tfb_fpc} +\alias{is_tf_mv} +\alias{is_tfd_mv} +\alias{is_tfb_mv} \title{Utility functions for \code{tf}-objects} \usage{ tf_arg(f) @@ -56,12 +61,16 @@ tf_arg(x) <- value \method{rev}{tf}(x) +\method{rev}{tf_mv}(x) + \method{is.na}{tf}(x) \method{is.na}{tfd_irreg}(x) is_tf(x) +is_tf_1d(x) + is_tfd(x) is_reg(x) @@ -77,6 +86,12 @@ is_tfb(x) is_tfb_spline(x) is_tfb_fpc(x) + +is_tf_mv(x) + +is_tfd_mv(x) + +is_tfb_mv(x) } \arguments{ \item{f}{an \code{tf} object.} @@ -95,9 +110,9 @@ the domain. \cr} \code{tf_arg(f)}? Defaults to \code{FALSE}, which returns the matrix of basis functions (columns) evaluated on \code{tf_arg(f)} (rows).} -\item{object}{as usual} +\item{object}{a \code{tfb} object.} -\item{...}{dots} +\item{...}{not used.} } \value{ either the respective attribute or, for setters (assignment functions), @@ -106,6 +121,11 @@ the input object with modified properties. \description{ A bunch of methods & utilities that do what they say: get or set the respective attributes of a \code{tf}-object. + +\code{is_tf_1d()} distinguishes \emph{univariate} \code{tf} vectors (length-\code{n} +samples of \code{f: R -> R}) from any \code{tf} vector. Returns \code{TRUE} for \code{tfd} / +\code{tfb} and \code{FALSE} for \code{tfd_mv} / \code{tfb_mv}. Useful as a dispatch / guard +predicate inside helpers that assume scalar per-arg evaluations. } \examples{ x <- tf_rgp(3) @@ -125,8 +145,10 @@ tf_basis(xb)(c(0, .1, .2)) c(is_tfb(xb), is_tfb_spline(xb), is_tfb_fpc(xb)) } \seealso{ -Other tidyfun utility functions: -\code{\link{in_range}()}, -\code{\link{tf_zoom}()} +Other tidyfun utility functions: +\code{\link[=ensure_list]{ensure_list()}}, +\code{\link[=in_range]{in_range()}}, +\code{\link[=tf_zoom]{tf_zoom()}}, +\code{\link[=unique_id]{unique_id()}} } \concept{tidyfun utility functions} diff --git a/man/tfsummaries.Rd b/man/tfsummaries.Rd index 9517ecf4..de0a6fda 100644 --- a/man/tfsummaries.Rd +++ b/man/tfsummaries.Rd @@ -79,8 +79,8 @@ summary(x) \seealso{ \code{\link[=tf_fwise]{tf_fwise()}} -Other tidyfun summary functions: -\code{\link{fivenum}()}, +Other tidyfun summary functions: +\code{\link[=fivenum]{fivenum()}}, \code{\link{functionwise}} } \concept{tidyfun summary functions} diff --git a/man/tfviz.Rd b/man/tfviz.Rd index 7e8b5bbe..42ba073d 100644 --- a/man/tfviz.Rd +++ b/man/tfviz.Rd @@ -28,10 +28,11 @@ ) } \arguments{ -\item{x}{an \code{tf} object.} +\item{x}{a \code{tf} object.} -\item{y}{(optional) numeric vector to be used as \code{arg} -(i.e., for the \strong{x}-axis...!).} +\item{y}{(optional) numeric vector of evaluation points to use as \code{arg} +for plotting (passed as the function's input grid, not the y-axis +values).} \item{n_grid}{minimal size of equidistant grid used for plotting, defaults to \code{50}. See details.} diff --git a/man/unique_id.Rd b/man/unique_id.Rd index cbb3a331..42ca5633 100644 --- a/man/unique_id.Rd +++ b/man/unique_id.Rd @@ -7,20 +7,27 @@ unique_id(x) } \arguments{ -\item{x}{any input.} +\item{x}{any input that can be coerced to character.} } \value{ -\code{x} turned into a list. +A character vector of unique, syntactically valid names of +the same length as \code{x}. } \description{ -See above. +Coerces \code{x} to character and returns syntactically valid, unique +identifiers. Empty strings are replaced with \code{"NA"} before +deduplication. If \code{x} already has no duplicates it is returned +unchanged. } \examples{ -unique_id(c("a", "b", "a")) +unique_id(c("a", "a", "b")) +unique_id(c(1, 1, 2)) } \seealso{ -Other tidyfun developer tools: -\code{\link{ensure_list}()}, -\code{\link{prep_plotting_arg}()} +Other tidyfun utility functions: +\code{\link[=ensure_list]{ensure_list()}}, +\code{\link[=in_range]{in_range()}}, +\code{\link[=tf_arg]{tf_arg()}}, +\code{\link[=tf_zoom]{tf_zoom()}} } -\concept{tidyfun developer tools} +\concept{tidyfun utility functions} diff --git a/man/vctrs.Rd b/man/vctrs.Rd index 50004717..2051572f 100644 --- a/man/vctrs.Rd +++ b/man/vctrs.Rd @@ -1,7 +1,15 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/vctrs-cast.R, R/vctrs-ptype2.R +% Please edit documentation in R/vctrs-mv.R, R/vctrs-cast.R, R/vctrs-ptype2.R \name{vctrs} \alias{vctrs} +\alias{vec_ptype2.tfd_mv.tfd_mv} +\alias{vec_ptype2.tfb_mv.tfb_mv} +\alias{vec_ptype2.tfd_mv.tfb_mv} +\alias{vec_ptype2.tfb_mv.tfd_mv} +\alias{vec_cast.tfd_mv.tfd_mv} +\alias{vec_cast.tfb_mv.tfb_mv} +\alias{vec_cast.tfd_mv.tfb_mv} +\alias{vec_cast.tfb_mv.tfd_mv} \alias{vec_cast.tfd_reg.tfd_reg} \alias{vec_cast.tfd_reg.tfd_irreg} \alias{vec_cast.tfd_reg.tfb_spline} @@ -36,6 +44,22 @@ \alias{vec_ptype2.tfb_fpc.tfd_irreg} \title{\code{vctrs} methods for \code{tf} objects} \usage{ +\method{vec_ptype2}{tfd_mv.tfd_mv}(x, y, ...) + +\method{vec_ptype2}{tfb_mv.tfb_mv}(x, y, ...) + +\method{vec_ptype2}{tfd_mv.tfb_mv}(x, y, ...) + +\method{vec_ptype2}{tfb_mv.tfd_mv}(x, y, ...) + +\method{vec_cast}{tfd_mv.tfd_mv}(x, to, ...) + +\method{vec_cast}{tfb_mv.tfb_mv}(x, to, ...) + +\method{vec_cast}{tfd_mv.tfb_mv}(x, to, ...) + +\method{vec_cast}{tfb_mv.tfd_mv}(x, to, ...) + \method{vec_cast}{tfd_reg.tfd_reg}(x, to, ...) \method{vec_cast}{tfd_reg.tfd_irreg}(x, to, ...) @@ -103,13 +127,13 @@ \arguments{ \item{x}{Vectors to cast.} -\item{to}{Type to cast to. If \code{NULL}, \code{x} will be returned as is.} +\item{y}{vectors to cast.} \item{...}{For \code{vec_cast_common()}, vectors to cast. For \code{vec_cast()}, \code{vec_cast_default()}, and \code{vec_restore()}, these dots are only for future extensions and should be empty.} -\item{y}{vectors to cast.} +\item{to}{Type to cast to. If \code{NULL}, \code{x} will be returned as is.} } \value{ for \code{vec_cast}: the casted \code{tf}-vector, for \code{vec_ptype2}: the common prototype diff --git a/tests/testthat/helper-tf.R b/tests/testthat/helper-tf.R new file mode 100644 index 00000000..ccc9c4a2 --- /dev/null +++ b/tests/testthat/helper-tf.R @@ -0,0 +1,14 @@ +# Test helper: expect that a tf / tf_mv object satisfies all internal +# invariants checked by validate_tf(). +expect_valid_tf <- function(object, ...) { + act <- testthat::quasi_label(rlang::enquo(object), arg = "object") + res <- tryCatch(validate_tf(act$val), error = identity) + if (inherits(res, "error")) { + testthat::fail( + paste0(act$lab, " is not a valid tf: ", conditionMessage(res)) + ) + } else { + testthat::succeed() + } + invisible(act$val) +} diff --git a/tests/testthat/test-approx.R b/tests/testthat/test-approx.R new file mode 100644 index 00000000..a7822e47 --- /dev/null +++ b/tests/testthat/test-approx.R @@ -0,0 +1,61 @@ +test_that("tf_approx_linear interpolates and returns NA outside", { + res <- tf_approx_linear( + x = c(-0.1, 0, 0.25, 0.5, 1, 1.1), + arg = c(0, 0.5, 1), + evaluations = c(0, 1, 2) + ) + expect_equal(res[2:5], c(0, 0.5, 1, 2)) + expect_true(is.na(res[1]) && is.na(res[6])) +}) + +test_that("tf_approx_spline interpolates linear data exactly", { + arg <- seq(0, 1, length.out = 11) + evals <- 2 * arg + 1 + res <- tf_approx_spline(x = c(0.05, 0.27, 0.95), arg = arg, evaluations = evals) + expect_equal(res, 2 * c(0.05, 0.27, 0.95) + 1, tolerance = 1e-8) +}) + +test_that("tf_approx_none returns NA at unseen args, passes through seen ones", { + res <- tf_approx_none( + x = c(0, 0.25, 0.5, 1), + arg = c(0, 0.5, 1), + evaluations = c(0, 1, 2) + ) + expect_equal(res[c(1, 3, 4)], c(0, 1, 2)) + expect_true(is.na(res[2])) +}) + +test_that("tf_approx_fill_extend extrapolates with boundary values", { + res <- tf_approx_fill_extend( + x = c(-0.5, 0, 0.5, 1, 2), + arg = c(0, 1), + evaluations = c(3, 7) + ) + expect_equal(res, c(3, 3, 5, 7, 7)) +}) + +test_that("tf_approx_locf carries last observation forward", { + res <- tf_approx_locf( + x = c(0, 0.3, 0.5, 0.7, 1), + arg = c(0, 0.5, 1), + evaluations = c(10, 20, 30) + ) + expect_equal(res, c(10, 10, 20, 20, 30)) +}) + +test_that("tf_approx_nocb carries next observation backward", { + res <- tf_approx_nocb( + x = c(0, 0.3, 0.5, 0.7, 1), + arg = c(0, 0.5, 1), + evaluations = c(10, 20, 30) + ) + expect_equal(res, c(10, 20, 20, 30, 30)) +}) + +test_that("evaluators are wired up via tf_evaluator<- on tfd", { + x <- tfd(matrix(c(0, 1, 2), nrow = 1), arg = c(0, 0.5, 1), + domain = c(-1, 2)) + expect_true(is.na(as.numeric(x[, 1.5]))) + tf_evaluator(x) <- tf_approx_fill_extend + expect_equal(as.numeric(x[, 1.5]), 2) +}) diff --git a/tests/testthat/test-arg-setter.R b/tests/testthat/test-arg-setter.R new file mode 100644 index 00000000..59bfd8ee --- /dev/null +++ b/tests/testthat/test-arg-setter.R @@ -0,0 +1,33 @@ +test_that("tf_arg<-.tfd_irreg preserves values on round-trip (#234)", { + set.seed(1234) + x <- tf_sparsify(tf_rgp(2)) + evals_before <- tf_evaluations(x) + arg_before <- tf_arg(x) + suppressWarnings(tf_arg(x) <- tf_arg(x)) + expect_identical(tf_evaluations(x), evals_before) + expect_identical(tf_arg(x), arg_before) +}) + +test_that("tf_arg<-.tfd_irreg keeps NA entries as NULL (#234)", { + set.seed(99) + x <- tf_sparsify(tf_rgp(3)) + # zero out one entry via arithmetic with NA to produce a NULL entry + x_with_na <- x + c(0, NA_real_, 0) + na_mask_before <- is.na(x_with_na) + suppressWarnings(tf_arg(x_with_na) <- tf_arg(x_with_na)) + expect_identical(is.na(x_with_na), na_mask_before) +}) + +test_that("tf_arg<-.tfd_irreg accepts a single shared arg vector (#234)", { + x <- tfd( + list(c(1, 2, 3), c(4, 5, 6)), + arg = list(c(0, 0.5, 1), c(0, 0.5, 1)) + ) + x <- as.tfd_irreg(x) + new_arg <- c(0.1, 0.5, 0.9) + suppressWarnings(tf_arg(x) <- new_arg) + expect_identical(tf_arg(x)[[1]], new_arg) + expect_identical(tf_arg(x)[[2]], new_arg) + expect_equal(tf_evaluations(x)[[1]], c(1, 2, 3)) + expect_equal(tf_evaluations(x)[[2]], c(4, 5, 6)) +}) diff --git a/tests/testthat/test-calculus.R b/tests/testthat/test-calculus.R index 6ab609ec..a71e125e 100644 --- a/tests/testthat/test-calculus.R +++ b/tests/testthat/test-calculus.R @@ -51,6 +51,9 @@ test_that("basic derivatives work", { tolerance = 0.1, ignore_attr = TRUE ) + expect_valid_tf(tf_derive(cubic)) + expect_valid_tf(tf_derive(cubic_irreg)) + expect_valid_tf(tf_derive(cubic_b)) }) test_that("basic definite integration works", { @@ -81,6 +84,9 @@ test_that("basic antiderivatives work", { tolerance = 0.1, ignore_attr = TRUE ) + expect_valid_tf(tf_integrate(square, definite = FALSE)) + expect_valid_tf(tf_integrate(square_irreg, definite = FALSE)) + expect_valid_tf(tf_integrate(square_b, definite = FALSE)) }) test_that("calculus works for tfb_spline with non-identity link", { @@ -190,3 +196,29 @@ test_that("derivative at extremum is near zero", { f_deriv <- tf_derive(f_quad) expect_equal(as.numeric(f_deriv[, 2]), 0, tolerance = 1e-4) }) + +test_that("tf_integrate antiderivative for irregular tfd does not crash (#237)", { + set.seed(11) + f <- tf_sparsify(tf_rgp(3)) + anti <- expect_no_error(tf_integrate(f, definite = FALSE)) + expect_s3_class(anti, "tfd") + expect_length(anti, length(f)) + # cumsum-based antiderivative starts at 0 at each curve's first arg + expect_equal( + vapply(tf_evaluations(anti), `[`, numeric(1), 1), + rep(0, length(anti)), + ignore_attr = TRUE + ) +}) +test_that("tf_integrate definite for irregular tfd is non-NA under defaults (#253)", { + set.seed(11) + f <- tf_sparsify(tf_rgp(3)) + res <- expect_no_warning(tf_integrate(f)) + expect_true(all(!is.na(res))) + # compare to explicit per-curve limits: should agree exactly + ranges <- lapply(tf_arg(f), range) + lo <- vapply(ranges, `[`, numeric(1), 1) + up <- vapply(ranges, `[`, numeric(1), 2) + res_explicit <- tf_integrate(f, lower = lo, upper = up) + expect_equal(unname(res), unname(res_explicit)) +}) diff --git a/tests/testthat/test-evaluate.R b/tests/testthat/test-evaluate.R index ee768b49..0fc39eeb 100644 --- a/tests/testthat/test-evaluate.R +++ b/tests/testthat/test-evaluate.R @@ -49,3 +49,20 @@ test_that("tf_evaluate.tfb keeps NA entries for shared arg", { expect_length(eval_grid, length(b_na)) expect_equal(eval_grid[[2]], rep(NA_real_, 5)) }) + +test_that("tf_evaluate.tfd returns values at correct positions for duplicated args (#236)", { + arg <- seq(0, 1, length.out = 11) + x <- tfd(matrix(arg, nrow = 1), arg = arg) + # duplicated unseen args must all be placed correctly + expect_equal( + tf_evaluate(x, c(0.25, 0.25, 0.35))[[1]], + c(0.25, 0.25, 0.35) + ) + # duplicated values, all unseen, more than one duplicate + expect_equal( + tf_evaluate(x, c(0.25, 0.25, 0.35, 0.35))[[1]], + c(0.25, 0.25, 0.35, 0.35) + ) + # `[` operator path reaches the same code + expect_equal(as.numeric(x[1, c(0.25, 0.25, 0.35)]), c(0.25, 0.25, 0.35)) +}) diff --git a/tests/testthat/test-fwise.R b/tests/testthat/test-fwise.R index f82f6d11..d620b599 100644 --- a/tests/testthat/test-fwise.R +++ b/tests/testthat/test-fwise.R @@ -90,3 +90,73 @@ test_that("fwise summaries work for tfb_fpc", { expect_equal(tf_crosscov(x, x), tf_fvar(x)) }) + +test_that("function-wise scalar summaries work componentwise for tf_mv", { + x <- tfd(rbind(c(0, 1, 2), c(1, 2, 3)), arg = 0:2) + y <- tfd(rbind(c(0, 2, 4), c(2, 4, 6)), arg = 0:2) + fm <- tfd_mv(list(x = x, y = y)) + names(fm) <- c("a", "b") + + expected_mean <- cbind(x = tf_fmean(fm$x), y = tf_fmean(fm$y)) + expected_var <- cbind(x = tf_fvar(fm$x), y = tf_fvar(fm$y)) + expected_sd <- cbind(x = tf_fsd(fm$x), y = tf_fsd(fm$y)) + rownames(expected_var) <- names(fm) + rownames(expected_sd) <- names(fm) + + expect_equal(tf_fmean(fm), expected_mean) + expect_equal(tf_fvar(fm), expected_var) + expect_equal(tf_fsd(fm), expected_sd) + + custom_arg <- seq(0, 2, length.out = 5) + expect_equal( + tf_fmean(fm, arg = custom_arg), + cbind( + x = tf_fmean(fm$x, arg = custom_arg), + y = tf_fmean(fm$y, arg = custom_arg) + ) + ) +}) + +test_that("function-wise scalar summaries use per-component args for tf_mv", { + x_arg <- seq(0, 1, length.out = 3) + y_arg <- seq(0, 1, length.out = 5) + x <- tfd(rbind(a = x_arg, b = x_arg^2), arg = x_arg) + y <- tfd(rbind(a = sin(y_arg), b = cos(y_arg)), arg = y_arg) + fm <- tfd_mv(list(x = x, y = y)) + + expected <- cbind(x = tf_fmean(x), y = tf_fmean(y)) + expected_var <- cbind(x = tf_fvar(x), y = tf_fvar(y)) + expected_sd <- cbind(x = tf_fsd(x), y = tf_fsd(y)) + rownames(expected) <- names(fm) + rownames(expected_var) <- names(fm) + rownames(expected_sd) <- names(fm) + + expect_equal(tf_fmean(fm), expected) + expect_equal(tf_fvar(fm), expected_var) + expect_equal(tf_fsd(fm), expected_sd) +}) + +test_that("tf_fwise works componentwise for tf_mv", { + x <- tfd(rbind(c(0, 1, 2), c(1, 2, 3)), arg = 0:2) + y <- tfd(rbind(c(0, 2, 4), c(2, 4, 6)), arg = 0:2) + fm <- tfd_mv(list(x = x, y = y)) + names(fm) <- c("a", "b") + + out <- tf_fwise(fm, \(df) max(df$value)) + + expect_named(out, c("a", "b")) + expect_named(out[[1]], c("x", "y")) + expect_equal( + out, + list( + a = list( + x = tf_fwise(fm$x, \(df) max(df$value))[[1]], + y = tf_fwise(fm$y, \(df) max(df$value))[[1]] + ), + b = list( + x = tf_fwise(fm$x, \(df) max(df$value))[[2]], + y = tf_fwise(fm$y, \(df) max(df$value))[[2]] + ) + ) + ) +}) diff --git a/tests/testthat/test-graphics.R b/tests/testthat/test-graphics.R new file mode 100644 index 00000000..cda67dbc --- /dev/null +++ b/tests/testthat/test-graphics.R @@ -0,0 +1,40 @@ +test_that("plot.tf(points = TRUE) passes col (not 'ol') to matlines (#248)", { + x <- tf_rgp(3) + pdf(NULL) + on.exit(dev.off(), add = TRUE) + + # Capture the call matlines() receives. Use an env in globalenv() so the + # tracer (which runs in the traced function's frame) can reach it. + capture_env <- new.env(parent = emptyenv()) + assign(".tf_matlines_call", capture_env, envir = globalenv()) + on.exit(rm(".tf_matlines_call", envir = globalenv()), add = TRUE) + + trace( + graphics::matlines, + tracer = quote( + assign( + "call", + match.call(), + envir = get(".tf_matlines_call", envir = globalenv()) + ) + ), + print = FALSE + ) + on.exit(suppressMessages(untrace(graphics::matlines)), add = TRUE) + + plot(x, points = TRUE) + + arg_names <- names(as.list(capture_env$call)[-1]) + expect_true("col" %in% arg_names) + expect_false("ol" %in% arg_names) +}) + +test_that("plot.tf smoke test for basic types", { + x <- tf_rgp(3) + pdf(NULL) + on.exit(dev.off(), add = TRUE) + expect_no_error(plot(x)) + expect_no_error(plot(x, points = TRUE)) + expect_no_error(plot(x, type = "lasagna")) + expect_no_error(lines(x)) +}) diff --git a/tests/testthat/test-math.R b/tests/testthat/test-math.R new file mode 100644 index 00000000..8cc74bd4 --- /dev/null +++ b/tests/testthat/test-math.R @@ -0,0 +1,54 @@ +test_that("Math.tfd threads `...` to the generic (#246)", { + arg <- seq(0, 1, length.out = 11) + x <- tfd(matrix(c(1.234, 2.345, 3.456, 4.567, 5.678, + 6.789, 7.890, 8.901, 9.012, 10.123, 11.234), nrow = 1), + arg = arg) + + # round honors digits arg + r1 <- as.numeric(round(x, 1)[, arg]) + r0 <- as.numeric(round(x)[, arg]) + expect_false(isTRUE(all.equal(r1, r0))) + expect_equal(r1, round(as.numeric(x[, arg]), 1)) + + # log honors base arg + l10 <- as.numeric(log(x, base = 10)[, arg]) + le <- as.numeric(log(x)[, arg]) + expect_false(isTRUE(all.equal(l10, le))) + expect_equal(l10, log10(as.numeric(x[, arg]))) + + # signif honors digits arg + s2 <- as.numeric(signif(x, 2)[, arg]) + expect_equal(s2, signif(as.numeric(x[, arg]), 2)) +}) + +test_that("Math.tfd basic ops without extra args still work", { + arg <- seq(0.1, 1, length.out = 10) + x <- tfd(matrix(arg, nrow = 1), arg = arg) + expect_equal(as.numeric(sqrt(x)[, arg]), sqrt(arg)) + expect_equal(as.numeric(exp(x)[, arg]), exp(arg)) + expect_equal(as.numeric(abs(-x)[, arg]), arg) +}) + +test_that("Math.tfb threads `...` (round-trip via tfd)", { + arg <- seq(0, 1, length.out = 51) + raw <- tfd(matrix(1.2345 + sin(2 * pi * arg), nrow = 1), arg = arg) + b <- suppressMessages({ + capture.output(b <- tfb(raw, k = 15, penalized = FALSE, verbose = FALSE)) + b + }) + r1 <- suppressWarnings(round(b, 1)) + r0 <- suppressWarnings(round(b)) + expect_false(isTRUE(all.equal( + as.numeric(r1[, arg]), + as.numeric(r0[, arg]) + ))) +}) + +test_that("Math.tfd works on irregular tfd", { + irr <- tfd(list(c(1.1, 2.2, 3.3), c(2.6, 4.7)), + arg = list(c(0, 0.5, 1), c(0, 1))) + r <- round(irr, 0) + vals <- tf_evaluations(r) + expect_equal(vals[[1]], c(1, 2, 3)) + expect_equal(vals[[2]], c(3, 5)) +}) diff --git a/tests/testthat/test-mfpc.R b/tests/testthat/test-mfpc.R new file mode 100644 index 00000000..8b8f04b1 --- /dev/null +++ b/tests/testthat/test-mfpc.R @@ -0,0 +1,322 @@ +test_that("tfb_mfpc returns a tfb_mv with shared-score tfb_fpc components", { + set.seed(1) + g <- tfd_mv(list(x = tf_rgp(15), y = tf_rgp(15))) + m <- tfb_mfpc(g, pve = 0.99) + expect_s3_class(m, "tfb_mv") + expect_true(is_tfb_mv(m)) + expect_true(is_tfb_mfpc(m)) + expect_false(is_tfb_mfpc(tfb_mv(g, basis = "fpc", verbose = FALSE))) + expect_true(all(map_lgl(tf_components(m), is_tfb_fpc))) + expect_length(m, 15) + expect_identical(tf_ncomp(m), 2L) +}) + +test_that("MFPC scores are shared (identical) across all components", { + set.seed(2) + g <- tfd_mv(list(x = tf_rgp(12), y = tf_rgp(12), z = tf_rgp(12))) + m <- tfb_mfpc(g, npc = 4) + # the per-curve coefficient vectors (1, scores) are identical component-wise + c1 <- unclass(tf_component(m, 1)) + c2 <- unclass(tf_component(m, 2)) + c3 <- unclass(tf_component(m, 3)) + expect_equal(c1, c2, ignore_attr = TRUE) + expect_equal(c1, c3, ignore_attr = TRUE) + S <- tf_mfpc_scores(m) + expect_equal(dim(S), c(12L, 4L)) +}) + +test_that("reconstruction is exact at full rank with equal weights", { + set.seed(3) + g <- tfd_mv(list(x = tf_rgp(10), y = tf_rgp(10))) + m <- tfb_mfpc(g, weights = "equal", uni_pve = 1, pve = 1) + rec <- as.tfd_mv(m) + expect_equal(as.matrix(rec), as.matrix(g), tolerance = 1e-10) +}) + +test_that("multivariate eigenfunctions are weighted-orthonormal", { + set.seed(4) + g <- tfd_mv(list(x = tf_rgp(40, nugget = .05), y = tf_rgp(40, nugget = .05))) + m <- tfb_mfpc(g, weights = "inverse_variance", npc = 5) + w <- attr(m, "mfpc")$weights + ef <- tf_mfpc_efunctions(m) # tfd_mv, length M + M <- length(ef) + gram <- matrix(0, M, M) + for (j in seq_len(tf_ncomp(ef))) { + mat <- as.matrix(tf_component(ef, j)) + arg <- as.numeric(attr(mat, "arg")) + qw <- trapezoid_weights(arg) + gram <- gram + w[j] * (mat %*% (qw * t(mat))) + } + expect_equal(unname(gram), diag(M), tolerance = 1e-6) +}) + +test_that("multivariate eigenvalues account for the total weighted variance", { + set.seed(5) + g <- tfd_mv(list(x = tf_rgp(30, nugget = .05), y = tf_rgp(30, nugget = .05))) + m <- tfb_mfpc(g, weights = "equal", uni_pve = 1, pve = 1) + nu <- attr(m, "mfpc")$evalues + w <- attr(m, "mfpc")$weights + ind <- tfb_mv(g, basis = "fpc", pve = 1, verbose = FALSE) + tv <- map_dbl(tf_components(ind), \(co) sum(attr(co, "score_variance"))) + expect_equal(sum(nu), sum(w * tv), tolerance = 1e-6) + # eigenvalues are non-increasing and non-negative + expect_true(all(nu >= -1e-8)) + expect_false(is.unsorted(rev(nu))) +}) + +test_that("weight schemes and user weights produce valid, normalized weights", { + set.seed(6) + g <- tfd_mv(list(x = tf_rgp(25), y = tf_rgp(25))) + m_iv <- tfb_mfpc(g, weights = "inverse_variance", npc = 3) + m_eq <- tfb_mfpc(g, weights = "equal", npc = 3) + m_snr <- tfb_mfpc(g, weights = "snr", uni_pve = 0.9, npc = 3) + m_num <- tfb_mfpc(g, weights = c(3, 1), npc = 3) + # all schemes rescale to sum to d (= 2) + for (m in list(m_iv, m_eq, m_snr, m_num)) { + expect_equal(sum(attr(m, "mfpc")$weights), 2) + } + expect_equal(unname(attr(m_eq, "mfpc")$weights), c(1, 1)) + expect_equal(unname(attr(m_num, "mfpc")$weights), c(1.5, 0.5)) + # wrong-length numeric weights error + expect_error(tfb_mfpc(g, weights = c(1, 2, 3)), "weights") + # zero / negative weights error (would yield Inf eigenfunctions via 1/sqrt(w)) + expect_error(tfb_mfpc(g, weights = c(1, 0)), "strictly positive") + expect_error(tfb_mfpc(g, weights = c(2, -1)), "strictly positive") +}) + +test_that("re-scoring rejects a custom arg and incompatible domains", { + set.seed(15) + g <- tfd_mv(list(x = tf_rgp(15), y = tf_rgp(15))) + m <- tfb_mfpc(g, npc = 3) + expect_error(tf_rebase(g, m, arg = seq(0, 1, length.out = 10)), "custom") +}) + +test_that("joint re-scoring round-trips the training data exactly", { + set.seed(7) + g <- tfd_mv(list(x = tf_rgp(20), y = tf_rgp(20))) + m <- tfb_mfpc(g, pve = 0.99) + S <- tf_mfpc_scores(m) + # re-scoring the *training* data must recover the stored scores + m_re <- tf_rebase(g, m) + expect_true(is_tfb_mfpc(m_re)) + expect_equal(tf_mfpc_scores(m_re), S, tolerance = 1e-8) + # the same via vec_cast + m_cast <- vctrs::vec_cast(g, m) + expect_equal(tf_mfpc_scores(m_cast), S, tolerance = 1e-8) +}) + +test_that("re-scoring new data is well-defined and component-compatible", { + set.seed(8) + g <- tfd_mv(list(x = tf_rgp(30), y = tf_rgp(30))) + m <- tfb_mfpc(g, npc = 4) + g_new <- tfd_mv(list(x = tf_rgp(5), y = tf_rgp(5))) + proj <- tf_rebase(g_new, m) + expect_s3_class(proj, "tfb_mv") + expect_true(is_tfb_mfpc(proj)) + expect_length(proj, 5) + expect_equal(dim(tf_mfpc_scores(proj)), c(5L, 4L)) + # mismatched component count errors + g_bad <- tfd_mv(list(x = tf_rgp(5), y = tf_rgp(5), z = tf_rgp(5))) + expect_error(tf_rebase(g_bad, m)) +}) + +test_that("uni_pve / per-component args control the univariate truncation", { + set.seed(9) + g <- tfd_mv(list(x = tf_rgp(30, nugget = .05), y = tf_rgp(30, nugget = .05))) + m_lo <- tfb_mfpc(g, uni_pve = 0.5, pve = 1) + m_hi <- tfb_mfpc(g, uni_pve = 0.99, pve = 1) + # fewer univariate PCs => fewer available multivariate PCs + expect_lte(attr(m_lo, "mfpc")$npc, attr(m_hi, "mfpc")$npc) +}) + +test_that("scoring a single MFPC component directly is an error", { + set.seed(10) + g <- tfd_mv(list(x = tf_rgp(10), y = tf_rgp(10))) + m <- tfb_mfpc(g, npc = 3) + comp <- tf_component(m, 1) + expect_s3_class(comp, "tfb_fpc") + # the stored per-component scoring function refuses to run + expect_error( + attr(comp, "scoring_function")(), + "single component" + ) +}) + +test_that("slicing and same-fit concatenation preserve the MFPC spec", { + set.seed(11) + g <- tfd_mv(list(x = tf_rgp(12), y = tf_rgp(12))) + m <- tfb_mfpc(g, npc = 3) + # subsetting keeps the curve-independent eigenbasis -> still re-scorable + expect_true(is_tfb_mfpc(m[1:5])) + expect_equal(dim(tf_mfpc_scores(m[1:5])), c(5L, 3L)) + # concatenation of slices of the *same* fit keeps the spec + expect_true(is_tfb_mfpc(c(m[1:6], m[7:12]))) + expect_true(is_tfb_mfpc(c(m, m))) +}) + +# Capture warning messages from an expression without aborting on them. +collect_warnings <- function(expr) { + ws <- character() + val <- withCallingHandlers( + expr, + warning = function(w) { + ws <<- c(ws, conditionMessage(w)) + invokeRestart("muffleWarning") + } + ) + list(value = val, warnings = ws) +} + +test_that("multi-component subsetting preserves MFPC specs when possible", { + set.seed(111) + mf <- tfb_mfpc( + tfd_mv(list(x = tf_rgp(12), y = tf_rgp(12), z = tf_rgp(12))), + npc = 3 + ) + + full <- mf[, component = c("x", "y", "z")] + expect_true(is_tfb_mfpc(full)) + expect_identical(attr(full, "mfpc")$comp_names, c("x", "y", "z")) + expect_no_error(tf_rebase(as.tfd_mv(full), full)) + + reordered <- mf[, component = c("z", "x", "y")] + expect_true(is_tfb_mfpc(reordered)) + expect_identical(attr(reordered, "mfpc")$comp_names, c("z", "x", "y")) + expect_no_error(tf_rebase(as.tfd_mv(reordered), reordered)) + expect_equal(tf_mfpc_scores(reordered), tf_mfpc_scores(mf)) + + rescored <- tf_rebase(as.tfd_mv(mf), mf) + rescored_reordered <- tf_rebase( + as.tfd_mv(mf)[, component = c("z", "x", "y")], + reordered + ) + expect_equal(tf_mfpc_scores(rescored_reordered), tf_mfpc_scores(rescored)) + + cap <- collect_warnings(mf[, component = c("x", "z")]) + expect_true(any(grepl("demot|mfpc|MFPC|joint", cap$warnings))) + expect_false(is_tfb_mfpc(cap$value)) + expect_no_error(suppressWarnings(cap$value + 1)) +}) + +test_that("tf_interpolate preserves usable MFPC specs", { + set.seed(112) + mf <- tfb_mfpc( + tfd_mv(list(x = tf_rgp(10), y = tf_rgp(10))), + npc = 2 + ) + + out <- tf_interpolate(mf, arg = seq(0, 1, length.out = 31)) + + expect_true(is_tfb_mfpc(out)) + expect_identical(attr(out, "mfpc")$comp_names, attr(mf, "mfpc")$comp_names) + expect_identical(tf_arg(out$x), seq(0, 1, length.out = 31)) + expect_no_error(tf_rebase(as.tfd_mv(out), out)) +}) + +test_that("tfb_mfpc protects its joint spec", { + set.seed(1) + mf <- tfb_mfpc(tfd_mv(list(x = tf_rgp(20), y = tf_rgp(20))), pve = 0.95) + # 1. Arithmetic demotes with a clear warning + cap <- collect_warnings(mf + 1) + expect_true(any(grepl("demot|mfpc|MFPC|joint", cap$warnings))) + expect_false(is_tfb_mfpc(cap$value)) + # 2. $<- demotes with a clear warning + cap <- collect_warnings({ + mf2 <- mf + mf2$x <- mf$x + mf2 + }) + expect_true(any(grepl("demot|mfpc|MFPC|joint", cap$warnings))) + expect_false(is_tfb_mfpc(cap$value)) + # 3. c() of slices of the same fit preserves the spec + expect_true(is_tfb_mfpc(c(mf[1:4], mf[5:8]))) +}) + +test_that("post-demotion tfb_mv stays functional", { + set.seed(42) + mf <- tfb_mfpc(tfd_mv(list(x = tf_rgp(10), y = tf_rgp(10))), pve = 0.95) + mf2 <- mf + suppressWarnings(mf2$x <- mf$x) + expect_false(is_tfb_mfpc(mf2)) + # evaluating the demoted object must not explode in the per-component + # scoring stub + expect_no_error(tf_evaluate(mf2)) + # arithmetic re-scores via the components' scoring_function, so it must not + # hit the abort stub either -- this exercises the rebuilt standalone bases, + # including the assigned `value` (which came from the same MFPC fit). + expect_no_error(suppressWarnings(out <- mf2 + 1)) + expect_true(all(is.finite(unlist(tf_evaluations(out))))) + expect_no_error(suppressWarnings(tf_rebase(mf2, mf2))) +}) + +test_that("both-mfpc arithmetic warns about demotion exactly once", { + set.seed(21) + mf <- tfb_mfpc(tfd_mv(list(x = tf_rgp(10), y = tf_rgp(10))), pve = 0.95) |> + suppressWarnings() + n_demotion_warnings <- 0L + withCallingHandlers( + mf + mf, + tf_mfpc_demotion = function(w) { + n_demotion_warnings <<- n_demotion_warnings + 1L + invokeRestart("muffleWarning") + }, + warning = function(w) invokeRestart("muffleWarning") + ) + expect_identical(n_demotion_warnings, 1L) +}) + +test_that("demoted tfb_mfpc supports chained Math/Ops", { + set.seed(1) + mf <- tfb_mfpc(tfd_mv(list(x = tf_rgp(20), y = tf_rgp(20))), pve = 0.95) |> + suppressWarnings() + expect_no_error(out1 <- suppressWarnings((mf + 1) - 1)) + expect_no_error(out2 <- suppressWarnings(log(mf + 2))) + expect_no_error(out3 <- suppressWarnings(-mf)) + expect_no_error(out4 <- suppressWarnings(mf * 2 / 2)) + # reconstructed values are sane (finite) after a round-trip through ops + expect_true(all(is.finite(unlist(tf_evaluations(out1))))) +}) + +test_that("demotion warning has class 'tf_mfpc_demotion'", { + set.seed(8) + mf <- tfb_mfpc(tfd_mv(list(x = tf_rgp(10), y = tf_rgp(10))), pve = 0.95) |> + suppressWarnings() + classes <- character() + withCallingHandlers( + mf + 1, + tf_mfpc_demotion = function(w) { + classes <<- c(classes, class(w)) + invokeRestart("muffleWarning") + }, + warning = function(w) invokeRestart("muffleWarning") + ) + expect_true("tf_mfpc_demotion" %in% classes) +}) + +test_that("c() of MFPC fits with different specs demotes with a warning", { + set.seed(1) + mf1 <- tfb_mfpc(tfd_mv(list(x = tf_rgp(10), y = tf_rgp(10))), pve = 0.95) + set.seed(2) + mf2 <- tfb_mfpc(tfd_mv(list(x = tf_rgp(10), y = tf_rgp(10))), pve = 0.95) + cap <- collect_warnings(c(mf1, mf2)) + expect_true(any(grepl("demot|mfpc|MFPC|joint", cap$warnings))) + expect_false(is_tfb_mfpc(cap$value)) +}) + +test_that("mixing a tfd_mv with an MFPC tfb_mv demotes (no spec carried)", { + set.seed(12) + g <- tfd_mv(list(x = tf_rgp(8), y = tf_rgp(8))) + m <- tfb_mfpc(g, npc = 2) + expect_false(is_tfb_mfpc(c(as.tfd_mv(m), g))) +}) + +test_that("npc beyond available components warns and is capped", { + set.seed(13) + g <- tfd_mv(list(x = tf_rgp(6), y = tf_rgp(6))) + expect_warning(m <- tfb_mfpc(g, uni_pve = 0.5, npc = 1000), "exceeds") + expect_lte(attr(m, "mfpc")$npc, length(g) * 2) +}) + +test_that("empty / degenerate inputs error informatively", { + expect_error(tfb_mfpc(tfd_mv(list())), "no components") +}) diff --git a/tests/testthat/test-mv-contract.R b/tests/testthat/test-mv-contract.R new file mode 100644 index 00000000..e65da414 --- /dev/null +++ b/tests/testthat/test-mv-contract.R @@ -0,0 +1,352 @@ +# Contract regressions: tf vs. tf_mv inheritance + coercion shape stability. +# These pin the behaviour the multivariate audit (June 2026 integration into +# tidyfun's ggplot layer) flagged as wrong-or-inconsistent. + +# --- is_tf_1d() distinguishes univariate from multivariate ------------------- + +test_that("is_tf_1d() is TRUE for tfd/tfb and FALSE for tfd_mv/tfb_mv", { + set.seed(1) + fu <- tf_rgp(2) + fb <- tfb(fu, k = 4, penalized = FALSE, verbose = FALSE) + fm <- tfd_mv(list(x = tf_rgp(2), y = tf_rgp(2))) + fmb <- tfb_mv(fm, verbose = FALSE) + expect_true(is_tf_1d(fu)) + expect_true(is_tf_1d(fb)) + expect_false(is_tf_1d(fm)) + expect_false(is_tf_1d(fmb)) + # raw scalars / data.frames are not tf + expect_false(is_tf_1d(1:5)) + expect_false(is_tf_1d(data.frame(a = 1))) +}) + +# --- Summary on tf_mv dispatches to the multivariate method ------------------ + +test_that("Summary() / max() on a tf_mv dispatches to the tf_mv method", { + set.seed(2) + fm <- tfd_mv(list(x = tf_rgp(3), y = tf_rgp(3))) + # Summary.tf_mv returns a tf_mv (length 1) -- the univariate path would + # silently flatten to a single tfd, losing component structure. + out <- max(fm) + expect_s3_class(out, "tf_mv") + expect_identical(tf_ncomp(out), 2L) +}) + +# --- tf_2_df delegates to .tf_mv path when given a tf_mv --------------------- + +test_that("tf_2_df on a tf_mv delegates to as.data.frame.tf_mv", { + set.seed(3) + fm <- tfd_mv(list(x = tf_rgp(2, arg = 4L), y = tf_rgp(2, arg = 4L))) + df <- tf_2_df(fm) + # tf_2_df now returns the mv-aware long form: (id, arg, component, value) + expect_named(df, c("id", "arg", "component", "value")) + expect_s3_class(df$component, "factor") + expect_identical(levels(df$component), c("x", "y")) +}) + +# --- as.data.frame.tf_mv long/wide + arg + interpolate ----------------------- + +test_that("as.data.frame.tf_mv defaults to long (id, arg, component, value)", { + set.seed(4) + fm <- tfd_mv(list(x = tf_rgp(2, arg = 4L), y = tf_rgp(2, arg = 4L))) + d_long <- as.data.frame(fm, unnest = TRUE) + expect_named(d_long, c("id", "arg", "component", "value")) + expect_s3_class(d_long$component, "factor") + expect_identical(levels(d_long$component), c("x", "y")) + expect_identical(nrow(d_long), 2L * 4L * 2L) + # opt-in wide schema + d_wide <- as.data.frame(fm, unnest = TRUE, long = FALSE) + expect_named(d_wide, c("id", "arg", "x", "y")) + expect_identical(nrow(d_wide), 2L * 4L) +}) + +test_that("as.data.frame.tf_mv accepts a custom arg grid", { + set.seed(5) + fm <- tfd_mv(list(x = tf_rgp(2, arg = 11L), y = tf_rgp(2, arg = 11L))) + arg <- c(0.1, 0.5, 0.9) + d_long <- as.data.frame(fm, unnest = TRUE, arg = arg) + # 2 curves * 3 arg * 2 components + expect_identical(nrow(d_long), 2L * 3L * 2L) + expect_setequal(unique(d_long$arg), arg) + d_wide <- as.data.frame(fm, unnest = TRUE, long = FALSE, arg = arg) + expect_identical(nrow(d_wide), 2L * 3L) + expect_setequal(unique(d_wide$arg), arg) +}) + +test_that("as.data.frame.tf_mv on a zero-component tf_mv returns an empty df", { + empty <- tfd_mv(list()) + d_long <- as.data.frame(empty, unnest = TRUE) + expect_named(d_long, c("id", "arg", "component", "value")) + expect_identical(nrow(d_long), 0L) + d_wide <- as.data.frame(empty, unnest = TRUE, long = FALSE) + expect_named(d_wide, c("id", "arg")) + expect_identical(nrow(d_wide), 0L) +}) + +test_that("single component extraction without arg preserves tf class", { + set.seed(51) + fm <- tfd_mv(list(x = tf_rgp(3), y = tf_rgp(3))) + out <- fm[, component = "x"] + expect_s3_class(out, "tfd") + expect_false(is_tf_mv(out)) + expect_equal(out, tf_component(fm, "x")) + + out_i <- fm[1:2, component = 1] + expect_s3_class(out_i, "tfd") + expect_equal(out_i, tf_component(fm, 1)[1:2]) + + mat <- fm[, c(0.25, 0.5), component = "x"] + expect_true(is.matrix(mat)) + expect_identical(dim(mat), c(3L, 2L)) + + fb <- tfb_mv(fm, k = 5, penalized = FALSE, verbose = FALSE) + expect_s3_class(fb[, component = "x"], "tfb") +}) + +# --- tf_evaluate.tf_mv uniform per-curve data.frame -------------------------- + +test_that("tf_evaluate() returns uniform list-of-data.frames", { + set.seed(6) + fm <- tfd_mv(list(x = tf_rgp(3), y = tf_rgp(3))) + out <- tf_evaluate(fm, arg = c(0.1, 0.5, 0.9)) + expect_length(out, 3L) + for (df in out) { + expect_s3_class(df, "data.frame") + expect_identical(colnames(df), c("arg", "x", "y")) + expect_identical(nrow(df), 3L) + } +}) + +test_that("tf_evaluate() returns data.frames (no list-shape)", { + set.seed(7) + fx <- tf_rgp(2, arg = 5L) + fy <- tf_jiggle(tf_rgp(2, arg = 7L)) # irregular + fm <- tfd_mv(list(x = fx, y = fy)) + out <- tf_evaluate(fm) # native (union) grids per curve + for (df in out) { + expect_s3_class(df, "data.frame") + expect_identical(colnames(df), c("arg", "x", "y")) + } +}) + +# --- as.matrix.tf_mv stays 3-d ----------------------------------------------- + +test_that("as.matrix.tf_mv returns a 3-d array with named third dim", { + set.seed(8) + fm <- tfd_mv(list(x = tf_rgp(3, arg = 5L), y = tf_rgp(3, arg = 5L))) + m <- as.matrix(fm, arg = seq(0, 1, length.out = 5), interpolate = TRUE) + expect_identical(length(dim(m)), 3L) + expect_identical(dim(m), c(3L, 5L, 2L)) + expect_identical(dimnames(m)[[3]], c("x", "y")) +}) + +test_that("as.matrix.tf_mv treats arg NULL like a missing arg", { + set.seed(81) + fm <- tfd_mv(list(x = tf_rgp(2, arg = 5L), y = tf_rgp(2, arg = 5L))) + expect_equal(as.matrix(fm, arg = NULL), as.matrix(fm)) +}) + +# --- mat_2_df id column is plain factor, not ordered ------------------------- + +test_that("as.data.frame on a tfd/tfb yields a plain-factor id column", { + set.seed(9) + f <- tf_rgp(3) + d <- as.data.frame(f, unnest = TRUE) + expect_s3_class(d$id, "factor") + expect_false(inherits(d$id, "ordered")) +}) + +# --- tf_norm rebases components onto a paired grid before pointwise norm ----- + +test_that("tf_norm on components with different argument grids works", { + # x evaluated at c(0, 1), y at c(0.5, 1.5); the univariate `+` path would + # have errored or misaligned. The new norm path evaluates on each curve's + # union grid and NA-fills outside each component's support. + fx <- tfd(matrix(c(3, 3), nrow = 1), arg = c(0, 1)) + fy <- tfd(matrix(c(4, 4), nrow = 1), arg = c(0.5, 1.5)) + f <- tfd_mv(list(x = fx, y = fy), domain = c(0, 1.5)) + n <- tf_norm(f) + expect_s3_class(n, "tfd") + # at arg = 0.5, both components are well-defined (3, 4) -> norm = 5 + expect_equal(unlist(tf_evaluate(n, arg = 0.5)), 5, ignore_attr = TRUE) +}) + +# --- tf_integrate on zero-component tf_mv returns a shape-appropriate empty -- + +test_that("tf_integrate on a zero-component tf_mv returns an empty result", { + empty <- tfd_mv(list()) + out <- tf_integrate(empty) + expect_true(is.matrix(out)) + expect_identical(dim(out), c(0L, 0L)) +}) + +# --- Fail-fast contract for un-stubbed generics ------------------------------ +# For each S3 generic in NAMESPACE that has a univariate-tf method (tf / tfd / +# tfb / tfd_reg / tfd_irreg / tfb_spline / tfb_fpc), calling it on a `tf_mv` +# must either succeed (an explicit tf_mv method exists) or abort with the +# classed `tf_mv_method_unimplemented` condition -- never an obscure deep +# internal error, never NA garbage. See tidyfun/tf#255. + +# small probe set with explicit invocation recipes for verbs whose signatures +# need a second argument or otherwise resist a generic walker call. +mv_probe_calls <- function(fm) { + list( + summary = function() summary(fm), + fivenum = function() fivenum(fm), + quantile = function() quantile(fm), + tf_depth = function() tf_depth(fm), + tf_crosscov = function() tf_crosscov(fm, fm), + tf_crosscor = function() tf_crosscor(fm, fm), + tf_invert = function() tf_invert(fm), + sort = function() sort(fm), + rank = function() rank(fm), + xtfrm = function() xtfrm(fm) + ) +} + +test_that("explicitly probed unimplemented verbs abort with classed condition", { + set.seed(255) + fm <- tfd_mv(list(x = tf_rgp(3), y = tf_rgp(3))) + probes <- mv_probe_calls(fm) + for (nm in names(probes)) { + expect_error( + probes[[nm]](), + class = "tf_mv_method_unimplemented", + info = sprintf( + "%s() on tf_mv should signal tf_mv_method_unimplemented", + nm + ) + ) + } +}) + +test_that("every univariate-tf generic either has a tf_mv method or aborts cleanly", { + set.seed(2550) + fm <- tfd_mv(list(x = tf_rgp(3), y = tf_rgp(3))) + + # parse NAMESPACE: for each generic, collect the set of classes it's + # registered for. For an installed package (R CMD check, test_dir() on a + # library install) NAMESPACE sits at the package root; under + # devtools::load_all() the pkgload shim resolves the same call to the + # source-tree NAMESPACE. + ns_path <- system.file("NAMESPACE", package = "tf") + if (!nzchar(ns_path)) { + # not installed at all: fall back to the source-tree NAMESPACE. + ns_path <- testthat::test_path("..", "..", "NAMESPACE") + } + ns_lines <- readLines(ns_path) + expect_true(any(grepl("^S3method\\(", ns_lines))) + s3 <- grep("^S3method\\(", ns_lines, value = TRUE) + m <- regmatches(s3, regexec("^S3method\\(([^,]+),(.+)\\)$", s3)) + pairs <- do.call( + rbind.data.frame, + lapply(m, function(x) { + if (length(x) == 3) { + data.frame( + generic = gsub('"', "", trimws(x[2])), + class = gsub('"', "", trimws(x[3])), + stringsAsFactors = FALSE + ) + } + }) + ) + by_gen <- split(pairs$class, pairs$generic) + + univariate_classes <- c( + "tf", + "tfd", + "tfb", + "tfd_reg", + "tfd_irreg", + "tfb_spline", + "tfb_fpc" + ) + + has_univariate <- function(classes) any(classes %in% univariate_classes) + has_tf_mv <- function(classes) "tf_mv" %in% classes + + # restrict to single-arg generics we can blindly call; everything else is + # already covered by the explicit probe table above. + walkable <- c( + "format", + "print", + "plot", + "lines", + "tf_arg", + "tf_domain", + "tf_evaluations", + "tf_count", + "as.data.frame", + "as.matrix", + "mean", + "median", + "sd", + "var", + "is.na", + "tf_derive", + "tf_integrate", + "tf_inner", + "tf_norm", + "tf_tangent", + "tf_smooth", + "tf_evaluate", + "tf_arclength", + "tf_rebase", + "tf_zoom", + "tf_warp", + "tf_align", + "rev", + "sort", + "rank", + "xtfrm", + "summary", + "fivenum", + "quantile", + "tf_depth", + "tf_interpolate", + "tf_invert" + ) + + # The contract: a walked single-argument generic must either *succeed* + # (a real tf_mv method exists, just not registered under that name -- e.g. + # group generics) or abort with the classed `tf_mv_method_unimplemented` + # condition. Any other error -- classed or not -- means a stub is missing + # and the call fell through to a univariate method. + fails_cleanly <- function(call_expr) { + tryCatch( + { + eval(call_expr) + TRUE + }, + tf_mv_method_unimplemented = function(e) TRUE, + error = function(e) { + structure(FALSE, message = conditionMessage(e)) + } + ) + } + + checked <- character() + for (gen in names(by_gen)) { + classes <- by_gen[[gen]] + if (!has_univariate(classes)) next + if (has_tf_mv(classes)) next + if (!(gen %in% walkable)) next + # try a one-arg call; skip anything that genuinely needs >1 argument + call_expr <- str2lang(sprintf("%s(fm)", gen)) + expect_true( + fails_cleanly(call_expr), + info = sprintf( + "Generic %s(): tf_mv path must either succeed or abort with tf_mv_method_unimplemented; any other error means a fail-fast stub is missing.", + gen + ) + ) + checked <- c(checked, gen) + } + # Sanity: walker observed *some* generics with univariate methods (else the + # NAMESPACE/test wiring is wrong). + expect_true(any(has_univariate(unlist(by_gen)))) + # Sanity: the walkable list must actually overlap registered S3 generics -- + # otherwise the walker silently iterates over nothing and the test rots into + # a no-op. (`checked` itself can legitimately be 0 once every univariate-tf + # generic has an explicit tf_mv method; that's the desired end state.) + expect_gt(length(intersect(walkable, names(by_gen))), 10) +}) diff --git a/tests/testthat/test-mv-edge.R b/tests/testthat/test-mv-edge.R new file mode 100644 index 00000000..d8518113 --- /dev/null +++ b/tests/testthat/test-mv-edge.R @@ -0,0 +1,650 @@ +# Edge cases, NA handling, and branches not exercised elsewhere. + +# ---- empty / length-0 --------------------------------------------------------- + +test_that("empty tf_mv prototype: accessors, ops, c(), tibble", { + f0 <- tfd_mv(list()) + expect_length(f0, 0L) + expect_identical(tf_ncomp(f0), 0L) + expect_identical(length(tf_components(f0)), 0L) + expect_identical(tf_arg(f0), numeric(0)) + expect_identical(tf_evaluations(f0), list()) + expect_identical(is.na(f0), logical(0)) + expect_identical(dim(as.matrix(f0)), c(0L, 0L, 0L)) + expect_identical(dim(tf_count(f0)), c(0L, 0L)) + # c() with empty stays length 0 + expect_length(c(f0, f0), 0L) + expect_silent(format(f0)) + expect_invisible(print(f0)) + # tibble column with 0 rows + skip_if_not_installed("tibble") + expect_identical(nrow(tibble::tibble(traj = f0)), 0L) +}) + +test_that("Reduce-based ops on a zero-component prototype stay zero-length", { + # d == 0 means the underlying component list is empty, so Reduce() has no + # accumulator and would return NULL without an explicit guard. + f0 <- tfd_mv(list()) + expect_identical(f0 == f0, logical(0)) + expect_identical(f0 != f0, logical(0)) + expect_s3_class(tf_norm(f0), "tfd") + expect_length(tf_norm(f0), 0L) + expect_s3_class(tf_inner(f0, f0), "tfd") + expect_length(tf_inner(f0, f0), 0L) + expect_s3_class(tf_distance(f0, f0), "tfd") +}) + +test_that("tfb_mv prototype is constructible and identifiable", { + tb0 <- tfb_mv(list()) + expect_s3_class(tb0, "tfb_mv") + expect_length(tb0, 0L) + # default-method path + tb0d <- tfb_mv(numeric(0)) + expect_s3_class(tb0d, "tfb_mv") + expect_s3_class(tfb_mv(tfd_mv(list())), "tfb_mv") +}) + +test_that("zero-curve tf_mv with components has empty matrix/count shapes", { + f0 <- tfd_mv(list(x = tfd(), y = tfd())) + expect_length(f0, 0L) + expect_identical(tf_ncomp(f0), 2L) + expect_identical(dim(as.matrix(f0)), c(0L, 0L, 2L)) + counts <- tf_count(f0) + expect_identical(dim(counts), c(0L, 2L)) + expect_identical(colnames(counts), c("x", "y")) +}) + +# ---- n = 1, d = 1 ------------------------------------------------------------- + +test_that("single-curve and single-component tf_mv work end-to-end", { + set.seed(11) + one <- tfd_mv(list(x = tf_rgp(1), y = tf_rgp(1))) + expect_length(one, 1L) + expect_length(c(one, one), 2L) + expect_identical(dim(one[1, c(.2, .5)]), c(1L, 2L, 2L)) + + single <- tfd_mv(list(only = tf_rgp(4))) + expect_identical(tf_ncomp(single), 1L) + expect_s3_class(single + single, "tfd_mv") + # vec_ptype_full reports d = 1 + expect_match(vctrs::vec_ptype_full(single), "d=1") +}) + +# ---- NA handling -------------------------------------------------------------- + +test_that("NA in any component marks the curve as NA, ops propagate NAs", { + set.seed(12) + fx <- tf_rgp(4) + fx[2] <- NA + fy <- tf_rgp(4) + fy[3] <- NA + f <- tfd_mv(list(x = fx, y = fy)) + # any-component-NA => curve NA + expect_equal(unname(is.na(f)), c(FALSE, TRUE, TRUE, FALSE)) + m_all <- suppressWarnings(mean(f)) + expect_length(m_all, 1L) + m_complete <- mean(f, na.rm = TRUE) + expect_equal(m_complete, mean(f[!is.na(f)])) + # subset preserves NA status + expect_true(is.na(f[2])) + # arithmetic with NA curves: result NA at NA positions + ff <- suppressWarnings(f + f) + expect_equal(unname(is.na(ff)), c(FALSE, TRUE, TRUE, FALSE)) +}) + +test_that("all-NA mv curve is handled in tf_evaluations()", { + set.seed(13) + fx <- tf_rgp(3) + fx[1] <- NA + fy <- tf_rgp(3) + fy[1] <- NA + f <- tfd_mv(list(x = fx, y = fy)) + ev <- tf_evaluations(f) + # new uniform shape: list of per-curve (arg, x, y) data.frames. For the + # all-NA curve the arg grid is still well-defined (the union of the + # components' native grids) but the value columns are NA-filled. + expect_s3_class(ev[[1]], "data.frame") + expect_identical(colnames(ev[[1]]), c("arg", "x", "y")) + expect_true(all(is.na(ev[[1]]$x)) && all(is.na(ev[[1]]$y))) + expect_s3_class(ev[[2]], "data.frame") + expect_identical(colnames(ev[[2]]), c("arg", "x", "y")) + expect_false(anyNA(ev[[2]]$x) || anyNA(ev[[2]]$y)) +}) + +# ---- Summary group generic and stat methods ---------------------------------- + +test_that("Summary group generic on tf_mv is component-wise", { + set.seed(14) + f <- tfd_mv(list(x = tf_rgp(3), y = tf_rgp(3))) + s <- sum(f) + expect_s3_class(s, "tfd_mv") + expect_length(s, 1L) + expect_equal(s$x, sum(f$x)) + expect_equal(s$y, sum(f$y)) + expect_equal(min(f)$x, min(f$x)) + expect_equal(max(f)$y, max(f$y)) +}) + +test_that("var and sd on tf_mv are component-wise and return length-1 mv", { + set.seed(15) + f <- tfd_mv(list(x = tf_rgp(5), y = tf_rgp(5))) + v <- var(f) + s <- sd(f) + expect_s3_class(v, "tfd_mv") + expect_length(v, 1L) + expect_s3_class(s, "tfd_mv") + expect_length(s, 1L) + expect_equal( + tf_evaluations(v$x)[[1]], + tf_evaluations(var(f$x))[[1]] + ) + expect_equal(tf_evaluations(s$y)[[1]], tf_evaluations(sd(f$y))[[1]]) +}) + +# ---- Arithmetic edge cases ---------------------------------------------------- + +test_that("unary minus on tf_mv works (vec_arith.tf_mv.MISSING)", { + set.seed(16) + f <- tfd_mv(list(x = tf_rgp(3), y = tf_rgp(3))) + nf <- -f + expect_s3_class(nf, "tfd_mv") + expect_equal( + tf_evaluations((nf + f)$x)[[1]], + rep(0, length(tf_arg(f$x))), + tolerance = 1e-9 + ) +}) + +test_that("incompatible arithmetic op errors via vec_arith.tf_mv.default", { + f <- tfd_mv(list(x = tf_rgp(2), y = tf_rgp(2))) + expect_error(vctrs::vec_arith("+", f, "abc")) +}) + +# ---- tfb_mv non-tf inputs and vctrs paths ------------------------------------ + +test_that("tfb_mv.list accepts a list of pre-built tfb components", { + set.seed(17) + tx <- tfb(tf_rgp(3), verbose = FALSE) + ty <- tfb(tf_rgp(3), verbose = FALSE) + tb <- tfb_mv(list(x = tx, y = ty)) + expect_s3_class(tb, "tfb_mv") + expect_true(all(map_lgl(tf_components(tb), is_tfb))) +}) + +test_that("tfb_mv.list refits non-tfb components via tfd_mv", { + set.seed(171) + mx <- matrix(rnorm(33), nrow = 3) + my <- matrix(rnorm(33), nrow = 3) + tb <- suppressWarnings(suppressMessages( + tfb_mv(list(x = mx, y = my)) + )) + expect_s3_class(tb, "tfb_mv") + expect_identical(tf_ncomp(tb), 2L) +}) + +test_that("c(tfb_mv, tfb_mv) and tibble column work for tfb_mv", { + set.seed(18) + tb1 <- tfb_mv(tfd_mv(list(x = tf_rgp(3), y = tf_rgp(3))), verbose = FALSE) + tb2 <- tfb_mv(tfd_mv(list(x = tf_rgp(2), y = tf_rgp(2))), verbose = FALSE) + cc <- suppressWarnings(c(tb1, tb2)) + expect_s3_class(cc, "tfb_mv") + expect_length(cc, 5L) + expect_match(vctrs::vec_ptype_abbr(tb1), "tfb_mv") + expect_match(vctrs::vec_ptype_full(tb1), "tfb_mv") +}) + +# ---- tfd_mv.tf_mv (re-evaluation on a new arg grid) -------------------------- + +test_that("tfd_mv(, arg = ...) re-evaluates on a new grid", { + set.seed(19) + f <- tfd_mv(list(x = tf_rgp(3), y = tf_rgp(3))) + new_grid <- seq(0, 1, length.out = 21) + g <- tfd_mv(f, arg = new_grid) + expect_s3_class(g, "tfd_mv") + expect_equal(tf_arg(g), new_grid) + expect_equal(g$x, tfd(f$x, arg = new_grid)) + expect_equal(g$y, tfd(f$y, arg = new_grid)) +}) + +# ---- tf_rebase with an mv basis_from ----------------------------------------- + +test_that("tf_rebase(mv, mv_basis) uses each component as its own basis", { + set.seed(20) + f <- tfd_mv(list(x = tf_rgp(3), y = tf_rgp(3))) + basis_from <- tfb_mv( + tfd_mv(list(x = tf_rgp(1), y = tf_rgp(1))), + k = 8, + verbose = FALSE + ) + r <- tf_rebase(f, basis_from) + expect_s3_class(r, "tfb_mv") + expect_equal(r$x, tf_rebase(f$x, basis_from$x)) + expect_equal(r$y, tf_rebase(f$y, basis_from$y)) +}) + +# ---- tf_evaluate.tf_mv direct call ------------------------------------------- + +test_that("tf_evaluate() returns per-curve data.frames on requested arg", { + set.seed(21) + f <- tfd_mv(list(x = tf_rgp(3), y = tf_rgp(3))) + out <- tf_evaluate(f, arg = c(0.2, 0.5, 0.8)) + expect_length(out, 3L) + # uniform shape: list of n per-curve data.frames (arg, comp1, ..., compd) + expect_s3_class(out[[1]], "data.frame") + expect_identical(colnames(out[[1]]), c("arg", "x", "y")) + expect_identical(nrow(out[[1]]), 3L) + expect_identical(out[[1]]$arg, c(0.2, 0.5, 0.8)) + expect_equal(out[[1]]$x, tf_evaluate(f$x, arg = c(0.2, 0.5, 0.8))[[1]]) + expect_equal(out[[3]]$y, tf_evaluate(f$y, arg = c(0.2, 0.5, 0.8))[[3]]) +}) + +# ---- as.matrix(arg=...) and as.data.frame both modes ------------------------- + +test_that("as.matrix(, arg = ...) re-evaluates on a new grid", { + set.seed(22) + f <- tfd_mv(list(x = tf_rgp(3, arg = 11L), y = tf_rgp(3, arg = 11L))) + m <- as.matrix(f, arg = seq(0, 1, length.out = 5), interpolate = TRUE) + expect_identical(dim(m), c(3L, 5L, 2L)) + expect_equal( + m[,, "x"], + as.matrix(f$x, arg = seq(0, 1, length.out = 5), interpolate = TRUE), + ignore_attr = TRUE + ) + expect_equal( + m[,, "y"], + as.matrix(f$y, arg = seq(0, 1, length.out = 5), interpolate = TRUE), + ignore_attr = TRUE + ) +}) + +test_that("as.matrix() uses the union grid for mixed regular component grids", { + x <- tfd(matrix(1:3, nrow = 1), arg = 1:3) + y <- tfd(matrix(11:14, nrow = 1), arg = 1:4) + f <- tfd_mv(list(x = x, y = y)) + expect_warning(m <- as.matrix(f), "interpolate = FALSE") + expect_identical(dim(m), c(1L, 4L, 2L)) + expect_true(is.na(m[1, "4", "x"])) + expect_equal(unname(m[1, "4", "y"]), 14) +}) + +test_that("[.tf_mv(matrix = FALSE) uses per-curve union grids when j is missing", { + x <- tfd(matrix(1:3, nrow = 1), arg = 1:3) + y <- tfd(matrix(11:14, nrow = 1), arg = 1:4) + f <- tfd_mv(list(x = x, y = y)) + expect_warning( + out <- f[,, interpolate = FALSE, matrix = FALSE], + "interpolate = FALSE" + ) + expect_length(out, 1L) + expect_equal(out[[1]]$arg, 1:4) + expect_true(is.na(out[[1]]$x[4])) + expect_equal(out[[1]]$y, 11:14) +}) + +test_that("as.data.frame() supports both unnested and 1-column forms", { + set.seed(23) + f <- tfd_mv(list(x = tf_rgp(2, arg = 5L), y = tf_rgp(2, arg = 5L))) + d1 <- as.data.frame(f) + expect_identical(nrow(d1), 2L) + expect_named(d1, "data") + # default schema is long: (id, arg, component, value), 2 * 5 * 2 = 20 rows + d2 <- as.data.frame(f, unnest = TRUE) + expect_named(d2, c("id", "arg", "component", "value")) + expect_s3_class(d2$component, "factor") + expect_identical(levels(d2$component), c("x", "y")) + expect_identical(nrow(d2), 2L * 5L * 2L) + # x values match the univariate as.data.frame on the x-component + expect_equal(d2$value[d2$component == "x"], + as.data.frame(f$x, unnest = TRUE)$value) + expect_equal(d2$value[d2$component == "y"], + as.data.frame(f$y, unnest = TRUE)$value) + # opt-in to legacy wide schema + d2w <- as.data.frame(f, unnest = TRUE, long = FALSE) + expect_named(d2w, c("id", "arg", "x", "y")) + expect_identical(nrow(d2w), 2L * 5L) + expect_equal(d2w$x, as.data.frame(f$x, unnest = TRUE)$value) + expect_equal(d2w$y, as.data.frame(f$y, unnest = TRUE)$value) +}) + +# ---- registration: ref_component = "norm" path ------------------------------- + +test_that("ref_component = 'norm' runs the norm-based registration path", { + set.seed(24) + t <- seq(0, 1, length.out = 51) + shifts <- c(-0.05, 0, 0.05) + # use a bump so the norm carries phase information (avoids sin/cos degeneracy) + bump <- function(s) exp(-30 * (t - (0.5 + s))^2) + x <- tfd(t(sapply(shifts, \(s) bump(s))), arg = t) + y <- tfd(t(sapply(shifts, \(s) 0.5 * bump(s))), arg = t) + f <- tfd_mv(list(x = x, y = y)) + w <- suppressWarnings(suppressMessages( + tf_estimate_warps( + f, + method = "affine", + type = "shift", + ref_component = "norm" + ) + )) + aligned <- suppressWarnings(suppressMessages(tf_align(f, w))) + meanvar <- function(mv) { + mean(sapply(tf_components(mv), function(comp) { + suppressWarnings(mean(tf_evaluations(var(comp))[[1]], na.rm = TRUE)) + })) + } + expect_s3_class(w, "tfd") + expect_length(w, 3L) + expect_equal( + as.matrix(w), + as.matrix(suppressWarnings(suppressMessages( + tf_estimate_warps(tf_norm(f), method = "affine", type = "shift") + ))) + ) + expect_lt(meanvar(aligned), 0.1 * meanvar(f)) +}) + +# ---- tf_component<- adds new components, length mismatch errors -------------- + +test_that("tfb_mv distributes a component-named list ... per component", { + set.seed(202) + f <- tfd_mv(list(x = tf_rgp(3, arg = 101L), y = tf_rgp(3, arg = 101L))) + # per-component k via a list keyed by component names + tb <- tfb_mv(f, k = list(x = 5, y = 15), verbose = FALSE) + expect_s3_class(tb, "tfb_mv") + lab_x <- attr(tf_components(tb)$x, "basis_label") + lab_y <- attr(tf_components(tb)$y, "basis_label") + expect_match(lab_x, "k = 5") + expect_match(lab_y, "k = 15") + # mixing shared (bs) and per-component (k) + tb2 <- tfb_mv(f, k = list(x = 5, y = 15), bs = "tp", verbose = FALSE) + expect_match(attr(tf_components(tb2)$x, "basis_label"), 'bs = "tp"') + expect_match(attr(tf_components(tb2)$y, "basis_label"), 'k = 15') + # back-compat: scalar k is shared across components + tb3 <- tfb_mv(f, k = 8, verbose = FALSE) + expect_match(attr(tf_components(tb3)$x, "basis_label"), "k = 8") + expect_match(attr(tf_components(tb3)$y, "basis_label"), "k = 8") +}) + +test_that("tfb_mv.list forwards basis arguments after tfd_mv conversion", { + set.seed(203) + mx <- matrix(rnorm(3 * 20), nrow = 3) + my <- matrix(rnorm(3 * 20), nrow = 3) + tb <- suppressWarnings(suppressMessages( + tfb_mv(list(x = mx, y = my), k = 5, verbose = FALSE) + )) + expect_match(attr(tf_components(tb)$x, "basis_label"), "k = 5") + expect_match(attr(tf_components(tb)$y, "basis_label"), "k = 5") + expect_equal( + tb$x, + suppressWarnings(suppressMessages(tfb(tfd(mx), k = 5, verbose = FALSE))) + ) + expect_equal( + tb$y, + suppressWarnings(suppressMessages(tfb(tfd(my), k = 5, verbose = FALSE))) + ) +}) + +test_that("tfb_mv.list distributes component-named basis arguments for tf inputs", { + set.seed(204) + f <- list(x = tf_rgp(3, arg = 101L), y = tf_rgp(3, arg = 101L)) + tb <- tfb_mv(f, k = list(x = 5, y = 15), verbose = FALSE) + expect_match(attr(tf_components(tb)$x, "basis_label"), "k = 5") + expect_match(attr(tf_components(tb)$y, "basis_label"), "k = 15") + expect_equal(tb$x, tfb(f$x, k = 5, verbose = FALSE)) + expect_equal(tb$y, tfb(f$y, k = 15, verbose = FALSE)) +}) + +test_that("tfb_mv: a non-component-named list is treated as a shared arg, not distributed", { + f <- tfd_mv(list(x = tf_rgp(2, arg = 51L), y = tf_rgp(2, arg = 51L))) + # list whose names don't match component names is NOT distributed (treated as + # a single arg-value; even though mgcv rejects this particular shape, my + # dispatcher must still treat both components identically -- both end up with + # the same shared k.) + tb <- suppressWarnings(suppressMessages( + tfb_mv(f, k = 6, sp = list(foo = 0, bar = 0), verbose = FALSE) + )) + expect_match(attr(tf_components(tb)$x, "basis_label"), "k = 6") + expect_match(attr(tf_components(tb)$y, "basis_label"), "k = 6") +}) + +test_that("mixed regular/irregular components work across the API", { + set.seed(101) + reg <- tf_rgp(3) + irr <- tf_sparsify(tf_rgp(3)) + f <- tfd_mv(list(x = reg, y = irr)) + expect_s3_class(f, "tfd_mv") + expect_true(is_reg(tf_components(f)$x)) + expect_true(is_irreg(tf_components(f)$y)) + # tf_arg returns a per-component list (vec for reg, list-of-vecs for irreg) + a <- tf_arg(f) + expect_named(a, c("x", "y")) + expect_true(is.numeric(a$x)) + expect_true(is.list(a$y)) + # tf_evaluations[[i]] is a per-curve data.frame (arg, x, y) on the union grid; + # NA-filled where a component lacks a native observation at that arg. + ev <- tf_evaluations(f)[[1]] + expect_s3_class(ev, "data.frame") + expect_named(ev, c("arg", "x", "y")) + expect_true(anyNA(ev$x) || anyNA(ev$y)) + # tf_count is n x d + expect_identical(dim(tf_count(f)), c(3L, 2L)) + # subset preserves component classes + g <- f[2:3] + expect_true(is_reg(g$x) && is_irreg(g$y)) + # arithmetic preserves component classes + s <- f + f + expect_true(is_reg(s$x) && is_irreg(s$y)) + # c() preserves component classes + cc <- c(f, f) + expect_length(cc, 6L) + expect_true(is_reg(cc$x) && is_irreg(cc$y)) + # as.data.frame(unnest = TRUE, long = FALSE) full-outer-joins on (id, arg) + # so that rows where only the regular component has a value get NAs in the + # irregular column (this used to error with a row-count mismatch). + df <- as.data.frame(f, unnest = TRUE, long = FALSE) + expect_named(df, c("id", "arg", "x", "y")) + expect_true(anyNA(df$y)) # irregular y is NA at most reg-grid points + expect_false(anyNA(df$x)) # regular x is observed everywhere +}) + +test_that("tf_component<- can add a new component and rejects mismatched length", { + set.seed(25) + f <- tfd_mv(list(x = tf_rgp(3), y = tf_rgp(3))) + f2 <- f + tf_component(f2, "z") <- tf_rgp(3) + expect_identical(tf_ncomp(f2), 3L) + expect_named(tf_components(f2), c("x", "y", "z")) + expect_error(tf_component(f, "x") <- tf_rgp(4), "length") +}) + +test_that("tf_mv constructors preserve and require compatible curve names", { + x <- tf_rgp(2) + y <- tf_rgp(2) + names(x) <- c("a", "b") + names(y) <- c("a", "b") + f <- tfd_mv(list(x = x, y = y)) + expect_identical(names(f), c("a", "b")) + + names(y) <- c("c", "d") + expect_error(tfd_mv(list(x = x, y = y)), "identical curve names") +}) + +test_that("tf_component() uses exact component names", { + f <- tfd_mv(list(xpos = tf_rgp(2), ypos = tf_rgp(2))) + expect_error(tf_component(f, "x"), "Unknown component") + expect_s3_class(tf_component(f, "xpos"), "tfd") +}) + +test_that("tf_component validates the index/name selector", { + f <- tfd_mv(list(x = tf_rgp(2), y = tf_rgp(2))) + expect_error(tf_component(f, 5), "between 1 and 2") + expect_error(tf_component(f, 0), "between 1 and 2") + expect_error(tf_component(f, 1.5), "between 1 and 2") + expect_error(tf_component(f, c(1, 2)), "single component") + expect_error(tf_component(42, 1), "tf_mv") + # valid selectors still work + expect_s3_class(tf_component(f, 2L), "tfd") + expect_s3_class(tf_component(f, "y"), "tfd") +}) + +test_that("tf_component<- validates the index but still appends new names", { + f <- tfd_mv(list(x = tf_rgp(2), y = tf_rgp(2))) + expect_error( + { + f2 <- f + tf_component(f2, 7) <- tf_rgp(2) + }, + "between 1 and 2" + ) + # appending a brand-new component by name remains allowed + f3 <- f + tf_component(f3, "z") <- tf_rgp(2) + expect_named(tf_components(f3), c("x", "y", "z")) +}) + +test_that("tf_estimate_warps validates max_iter, tol and ref_component", { + f <- tfd_mv(list(x = tf_rgp(3), y = tf_rgp(3))) + expect_error(tf_estimate_warps(f, max_iter = -1), "max_iter") + expect_error(tf_estimate_warps(f, tol = -0.1), "tol") + expect_error(tf_estimate_warps(f, ref_component = 9), "between 1 and 2") +}) + +test_that("tf_component<- rejects multi-length and empty selectors cleanly", { + f <- tfd_mv(list(x = tf_rgp(2), y = tf_rgp(2))) + # multi-length character selector must not hit the `&&` length>1 crash + expect_error( + { + f2 <- f + tf_component(f2, c("a", "b")) <- tf_rgp(2) + }, + "length 1" + ) + # selecting from a component-less object gives a clear message + e <- tfd_mv(list()) + expect_error(tf_component(e, 1), "no components") +}) + +test_that("tf_inner.tf_mv rejects a non-tf_mv second argument informatively", { + f <- tfd_mv(list(x = tf_rgp(2), y = tf_rgp(2))) + expect_error(tf_inner(f, tf_rgp(2)), "tf_mv") + expect_error(tf_inner(f, 1:2), "tf_mv") +}) + +# ---- bracket seams: NA index, broadcast, and var() y handling ---------------- + +test_that("[<-.tf_mv rejects a univariate tf and other non-NA scalars (#244)", { + set.seed(244) + g <- tfd_mv(list(x = tf_rgp(3), y = tf_rgp(3))) + # broadcasting a univariate tf would silently corrupt every component: + expect_error(g[1] <- tf_rgp(1), "logical.*NA|tf_mv") + # bare numeric scalars are not a valid "all components NA" sentinel either + expect_error(g[1] <- 0, "logical.*NA|tf_mv") + # NA still works (the documented broadcast use-case) + g_na <- g + g_na[1] <- NA + expect_true(is.na(g_na$x[1])) + expect_true(is.na(g_na$y[1])) +}) + +test_that("[<-.tf_mv rejects typed NAs and validates length upfront (#244)", { + set.seed(2441) + g <- tfd_mv(list(x = tf_rgp(3), y = tf_rgp(3))) + # Typed NAs are rejected with the new message rather than the downstream + # "Can't combine and " complaint. + expect_error(g[1] <- NA_real_, "logical.*NA|tf_mv") + expect_error(g[1] <- NA_integer_, "logical.*NA|tf_mv") + # Length-mismatched all-NA value is rejected upfront with a clean message + # rather than the downstream vec_slice<- "Can't recycle" error. + expect_error(g[1] <- c(NA, NA), "length") + # logical(0) was already rejected; pin the behaviour. + expect_error(g[1] <- logical(0), "logical.*NA|tf_mv") +}) + +test_that("var(, y = NULL) explicitly works (#245)", { + set.seed(2451) + f <- tf_rgp(5) + fm <- tfd_mv(list(x = tf_rgp(4), y = tf_rgp(4))) + # passing an explicit NULL is the deliberate use case and must not error + expect_s3_class(var(f, y = NULL), "tf") + expect_s3_class(var(fm, y = NULL), "tf_mv") + # direct method calls too + expect_s3_class(var.tf(f, y = NULL), "tf") + expect_s3_class(var.tf_mv(fm, y = NULL), "tf_mv") +}) + +test_that("var.tf and var.tf_mv error on a non-NULL y (#245)", { + set.seed(245) + f <- tf_rgp(5) + g <- tf_rgp(5) + expect_error(var(f, g), "tf_crosscov|tf_crosscor") + fm <- tfd_mv(list(x = tf_rgp(4), y = tf_rgp(4))) + gm <- tfd_mv(list(x = tf_rgp(4), y = tf_rgp(4))) + expect_error(var(fm, gm), "tf_crosscov|tf_crosscor") + # var(x) without y still works + expect_s3_class(var(f), "tf") + expect_s3_class(var(fm), "tf_mv") +}) + +test_that("[.tf_mv rejects NA indices and out-of-bounds (#252)", { + set.seed(252) + f <- tfd_mv(list(x = tf_rgp(3), y = tf_rgp(3))) + expect_error(f[c(1, NA)], "[Mm]issing|NA") + expect_error(f[c(1L, NA_integer_)], "[Mm]issing|NA") + # out-of-bounds also errors (was already implicit via vec_slice, kept for + # parity with univariate `[.tf`) + expect_error(f[10]) +}) + +test_that("[.tf_mv emits the basis 'interpolate ignored' inform once (#252)", { + set.seed(2521) + fb <- tfb(tf_rgp(3), verbose = FALSE) + fbm <- tfb_mv(list(x = fb, y = fb)) + msgs <- capture_messages(out <- fbm[1:2, , interpolate = FALSE]) + # exactly one inform, not one per component + expect_length(grep("interpolate", msgs), 1L) +}) + +test_that("[.tf_mv honours interpolate = FALSE for tfd components (#270)", { + # For all-tfd tf_mv, no tfb-driven override fires, the user's + # `interpolate = FALSE` is honoured per component and we get the standard + # univariate `[.tf` NA-warning on out-of-grid j. Pinned here so the + # per-component override logic stays per-component rather than reverting + # to the old all-or-nothing global override (which would silently force + # interpolate = TRUE for tfd components whenever any future mixed + # tf_mv contains a single tfb component). + x <- tfd(matrix(1:3, nrow = 2, ncol = 3, byrow = TRUE), arg = 1:3) + y <- tfd(matrix(4:6, nrow = 2, ncol = 3, byrow = TRUE), arg = 1:3) + f <- tfd_mv(list(x = x, y = y)) + msgs <- capture_messages( + suppressWarnings( + arr <- f[, c(1, 2.5), interpolate = FALSE] + ) + ) + # no 'interpolate ignored' inform when no component is basis-represented + expect_length(grep("interpolate ignored", msgs), 0L) + # 2.5 is not on the native grid -> NA under interpolate = FALSE + expect_true(all(is.na(arr[, "2.5", ]))) + # 1 is on the native grid -> value preserved (= 1 for both rows of x) + expect_equal(unname(arr[, "1", "x"]), c(1, 1)) +}) + +test_that("[<-.tf_mv length validation handles logical indices (#270)", { + set.seed(2701) + g <- tfd_mv(list(x = tf_rgp(3), y = tf_rgp(3))) + # logical i with 2 TRUEs == 2 locations; 2 NAs should be accepted + g2 <- g + expect_no_error(g2[c(TRUE, FALSE, TRUE)] <- c(NA, NA)) + expect_true(is.na(g2$x[1])) + expect_false(is.na(g2$x[2])) + expect_true(is.na(g2$x[3])) + # 2 TRUEs but 3 NAs -> length mismatch, clean tf_mv-level error + expect_error( + g[c(TRUE, FALSE, TRUE)] <- c(NA, NA, NA), + "length|tf_mv" + ) + # negative indices: -1 = 2 locations on length-3, 2 NAs ok + g3 <- g + expect_no_error(g3[-1] <- c(NA, NA)) + expect_false(is.na(g3$x[1])) + expect_true(is.na(g3$x[2])) + expect_true(is.na(g3$x[3])) +}) diff --git a/tests/testthat/test-mv-geom.R b/tests/testthat/test-mv-geom.R new file mode 100644 index 00000000..99e9e8f0 --- /dev/null +++ b/tests/testthat/test-mv-geom.R @@ -0,0 +1,274 @@ +# Tier-1 geometric primitives for tf_mv (tf_norm / tf_speed / tf_inner / +# tf_distance / tf_tangent / tf_reparam_arclength). + +test_that("tf_norm is the pointwise Euclidean norm of the components", { + t <- seq(0, 1, length.out = 51) + f <- tfd_mv(list( + x = tfd(matrix(rep(3, 51), nrow = 1), arg = t), + y = tfd(matrix(rep(4, 51), nrow = 1), arg = t) + )) + # constant (3, 4) -> ||.|| == 5 + expect_s3_class(tf_norm(f), "tfd") + expect_equal(tf_evaluations(tf_norm(f))[[1]], rep(5, 51)) +}) + +test_that("tf_speed equals tf_norm(tf_derive(.))", { + set.seed(11) + f <- tfd_mv(list(x = tf_rgp(2), y = tf_rgp(2))) + expect_equal(tf_speed(f), tf_norm(tf_derive(f))) +}) + +test_that("tf_inner is component-wise dot product, agrees with hand calc", { + t <- seq(0, 1, length.out = 51) + f <- tfd_mv(list( + x = tfd(matrix(rep(2, 51), nrow = 1), arg = t), + y = tfd(matrix(rep(3, 51), nrow = 1), arg = t) + )) + g <- tfd_mv(list( + x = tfd(matrix(rep(4, 51), nrow = 1), arg = t), + y = tfd(matrix(rep(5, 51), nrow = 1), arg = t) + )) + # <(2,3), (4,5)> = 2*4 + 3*5 = 23 everywhere + expect_equal(tf_evaluations(tf_inner(f, g))[[1]], rep(23, 51)) + # incompatible structures -> error + expect_error( + tf_inner(f, tfd_mv(list(a = tf_rgp(1), b = tf_rgp(1), c = tf_rgp(1)))), + class = "vctrs_error_incompatible_type" + ) +}) + +test_that("tf_inner evaluates mixed component grids on the paired union grid", { + x <- tfd(matrix(c(1, 2, 3), nrow = 1), arg = c(0, 0.5, 1)) + y <- tfd(matrix(c(10, 20, 30, 40), nrow = 1), arg = c(0, 0.25, 0.75, 1)) + f <- tfd_mv(list(x = x, y = y), domain = c(0, 1)) + grid <- c(0, 0.25, 0.5, 0.75, 1) + + paired <- tf_evaluate(f, arg = grid)[[1]] + inner <- tf_inner(f, f) + + expect_equal(tf_arg(inner), grid) + expect_equal( + tf_evaluate(inner, arg = grid)[[1]], + paired$x^2 + paired$y^2 + ) +}) + +test_that("tf_inner restricts to the common domain on partial overlap", { + # f(t) = (t, t) on [0, 1], g(t) = (t, t) on [0.5, 1.5]; overlap is [0.5, 1], + # where = t^2 + t^2 = 2 t^2. + f <- tfd_mv( + list( + x = tfd(matrix(c(0, 0.5, 1), nrow = 1), arg = c(0, 0.5, 1)), + y = tfd(matrix(c(0, 0.5, 1), nrow = 1), arg = c(0, 0.5, 1)) + ), + domain = c(0, 1) + ) + g <- tfd_mv( + list( + x = tfd(matrix(c(0.5, 1, 1.5), nrow = 1), arg = c(0.5, 1, 1.5)), + y = tfd(matrix(c(0.5, 1, 1.5), nrow = 1), arg = c(0.5, 1, 1.5)) + ), + domain = c(0.5, 1.5) + ) + inner <- tf_inner(f, g) + expect_equal(tf_domain(inner), c(0.5, 1)) + expect_equal(tf_arg(inner), c(0.5, 1)) + expect_equal(tf_evaluate(inner, arg = c(0.5, 1))[[1]], c(2 * 0.5^2, 2 * 1^2)) +}) + +test_that("tf_inner errors on non-overlapping domains", { + f <- tfd_mv( + list( + x = tfd(matrix(c(0, 1), nrow = 1), arg = c(0, 1)), + y = tfd(matrix(c(0, 1), nrow = 1), arg = c(0, 1)) + ), + domain = c(0, 1) + ) + g <- tfd_mv( + list( + x = tfd(matrix(c(2, 3), nrow = 1), arg = c(2, 3)), + y = tfd(matrix(c(2, 3), nrow = 1), arg = c(2, 3)) + ), + domain = c(2, 3) + ) + expect_error(tf_inner(f, g), "non-overlapping") +}) + +test_that("tf_inner of size-0 and size-1 operands is the empty tfd", { + set.seed(7) + f1 <- tfd_mv(list(x = tf_rgp(1), y = tf_rgp(1))) + f0 <- f1[0] + expect_equal(vec_size(tf_inner(f0, f1)), 0L) + expect_equal(vec_size(tf_inner(f1, f0)), 0L) + expect_s3_class(tf_inner(f0, f1), "tfd") +}) + +test_that("tf_distance equals tf_norm(f - g)", { + set.seed(12) + f <- tfd_mv(list(x = tf_rgp(2), y = tf_rgp(2))) + g <- tfd_mv(list(x = tf_rgp(2), y = tf_rgp(2))) + expect_equal(tf_distance(f, g), tf_norm(f - g)) + # distance to self is 0 + expect_equal( + tf_evaluations(tf_distance(f, f))[[1]], + rep(0, length(tf_arg(f))), + tolerance = 1e-12 + ) +}) + +test_that("tf_tangent has unit speed everywhere except where speed = 0", { + # unit circle: tangent is (-sin(2pi t), cos(2pi t)), always unit length + t <- seq(0, 1, length.out = 401) + circ <- tfd_mv(list( + x = tfd(matrix(cos(2 * pi * t), nrow = 1), arg = t), + y = tfd(matrix(sin(2 * pi * t), nrow = 1), arg = t) + )) + tan <- tf_tangent(circ) + expect_s3_class(tan, "tfd_mv") + expect_identical(tf_ncomp(tan), 2L) + # speed of the tangent at the middle of the domain should be ~ 1 + expect_equal( + unlist(tf_evaluate(tf_norm(tan), arg = 0.5)), + 1, + tolerance = 1e-2, + ignore_attr = TRUE + ) +}) + +test_that("tf_tangent aligns mixed derivative grids before scaling", { + x_arg <- c(0, 0.5, 1) + y_arg <- c(0, 0.25, 0.75, 1) + f <- tfd_mv( + list( + x = tfd(matrix(x_arg, nrow = 1), arg = x_arg), + y = tfd(matrix(y_arg, nrow = 1), arg = y_arg) + ), + domain = c(0, 1) + ) + grid <- sort(unique(c(x_arg, y_arg))) + + tan <- tf_tangent(f) + expect_s3_class(tan, "tfd_mv") + expect_equal( + tf_evaluate(tf_norm(tan), arg = grid)[[1]], + rep(1, length(grid)), + tolerance = 1e-10 + ) +}) + +test_that("tf_reparam_arclength yields a (nearly) constant-speed curve", { + # an unevenly-parameterized line: f(t) = (t^2, 0) on [0,1]. + # the curve is the segment from (0,0) to (1,0) (length 1), traversed + # non-uniformly. After arclength reparameterization the speed should be + # approximately constant (= 1). + t <- seq(0, 1, length.out = 401) + f <- tfd_mv(list( + x = tfd(matrix(t^2, nrow = 1), arg = t), + y = tfd(matrix(rep(0, 401), nrow = 1), arg = t) + )) + g <- tf_reparam_arclength(f) + expect_s3_class(g, "tfd_mv") + # sample speed on the interior of the domain (avoid boundary FD artefacts) + sp <- tf_evaluate(tf_speed(g), arg = c(0.3, 0.5, 0.7))[[1]] + expect_equal(sp, rep(1, 3), tolerance = 0.1) +}) + +test_that("tf_reparam_arclength preserves non-unit domains", { + t <- seq(0, 10, length.out = 11) + f <- tfd_mv(list( + x = tfd(matrix(t, nrow = 1), arg = t), + y = tfd(matrix(0, nrow = 1, ncol = length(t)), arg = t) + )) + r <- tf_reparam_arclength(f) + m <- as.matrix(r, arg = t, interpolate = TRUE) + expect_false(anyNA(m)) + expect_equal(unname(m[1, , "x"]), t, tolerance = 1e-8) + expect_equal(unname(m[1, , "y"]), rep(0, length(t)), tolerance = 1e-8) +}) + +test_that("tf_reparam_arclength leaves zero-length (constant) curves unchanged", { + t <- seq(0, 1, length.out = 30) + # curve 1: a real (non-degenerate) curve; curve 2: constant in both components + f <- tfd_mv(list( + x = tfd(rbind(cos(2 * pi * t), rep(2, 30)), arg = t), + y = tfd(rbind(sin(2 * pi * t), rep(3, 30)), arg = t) + )) + expect_warning(g <- tf_reparam_arclength(f), "zero/undefined arc length") + expect_s3_class(g, "tfd_mv") + expect_length(g, 2L) + # degenerate curve returned untouched, well-defined curve free of NaN + expect_equal( + as.matrix(g[2]), + as.matrix(f[2]), + tolerance = 1e-8, + ignore_attr = TRUE + ) + expect_false(any(is.nan(as.matrix(g[1])))) +}) + +# Univariate generalization of the geometric primitives ------------------------ + +test_that("tf_norm/tf_inner/tf_distance reduce to scalar ops for univariate tf", { + set.seed(21) + u <- tf_rgp(3) + v <- tf_rgp(3) + # univariate norm is the pointwise absolute value + expect_equal(tf_norm(u), abs(u)) + # univariate inner product is the pointwise product + expect_equal(tf_inner(u, v), u * v) + # univariate distance is |u - v| + expect_equal(tf_distance(u, v), abs(u - v)) + # tf_speed and tf_tangent compose the same way + expect_equal(tf_speed(u), abs(tf_derive(u))) + expect_s3_class(tf_tangent(u), "tfd") +}) + +test_that("univariate geometric primitives also work for tfb", { + set.seed(22) + u <- tfb(tf_rgp(2), verbose = FALSE) + # tfb arithmetic warns about the lossy round-trip through tfd; that documented + # behaviour is not what we are testing here. + expect_s3_class(suppressWarnings(tf_norm(u)), "tfb") + expect_s3_class(suppressWarnings(tf_inner(u, u)), "tfb") +}) + +test_that("geometric primitives error informatively on non-tf input", { + expect_error(tf_norm(1:3), "not defined for") + expect_error(tf_tangent("a"), "not defined for") + expect_error(tf_inner(1:3, 1:3), "not defined for") + # univariate tf vs tf_mv is a mismatch + set.seed(23) + f <- tfd_mv(list(x = tf_rgp(2), y = tf_rgp(2))) + expect_error(tf_inner(tf_rgp(2), f), "univariate") +}) + +test_that("tf_arclength rejects lower > upper", { + set.seed(24) + f <- tfd_mv(list(x = tf_rgp(2), y = tf_rgp(2))) + expect_error(tf_arclength(f, lower = 0.8, upper = 0.2), "must not exceed") +}) + +test_that("tf_arclength handles irregular curves with sub-domain support", { + # two curves on a [0, 10] domain, each observed on a different sub-interval: + # curve A on [0, 3] -- straight line of length sqrt(2) * 3 in 2-D + # curve B on [5, 10] -- straight line of length sqrt(2) * 5 + curveA <- data.frame( + id = "A", + t = c(0, 1, 2, 3), + x = c(0, 1, 2, 3), + y = c(0, 1, 2, 3) + ) + curveB <- data.frame( + id = "B", + t = c(5, 6, 8, 10), + x = c(0, 1, 3, 5), + y = c(0, 1, 3, 5) + ) + long <- rbind(curveA, curveB) + trk <- tfd_mv(list( + x = tfd(long, id = "id", arg = "t", value = "x", domain = c(0, 10)), + y = tfd(long, id = "id", arg = "t", value = "y", domain = c(0, 10)) + )) + al <- tf_arclength(trk) + expect_equal(unname(al), c(3 * sqrt(2), 5 * sqrt(2)), tolerance = 1e-8) +}) diff --git a/tests/testthat/test-mv-methods.R b/tests/testthat/test-mv-methods.R new file mode 100644 index 00000000..f9020076 --- /dev/null +++ b/tests/testthat/test-mv-methods.R @@ -0,0 +1,249 @@ +test_that("bracket evaluation returns a [curve, arg, component] array", { + set.seed(1) + f <- tfd_mv(list(x = tf_rgp(4), y = tf_rgp(4))) + arr <- f[1:3, c(0.2, 0.5, 0.8)] + expect_identical(dim(arr), c(3L, 3L, 2L)) + expect_identical(dimnames(arr)[[3]], c("x", "y")) + # consistent with the univariate component bracket + expect_equal( + arr[,, "x"], + unclass(f$x[1:3, c(0.2, 0.5, 0.8)]), + ignore_attr = TRUE + ) +}) + +test_that("matrix-index extraction returns one row per (function, arg) pair", { + set.seed(2) + f <- tfd_mv(list(x = tf_rgp(3), y = tf_rgp(3))) + idx <- cbind(1:3, c(0, 0.5, 1)) + out <- f[idx] + expect_true(is.matrix(out)) + expect_identical(dim(out), c(3L, 2L)) + expect_identical(colnames(out), c("x", "y")) + expect_equal(out[, "x"], f$x[idx], ignore_attr = TRUE) + expect_equal(out[, "y"], f$y[idx], ignore_attr = TRUE) +}) + +test_that("component= drops to the univariate result", { + set.seed(3) + f <- tfd_mv(list(x = tf_rgp(3), y = tf_rgp(3))) + m <- f[1:2, c(0.1, 0.9), component = "x"] + expect_true(is.matrix(m)) + expect_identical(dim(m), c(2L, 2L)) + expect_equal(m, f$x[1:2, c(0.1, 0.9)], ignore_attr = TRUE) +}) + +test_that("component= with multiple names returns a sub-tf_mv", { + set.seed(31) + f <- tfd_mv(list(x = tf_rgp(3), y = tf_rgp(3), z = tf_rgp(3))) + sub <- f[,, c("x", "y")] + expect_s3_class(sub, "tfd_mv") + expect_identical(tf_ncomp(sub), 2L) + expect_identical(names(tf_components(sub)), c("x", "y")) + expect_length(sub, length(f)) + expect_equal(tf_component(sub, "x"), f$x) + expect_equal(tf_component(sub, "y"), f$y) +}) + +test_that("component= with multiple indices and curve subset returns sub-tf_mv", { + set.seed(32) + f <- tfd_mv(list(a = tf_rgp(4), b = tf_rgp(4), c = tf_rgp(4))) + sub <- f[2:3, , c(1L, 3L)] + expect_s3_class(sub, "tfd_mv") + expect_identical(tf_ncomp(sub), 2L) + expect_identical(names(tf_components(sub)), c("a", "c")) + expect_length(sub, 2L) +}) + +test_that("component= with multiple names + j evaluates to a 3-d array", { + set.seed(33) + f <- tfd_mv(list(x = tf_rgp(3), y = tf_rgp(3), z = tf_rgp(3))) + arr <- f[, c(0.1, 0.5, 0.9), c("x", "z")] + expect_true(is.array(arr)) + expect_identical(dim(arr), c(3L, 3L, 2L)) + expect_identical(dimnames(arr)[[3]], c("x", "z")) +}) + +test_that("multi-component selection rejects unknown names", { + set.seed(34) + f <- tfd_mv(list(x = tf_rgp(2), y = tf_rgp(2))) + expect_error(f[,, c("x", "not_there")], "Unknown component") +}) + +test_that("multi-component selection on tfb_mv stays tfb_mv (no refit)", { + set.seed(35) + fb <- tfb_mv( + tfd_mv(list(a = tf_rgp(3), b = tf_rgp(3), c = tf_rgp(3))), + verbose = FALSE + ) + sub <- fb[,, c("a", "b")] + expect_s3_class(sub, "tfb_mv") + expect_identical(tf_ncomp(sub), 2L) + expect_equal(tf_component(sub, "a"), fb$a) +}) + +test_that("matrix = FALSE returns per-curve data.frames", { + set.seed(4) + f <- tfd_mv(list(x = tf_rgp(2), y = tf_rgp(2))) + out <- f[1:2, c(0.2, 0.7), matrix = FALSE] + expect_type(out, "list") + expect_length(out, 2) + expect_named(out[[1]], c("arg", "x", "y")) + expect_equal(out[[1]]$x, f$x[1, c(0.2, 0.7), matrix = FALSE][[1]]$value) + expect_equal(out[[2]]$y, f$y[2, c(0.2, 0.7), matrix = FALSE][[1]]$value) +}) + +test_that("arithmetic is component-wise", { + set.seed(5) + f <- tfd_mv(list(x = tf_rgp(3), y = tf_rgp(3))) + s <- f + f + expect_s3_class(s, "tfd_mv") + expect_equal(tf_evaluations(s$x)[[1]], 2 * tf_evaluations(f$x)[[1]]) + d <- f - f + # tf_evaluations now returns per-curve data.frames (arg, x, y); + # the component columns should all be ~0. + for (cdf in tf_evaluations(d)) { + expect_true(all(abs(cdf$x) < 1e-9)) + expect_true(all(abs(cdf$y) < 1e-9)) + } + scaled <- 3 * f + expect_equal(tf_evaluations(scaled$y)[[2]], 3 * tf_evaluations(f$y)[[2]]) +}) + +test_that("Math and Summary group generics are component-wise", { + set.seed(6) + f <- tfd_mv(list(x = tf_rgp(3), y = tf_rgp(3))) + el <- exp(log(abs(f) + 1)) - 1 + expect_s3_class(el, "tfd_mv") + expect_equal( + tf_evaluations(el$x)[[1]], + abs(tf_evaluations(f$x)[[1]]), + tolerance = 1e-6 + ) +}) + +test_that("Summary group generics use all tf_mv inputs", { + f <- tfd_mv(list( + x = tfd(matrix(0, nrow = 1, ncol = 3), arg = 1:3), + y = tfd(matrix(0, nrow = 1, ncol = 3), arg = 1:3) + )) + g <- tfd_mv(list( + x = tfd(matrix(10, nrow = 1, ncol = 3), arg = 1:3), + y = tfd(matrix(10, nrow = 1, ncol = 3), arg = 1:3) + )) + expect_equal(as.matrix(max(f, g)), as.matrix(g)) +}) + +test_that("mean / median return a length-1 tf_mv", { + set.seed(7) + f <- tfd_mv(list(x = tf_rgp(5), y = tf_rgp(5))) + m <- mean(f) + expect_s3_class(m, "tfd_mv") + expect_length(m, 1) + expect_equal(tf_evaluations(m$x)[[1]], tf_evaluations(mean(f$x))[[1]]) + med <- median(f) + expect_length(med, 1) + expect_equal(med$x, median(f$x)) + expect_equal(med$y, median(f$y)) +}) + +test_that("sd.tf_mv and var.tf_mv accept na.rm", { + f <- suppressWarnings(tfd_mv(list( + x = tfd(rbind(c(1, 2, 3), c(NA, NA, NA), c(4, 5, 6)), arg = 1:3), + y = tfd(rbind(c(2, 3, 4), c(NA, NA, NA), c(5, 6, 7)), arg = 1:3) + ))) + fsd <- sd(f, na.rm = TRUE) + fvar <- var(f, na.rm = TRUE) + complete <- f[!is.na(f)] + expect_equal(fsd, sd(complete)) + expect_equal(fvar, var(complete)) +}) + +test_that("equality is component-wise", { + set.seed(8) + f <- tfd_mv(list(x = tf_rgp(3), y = tf_rgp(3))) + expect_true(all(f == f)) + expect_false(any(f != f)) +}) + +test_that("rev.tf_mv reverses the vector", { + set.seed(81) + f <- tfd_mv(list(x = tf_rgp(3), y = tf_rgp(3))) + names(f) <- c("a", "b", "c") + out <- rev(f) + expect_s3_class(out, "tfd_mv") + expect_identical(names(out), c("c", "b", "a")) + expect_equal(out$x, rev(f$x)) + expect_equal(out$y, rev(f$y)) +}) + +test_that("as.matrix returns a [curve, arg, component] array", { + set.seed(9) + f <- tfd_mv(list(x = tf_rgp(3, arg = 11L), y = tf_rgp(3, arg = 11L))) + m <- as.matrix(f) + expect_identical(dim(m), c(3L, 11L, 2L)) + expect_identical(dimnames(m)[[3]], c("x", "y")) + expect_equal(m[,, "x"], as.matrix(f$x), ignore_attr = TRUE) + expect_equal(m[,, "y"], as.matrix(f$y), ignore_attr = TRUE) +}) + +test_that("plot/lines default to trajectory for d == 2 and facet otherwise", { + set.seed(7) + f2 <- tfd_mv(list(x = tf_rgp(3), y = tf_rgp(3))) + f3 <- tfd_mv(list(x = tf_rgp(3), y = tf_rgp(3), z = tf_rgp(3))) + expect_identical(mv_plot_type(NULL, tf_components(f2)), "trajectory") + expect_identical(mv_plot_type(NULL, tf_components(f3)), "facet") + # partial matching and explicit override both honoured + expect_identical(mv_plot_type("traj", tf_components(f3)), "trajectory") + expect_identical(mv_plot_type("facet", tf_components(f2)), "facet") + + # the plotting calls run without error, including per-curve `col` recycling + pf <- withr::local_tempfile(fileext = ".pdf") + grDevices::pdf(pf) + on.exit(grDevices::dev.off(), add = TRUE) + expect_no_error(plot(f2, col = 1:3)) + expect_no_error(lines(f2, col = 1:3)) + expect_no_error(plot(f3)) + expect_error(plot(f3, type = "trajectory"), "requires exactly 2 components") +}) + +test_that("trajectory plotting handles components on different / irregular grids", { + # components on different regular grids would give x/y matrices of differing + # width: the trajectory must evaluate both on a common grid first. + set.seed(8) + mixed <- tfd_mv(list( + x = tfd(matrix(rnorm(2 * 10), 2), arg = seq(0, 1, length.out = 10)), + y = tfd(matrix(rnorm(2 * 25), 2), arg = seq(0, 1, length.out = 25)) + )) + irr <- tfd_mv(list( + x = tfd(list(c(0, 0.5, 1)), list(c(1, 2, 3))), + y = tfd(list(c(0, 0.5, 1)), list(c(4, 5, 6))) + )) + pf <- withr::local_tempfile(fileext = ".pdf") + grDevices::pdf(pf) + on.exit(grDevices::dev.off(), add = TRUE) + expect_no_error(plot(mixed, col = 1:2)) + expect_no_error(lines(mixed, col = 1:2)) + expect_no_error(plot(irr)) +}) + +test_that("print reports per-component grid / interpolator / basis info", { + set.seed(9) + # shared grid -> collapsed single info line + f <- tfd_mv(list(x = tf_rgp(3), y = tf_rgp(3))) + expect_output(print(f), "components based on \\d+ evaluations each") + expect_output(print(f), "interpolation by tf_approx_linear") + + # different grids -> per-component lines + mixed <- tfd_mv(list( + x = tfd(matrix(rnorm(2 * 10), 2), arg = seq(0, 1, length.out = 10)), + y = tfd(matrix(rnorm(2 * 25), 2), arg = seq(0, 1, length.out = 25)) + )) + expect_output(print(mixed), "x: based on 10 evaluations each") + expect_output(print(mixed), "y: based on 25 evaluations each") + + # basis representation reports the basis spec + tb <- tfb_mv(f, k = 7, verbose = FALSE) + expect_output(print(tb), "in basis representation") + expect_output(print(tb), "k = 7") +}) diff --git a/tests/testthat/test-mv-tidyverse.R b/tests/testthat/test-mv-tidyverse.R new file mode 100644 index 00000000..621223ac --- /dev/null +++ b/tests/testthat/test-mv-tidyverse.R @@ -0,0 +1,161 @@ +# tf_mv columns inside tibbles / data.frames, plus the standard tidyverse +# verbs (filter / mutate / summarize / arrange / slice / group_by / nest / +# left_join / bind_rows / pull / distinct). These all exercise vctrs (vec_proxy +# / vec_restore / vec_slice / vec_c / vec_ptype2 / vec_cast) one way or +# another; tf_mv was designed so the dplyr/tidyr surface "just works" via +# component-wise dispatch on the proxy. + +skip_if_not_installed("tibble") +skip_if_not_installed("dplyr") +skip_if_not_installed("tidyr") + +mk_data <- function(n = 4, seed = 1) { + set.seed(seed) + tibble::tibble( + id = seq_len(n), + g = rep(c("A", "B"), length.out = n), + path = tfd_mv(list(x = tf_rgp(n), y = tf_rgp(n))) + ) +} + +test_that("tf_mv works as a tibble column (constructor, ptype, length)", { + tbl <- mk_data() + expect_identical(nrow(tbl), 4L) + expect_s3_class(tbl$path, "tfd_mv") + expect_length(tbl$path, 4L) + expect_match(vctrs::vec_ptype_abbr(tbl$path), "tfd_mv") +}) + +test_that("dplyr::filter on a tibble keeps the tf_mv column aligned", { + tbl <- mk_data(6) + sub <- dplyr::filter(tbl, g == "B") + expect_identical(nrow(sub), 3L) + expect_length(sub$path, 3L) + expect_s3_class(sub$path, "tfd_mv") + expect_identical(names(tf_components(sub$path)), c("x", "y")) + expect_equal(sub$path, tbl$path[tbl$g == "B"]) +}) + +test_that("dplyr::filter accepts a tf_mv-derived predicate", { + tbl <- mk_data(6) + threshold <- mean(tf_arclength(tbl$path)) + sub <- dplyr::filter(tbl, tf_arclength(path) > threshold) + expect_true(all(tf_arclength(sub$path) > threshold)) +}) + +test_that("dplyr::mutate can derive scalar and tfd columns from a tf_mv", { + tbl <- mk_data() + m <- dplyr::mutate( + tbl, + arclen = tf_arclength(path), + speed = tf_speed(path), + path_scaled = 2 * path + ) + expect_type(m$arclen, "double") + expect_length(m$arclen, nrow(tbl)) + expect_s3_class(m$speed, "tfd") + expect_length(m$speed, nrow(tbl)) + expect_s3_class(m$path_scaled, "tfd_mv") + expect_identical(tf_ncomp(m$path_scaled), 2L) + expect_equal(m$arclen, tf_arclength(tbl$path)) + expect_equal(m$speed, tf_speed(tbl$path)) + expect_equal(m$path_scaled$x, 2 * tbl$path$x) + expect_equal(m$path_scaled$y, 2 * tbl$path$y) +}) + +test_that("dplyr::summarize returns a length-1 tfd_mv via mean()", { + tbl <- mk_data(6) + s <- dplyr::summarize( + tbl, + mean_path = mean(path), + total_len = sum(tf_arclength(path)) + ) + expect_s3_class(s$mean_path, "tfd_mv") + expect_length(s$mean_path, 1L) + expect_type(s$total_len, "double") + expect_equal(s$mean_path, mean(tbl$path)) + expect_equal(s$total_len, sum(tf_arclength(tbl$path))) +}) + +test_that("group_by + summarize yields one tfd_mv entry per group", { + tbl <- mk_data(6) + gs <- tbl |> + dplyr::group_by(g) |> + dplyr::summarize(m = mean(path), n_curves = dplyr::n()) + expect_identical(nrow(gs), 2L) + expect_s3_class(gs$m, "tfd_mv") + expect_length(gs$m, 2L) + expect_equal(gs$m[gs$g == "A"], mean(tbl$path[tbl$g == "A"])) + expect_equal(gs$m[gs$g == "B"], mean(tbl$path[tbl$g == "B"])) + expect_equal(gs$n_curves, c(3L, 3L)) +}) + +test_that("arrange / slice keep the tf_mv column row-aligned", { + tbl <- mk_data(6) + out <- tbl |> dplyr::arrange(dplyr::desc(id)) |> dplyr::slice(1:3) + expect_identical(out$id, c(6L, 5L, 4L)) + expect_length(out$path, 3L) + # the path that ended up at row 1 of out is the same as the path at row 6 + # of the original tibble + expect_equal( + tf_evaluations(out$path[1])[[1]], + tf_evaluations(tbl$path[6])[[1]] + ) +}) + +test_that("bind_rows concatenates a tf_mv column via vctrs c()", { + tbl <- mk_data(4) + br <- dplyr::bind_rows(tbl, tbl) + expect_identical(nrow(br), 8L) + expect_length(br$path, 8L) + expect_s3_class(br$path, "tfd_mv") + expect_equal(br$path[1:4], tbl$path) + expect_equal(br$path[5:8], tbl$path) +}) + +test_that("left_join preserves tf_mv column and fills NA rows correctly", { + tbl <- mk_data(4) + extra <- tibble::tibble(id = c(1, 3), label = c("a", "c")) + lj <- dplyr::left_join(tbl, extra, by = "id") + expect_identical(nrow(lj), 4L) + expect_length(lj$path, 4L) + expect_equal(lj$label, c("a", NA, "c", NA)) + expect_equal(lj$path, tbl$path) +}) + +test_that("pull returns the tf_mv vector unchanged", { + tbl <- mk_data() + p <- dplyr::pull(tbl, path) + expect_s3_class(p, "tfd_mv") + expect_length(p, nrow(tbl)) + expect_equal(p, tbl$path) +}) + +test_that("distinct on a key column keeps tf_mv aligned", { + tbl <- mk_data(4) + d <- dplyr::distinct(tbl, g, .keep_all = TRUE) + expect_identical(nrow(d), 2L) + expect_length(d$path, 2L) + expect_equal(d$path, tbl$path[c(1, 2)]) +}) + +test_that("tidyr nest / unnest round-trip a tf_mv column", { + tbl <- mk_data(4) + nst <- tidyr::nest(tbl, data = c(id, path)) + expect_identical(nrow(nst), 2L) + # each nested tibble carries the tfd_mv intact + expect_s3_class(nst$data[[1]]$path, "tfd_mv") + unn <- tidyr::unnest(nst, data) + expect_identical(nrow(unn), nrow(tbl)) + expect_s3_class(unn$path, "tfd_mv") + expect_equal(dplyr::arrange(unn, id)$path, tbl$path) +}) + +test_that("rowwise mutate with a tf_mv column works", { + tbl <- mk_data() + rw <- tbl |> + dplyr::rowwise() |> + dplyr::mutate(len = tf_arclength(path)) |> + dplyr::ungroup() + expect_equal(rw$len, tf_arclength(tbl$path)) +}) diff --git a/tests/testthat/test-mv-vctrs.R b/tests/testthat/test-mv-vctrs.R new file mode 100644 index 00000000..262d6bb9 --- /dev/null +++ b/tests/testthat/test-mv-vctrs.R @@ -0,0 +1,107 @@ +test_that("subsetting a tf_mv keeps components and names aligned", { + set.seed(1) + f <- tfd_mv(list(x = tf_rgp(5), y = tf_rgp(5))) + names(f) <- letters[1:5] + g <- f[2:4] + expect_s3_class(g, "tfd_mv") + expect_length(g, 3) + expect_identical(tf_ncomp(g), 2L) + expect_equal(tf_evaluations(g$x)[[1]], tf_evaluations(f$x)[[2]]) +}) + +test_that("c() concatenates tf_mv component-wise", { + set.seed(2) + f <- tfd_mv(list(x = tf_rgp(3), y = tf_rgp(3))) + g <- tfd_mv(list(x = tf_rgp(2), y = tf_rgp(2))) + cc <- c(f, g) + expect_s3_class(cc, "tfd_mv") + expect_length(cc, 5) + expect_equal(tf_evaluations(cc$x)[[4]], tf_evaluations(g$x)[[1]]) + expect_equal(cc[1:3], f) + expect_equal(cc[4:5], g) +}) + +test_that("vec_ptype2 / vec_cast work for tf_mv", { + set.seed(3) + f <- tfd_mv(list(x = tf_rgp(3), y = tf_rgp(3))) + tb <- tfb_mv(f, verbose = FALSE) + # tfd_mv <-> tfb_mv combine to a common (tfd_mv) type + cc <- suppressWarnings(c(f, tb)) + expect_s3_class(cc, "tfd_mv") + expect_length(cc, 6) + # explicit cast + cast <- suppressWarnings(vctrs::vec_cast(tb, f)) + expect_s3_class(cast, "tfd_mv") + expect_equal(cc[1:3], f) + expect_equal(cc[4:6], cast) + expect_equal(cast$x, as.tfd(tb$x)) + expect_equal(cast$y, as.tfd(tb$y)) +}) + +test_that("combining incompatible tf_mv errors", { + f2 <- tfd_mv(list(x = tf_rgp(2), y = tf_rgp(2))) + f3 <- tfd_mv(list(x = tf_rgp(2), y = tf_rgp(2), z = tf_rgp(2))) + expect_error(c(f2, f3), class = "vctrs_error_incompatible_type") + named_diff <- tfd_mv(list(a = tf_rgp(2), b = tf_rgp(2))) + expect_error(c(f2, named_diff), class = "vctrs_error_incompatible_type") +}) + +test_that("tf_mv works as a tibble / data.frame column", { + skip_if_not_installed("tibble") + set.seed(4) + f <- tfd_mv(list(x = tf_rgp(4), y = tf_rgp(4))) + tbl <- tibble::tibble(id = 1:4, traj = f) + expect_identical(nrow(tbl), 4L) + expect_s3_class(tbl$traj, "tfd_mv") + expect_identical(vctrs::vec_ptype_abbr(f), "tfd_mv") + sub <- tbl[2:3, ] + expect_length(sub$traj, 2) + expect_equal(sub$traj, f[2:3]) +}) + +test_that("vec_ptype_full reports the dimension", { + f <- tfd_mv(list(x = tf_rgp(2), y = tf_rgp(2))) + expect_match(vctrs::vec_ptype_full(f), "tfd_mv") +}) + +test_that("subset-assignment replaces curves component-wise", { + set.seed(31) + f <- tfd_mv(list(x = tf_rgp(4), y = tf_rgp(4))) + g <- f + g[2] <- f[1] + expect_s3_class(g, "tfd_mv") + expect_identical(tf_ncomp(g), 2L) + expect_identical(tf_domain(g), tf_domain(f)) + # replaced curve matches the source, others untouched + expect_equal(tf_evaluations(g$x)[[2]], tf_evaluations(f$x)[[1]]) + expect_equal(tf_evaluations(g$y)[[2]], tf_evaluations(f$y)[[1]]) + expect_equal(tf_evaluations(g$x)[[3]], tf_evaluations(f$x)[[3]]) + # logical index and length-1 recycling + g2 <- f + g2[c(TRUE, FALSE, TRUE, FALSE)] <- f[3] + expect_equal(tf_evaluations(g2$x)[[1]], tf_evaluations(f$x)[[3]]) + expect_equal(tf_evaluations(g2$x)[[3]], tf_evaluations(f$x)[[3]]) + # incompatible component count errors + expect_error( + f[1] <- tfd_mv(list(a = tf_rgp(1), b = tf_rgp(1), c = tf_rgp(1))), + class = "vctrs_error_incompatible_type" + ) +}) + +test_that("NA subset-assignment marks the whole curve missing", { + set.seed(32) + f <- tfd_mv(list(x = tf_rgp(3), y = tf_rgp(3))) + f[2] <- NA + expect_identical(unname(is.na(f)), c(FALSE, TRUE, FALSE)) +}) + +test_that("curve names round-trip through names<- and subsetting", { + set.seed(33) + f <- tfd_mv(list(x = tf_rgp(4), y = tf_rgp(4))) + names(f) <- c("a", "b", "c", "d") + expect_identical(names(f), c("a", "b", "c", "d")) + # names live on the components, so they survive subset and concatenation + expect_identical(names(f$x), c("a", "b", "c", "d")) + expect_identical(names(f[2:3]), c("b", "c")) + expect_identical(names(c(f[1], f[3])), c("a", "c")) +}) diff --git a/tests/testthat/test-mv-verbs.R b/tests/testthat/test-mv-verbs.R new file mode 100644 index 00000000..e2d85fe6 --- /dev/null +++ b/tests/testthat/test-mv-verbs.R @@ -0,0 +1,403 @@ +test_that("tf_rebase works component-wise for tf_mv", { + set.seed(1) + f <- tfd_mv(list(x = tf_rgp(4, arg = 101L), y = tf_rgp(4, arg = 101L))) + # rebase to a spline basis -> tfb_mv (basis spec comes from basis_from) + basis_from <- tfb(tf_rgp(1), k = 25, verbose = FALSE) + tb <- tf_rebase(f, basis_from) + expect_s3_class(tb, "tfb_mv") + expect_identical(tf_ncomp(tb), 2L) + expect_equal(tb$x, tf_rebase(f$x, basis_from)) + expect_equal(tb$y, tf_rebase(f$y, basis_from)) + # rebase to a new tfd grid + tfd_basis <- tfd(tf_rgp(1)) + g <- tf_rebase(f, tfd_basis, arg = seq(0, 1, length.out = 21)) + expect_s3_class(g, "tfd_mv") + expect_equal(tf_arg(g), seq(0, 1, length.out = 21)) + expect_equal(g$x, tf_rebase(f$x, tfd_basis, arg = seq(0, 1, length.out = 21))) + expect_equal(g$y, tf_rebase(f$y, tfd_basis, arg = seq(0, 1, length.out = 21))) +}) + +test_that("tf_interpolate works component-wise for tf_mv", { + set.seed(11) + f <- tfd_mv(list(x = tf_rgp(3, arg = 11L), y = tf_rgp(3, arg = 11L))) + arg <- seq(0, 1, length.out = 7) + + out <- tf_interpolate(f, arg = arg) + + expect_s3_class(out, "tfd_mv") + expect_identical(tf_arg(out), arg) + expect_equal(out$x, tf_interpolate(f$x, arg = arg)) + expect_equal(out$y, tf_interpolate(f$y, arg = arg)) +}) + +test_that("tf_sparsify and tf_jiggle delegate to univariate methods for tf_mv", { + set.seed(12) + f <- tfd_mv(list(x = tf_rgp(3, arg = 21L), y = tf_rgp(3, arg = 21L))) + + set.seed(121) + sp <- tf_sparsify(f, dropout = 0.3) + set.seed(121) + sp_x <- tf_sparsify(f$x, dropout = 0.3) + set.seed(121) + sp_y <- tf_sparsify(f$y, dropout = 0.3) + expect_equal(sp$x, sp_x) + expect_equal(sp$y, sp_y) + expect_equal(tf_arg(sp$x), tf_arg(sp$y)) + + set.seed(122) + jig <- tf_jiggle(f, amount = 0.2) + set.seed(122) + jig_x <- tf_jiggle(f$x, amount = 0.2) + set.seed(122) + jig_y <- tf_jiggle(f$y, amount = 0.2) + expect_equal(jig$x, jig_x) + expect_equal(jig$y, jig_y) + expect_equal(tf_arg(jig$x), tf_arg(jig$y)) +}) + +test_that("tf_sparsify and tf_jiggle can operate independently per component", { + set.seed(13) + f <- tfd_mv(list( + x = tf_rgp(3, arg = seq(0, 1, length.out = 21)), + y = tf_rgp(3, arg = seq(0, 1, length.out = 31)) + )) + + expect_error(tf_sparsify(f), "same_arg = TRUE") + expect_error(tf_jiggle(f), "same_arg = TRUE") + expect_no_error(sp <- tf_sparsify(f, dropout = 0.3, same_arg = FALSE)) + expect_no_error(jig <- tf_jiggle(f, amount = 0.2, same_arg = FALSE)) + expect_s3_class(sp, "tfd_mv") + expect_s3_class(jig, "tfd_mv") +}) + +test_that("tf_sparsify and tf_jiggle reuse random changes on shared irregular grids", { + set.seed(131) + x <- tf_sparsify(tf_rgp(3, arg = 21L), dropout = 0.2) + y <- x + 1 + f <- tfd_mv(list(x = x, y = y)) + + set.seed(132) + sp <- tf_sparsify(f, dropout = 0.3) + expect_equal(tf_arg(sp$x), tf_arg(sp$y)) + + set.seed(133) + jig <- tf_jiggle(f, amount = 0.2) + expect_equal(tf_arg(jig$x), tf_arg(jig$y)) +}) + +test_that("tf_sparsify and tf_jiggle handle zero-length inputs", { + set.seed(14) + x <- tf_rgp(3, arg = 11L) + empty_x <- x[integer()] + empty_mv <- tfd_mv(list(x = x, y = x))[integer()] + + expect_s3_class(tf_sparsify(empty_x), "tfd_irreg") + expect_length(tf_sparsify(empty_x), 0L) + expect_s3_class(tf_jiggle(empty_x), "tfd_irreg") + expect_length(tf_jiggle(empty_x), 0L) + + sp <- tf_sparsify(empty_mv) + jig <- tf_jiggle(empty_mv) + expect_s3_class(sp, "tfd_mv") + expect_s3_class(jig, "tfd_mv") + expect_identical(vec_size(sp), 0L) + expect_identical(vec_size(jig), 0L) + expect_identical(tf_ncomp(sp), 2L) + expect_identical(tf_ncomp(jig), 2L) +}) + +test_that("tf_derive is component-wise", { + set.seed(2) + f <- tfd_mv(list(x = tf_rgp(3), y = tf_rgp(3))) + d <- tf_derive(f) + expect_s3_class(d, "tfd_mv") + expect_equal( + tf_evaluations(d$x)[[1]], + tf_evaluations(tf_derive(f$x))[[1]] + ) + expect_equal( + tf_evaluations(d$y)[[2]], + tf_evaluations(tf_derive(f$y))[[2]] + ) +}) + +test_that("tf_integrate returns an n x d matrix (definite) or tfd_mv (indefinite)", { + set.seed(3) + f <- tfd_mv(list(x = tf_rgp(4), y = tf_rgp(4))) + m <- tf_integrate(f) + expect_true(is.matrix(m)) + expect_identical(dim(m), c(4L, 2L)) + expect_identical(colnames(m), c("x", "y")) + expect_equal(m[, "x"], tf_integrate(f$x), ignore_attr = TRUE) + expect_equal(m[, "y"], tf_integrate(f$y), ignore_attr = TRUE) + ind <- tf_integrate(f, definite = FALSE) + expect_s3_class(ind, "tfd_mv") + expect_equal(ind$x, tf_integrate(f$x, definite = FALSE)) + expect_equal(ind$y, tf_integrate(f$y, definite = FALSE)) +}) + +test_that("tf_zoom and tf_smooth are component-wise", { + set.seed(4) + f <- tfd_mv(list(x = tf_rgp(3), y = tf_rgp(3))) + z <- tf_zoom(f, 0.25, 0.75) + expect_s3_class(z, "tfd_mv") + expect_true(all(tf_domain(z$x) == c(0.25, 0.75))) + expect_equal(z$x, tf_zoom(f$x, 0.25, 0.75)) + expect_equal(z$y, tf_zoom(f$y, 0.25, 0.75)) + s <- tf_smooth(f, verbose = FALSE) + expect_s3_class(s, "tfd_mv") + expect_equal(s$x, tf_smooth(f$x, verbose = FALSE)) + expect_equal(s$y, tf_smooth(f$y, verbose = FALSE)) +}) + +test_that("tf_warp / tf_align apply one shared warp to all components", { + set.seed(5) + t <- seq(0, 1, length.out = 101) + n <- 4 + warp <- { + w <- tf_rgp(n, arg = t) + w <- exp(w - mean(w)) + tf_integrate(w, definite = FALSE) / tf_integrate(w) + } + f <- tfd_mv(list(x = tf_rgp(n, arg = t), y = tf_rgp(n, arg = t))) + warped <- tf_warp(f, warp) + expect_s3_class(warped, "tfd_mv") + expect_identical(tf_ncomp(warped), 2L) + # warping each component individually gives the same result + expect_equal( + tf_evaluations(warped$x)[[1]], + tf_evaluations(tf_warp(f$x, warp))[[1]] + ) + expect_equal(warped$y, tf_warp(f$y, warp)) + aligned <- tf_align(f, warp) + expect_equal(aligned$x, tf_align(f$x, warp)) + expect_equal(aligned$y, tf_align(f$y, warp)) +}) + +test_that("tf_register on tf_mv aligns all components with a shared warp", { + t <- seq(0, 1, length.out = 101) + shifts <- c(-0.08, -0.04, 0, 0.04, 0.08) + bump <- function(shift) exp(-80 * (t - (0.5 + shift))^2) + x <- tfd(t(sapply(shifts, bump)), arg = t) + y <- tfd(t(sapply(shifts, \(shift) 0.5 * bump(shift))), arg = t) + f <- tfd_mv(list(x = x, y = y)) + w <- suppressWarnings(suppressMessages( + tf_estimate_warps(f, method = "affine", type = "shift") + )) + aligned <- suppressWarnings(suppressMessages(tf_align(f, w))) + reg <- suppressWarnings(suppressMessages( + tf_register(f, method = "affine", type = "shift") + )) + expect_s3_class(reg, "tf_registration") + expect_s3_class(tf_aligned(reg), "tfd_mv") + expect_s3_class(tf_template(reg), "tfd_mv") + expect_s3_class(tf_inv_warps(reg), "tfd") # warps are univariate + expect_identical(length(tf_inv_warps(reg)), 5L) + # registration should reduce cross-curve (phase) variability + meanvar <- function(mv) { + mean(sapply(tf_components(mv), function(c) { + suppressWarnings(mean(tf_evaluations(var(c))[[1]], na.rm = TRUE)) + })) + } + expect_lt(meanvar(aligned), 0.1 * meanvar(f)) + expect_equal(aligned$x, suppressWarnings(tf_align(x, w))) + expect_equal(aligned$y, suppressWarnings(tf_align(y, w))) + expect_equal(tf_aligned(reg), aligned) +}) + +test_that("SRVF registration for tf_mv applies one shared warp to every component", { + skip_if_not_installed("fdasrvf") + + t <- seq(0, 1, length.out = 101) + centers <- c(0.4, 0.5, 0.6) + bump <- function(center) exp(-80 * (t - center)^2) + x <- tfd(t(sapply(centers, bump)), arg = t) + y <- tfd(t(sapply(centers, \(center) 0.5 * bump(center))), arg = t) + f <- tfd_mv(list(x = x, y = y)) + + w <- suppressWarnings(suppressMessages( + tf_estimate_warps(f, method = "srvf") + )) + aligned <- suppressWarnings(suppressMessages(tf_align(f, w))) + + meanvar <- function(mv) { + mean(sapply(tf_components(mv), function(comp) { + suppressWarnings(mean(tf_evaluations(var(comp))[[1]], na.rm = TRUE)) + })) + } + + expect_s3_class(w, "tfd") + expect_s3_class(aligned, "tfd_mv") + expect_lt(meanvar(aligned), 0.1 * meanvar(f)) + expect_equal(aligned$x, suppressWarnings(tf_align(x, w))) + expect_equal(aligned$y, suppressWarnings(tf_align(y, w))) +}) + +test_that("tf_estimate_warps respects ref_component", { + t <- seq(0, 1, length.out = 101) + shifts <- c(-0.08, 0, 0.08) + x <- tfd(t(sapply(shifts, \(s) exp(-80 * (t - (0.5 + s))^2))), arg = t) + y <- tfd(t(sapply(shifts, \(s) exp(-80 * (t - (0.3 - s))^2))), arg = t) + f <- tfd_mv(list(x = x, y = y)) + w_first <- suppressWarnings(suppressMessages( + tf_estimate_warps(f, method = "affine", type = "shift") + )) + w_y <- suppressWarnings(suppressMessages( + tf_estimate_warps(f, method = "affine", type = "shift", ref_component = "y") + )) + expect_s3_class(w_first, "tfd") + expect_length(w_first, 3) + expect_length(w_y, 3) + expect_equal( + as.matrix(w_first), + as.matrix(suppressWarnings(suppressMessages( + tf_estimate_warps(x, method = "affine", type = "shift") + ))) + ) + expect_equal( + as.matrix(w_y), + as.matrix(suppressWarnings(suppressMessages( + tf_estimate_warps(y, method = "affine", type = "shift") + ))) + ) + expect_false(isTRUE(all.equal(as.matrix(w_first), as.matrix(w_y)))) +}) + +# ---- tf_arclength ------------------------------------------------------------ + +test_that("tf_arclength on the unit circle returns ~ 2*pi", { + t <- seq(0, 1, length.out = 401) + circ <- tfd_mv(list( + x = tfd(matrix(cos(2 * pi * t), nrow = 1), arg = t), + y = tfd(matrix(sin(2 * pi * t), nrow = 1), arg = t) + )) + expect_equal(tf_arclength(circ), 2 * pi, tolerance = 1e-2) +}) + +test_that("tf_arclength is vectorised over curves (k-loop has length 2*pi*k)", { + t <- seq(0, 1, length.out = 401) + ks <- 1:3 + xs <- tfd(do.call(rbind, lapply(ks, \(k) cos(2 * pi * k * t))), arg = t) + ys <- tfd(do.call(rbind, lapply(ks, \(k) sin(2 * pi * k * t))), arg = t) + mv <- tfd_mv(list(x = xs, y = ys)) + ls <- tf_arclength(mv) + expect_length(ls, 3L) + expect_equal(ls, 2 * pi * ks, tolerance = 5e-2) +}) + +test_that("tf_arclength definite = FALSE returns cumulative s(t) as a tfd", { + t <- seq(0, 1, length.out = 401) + circ <- tfd_mv(list( + x = tfd(matrix(cos(2 * pi * t), nrow = 1), arg = t), + y = tfd(matrix(sin(2 * pi * t), nrow = 1), arg = t) + )) + s <- tf_arclength(circ, definite = FALSE) + expect_s3_class(s, "tfd") + expect_length(s, 1L) + # s(0) = 0, s(0.5) = pi, s(1) = 2*pi (uniform-speed parameterisation) + vals <- unlist(tf_evaluate(s, arg = c(0, 0.5, 1))) + expect_equal(vals, c(0, pi, 2 * pi), tolerance = 1e-2) +}) + +test_that("tf_arclength respects lower / upper limits", { + t <- seq(0, 1, length.out = 401) + circ <- tfd_mv(list( + x = tfd(matrix(cos(2 * pi * t), nrow = 1), arg = t), + y = tfd(matrix(sin(2 * pi * t), nrow = 1), arg = t) + )) + expect_equal( + tf_arclength(circ, lower = 0, upper = 0.25), + pi / 2, + tolerance = 1e-2 + ) + expect_equal( + tf_arclength(circ, lower = 0.25, upper = 0.75), + pi, + tolerance = 1e-2 + ) + expect_equal(tf_arclength(circ, lower = 0.5, upper = 0.5), 0) + expect_equal( + tf_arclength(circ, lower = 0.5, upper = 0.5, method = "derive"), + 0 + ) + point <- tfd_mv( + list( + x = tfd(matrix(1, nrow = 1), arg = 0.5, domain = c(0, 1)), + y = tfd(matrix(2, nrow = 1), arg = 0.5, domain = c(0, 1)) + ), + domain = c(0, 1) + ) + expect_equal(tf_arclength(point), 0) +}) + +test_that("tf_arclength rejects limits outside the domain (both methods)", { + t <- seq(0, 1, length.out = 41) + circ <- tfd_mv(list( + x = tfd(matrix(cos(2 * pi * t), nrow = 1), arg = t), + y = tfd(matrix(sin(2 * pi * t), nrow = 1), arg = t) + )) + # a zero-width interval *outside* the domain must error, not silently give 0 + expect_error(tf_arclength(circ, lower = 2, upper = 2), "within the domain") + expect_error( + tf_arclength(circ, lower = 2, upper = 2, method = "derive"), + "within the domain" + ) + expect_error(tf_arclength(circ, lower = -1, upper = 0.5), "within the domain") + expect_error(tf_arclength(circ, lower = 0.5, upper = 5), "within the domain") +}) + +test_that("tf_arclength(definite = FALSE) rejects a zero-width interval", { + t <- seq(0, 1, length.out = 41) + circ <- tfd_mv(list( + x = tfd(matrix(cos(2 * pi * t), nrow = 1), arg = t), + y = tfd(matrix(sin(2 * pi * t), nrow = 1), arg = t) + )) + expect_error( + tf_arclength(circ, lower = 0.5, upper = 0.5, definite = FALSE), + "zero-width" + ) + # a proper interval still returns a cumulative tfd + expect_s3_class( + tf_arclength(circ, lower = 0.25, upper = 0.75, definite = FALSE), + "tfd" + ) +}) + +test_that("tf_arclength polyline is more accurate than derive on raw tfd", { + t <- seq(0, 1, length.out = 401) + circ <- tfd_mv(list( + x = tfd(matrix(cos(2 * pi * t), nrow = 1), arg = t), + y = tfd(matrix(sin(2 * pi * t), nrow = 1), arg = t) + )) + err_poly <- abs(tf_arclength(circ, method = "polyline") - 2 * pi) + err_der <- abs(tf_arclength(circ, method = "derive") - 2 * pi) + expect_lt(err_poly, err_der) + # both still in the right ballpark + expect_equal(tf_arclength(circ, method = "derive"), 2 * pi, tolerance = 1e-2) +}) + +test_that("tf_arclength works for a 3-d helix", { + # one full turn of a unit-radius helix climbing 2pi*c in z over t in [0, 1]: + # f(t) = (cos(2*pi*t), sin(2*pi*t), 2*pi*c*t) + # arc length = sqrt((2*pi)^2 + (2*pi*c)^2) = 2*pi*sqrt(1 + c^2) + t <- seq(0, 1, length.out = 401) + c0 <- 0.5 + hx <- tfd(matrix(cos(2 * pi * t), nrow = 1), arg = t) + hy <- tfd(matrix(sin(2 * pi * t), nrow = 1), arg = t) + hz <- tfd(matrix(2 * pi * c0 * t, nrow = 1), arg = t) + helix <- tfd_mv(list(x = hx, y = hy, z = hz)) + expect_equal(tf_arclength(helix), 2 * pi * sqrt(1 + c0^2), tolerance = 1e-2) +}) + +test_that("tf_arclength errors clearly when component domains only partially overlap", { + x <- tfd(matrix(c(0, 1), nrow = 1), arg = c(0, 1)) + y <- tfd(matrix(c(0, 0.5, 1), nrow = 1), arg = c(0.5, 0.75, 1)) + f <- tfd_mv(list(x = x, y = y)) + + expect_error(tf_arclength(f), "missing paired component") + expect_equal( + tf_arclength(f, lower = 0.5, upper = 1), + sqrt(1.25), + tolerance = 1e-8 + ) +}) diff --git a/tests/testthat/test-mv-where.R b/tests/testthat/test-mv-where.R new file mode 100644 index 00000000..e851939d --- /dev/null +++ b/tests/testthat/test-mv-where.R @@ -0,0 +1,176 @@ +# tf_where / tf_anywhere on vector-valued (tf_mv) input: conditions are joint +# across components, referring to them by name (no `value` column). + +grid <- seq(-1, 1, by = 0.2) +# x: curve 1 = identity, curve 2 = 2 * identity; y: curve 1 = -identity, +# curve 2 = identity -- simple lines with known sign patterns. +x_mv <- tfd(unname(rbind(grid, 2 * grid)), arg = grid) +y_mv <- tfd(unname(rbind(-grid, grid)), arg = grid) +fm <- tfd_mv(list(x = x_mv, y = y_mv)) + +test_that("tf_where on tf_mv evaluates joint conditions across components", { + expect_equal( + tf_where(fm, x > 0 & y > 0), + list(numeric(0), seq(0.2, 1, by = 0.2)), + ignore_attr = TRUE + ) + expect_equal( + unname(tf_where(fm, x + y > 0, "first")), + c(NA, 0.2) + ) + expect_equal( + unname(tf_where(fm, x + y > 0, "last")), + c(NA, 1) + ) + expect_equal( + tf_where(fm, x <= 0 & y >= 0, "range"), + data_frame(begin = c(-1, 0), end = 0), + ignore_attr = TRUE + ) + expect_equal( + unname(tf_where(fm, x^2 + y^2 > 4, "any")), + c(FALSE, TRUE) + ) + # `arg` is available in cond, as for univariate input + expect_equal( + tf_where(fm, arg > 0.5 & y > 0), + list(numeric(0), c(0.6, 0.8, 1)), + ignore_attr = TRUE + ) +}) + +test_that("tf_anywhere on tf_mv is tf_where(..., return = 'any')", { + expect_identical( + tf_anywhere(fm, x^2 + y^2 > 4), + tf_where(fm, x^2 + y^2 > 4, "any") + ) +}) + +test_that("single-component conditions agree with univariate tf_where", { + expect_identical( + tf_where(fm, x > 0.5), + tf_where(fm$x, value > 0.5) + ) + expect_identical( + tf_anywhere(fm, y > 0.5), + tf_anywhere(fm$y, value > 0.5) + ) +}) + +test_that("`value` in cond on tf_mv aborts with a classed hint", { + expect_error( + tf_where(fm, value > 0), + class = "tf_mv_where_value" + ) + expect_error( + tf_anywhere(fm, value > 0), + class = "tf_mv_where_value" + ) + # ... unless a component is actually named `value` + fmv <- tfd_mv(list(value = x_mv, y = y_mv)) + expect_identical( + tf_where(fmv, value > 0.5), + tf_where(fmv$value, value > 0.5) + ) +}) + +test_that("tf_where on tf_mv requires a common grid unless arg is supplied", { + grid_fine <- seq(-1, 1, by = 0.1) + y_fine <- tfd(unname(rbind(-grid_fine, grid_fine)), arg = grid_fine) + fmi <- tfd_mv(list(x = x_mv, y = y_fine)) + expect_error( + tf_where(fmi, x > 0 & y > 0), + class = "tf_mv_incommensurate_args" + ) + expect_error( + tf_anywhere(fmi, x > 0), + class = "tf_mv_incommensurate_args" + ) + # explicit arg picks the evaluation grid and lifts the requirement + expect_equal( + tf_where(fmi, x > 0 & y > 0, arg = grid), + tf_where(fm, x > 0 & y > 0) + ) + expect_equal( + tf_anywhere(fmi, x^2 + y^2 > 4, arg = grid), + tf_anywhere(fm, x^2 + y^2 > 4) + ) + # ... but the per-component list that tf_arg() returns for such objects is + # rejected -- it would be misread as per-curve grids + expect_error( + tf_where(fmi, x > 0, arg = tf_arg(fmi)), + class = "tf_mv_incommensurate_args" + ) +}) + +test_that("'arg' is rejected as a component name (would shadow the grid column)", { + expect_error( + tfd_mv(list(arg = x_mv, y = y_mv)), + "reserved" + ) +}) + +test_that("tf_where on zero-length tf_mv works for all return modes", { + fm0 <- fm[0] + expect_identical(unname(tf_where(fm0, x > 0)), list()) + expect_identical(unname(tf_where(fm0, x > 0, "first")), numeric(0)) + expect_identical(unname(tf_where(fm0, x > 0, "last")), numeric(0)) + expect_identical(unname(tf_where(fm0, x > 0, "any")), logical(0)) + expect_identical( + tf_where(fm0, x > 0, "range"), + data.frame(begin = numeric(0), end = numeric(0)) + ) +}) + +test_that("NA curves in tf_mv propagate as in the univariate case", { + fm_na <- fm + fm_na[1] <- NA + expect_true(is.na(tf_where(fm_na, x > 0)[[1]])) + expect_identical( + tf_where(fm_na, x > 0)[-1], + tf_where(fm, x > 0)[-1] + ) + expect_true(is.na(tf_where(fm_na, x > 0, "first")[[1]])) + # partial NA: a curve missing in *any* component counts as missing + fm_pna <- fm + x_na <- x_mv + x_na[1] <- NA + fm_pna$x <- x_na + expect_true(is.na(tf_where(fm_pna, y > 0)[[1]])) + expect_identical( + tf_where(fm_pna, y > 0)[-1], + tf_where(fm, y > 0)[-1] + ) +}) + +test_that("tf_where works with more than two components", { + fm3 <- tfd_mv(list(x = x_mv, y = y_mv, z = x_mv + y_mv)) + expect_identical( + tf_where(fm3, x + y == z), + tf_where(fm3, TRUE) + ) + expect_equal( + unname(tf_anywhere(fm3, z > x + y)), + c(FALSE, FALSE) + ) +}) + +test_that("tf_where works on tfb_mv input", { + set.seed(1212) + fd <- tfd_mv(list(x = tf_rgp(3), y = tf_rgp(3))) + fb <- tfb_mv(fd, k = 15, verbose = FALSE) + expect_true(is_tfb_mv(fb)) + res <- tf_where(fb, x > 0 & y > 0) + expect_type(res, "list") + expect_length(res, 3) + expect_identical( + res, + tf_where(fb, x > 0 & y > 0, arg = tf_arg(fb)) + ) + # agrees with joint condition on the evaluated components + expect_identical( + unname(tf_anywhere(fb, x > y)), + unname(map_lgl(tf_evaluations(fb), \(df) any(df$x > df$y))) + ) + expect_error(tf_where(fb, value > 0), class = "tf_mv_where_value") +}) diff --git a/tests/testthat/test-names.R b/tests/testthat/test-names.R index 163dc545..83f0bcc1 100644 --- a/tests/testthat/test-names.R +++ b/tests/testthat/test-names.R @@ -33,3 +33,14 @@ test_that("vec_arith keeps names", { expect_named(xn - mean(x), names(xn)) expect_named(mean(xn) - xn, names(xn - mean(x))) }) + +test_that("coef.tfb preserves names", { + set.seed(1) + x <- tfb(tf_rgp(3, arg = seq(0, 1, length.out = 21))) + names(x) <- c("a", "b", "c") + expect_named(coef(x), c("a", "b", "c")) + + # null names stay null + names(x) <- NULL + expect_null(names(coef(x))) +}) diff --git a/tests/testthat/test-ops.R b/tests/testthat/test-ops.R index 82c5b94b..d90b3c5d 100644 --- a/tests/testthat/test-ops.R +++ b/tests/testthat/test-ops.R @@ -172,3 +172,17 @@ test_that("tfb arithmetic operations with other tfb", { expect_no_error(x + x[1]) expect_no_error(x[3] + x) }) + +test_that("==.tfb dispatches like ==.tfd (PR C target 7)", { + xb <- suppressWarnings(tfb(tf_rgp(3))) + yb <- xb + yb[2] <- NA + # Comparison is per-curve via isTRUE(all.equal()) and never returns NA: + # an NA entry compares equal to itself and unequal to any non-NA entry. + expect_equal(unname(xb == xb), rep(TRUE, 3)) + expect_equal(unname(xb != xb), rep(FALSE, 3)) + expect_equal(unname(yb == yb), rep(TRUE, 3)) + expect_equal(unname(yb != yb), rep(FALSE, 3)) + expect_equal(unname(xb == yb), c(TRUE, FALSE, TRUE)) + expect_equal(unname(xb != yb), c(FALSE, TRUE, FALSE)) +}) diff --git a/tests/testthat/test-rank-order.R b/tests/testthat/test-rank-order.R index 0d8d6a1f..20f41110 100644 --- a/tests/testthat/test-rank-order.R +++ b/tests/testthat/test-rank-order.R @@ -75,7 +75,7 @@ test_that("rank.default still works for numeric", { # ---- order (via xtfrm) ------------------------------------------------------ test_that("order works on tf via xtfrm", { - o <- order(parallel) + o <- suppressWarnings(order(parallel)) expect_equal(o, 1:7) # Reversed: o_dec <- order(parallel, decreasing = TRUE) @@ -83,13 +83,22 @@ test_that("order works on tf via xtfrm", { }) test_that("xtfrm.tf returns MHI values", { - xt <- xtfrm(parallel) + xt <- suppressWarnings(xtfrm(parallel)) expect_type(xt, "double") expect_length(xt, 7) # For parallel lines, xtfrm should increase expect_true(all(diff(xt) > 0)) }) +test_that("xtfrm.tf warns once per session about depth-based semantics", { + rlang::reset_warning_verbosity("tf_xtfrm") + expect_warning(xtfrm(parallel), "depth-based total order") + # Subsequent invocations in the same session do not warn again. + expect_silent(xtfrm(parallel)) + expect_silent(order(parallel)) + expect_silent(sort(parallel)) +}) + # ---- sort -------------------------------------------------------------------- test_that("sort.tf sorts by MHI", { diff --git a/tests/testthat/test-rebase.R b/tests/testthat/test-rebase.R index c6cd4d1c..b86b58cf 100644 --- a/tests/testthat/test-rebase.R +++ b/tests/testthat/test-rebase.R @@ -30,6 +30,7 @@ for (i in seq_along(l)) { l[[i]] |> tf_arg() ) expect_named(x_rebase, names(x)) + expect_valid_tf(x_rebase) skip_on_cran() # to avoid non-reproducible BS-error on Fedora 36 - MKL expect_true( compare_tf_attribs(x_rebase, l[[i]], check_attrib = FALSE) |> all() @@ -77,6 +78,7 @@ for (i in seq_along(l)) { l[[i]] |> tf_arg() ) expect_named(x_rebase, names(x)) + expect_valid_tf(x_rebase) skip_on_cran() # to avoid non-reproducible BS-error on Fedora 36 - MKL expect_true( compare_tf_attribs(x_rebase, l[[i]], check_attrib = FALSE) |> all() @@ -119,9 +121,113 @@ for (i in seq_along(l)) { l[[i]] |> tf_arg() ) expect_named(x_rebase, names(x)) + expect_valid_tf(x_rebase) skip_on_cran() # to avoid non-reproducible BS-error on Fedora 36 - MKL expect_true( compare_tf_attribs(x_rebase, l[[i]], check_attrib = FALSE) |> all() ) }) } + +#------------------------------------------------------------------------------- +# regression tests for #238, #239, #240 + +test_that("#238 tf_rebase.tfb.tfd accepts dots without double-splicing", { + set.seed(238) + b <- tfb(tf_rgp(3), verbose = FALSE) + raw <- tf_rgp(3) + # any dot collided with itself before the fix ("matched by multiple actual arguments") + expect_silent(res1 <- tf_rebase(b, raw, evaluator = "tf_approx_spline")) + expect_s3_class(res1, "tfd") + expect_identical(attr(res1, "evaluator_name"), "tf_approx_spline") +}) + +test_that("tf_rebase with function-valued evaluator is documented as broken (see #261)", { + # Tracked in #261: tfd.tf needs to accept function-valued evaluator. + # When that issue is fixed, this test should be flipped to expect_no_error. + xb <- tfb(tf_rgp(3), verbose = FALSE) + x <- tf_rgp(3) + expect_error(tf_rebase(xb, x, evaluator = tf_approx_spline)) +}) + +test_that("#239 tf_rebase(tfd, tfb_spline) fits on the target spline grid", { + set.seed(239) + x <- tf_rgp(3, arg = seq(0, 1, length.out = 11)) + b <- tfb(tf_rgp(3, arg = seq(0, 1, length.out = 51)), k = 25, verbose = FALSE) + res <- tf_rebase(x, b) + expect_true(tf:::same_basis(res, b)) + # combining is now warning-free since both share the same basis + expect_warning(res + b, NA) +}) + +test_that("tf_rebase(tfd, tfb_spline) honors custom arg", { + set.seed(2391) + x <- tf_rgp(3, arg = seq(0, 1, length.out = 11)) + b <- tfb(tf_rgp(3, arg = seq(0, 1, length.out = 51)), k = 15, verbose = FALSE) + custom_arg <- seq(0, 1, length.out = 21) + + res <- suppressWarnings(tf_rebase(x, b, arg = custom_arg)) + + expect_identical(tf_arg(res), custom_arg) + expect_equal(attr(res, "basis_matrix"), attr(b, "basis")(custom_arg)) +}) + +test_that("#269 tf_rebase(tfd, tfb_spline) projects directly w/o interpolation step", { + # Per Fabian's review: rebase should project object's native evaluations onto + # basis_from's basis, *not* interpolate first (which would compound errors). + set.seed(269) + arg_x <- seq(0, 1, length.out = 41) + x <- tf_rgp(3, arg = arg_x) |> + tf_smooth() |> + suppressMessages() + b <- tfb( + tf_rgp(5, arg = seq(0, 1, length.out = 101)) |> + tf_smooth() |> + suppressMessages(), + k = 25, + verbose = FALSE + ) + + res <- tf_rebase(x, b) + + # Result lives in basis_from's basis (and arg) -- not lossy on subsequent ops + expect_true(tf:::same_basis(res, b)) + expect_identical(tf_arg(res), tf_arg(b)) + expect_warning(res + b[1:3], NA) + + # Evaluating result at x's native grid should reproduce x to within basis + # approximation error -- baseline = same projection performed directly using + # basis_from's basis and unpenalized LS at arg_x; rebase result should be no + # worse than this baseline (no additional interpolation-error budget). + B_at_x <- attr(b, "basis")(arg_x) + ev_x <- tf_evaluations(x) + ev_res_at_x <- map(vec_data(res), \(coef) drop(B_at_x %*% coef)) + rebase_err <- map2_dbl(ev_x, ev_res_at_x, \(a, b) max(abs(a - b))) + # Reference: unpenalized LS projection onto the same basis at arg_x. + proj_err <- map_dbl(ev_x, \(y) { + coef <- qr.coef(qr(B_at_x), y) + max(abs(y - drop(B_at_x %*% coef))) + }) + # Penalized projection is at most modestly worse than unpenalized LS + # (small multiplicative slack for the smoothing penalty). + expect_true(all(rebase_err <= pmax(proj_err * 3, 0.05))) +}) + +test_that("#269 tf_rebase(tfd, tfb_spline) under-determined fit works via penalty", { + # object has fewer unique args than basis_from's k -- penalty must rescue. + set.seed(2690) + x <- tf_rgp(3, arg = seq(0, 1, length.out = 11)) |> suppressMessages() + b <- tfb(tf_rgp(3, arg = seq(0, 1, length.out = 51)), k = 25, verbose = FALSE) + # default tfb is penalized, so this should succeed (perhaps with sparse warning) + expect_no_error(res <- suppressWarnings(tf_rebase(x, b))) + expect_true(tf:::same_basis(res, b)) +}) + +test_that("#240 default tfb_spline/tfb_fpc methods return length-0 prototypes", { + proto_s <- suppressWarnings(tfb_spline()) + expect_s3_class(proto_s, "tfb_spline") + expect_length(proto_s, 0) + proto_f <- suppressMessages(tfb_fpc()) + expect_s3_class(proto_f, "tfb_fpc") + expect_length(proto_f, 0) +}) diff --git a/tests/testthat/test-register-mv-srvf.R b/tests/testthat/test-register-mv-srvf.R new file mode 100644 index 00000000..822dbf71 --- /dev/null +++ b/tests/testthat/test-register-mv-srvf.R @@ -0,0 +1,230 @@ +mv_registration_meanvar <- function(mv) { + mean(sapply(tf_components(mv), function(comp) { + suppressWarnings(mean(tf_evaluations(var(comp))[[1]], na.rm = TRUE)) + })) +} + +make_shifted_bump_mv <- function(t = seq(0, 1, length.out = 51)) { + centers <- c(0.4, 0.5, 0.6) + bump <- function(center) exp(-80 * (t - center)^2) + x <- tfd(t(sapply(centers, bump)), arg = t) + y <- tfd(t(sapply(centers, \(center) 0.5 * bump(center))), arg = t) + tfd_mv(list(x = x, y = y)) +} + +make_shape_mv <- function(t = seq(0, 1, length.out = 51)) { + base <- rbind(t, t^2) + scales <- c(1, 0.7, 1.3) + angles <- c(0, 0.4, -0.25) + offsets <- rbind(c(0.2, -0.1), c(0.4, -0.2), c(0.6, -0.3)) + beta <- array( + NA_real_, + dim = c(3, length(t), 2), + dimnames = list(c("a", "b", "c"), NULL, c("x", "y")) + ) + + for (i in seq_len(3)) { + rot <- matrix( + c(cos(angles[i]), sin(angles[i]), -sin(angles[i]), cos(angles[i])), + nrow = 2 + ) + curve <- scales[i] * + (rot %*% base) + + matrix(offsets[i, ], nrow = 2, ncol = length(t)) + beta[i, , 1] <- curve[1, ] + beta[i, , 2] <- curve[2, ] + } + tfd_mv(beta, arg = t) +} + +test_that("srvf_mv estimates true multivariate SRVF warps for tf_mv", { + skip_if_not_installed("fdasrvf") + + f <- make_shifted_bump_mv() + warps <- tf_estimate_warps(f, method = "srvf_mv", max_iter = 2) + aligned <- tf_align(f, warps) + reg <- tf_register(f, method = "srvf_mv", max_iter = 2) + + expect_s3_class(warps, "tfd") + expect_s3_class(attr(warps, "template"), "tfd_mv") + expect_s3_class(aligned, "tfd_mv") + expect_s3_class(reg, "tf_registration") + expect_s3_class(tf_aligned(reg), "tfd_mv") + expect_s3_class(tf_template(reg), "tfd_mv") + expect_s3_class(tf_inv_warps(reg), "tfd") + expect_identical(tf_domain(warps), tf_domain(f)) + expect_identical(tf_arg(warps), tf_arg(f)) + expect_lt( + mv_registration_meanvar(tf_aligned(reg)), + 0.1 * mv_registration_meanvar(f) + ) +}) + +test_that("srvf_mv supports explicit tf_mv templates", { + skip_if_not_installed("fdasrvf") + + f <- make_shifted_bump_mv() + template <- f[2] + warps <- tf_estimate_warps( + f, + method = "srvf_mv", + template = template + ) + + expect_s3_class(warps, "tfd") + expect_identical(attr(warps, "template"), template) + expect_length(warps, length(f)) + expect_identical(tf_arg(warps), tf_arg(f)) +}) + +test_that("srvf_mv keeps shape options out of tf_register", { + skip_if_not_installed("fdasrvf") + + f <- make_shifted_bump_mv() + expect_error( + tf_estimate_warps(f, method = "srvf_mv", rotation = TRUE), + "tf_register_shape" + ) + expect_error( + tf_estimate_warps(f$x, method = "srvf_mv"), + "only available for .*tf_mv" + ) +}) + +test_that("srvf_mv rejects non shared-grid tf_mv inputs", { + skip_if_not_installed("fdasrvf") + + t1 <- seq(0, 1, length.out = 31) + t2 <- seq(0, 1, length.out = 41) + f <- tfd_mv(list( + x = tfd(matrix(sin(2 * pi * t1), nrow = 1), arg = t1), + y = tfd(matrix(cos(2 * pi * t2), nrow = 1), arg = t2) + )) + + expect_error( + tf_estimate_warps(f, method = "srvf_mv"), + "one shared argument grid" + ) +}) + +test_that("tf_register_shape aligns translated, rotated, and scaled curves", { + skip_if_not_installed("fdasrvf") + + f <- make_shape_mv() + reg <- tf_register_shape(f, max_iter = 2) + + expect_s3_class(reg, "tf_shape_registration") + expect_s3_class(reg, "tf_registration") + expect_s3_class(tf_aligned(reg), "tfd_mv") + expect_s3_class(tf_inv_warps(reg), "tfd") + expect_s3_class(tf_template(reg), "tfd_mv") + expect_equal(dim(tf_rotations(reg)), c(2, 2, length(f))) + expect_length(tf_scales(reg), length(f)) + expect_true(all(is.finite(tf_scales(reg)))) + expect_true(all(tf_scales(reg) > 0)) + expect_lt( + mv_registration_meanvar(tf_aligned(reg)), + 0.1 * mv_registration_meanvar(f) + ) +}) + +test_that("tf_register_shape rejects closed-curve mode", { + skip_if_not_installed("fdasrvf") + + f <- make_shape_mv() + expect_error( + tf_register_shape(f, max_iter = 1, mode = "C"), + "not supported" + ) +}) + +test_that("tf_register_shape can disable rotation and scaling", { + skip_if_not_installed("fdasrvf") + + f <- make_shape_mv() + reg <- tf_register_shape(f, max_iter = 1, rotation = FALSE, scale = FALSE) + + expect_equal(unname(tf_scales(reg)), rep(1, length(f))) + expect_equal(names(tf_scales(reg)), names(f)) + expect_equal( + unname(tf_rotations(reg)), + array(diag(2), dim = c(2, 2, length(f))) + ) + expect_equal(dimnames(tf_rotations(reg))[[3]], names(f)) +}) + +test_that("tf_shape_registration subsetting keeps shape outputs aligned", { + skip_if_not_installed("fdasrvf") + + f <- make_shape_mv() + reg <- tf_register_shape(f, max_iter = 1) + sub <- reg[1:2] + sub_named <- reg[c("a", "c")] + + expect_s3_class(sub, "tf_shape_registration") + expect_length(tf_aligned(sub), 2) + expect_length(tf_inv_warps(sub), 2) + expect_equal(dim(tf_rotations(sub)), c(2, 2, 2)) + expect_length(tf_scales(sub), 2) + expect_equal(dim(tf_rotations(sub_named)), c(2, 2, 2)) + expect_equal(names(tf_scales(sub_named)), c("a", "c")) +}) + +test_that("summary.tf_registration reports a numeric amplitude-variance reduction for mv (#249)", { + skip_if_not_installed("fdasrvf") + set.seed(123) + fm <- tfd_mv(list(x = tf_rgp(5), y = tf_rgp(5))) + reg <- tf_register(fm, method = "srvf_mv") + expect_no_warning(s <- summary(reg)) + expect_true(is.numeric(s$amp_var_reduction)) + expect_true(is.finite(s$amp_var_reduction)) + out <- capture.output(print(s)) + expect_false(any(grepl("not computable", out))) +}) + +test_that("summary.tf_shape_registration reports rotation angles and scale deciles", { + skip_if_not_installed("fdasrvf") + f <- make_shape_mv() + reg <- tf_register_shape(f, max_iter = 1) + expect_no_warning(s <- summary(reg)) + expect_s3_class(s, "summary.tf_shape_registration") + expect_true(is.numeric(s$rotation_angles_deg)) + expect_true(is.numeric(s$scale_quantiles)) + out <- capture.output(print(s)) + expect_true(any(grepl("Rotation angles", out))) + expect_true(any(grepl("Scale factors", out))) +}) + +test_that("shape_rotation_angles handles 2D and 3D correctly (#271)", { + # 2D: rotation by pi/4 + theta <- pi / 4 + R2 <- matrix(c(cos(theta), sin(theta), -sin(theta), cos(theta)), 2, 2) + rot2 <- array(R2, dim = c(2, 2, 1)) + expect_equal(tf:::shape_rotation_angles(rot2), theta) + + # 2D: signed (negative) rotation + R2neg <- matrix(c(cos(-theta), sin(-theta), -sin(-theta), cos(-theta)), 2, 2) + rot2neg <- array(R2neg, dim = c(2, 2, 1)) + expect_equal(tf:::shape_rotation_angles(rot2neg), -theta) + + # 3D: rotation by pi/3 about x-axis -- acos((tr(R) - 1) / 2) == pi/3 + phi <- pi / 3 + R3 <- matrix(c( + 1, 0, 0, + 0, cos(phi), sin(phi), + 0, -sin(phi), cos(phi) + ), 3, 3) + rot3 <- array(R3, dim = c(3, 3, 1)) + expect_equal(tf:::shape_rotation_angles(rot3), phi) +}) + +test_that("shape_rotation_angles warns and returns NA for d > 3 (#271)", { + # 4x4 identity stacks - skip the random-rotation construction; the formula + # is not meaningful for d > 3 regardless of the matrix's content. + rot4 <- array(diag(4), dim = c(4, 4, 2)) + expect_warning( + angles <- tf:::shape_rotation_angles(rot4), + "not implemented for" + ) + expect_equal(angles, rep(NA_real_, 2)) +}) diff --git a/tests/testthat/test-register.R b/tests/testthat/test-register.R index a1b496ce..3c2e600c 100644 --- a/tests/testthat/test-register.R +++ b/tests/testthat/test-register.R @@ -240,6 +240,28 @@ test_that("tf_registration summary works", { expect_false(s2$has_original) }) +test_that("summary.tf_registration is robust to all-NA inputs (#271)", { + t <- seq(0, 2 * pi, length.out = 101) + x <- tfd(t(sapply(c(-0.3, 0, 0.3), \(s) sin(t + s))), arg = t) + reg <- quiet_expected_registration_warnings( + tf_register(x, method = "affine", type = "shift") + ) + # Force mean_pointwise_variance() into the degenerate path by replacing + # stored data with an all-NA tfd. var() on this will not produce useful + # evaluations and may error / warn -- summary() must still return NA + # gracefully without erroring or emitting warnings. + reg_na <- reg + reg_na$x <- tfd( + matrix(NA_real_, nrow = length(x), ncol = length(t)), + arg = t + ) + expect_no_warning(s_na <- summary(reg_na)) + expect_true(is.na(s_na$amp_var_reduction)) + + # Directly exercise mean_pointwise_variance on a NULL-evaluations input + expect_identical(tf:::mean_pointwise_variance(NULL), NA_real_) +}) + test_that("tf_registration plot works", { t <- seq(0, 2 * pi, length.out = 101) x <- tfd(t(sapply(c(-0.3, 0, 0.3), \(s) sin(t + s))), arg = t) @@ -330,6 +352,25 @@ test_that("tf_estimate_warps works for SRVF and CC methods", { ) }) +test_that("SRVF warps are monotone when domain is wider than arg range (#242)", { + skip_if_not_installed("fdasrvf") + set.seed(1) + x <- tfd( + matrix(rnorm(5 * 9), 5), + arg = seq(0.1, 0.9, length.out = 9), + domain = c(0, 1) + ) + w <- tf_estimate_warps(x, method = "srvf") + arg_x <- as.numeric(tf_arg(x)) + for (i in seq_along(w)) { + v <- tf_evaluations(w)[[i]] + expect_true(all(diff(v) > 0)) + expect_equal(v[1], min(arg_x)) + expect_equal(v[length(v)], max(arg_x)) + } + expect_no_error(tf_register(x, method = "srvf")) +}) + test_that("tf_estimate_warps returns tfd (not tf_registration)", { t <- seq(0, 2 * pi, length.out = 101) x <- tfd(t(sapply(c(-0.3, 0, 0.3), \(s) sin(t + s))), arg = t) @@ -814,6 +855,17 @@ test_that("register_landmark handles NA landmarks correctly", { } }) +test_that("landmark registration errors clearly on NA-induced non-monotone template (#243)", { + # NA pattern arranged so that default colMeans(landmarks, na.rm=TRUE) crosses: + # col 1 mean = mean(0.1, 0.9) = 0.5, col 2 mean = 0.3 -> not strictly increasing. + lm <- matrix(c(0.1, NA, NA, 0.3, 0.9, NA), nrow = 3, byrow = TRUE) + x <- tf_rgp(3) + expect_error( + tf_estimate_warps(x, landmarks = lm, method = "landmark"), + "monoton|landmark" + ) +}) + test_that("tf_estimate_warps landmark method validates input", { t <- seq(0, 1, length.out = 51) x <- tfd(t(cbind(sin(t * pi), sin(t * pi + 0.1))), arg = t) diff --git a/tests/testthat/test-rgp.R b/tests/testthat/test-rgp.R index 252d1b4b..ef40fa40 100644 --- a/tests/testthat/test-rgp.R +++ b/tests/testthat/test-rgp.R @@ -21,3 +21,21 @@ test_that("user defined covariance works", { "2 formal arguments" ) }) + +test_that("tf_rgp reproducibility (regression test for base-R rmvnorm replacement)", { + # Pin a digest of the output for a fixed seed. Guards against accidental + # changes to the GP sampling path (which now uses a base-R eigen-decomposition + # sampler instead of mvtnorm::rmvnorm). + set.seed(20260610) + x_a <- tf_rgp(3, arg = 21L, cov = "squareexp", nugget = 0) + set.seed(20260610) + x_b <- tf_rgp(3, arg = 21L, cov = "squareexp", nugget = 0) + expect_equal(x_a, x_b) + + # Check sampled values have the right empirical scale (variance roughly 1 + # for the squared-exp kernel with no nugget at f_cov(t, t) = 1). + set.seed(1) + x <- tf_rgp(200, arg = 51L, cov = "squareexp", nugget = 0) + evals <- do.call(rbind, tf_evaluations(x)) + expect_true(abs(mean(apply(evals, 2, var)) - 1) < 0.2) +}) diff --git a/tests/testthat/test-smooth.R b/tests/testthat/test-smooth.R new file mode 100644 index 00000000..edf25628 --- /dev/null +++ b/tests/testthat/test-smooth.R @@ -0,0 +1,106 @@ +test_that("tf_smooth works with lowess", { + set.seed(1) + x <- tf_rgp(3, arg = 51L) + out <- suppressMessages(tf_smooth(x, method = "lowess")) + expect_s3_class(out, "tfd") + expect_equal(length(out), length(x)) + expect_equal(tf_arg(out), tf_arg(x)) +}) + +test_that("tf_smooth works with savgol", { + set.seed(1) + x <- tf_rgp(3, arg = 51L) + out <- suppressMessages(tf_smooth(x, method = "savgol")) + expect_s3_class(out, "tfd") + expect_equal(length(out), length(x)) + out2 <- suppressMessages(tf_smooth(x, method = "savgol", fl = 5)) + expect_s3_class(out2, "tfd") +}) + +test_that("tf_smooth works with rollmean", { + set.seed(1) + x <- tf_rgp(3, arg = 51L) + out <- suppressMessages(tf_smooth(x, method = "rollmean")) + expect_s3_class(out, "tfd") + expect_equal(length(out), length(x)) +}) + +test_that("tf_smooth works with rollmedian", { + set.seed(1) + x <- tf_rgp(3, arg = 51L) + out <- suppressMessages(tf_smooth(x, method = "rollmedian", k = 5)) + expect_s3_class(out, "tfd") + expect_equal(length(out), length(x)) +}) + +test_that("tf_smooth.tfb returns input unchanged", { + set.seed(1) + x <- suppressMessages(tfb(tf_rgp(3, arg = 51L))) + expect_identical(suppressMessages(tf_smooth(x)), x) +}) + +test_that("savgol matches pracma::savgol numerically (regression)", { + # Pin numerical equivalence between the inline savgol() and the upstream + # implementation it replaces. Snapshot values computed against + # pracma::savgol() at the time of the dependency removal. + set.seed(20260610) + x <- cumsum(rnorm(101)) + + # Default (fl, forder = 4, dorder = 0) + out_default <- savgol(x, fl = 11) + expect_length(out_default, length(x)) + expect_equal( + out_default[c(1, 25, 50, 75, 101)], + c( + -0.4616933452485163, + 4.9930783306980793, + 6.2228728562669682, + 4.4474764097120572, + 7.2186156201758047 + ), + tolerance = 1e-10 + ) + + # First derivative + out_d1 <- savgol(x, fl = 11, forder = 3, dorder = 1) + expect_equal( + out_d1[c(1, 25, 50, 75, 101)], + c( + -0.0062313892718525, + 0.0091651046404730, + -0.4741681396791447, + 0.2145733291250493, + -2.4397772718621287 + ), + tolerance = 1e-10 + ) + + # Caching does not change the answer + out_default_again <- savgol(x, fl = 11) + expect_identical(out_default, out_default_again) +}) + +test_that("savgol input validation", { + expect_error(savgol(1:10, fl = 4), "odd integer") + expect_error(savgol(1:10, fl = 1), "fl") + expect_error(savgol(1:10, fl = 2.5), "fl") + expect_error(savgol(1:10, fl = 5, forder = 1.5), "forder") + expect_error(savgol(1:10, fl = 5, forder = -1), "forder") + expect_error(savgol(1:10, fl = 5, forder = 2, dorder = 3), "dorder") + expect_error(savgol(1:10, fl = 3, forder = 4), "greater than") + expect_error( + suppressMessages(tf_smooth( + tf_rgp(2, arg = 51L), + method = "savgol", + fl = 2.5 + )) + ) +}) + +test_that("tf_smooth(savgol) still works after dropping pracma", { + set.seed(1) + f <- tf_rgp(2, arg = 51L, nugget = 0.05) + f_sg <- suppressMessages(tf_smooth(f, "savgol", verbose = FALSE)) + expect_s3_class(f_sg, "tfd") + expect_length(f_sg, length(f)) +}) diff --git a/tests/testthat/test-soft-impute-svd.R b/tests/testthat/test-soft-impute-svd.R new file mode 100644 index 00000000..a1f6058f --- /dev/null +++ b/tests/testthat/test-soft-impute-svd.R @@ -0,0 +1,108 @@ +test_that("simpute_svd matches svd() on a complete matrix with lambda = 0", { + set.seed(42) + x <- matrix(rnorm(60), 12, 5) + s_ref <- svd(x) + s_new <- simpute_svd(x) + # leading min(dim)-1 singular values should match (signs of u/v can flip, + # but their outer products / d should be identical) + k <- length(s_new$d) + expect_equal(s_new$d, s_ref$d[seq_len(k)], tolerance = 1e-10) + expect_equal( + s_new$u %*% (s_new$d * t(s_new$v)), + s_ref$u[, seq_len(k), drop = FALSE] %*% + (s_ref$d[seq_len(k)] * t(s_ref$v[, seq_len(k), drop = FALSE])), + tolerance = 1e-10 + ) +}) + +test_that("simpute_svd imputed entries equal the rank-J reconstruction", { + set.seed(7) + n <- 25 + m <- 8 + x_full <- tcrossprod(matrix(rnorm(n * 3), n, 3), matrix(rnorm(m * 3), m, 3)) + x_full <- x_full + 0.01 * matrix(rnorm(n * m), n, m) + x <- x_full + nas <- matrix(runif(n * m) < 0.1, n, m) + x[nas] <- NA + s <- simpute_svd(x) + + # reconstruct from returned factors + recon <- s$u %*% (s$d * t(s$v)) + # at convergence: filled[nas] are well-approximated by the rank-J recon. + # We check the fixed-point property: re-running SVD on (observed | recon[nas]) + # yields singular values close to s$d (the convergence threshold inside + # simpute_svd is on a relative-Frobenius ratio, so individual singular values + # match only up to ~ sqrt(thresh) in relative terms). + x_filled <- x + x_filled[nas] <- recon[nas] + s_check <- svd(x_filled) + expect_equal(s_check$d[seq_along(s$d)], s$d, tolerance = 1e-2) +}) + +test_that("simpute_svd is a no-op when there are no NAs (rank-J slice)", { + set.seed(3) + x <- matrix(rnorm(40), 10, 4) + s_new <- simpute_svd(x) + s_ref <- svd(x) + # full reconstruction should match + expect_equal( + s_new$u %*% (s_new$d * t(s_new$v)), + s_ref$u[, seq_along(s_new$d), drop = FALSE] %*% + (s_ref$d[seq_along(s_new$d)] * + t(s_ref$v[, seq_along(s_new$d), drop = FALSE])), + tolerance = 1e-10 + ) +}) + +test_that("simpute_svd clamps J to the available rank and handles 1-row inputs", { + set.seed(5) + # skinny matrix, J larger than min(dim) must not cause out-of-bounds errors + x <- matrix(rnorm(6), 3, 2) + x[1, 2] <- NA + s <- simpute_svd(x, J = 10) + expect_lte(length(s$d), min(dim(x))) + expect_equal(dim(s$u), c(nrow(x), length(s$d))) + expect_equal(dim(s$v), c(ncol(x), length(s$d))) + + # 1-row input: default J would be 0, must be clamped to 1 + x1 <- matrix(rnorm(5), 1, 5) + x1[1, 3] <- NA + s1 <- simpute_svd(x1) + expect_equal(length(s1$d), 1L) + expect_equal(dim(s1$u), c(1L, 1L)) + expect_equal(dim(s1$v), c(5L, 1L)) + + # complete-data branch with 1 row + s1c <- simpute_svd(matrix(rnorm(5), 1, 5)) + expect_equal(length(s1c$d), 1L) +}) + +test_that("simpute_svd warns instead of erroring when maxit leaves no iterations", { + set.seed(8) + x <- matrix(rnorm(20), 5, 4) + x[2, 3] <- NA + expect_warning(simpute_svd(x, maxit = 0), "Convergence not achieved") +}) + +test_that("fpc_wsvd weighted SVD matches svd() on uniform-grid centered data", { + set.seed(11) + n <- 30 + m <- 21 + arg <- seq(0, 1, length.out = m) + data <- matrix(rnorm(n * m), n, m) + res <- fpc_wsvd(data, arg = arg, pve = 1) + + # Reproduce expected structure: trapezoidal weights on uniform grid + delta <- c(0, diff(arg)) + weights <- 0.5 * c(delta[-1] + head(delta, -1), tail(delta, 1)) + data_wc <- t((t(data) - colMeans(data)) * sqrt(weights)) + s_ref <- svd(data_wc, nu = 0, nv = min(dim(data))) + efun_ref <- s_ref$v[, seq_len(res$npc), drop = FALSE] / sqrt(weights) + + expect_equal(abs(res$efunctions), abs(efun_ref), tolerance = 1e-10) + expect_equal((res$evalues), (s_ref$d[seq_len(res$npc)])^2, tolerance = 1e-10) + + # eigenfunctions are L2-orthonormal under the trapezoidal inner product + gram <- t(res$efunctions) %*% (weights * res$efunctions) + expect_equal(gram, diag(res$npc), tolerance = 1e-8) +}) diff --git a/tests/testthat/test-split-combine.R b/tests/testthat/test-split-combine.R index 1b3578b4..e6609f3a 100644 --- a/tests/testthat/test-split-combine.R +++ b/tests/testthat/test-split-combine.R @@ -28,6 +28,19 @@ test_that("tf_split works as expected", { expect_identical(map(tfs_r, tf_domain), list(c(0, 0.3), c(0.301, 1))) }) +test_that("tf_split handles splits at domain boundaries (#251)", { + x <- tf_rgp(3) + # splits only at domain boundaries -> no split + expect_identical(tf_split(x, splits = 0), list(x)) + expect_identical(tf_split(x, splits = 1), list(x)) + expect_identical(tf_split(x, splits = c(0, 1)), list(x)) + # include = "left" with a single split should not error + expect_no_error(tf_split(x, splits = 0.5, include = "left")) + # single-split inputs with boundary-equal splits should not error + expect_no_error(tf_split(x, splits = 0, include = "left")) + expect_no_error(tf_split(x, splits = 1, include = "left")) +}) + test_that("tf_combine works as expected", { x <- tf_rgp(3) @@ -56,8 +69,8 @@ test_that("tf_combine works as expected", { # expect_identical( - do.call(tf_combine, tf_split(x, 0.3)) |> suppressMessages(), - do.call(tf_combine, rev(tf_split(x, 0.3))) |> suppressMessages() + do.call(tf_combine, tf_split(x, 0.3)) |> suppressWarnings(), + do.call(tf_combine, rev(tf_split(x, 0.3))) |> suppressWarnings() ) expect_error( @@ -65,6 +78,13 @@ test_that("tf_combine works as expected", { "multiple values" ) + # duplicate-point branch issues a real, suppressible warning + expect_warning( + tf_combine(x, x), + "duplicated points" + ) + expect_silent(suppressWarnings(tf_combine(x, x))) + expect_class(tf_combine(x, tf_jiggle(x)), "tfd_irreg") expect_error( diff --git a/tests/testthat/test-summarize.R b/tests/testthat/test-summarize.R index 44524270..7a5b47ae 100644 --- a/tests/testthat/test-summarize.R +++ b/tests/testthat/test-summarize.R @@ -243,3 +243,17 @@ test_that("cum* functions work for tfb objects", { # ) # } }) + +test_that("sd/var return visibly for tfb (#250)", { + xb <- suppressWarnings(tfb(tf_rgp(5), verbose = FALSE)) + expect_true(withVisible(sd(xb))$visible) + expect_true(withVisible(var(xb))$visible) +}) + +test_that("Summary.tf error messages do not crash cli (#247)", { + x <- tf_rgp(3) + expect_error(all(x), "not defined") + expect_error(any(x), "not defined") + # prod is a Summary-group generic but should not land in the error branch + expect_no_error(prod(x)) +}) diff --git a/tests/testthat/test-tfb-fpc.R b/tests/testthat/test-tfb-fpc.R index abd1e924..bc3f40b8 100644 --- a/tests/testthat/test-tfb-fpc.R +++ b/tests/testthat/test-tfb-fpc.R @@ -45,7 +45,7 @@ test_that("fpc_wsvd works for smooth non-equidistant data", { test_that("fpc_wsvd works for partially missing data", { expect_s3_class(tfb_fpc(sparse), "tfb_fpc") |> suppressMessages() expect_message(tfb_fpc(sparse), "High `pve`") |> suppressMessages() - expect_message(tfb_fpc(sparse), "Using softImpute") |> suppressMessages() + expect_message(tfb_fpc(sparse), "Using soft-impute") |> suppressMessages() set.seed(1312) x <- tf_rgp(50) x_sp_pc <- x |> @@ -78,5 +78,8 @@ test_that("tfb_fpc defaults work for all kinds of regular input", { tolerance = 1e-1, ignore_attr = TRUE ) + expect_valid_tf(smoo_) } + expect_valid_tf(tfb_fpc(smoo)) + expect_valid_tf(tfb_fpc(smoo, pve = 0.9999)) }) diff --git a/tests/testthat/test-tfb-mv.R b/tests/testthat/test-tfb-mv.R new file mode 100644 index 00000000..c174c00d --- /dev/null +++ b/tests/testthat/test-tfb-mv.R @@ -0,0 +1,81 @@ +test_that("tfb_mv fits a basis per component", { + set.seed(1) + f <- tfd_mv(list(x = tf_rgp(4), y = tf_rgp(4))) + tb <- tfb_mv(f, k = 8, verbose = FALSE) + expect_s3_class(tb, "tfb_mv") + expect_true(is_tfb_mv(tb)) + expect_false(is_tfb(tb)) # not a univariate tfb + expect_length(tb, 4) + expect_identical(tf_ncomp(tb), 2L) + expect_true(all(map_lgl(tf_components(tb), is_tfb))) + expect_equal(tb$x, tfb(f$x, k = 8, verbose = FALSE)) + expect_equal(tb$y, tfb(f$y, k = 8, verbose = FALSE)) + expect_valid_tf(tb) +}) + +test_that("tfb_mv round-trips tfd_mv -> tfb_mv -> tfd_mv approximately", { + set.seed(2) + arg <- seq(0, 1, length.out = 101) + f <- tfd_mv(list(x = tf_rgp(3, arg = arg), y = tf_rgp(3, arg = arg))) + tb <- tfb_mv(f, k = 25, verbose = FALSE) + back <- as.tfd_mv(tb) + expect_s3_class(back, "tfd_mv") + diff_x <- max(abs( + unlist(tf_evaluations(f$x)) - unlist(tf_evaluations(back$x)) + )) + diff_y <- max(abs( + unlist(tf_evaluations(f$y)) - unlist(tf_evaluations(back$y)) + )) + expect_lt(diff_x, 0.1) + expect_lt(diff_y, 0.1) + expect_valid_tf(tb) + expect_valid_tf(back) +}) + +test_that("tfb_mv supports fpc basis", { + set.seed(3) + f <- tfd_mv(list(x = tf_rgp(10), y = tf_rgp(10))) + tb <- tfb_mv(f, basis = "fpc", verbose = FALSE) + expect_s3_class(tb, "tfb_mv") + expect_true(all(map_lgl(tf_components(tb), is_tfb_fpc))) + expect_equal(tb$x, tfb(f$x, basis = "fpc", verbose = FALSE)) + expect_equal(tb$y, tfb(f$y, basis = "fpc", verbose = FALSE)) + expect_valid_tf(tb) +}) + +test_that("per-component basis is reachable via tf_components()", { + set.seed(4) + tb <- tfb_mv(tfd_mv(list(x = tf_rgp(3), y = tf_rgp(3))), verbose = FALSE) + b <- map(tf_components(tb), tf_basis) + expect_length(b, 2) + expect_true(all(map_lgl(b, is.function))) + expect_equal(b$x(tf_arg(tb$x)), tf_basis(tb$x)(tf_arg(tb$x))) + expect_equal(b$y(tf_arg(tb$y)), tf_basis(tb$y)(tf_arg(tb$y))) +}) + +test_that("tfb_mv() distributes a properly-named per-component list arg", { + set.seed(41) + d <- tfd_mv(list(x = tf_rgp(3), y = tf_rgp(3))) + tb <- tfb_mv(d, k = list(x = 4, y = 6), penalized = FALSE, verbose = FALSE) + # per-component k values flow through to the underlying tfb basis + expect_identical(length(attr(tb$x, "basis_matrix")[1, ]), 4L) + expect_identical(length(attr(tb$y, "basis_matrix")[1, ]), 6L) +}) + +test_that("tfb_mv() rejects per-component lists with duplicated names", { + set.seed(42) + d <- tfd_mv(list(x = tf_rgp(3), y = tf_rgp(3))) + # `list(x = 4, x = 6)` previously slipped through the distribution predicate + # (length 2, all names %in% comp_names) and silently passed NULL to tfb() + # for component "y". Now it falls back to "treat the whole list as a single + # scalar arg to tfb()", which tfb rejects as a malformed k. + expect_error( + tfb_mv(d, k = list(x = 4, x = 6), penalized = FALSE, verbose = FALSE) + ) +}) + +test_that("tf_count aborts with a clear message on basis-represented data", { + set.seed(5) + tb <- tfb_mv(tfd_mv(list(x = tf_rgp(3), y = tf_rgp(3))), verbose = FALSE) + expect_error(tf_count(tb), "not defined for basis-represented") +}) diff --git a/tests/testthat/test-tfb-spline.R b/tests/testthat/test-tfb-spline.R index 2c44dc40..81a7dcbd 100644 --- a/tests/testthat/test-tfb-spline.R +++ b/tests/testthat/test-tfb-spline.R @@ -20,7 +20,9 @@ test_that("tfb_spline defaults work for all kinds of regular input", { tolerance = 1e-3, ignore_attr = TRUE ) + expect_valid_tf(smoo_) } + expect_valid_tf(tfb_spline(smoo, verbose = FALSE)) }) test_that("tfb_spline works for fda::fd input", { @@ -142,7 +144,10 @@ test_that("tfb_spline defaults work for all kinds of irregular input", { tolerance = 1e-1, ignore_attr = TRUE ) + expect_valid_tf(irr_tfb_) } + expect_valid_tf(irr_tfb_1) + expect_valid_tf(irr_tfb_2) }) test_that("unpenalized tfb_spline works", { @@ -240,6 +245,7 @@ test_that("mgcv spline basis options work", { knots = NULL )) ) + expect_valid_tf(smoo_) } }) diff --git a/tests/testthat/test-tfd-class.R b/tests/testthat/test-tfd-class.R index 04efd46d..3c0bbf81 100644 --- a/tests/testthat/test-tfd-class.R +++ b/tests/testthat/test-tfd-class.R @@ -20,25 +20,26 @@ test_that("tfd.numeric works", { expect_function(attr(f, "evaluator"), args = c("x", "arg", "evaluations")) expect_identical(attr(f, "evaluator_name"), "tf_approx_linear") - # empty data + # empty data: length-0 prototype with NA sentinel domain f <- tfd(numeric()) expect_s3_class(f, "tfd_reg") expect_length(f, 0) expect_identical(attr(f, "arg"), list(integer())) - expect_identical(attr(f, "domain"), c(0, 0)) + expect_identical(attr(f, "domain"), c(NA_real_, NA_real_)) expect_function(attr(f, "evaluator"), args = c("x", "arg", "evaluations")) expect_identical(attr(f, "evaluator_name"), "tf_approx_linear") - # single NA + # single NA -> length-1 vector of NA functions (#241) for (x in list(NA_real_, NA_integer_)) { - f <- tfd(x) + f <- suppressWarnings(tfd(x)) expect_s3_class(f, "tfd_reg") - expect_length(f, 0) - expect_identical(attr(f, "arg"), list(1L)) - expect_identical(attr(f, "domain"), c(0, 0)) + expect_length(f, 1) + expect_true(is.na(f)) expect_function(attr(f, "evaluator"), args = c("x", "arg", "evaluations")) expect_identical(attr(f, "evaluator_name"), "tf_approx_linear") } + # warn on NA-only input + expect_warning(tfd(NA_real_), "NA") # evaluations must be inside the domain x <- 1:10 @@ -55,6 +56,12 @@ test_that("tfd.numeric works", { tfd(x, domain = c(2, 9)), "Evaluations must be inside the domain." ) + + # constructor invariants + expect_valid_tf(tfd(runif(100))) + x_na <- runif(100); x_na[c(2, 4, 6)] <- NA + expect_valid_tf(tfd(x_na)) + expect_valid_tf(tfd(numeric())) }) test_that("tfd works consistently for partially missing data", { @@ -81,6 +88,9 @@ test_that("tfd works consistently for partially missing data", { tfd(x_df) |> suppressWarnings(), tfd(x_mat) |> suppressWarnings() ) + + expect_valid_tf(suppressWarnings(tfd(x_df))) + expect_valid_tf(suppressWarnings(tfd(x_mat))) }) test_that("NA creation warning uses singular/plural wording and lists indices", { @@ -120,3 +130,45 @@ test_that("NA creation warning uses singular/plural wording and lists indices", "Affected indices: 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, \\.{3}" ) }) + +test_that("tfd.list errors on mismatched per-entry lengths (#235)", { + # different lengths with shared arg -> must not silently return tfd_reg + expect_error( + tfd(list(1:3, 1:5), arg = 1:5), + "arg" + ) + # different lengths and arg list -> irregular path catches it + expect_error( + tfd(list(1:3, 1:5), arg = list(1:3, 1:4)), + "do not match" + ) +}) + +test_that("all-NA input yields length-n NA functions, not length-0 (#241)", { + # vector of NAs + x <- rep(NA_real_, 3) + expect_warning(f <- tfd(x), "NA") + expect_s3_class(f, "tfd") + expect_length(f, 1) # one curve, all NA evaluations + expect_true(is.na(f)) + + # matrix with all-NA rows -> one NA function per row + m <- matrix(NA_real_, nrow = 3, ncol = 5) + expect_warning(f <- tfd(m, arg = 1:5), "NA") + expect_length(f, 3) + expect_true(all(is.na(f))) +}) + +test_that("tfd() length-0 prototype uses NA sentinel domain (#241)", { + f <- tfd(numeric()) + expect_identical(attr(f, "domain"), c(NA_real_, NA_real_)) + expect_length(f, 0) +}) + +test_that("tfd.list infers irregular when lengths differ but arg list matches (#235)", { + f <- tfd(list(1:3, 1:5), arg = list(1:3, 1:5)) + expect_s3_class(f, "tfd_irreg") + expect_length(f, 2) + expect_equal(tf_evaluations(f)[[1]], 1:3) + expect_equal(tf_evaluations(f)[[2]], 1:5) +}) diff --git a/tests/testthat/test-tfd-mv.R b/tests/testthat/test-tfd-mv.R new file mode 100644 index 00000000..3925521e --- /dev/null +++ b/tests/testthat/test-tfd-mv.R @@ -0,0 +1,176 @@ +test_that("tfd_mv construction from a list of tf vectors works", { + set.seed(1) + fx <- tf_rgp(4) + fy <- tf_rgp(4) + f <- tfd_mv(list(x = fx, y = fy)) + expect_s3_class(f, "tfd_mv") + expect_s3_class(f, "tf_mv") + expect_s3_class(f, "tf") + expect_length(f, 4) + expect_identical(tf_ncomp(f), 2L) + expect_identical(names(tf_components(f)), c("x", "y")) + expect_equal(tf_component(f, "x"), fx, ignore_attr = TRUE) + expect_equal(f$y, fy, ignore_attr = TRUE) + expect_valid_tf(f) +}) + +test_that("tfd_mv is not a univariate tfd", { + f <- tfd_mv(list(a = tf_rgp(2), b = tf_rgp(2))) + expect_true(is_tf(f)) + expect_true(is_tf_mv(f)) + expect_true(is_tfd_mv(f)) + expect_false(is_tfd(f)) + expect_false(is_tfb_mv(f)) +}) + +test_that("tfd_mv construction from a list of matrices works", { + arg <- seq(0, 1, length.out = 11) + mx <- matrix(rnorm(33), nrow = 3) + my <- matrix(rnorm(33), nrow = 3) + f <- tfd_mv(list(x = mx, y = my), arg = arg) + expect_s3_class(f, "tfd_mv") + expect_length(f, 3) + expect_equal(tf_arg(f), arg) + expect_equal(f$x, tfd(mx, arg = arg)) + expect_equal(f$y, tfd(my, arg = arg)) + expect_valid_tf(f) +}) + +test_that("tfd_mv construction from a 3-d array works", { + arr <- array( + rnorm(3 * 11 * 2), + dim = c(3, 11, 2), + dimnames = list(NULL, NULL, c("x", "y")) + ) + f <- tfd_mv(arr, arg = seq(0, 1, length.out = 11)) + expect_length(f, 3) + expect_identical(tf_ncomp(f), 2L) + expect_identical(names(tf_components(f)), c("x", "y")) + expect_equal(f$x, tfd(arr[,, "x"], arg = seq(0, 1, length.out = 11))) + expect_equal(f$y, tfd(arr[,, "y"], arg = seq(0, 1, length.out = 11))) + expect_valid_tf(f) +}) + +test_that("tfd_mv construction from a long data.frame works", { + df <- data.frame( + id = rep(1:3, each = 5), + t = rep(seq(0, 1, length.out = 5), 3), + x = rnorm(15), + y = rnorm(15) + ) + f <- tfd_mv(df, id = "id", arg = "t", value = c("x", "y")) + expect_length(f, 3) + expect_identical(names(tf_components(f)), c("x", "y")) + expect_equal( + f$x, + tfd(df[, c("id", "t", "x")], id = "id", arg = "t", value = "x") + ) + expect_equal( + f$y, + tfd(df[, c("id", "t", "y")], id = "id", arg = "t", value = "y") + ) + expect_valid_tf(f) +}) + +test_that("tfd_mv supports regular and irregular components", { + set.seed(3) + reg <- tfd_mv(list(x = tf_rgp(3), y = tf_rgp(3))) + expect_true(all(map_lgl(tf_components(reg), is_reg))) + irr <- tfd_mv(list(x = tf_sparsify(tf_rgp(3)), y = tf_sparsify(tf_rgp(3)))) + expect_true(all(map_lgl(tf_components(irr), is_irreg))) + # per-component args may differ -> tf_arg returns a list + expect_type(tf_arg(irr), "list") + expect_true(is.matrix(tf_count(irr))) + expect_identical(dim(tf_count(irr)), c(3L, 2L)) + expect_valid_tf(reg) + expect_valid_tf(irr) +}) + +test_that("tfd_mv accessors and replacement work", { + set.seed(4) + f <- tfd_mv(list(x = tf_rgp(3), y = tf_rgp(3))) + expect_equal(tf_domain(f), c(0, 1)) + evs <- tf_evaluations(f) + expect_length(evs, 3) + expect_s3_class(evs[[1]], "data.frame") + expect_identical(colnames(evs[[1]]), c("arg", "x", "y")) + # replace a component + f2 <- f + f2$x <- f$x * 2 + expect_equal(tf_evaluations(f2$x)[[1]], 2 * tf_evaluations(f$x)[[1]]) + expect_equal(f2$y, f$y) + # add a new component by name + f3 <- f + z <- tf_rgp(3) + tf_component(f3, "z") <- z + expect_identical(tf_ncomp(f3), 3L) + expect_equal(f3$z, z) +}) + +test_that("tfd_mv handles NA curves (any component NA)", { + set.seed(5) + fx <- tf_rgp(3) + fx[2] <- NA + f <- tfd_mv(list(x = fx, y = tf_rgp(3))) + expect_equal(unname(is.na(f)), c(FALSE, TRUE, FALSE)) +}) + +test_that("tfd_mv length-0 prototype works", { + f0 <- tfd_mv(list()) + expect_s3_class(f0, "tfd_mv") + expect_length(f0, 0) + expect_identical(tf_ncomp(f0), 0L) + expect_valid_tf(f0) +}) + +test_that("tfd_mv errors on incompatible component lengths", { + expect_error(tfd_mv(list(x = tf_rgp(3), y = tf_rgp(4))), "same length") +}) + +test_that("tfd_mv unions differing component domains by default", { + f <- tfd_mv(list( + x = tf_rgp(2, arg = seq(0, 1, length.out = 5)), + y = tf_rgp(2, arg = seq(0, 2, length.out = 5)) + )) + expect_equal(tf_domain(f), c(0, 2)) + # both components got widened to the union + expect_true(all(sapply(tf_components(f), \(c) all(tf_domain(c) == c(0, 2))))) + expect_valid_tf(f) +}) + +test_that("tfd_mv accepts a user-supplied common domain", { + f <- tfd_mv( + list( + x = tf_rgp(2, arg = seq(0, 1, length.out = 5)), + y = tf_rgp(2, arg = seq(0, 1, length.out = 5)) + ), + domain = c(-1, 2) + ) + expect_equal(tf_domain(f), c(-1, 2)) +}) + +test_that("tfd_mv rejects a domain that doesn't contain the components", { + expect_error( + tfd_mv( + list( + x = tf_rgp(2, arg = seq(0, 1, length.out = 5)), + y = tf_rgp(2, arg = seq(0, 2, length.out = 5)) + ), + domain = c(0, 1) + ), + "not contained" + ) +}) + +test_that("tf_arg collapses when all-irregular components share per-curve args", { + set.seed(99) + args <- lapply(1:3, \(i) sort(runif(sample(5:8, 1)))) + f <- tfd_mv(list( + x = tfd(lapply(args, \(a) rnorm(length(a))), arg = args), + y = tfd(lapply(args, \(a) rnorm(length(a))), arg = args) + )) + a <- tf_arg(f) + # collapsed to one per-curve list (not list-of-list) + expect_true(is.list(a) && length(a) == 3L && all(map_lgl(a, is.numeric))) + expect_equal(a, args, ignore_attr = TRUE) +}) diff --git a/tests/testthat/test-utils.R b/tests/testthat/test-utils.R index 04fa10e5..7e75e89f 100644 --- a/tests/testthat/test-utils.R +++ b/tests/testthat/test-utils.R @@ -22,3 +22,40 @@ test_that("format_bib", { bibentries <- list(checkmate = citation("checkmate"), R = citation()) expect_string(format_bib("checkmate", "R")) }) + +test_that("trapezoid_weights matches the trapezoidal rule", { + # On an equidistant grid all interior weights equal the spacing, + # boundary weights equal half the spacing -> the discrete integral + # of v == 1 on [0, 1] is 1. + arg <- seq(0, 1, length.out = 11) + w <- trapezoid_weights(arg) + expect_equal(sum(w), 1) + expect_equal(w[1], 0.05) + expect_equal(w[length(w)], 0.05) + expect_true(all(abs(w[2:10] - 0.1) < 1e-12)) + + # On a non-equidistant grid, weights still integrate constant 1 to the + # domain length and weights equal the average of adjacent spacings interior, + # half-spacings at the boundary. + arg <- c(0, 0.1, 0.3, 0.7, 1) + w <- trapezoid_weights(arg) + expect_equal(sum(w), 1) + expect_equal(w[1], 0.05) + expect_equal(w[5], 0.15) + expect_equal(w[2], 0.15) + expect_equal(w[3], 0.3) + expect_equal(w[4], 0.35) + + # Equivalence with the by-hand sum((dev[-n] + dev[-1])/2 * dt) formulation. + set.seed(42) + v <- runif(5) + dt <- diff(arg) + by_hand <- sum((v[-length(v)] + v[-1]) / 2 * dt) + expect_equal(sum(w * v), by_hand) +}) + +test_that("unique_id replaces empty strings after coercion", { + ids <- unique_id(factor(c("", "a", ""))) + expect_true(all(nzchar(ids))) + expect_false(anyDuplicated(ids) > 0) +}) diff --git a/tests/testthat/test-validate-tf.R b/tests/testthat/test-validate-tf.R new file mode 100644 index 00000000..b99046d3 --- /dev/null +++ b/tests/testthat/test-validate-tf.R @@ -0,0 +1,165 @@ +# Tests for the internal validate_tf() invariant checker. + +test_that("validate_tf accepts valid tfd_reg", { + set.seed(1) + x <- tf_rgp(3) + expect_true(validate_tf(x)) + expect_valid_tf(x) +}) + +test_that("validate_tf accepts valid tfd_irreg", { + set.seed(1) + x <- tf_rgp(3) |> tf_sparsify(0.5) + expect_true(validate_tf(x)) + expect_valid_tf(x) +}) + +test_that("validate_tf accepts valid tfb_spline", { + set.seed(1) + suppressMessages(x <- tfb(tf_rgp(5))) + expect_true(validate_tf(x)) + expect_valid_tf(x) +}) + +test_that("validate_tf accepts valid tfb_fpc", { + set.seed(1) + suppressMessages(x <- tfb_fpc(tf_rgp(5))) + expect_true(validate_tf(x)) + expect_valid_tf(x) +}) + +test_that("validate_tf accepts valid tfd_mv", { + set.seed(1) + x <- tfd_mv(list(x = tf_rgp(3), y = tf_rgp(3))) + expect_true(validate_tf(x)) + expect_valid_tf(x) +}) + +test_that("validate_tf accepts valid tfb_mv", { + set.seed(1) + suppressMessages(x <- tfb_mv(list(x = tf_rgp(3), y = tf_rgp(3)))) + expect_true(validate_tf(x)) + expect_valid_tf(x) +}) + +test_that("validate_tf accepts length-0 prototypes of all subclasses", { + set.seed(1) + x_reg <- tf_rgp(3) + x_irreg <- tf_sparsify(x_reg, 0.5) + suppressMessages(x_spline <- tfb(x_reg)) + suppressMessages(x_fpc <- tfb_fpc(x_reg)) + x_tfd_mv <- tfd_mv(list(a = x_reg, b = x_irreg)) + suppressMessages(x_tfb_mv <- tfb_mv(list(a = x_reg, b = x_reg))) + expect_valid_tf(vctrs::vec_ptype(x_reg)) + expect_valid_tf(vctrs::vec_ptype(x_irreg)) + expect_valid_tf(vctrs::vec_ptype(x_spline)) + expect_valid_tf(vctrs::vec_ptype(x_fpc)) + expect_valid_tf(vctrs::vec_ptype(x_tfd_mv)) + expect_valid_tf(vctrs::vec_ptype(x_tfb_mv)) + # constructor-made empties carry only minimal attributes (no basis etc.) + expect_valid_tf(tfd(numeric(0))) + expect_valid_tf(tfb(numeric(0))) +}) + +test_that("validate_tf rejects tfd_irreg with non-character evaluator_name", { + set.seed(1) + x <- tf_rgp(3) |> tf_sparsify(0.5) + attr(x, "evaluator_name") <- 1L + expect_error(validate_tf(x), "evaluator_name") +}) + +test_that("validate_tf rejects non-tf input", { + expect_error(validate_tf(1:10), "not a .* object") +}) + +test_that("validate_tf rejects bad domain", { + set.seed(1) + x <- tf_rgp(3) + attr(x, "domain") <- c(1, 1) # not distinct + expect_error(validate_tf(x), "domain") +}) + +test_that("validate_tf rejects bad domain (length != 2)", { + set.seed(1) + x <- tf_rgp(3) + attr(x, "domain") <- c(0, 0.5, 1) + expect_error(validate_tf(x), "domain") +}) + +test_that("validate_tf rejects tfd_reg with mismatched element length", { + set.seed(1) + x <- tf_rgp(3) + # truncate one element to break the length invariant + bad <- unclass(x) + attrs <- attributes(x) + bad[[1]] <- bad[[1]][-1] + attributes(bad) <- attrs + expect_error(validate_tf(bad), "length") +}) + +test_that("validate_tf rejects tfd_irreg with bad element shape", { + set.seed(1) + x <- tf_rgp(3) |> tf_sparsify(0.5) + bad <- unclass(x) + attrs <- attributes(x) + bad[[1]] <- list(arg = bad[[1]]$arg, data = bad[[1]]$value) # #234-style bug + attributes(bad) <- attrs + expect_error(validate_tf(bad), "arg.*value|value") +}) + +test_that("validate_tf rejects tfb_spline with wrong coef length", { + set.seed(1) + suppressMessages(x <- tfb(tf_rgp(5))) + bad <- unclass(x) + attrs <- attributes(x) + bad[[1]] <- bad[[1]][-1] # truncate coefficient vector + attributes(bad) <- attrs + expect_error(validate_tf(bad), "ncol\\(basis_matrix\\)|length") +}) + +test_that("validate_tf rejects tfb_spline with mismatched basis_matrix nrow", { + set.seed(1) + suppressMessages(x <- tfb(tf_rgp(5))) + attr(x, "basis_matrix") <- attr(x, "basis_matrix")[-1, ] + expect_error(validate_tf(x), "nrow\\(basis_matrix\\)|length\\(arg\\)") +}) + +test_that("validate_tf rejects tfb_fpc with wrong score_variance length", { + set.seed(1) + suppressMessages(x <- tfb_fpc(tf_rgp(5))) + # score_variance must have length ncol(basis_matrix) - 1; truncate to break it + attr(x, "score_variance") <- attr(x, "score_variance")[-1] + expect_error(validate_tf(x), "score_variance") +}) + +test_that("validate_tf catches the #234 corruption pattern", { + # Manually construct an irregular tfd whose elements have list(arg=, data=) + # instead of the correct list(arg=, value=) -- i.e. the #234 bug shape. + # The base branch still has the #234 bug, so we build the corruption by hand + # rather than relying on tf_arg<-. + set.seed(1) + xi <- tf_sparsify(tf_rgp(2)) + raw <- unclass(xi) + attrs <- attributes(xi) + raw[[1]] <- list(arg = raw[[1]]$arg, data = raw[[1]]$value) + bad <- raw + attributes(bad) <- attrs + expect_error(validate_tf(bad), "value|field|name") +}) + +test_that("validate_tf rejects tf_mv with mismatched payload/component length", { + set.seed(1) + x <- tfd_mv(list(a = tf_rgp(3), b = tf_rgp(3))) + # corrupt: swap in a shorter component + comps <- attr(x, "components") + comps$a <- comps$a[1:2] + attr(x, "components") <- comps + expect_error(validate_tf(x), "payload length") +}) + +test_that("validate_tf rejects tf_mv with mismatched comp_names", { + set.seed(1) + x <- tfd_mv(list(a = tf_rgp(3), b = tf_rgp(3))) + attr(x, "comp_names") <- c("a", "wrong") + expect_error(validate_tf(x), "comp_names") +}) diff --git a/tests/testthat/test-vec-cast.R b/tests/testthat/test-vec-cast.R index abe635c7..74d5db65 100644 --- a/tests/testthat/test-vec-cast.R +++ b/tests/testthat/test-vec-cast.R @@ -50,6 +50,7 @@ expect_cast_result <- function( attributes(unname(cast))[-ignore], attributes(unname(to))[-ignore] ) + expect_valid_tf(cast) } test_that("vec_cast for tfd to tfd works/fails as expected", { @@ -84,14 +85,77 @@ test_that("vec_cast for tfd to tfd works/fails as expected", { }) test_that("vec_cast for tfb to tfb works/fails as expected", { - # tfb -> tfb should always fail unless bases are identical - expect_cast_result(l$b, l$b) - expect_cast_result(l$fp_low, l$fp_low) + # tfb -> tfb should always fail unless bases are identical; + # when it succeeds, all attributes (including `arg`) must match `to` exactly. + expect_cast_result(l$b, l$b, ignore = integer(0)) + expect_cast_result(l$fp_low, l$fp_low, ignore = integer(0)) expect_error(vec_cast(l$b, l$b2), "precision") expect_error(vec_cast(l$b, l$fp), "precision") expect_error(vec_cast(l$fp, l$fp_low), "precision") }) +test_that("tfb_spline -> tfb_spline cast actually exercises tf_rebase (#239)", { + # Two tfb_spline vectors with *different* arg grids and different `k` so + # `tf_basis(to)(tf_arg(x))` does not match x's basis matrix, the in-method + # same_basis check at R/vctrs-cast.R:108 is FALSE, and vec_cast_tfb_tfb + # actually runs tf_rebase (not the identity short-circuit at L113). + set.seed(239239) + x_src <- tf_rgp(3, arg = seq(0, 1, length.out = 21)) + x_to <- tf_rgp(3, arg = seq(0, 1, length.out = 41)) + b_src <- tfb(x_src, k = 7, verbose = FALSE) + b_to <- tfb(x_to, k = 11, verbose = FALSE) + expect_false(tf:::same_basis(b_src, b_to)) + + cast <- vctrs::allow_lossy_cast(vec_cast(b_src, b_to)) + # invariant: result lives in to's basis (arg grid + basis_label + same_basis) + expect_identical(tf_arg(cast), tf_arg(b_to)) + expect_identical(attr(cast, "basis_label"), attr(b_to, "basis_label")) + expect_identical(attr(cast, "basis_args"), attr(b_to, "basis_args")) + expect_true(tf:::same_basis(cast, b_to)) + # vec_ptype of the cast equals vec_ptype(to) on all attributes except the + # `basis` closure (closure environments are not stable across constructors). + cast_ptype_attrs <- attributes(vec_ptype(cast)) + to_ptype_attrs <- attributes(vec_ptype(b_to)) + cast_ptype_attrs$basis <- NULL + to_ptype_attrs$basis <- NULL + expect_identical(cast_ptype_attrs, to_ptype_attrs) +}) + +test_that("#269 vec_cast_tfb_tfb honours arg in short-circuit", { + # Two tfb_spline vectors with the *same* basis spec but different stored arg + # grids -- the basis-matrix equality check is satisfied, but returning x as-is + # would violate the cast invariant that result attributes (including `arg`) + # match `to`. + set.seed(269269) + x_full <- tf_rgp(3, arg = seq(0, 1, length.out = 51)) |> suppressMessages() + b <- tfb(x_full, k = 9, verbose = FALSE) + # interpolate b onto a different arg grid; same basis spec is reused, but + # `arg` and `basis_matrix` change. + arg_new <- seq(0, 1, length.out = 31) + b_interp <- tf_interpolate(b, arg = arg_new) |> suppressMessages() + expect_false(identical(tf_arg(b), tf_arg(b_interp))) + + cast <- vctrs::allow_lossy_cast(vec_cast(b_interp, b)) + # cast invariant: result lives on to's arg grid, not x's + expect_identical(tf_arg(cast), tf_arg(b)) + expect_identical(attr(cast, "basis_matrix"), attr(b, "basis_matrix")) +}) + +test_that("tfb_fpc -> tfb_fpc cast actually exercises tf_rebase (#239)", { + # Different arg grids -> same_basis is FALSE so the rebase path runs. + set.seed(2392392) + x_src <- suppressMessages(tf_smooth(tf_rgp(8, arg = seq(0, 1, length.out = 21)))) + x_to <- suppressMessages(tf_smooth(tf_rgp(8, arg = seq(0, 1, length.out = 41)))) + f_src <- tfb_fpc(x_src, pve = 0.9) + f_to <- tfb_fpc(x_to, pve = 0.9) + expect_false(tf:::same_basis(f_src, f_to)) + + cast <- vctrs::allow_lossy_cast(vec_cast(f_src, f_to)) + expect_identical(tf_arg(cast), tf_arg(f_to)) + expect_identical(attr(cast, "basis_label"), attr(f_to, "basis_label")) + expect_true(tf:::same_basis(cast, f_to)) +}) + test_that("vec_cast for tfd to tfb fails as expected", { expect_error(vec_cast(l$x, l$b), "precision") expect_error(vec_cast(l$x_ir, l$fp) |> suppressWarnings(), "precision") diff --git a/tests/testthat/test-where.R b/tests/testthat/test-where.R index 15ba3e1a..41676fcc 100644 --- a/tests/testthat/test-where.R +++ b/tests/testthat/test-where.R @@ -30,4 +30,10 @@ test_that("tf_where basics work", { tf_where(lin, value < -2), list(numeric(0), numeric(0)) ) + # scalar conditions recycle to all args, as under the old subset() semantics + expect_equal( + tf_where(lin, mean(value) < 0.05), + list(tf_arg(lin), numeric(0)) + ) + expect_error(tf_where(lin, value + 1), "logical") })