From 1bbeee0aa487d8d434ada71f4d1b31b5bcc05f2e Mon Sep 17 00:00:00 2001 From: t-pollington Date: Sun, 29 Aug 2021 19:01:51 +0100 Subject: [PATCH 01/14] credit --- _pkgdown.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/_pkgdown.yml b/_pkgdown.yml index 22194ab..f3f2a7b 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -1,6 +1,8 @@ authors: Bradley Efron: href: https://efron.web.stanford.edu + Timothy M Pollington: + href: https://orcid.org/0000-0002-9688-5960 Balasubramanian Narasimhan: href: https://statistics.stanford.edu/people/balasubramanian-narasimhan From c9a4aed6f1ab8e0b5704f100857e15b87bb2526b Mon Sep 17 00:00:00 2001 From: t-pollington Date: Sun, 29 Aug 2021 19:02:20 +0100 Subject: [PATCH 02/14] redundant --- R/bcajack2.R | 1 - 1 file changed, 1 deletion(-) diff --git a/R/bcajack2.R b/R/bcajack2.R index 1210c23..8529635 100644 --- a/R/bcajack2.R +++ b/R/bcajack2.R @@ -90,7 +90,6 @@ bcajack2 <- function(x, B, func, ..., m = nrow(x), mr, pct = 0.333, K = 2, J = 1 qbca2 <- function(Y, tt, t0, alpha, pct) { m <- ncol(Y) B <- nrow(Y) - o1 <- rep(1, m) D <- rep(0, B) for (i in seq_len(B)) { From 9b7992ebecc4f8133f624b9bcea1c6b0bf20b5f9 Mon Sep 17 00:00:00 2001 From: t-pollington Date: Sun, 29 Aug 2021 19:12:39 +0100 Subject: [PATCH 03/14] credit --- _pkgdown.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/_pkgdown.yml b/_pkgdown.yml index f3f2a7b..7a73e36 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -1,7 +1,7 @@ authors: Bradley Efron: href: https://efron.web.stanford.edu - Timothy M Pollington: + Timothy M Pollington: # bcajack2() bug correction href: https://orcid.org/0000-0002-9688-5960 Balasubramanian Narasimhan: href: https://statistics.stanford.edu/people/balasubramanian-narasimhan From 504037f7fa7500647baec217b67afdba310fdcd1 Mon Sep 17 00:00:00 2001 From: t-pollington Date: Sun, 29 Aug 2021 19:17:51 +0100 Subject: [PATCH 04/14] adding NAs --- R/bcajack2.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/bcajack2.R b/R/bcajack2.R index 8529635..d30c30c 100644 --- a/R/bcajack2.R +++ b/R/bcajack2.R @@ -101,8 +101,8 @@ bcajack2 <- function(x, B, func, ..., m = nrow(x), mr, pct = 0.333, K = 2, J = 1 Qd <- stats::quantile(D, pct) ip <- seq_len(B)[D <= Qd] ty. <- as.vector(m * stats::lm(tt[ip] ~ Y[ip, ] - 1)$coef) - ty. <- ty. - mean(ty.) - a <- (1/6) * sum(ty.^3)/sum(ty.^2)^1.5 + ty. <- ty. - mean(ty., na.rm = TRUE) + a <- (1/6) * sum(ty.^3, na.rm = TRUE)/sum(ty.^2, na.rm = TRUE)^1.5 ## if (sw == 3) ## return(ty.) s <- mean(tt) From 293e4ad3c7ae9fbf40d5581c3f526daffaa0b3c7 Mon Sep 17 00:00:00 2001 From: t-pollington Date: Sun, 29 Aug 2021 19:21:19 +0100 Subject: [PATCH 05/14] redundant --- R/bcajack2.R | 21 --------------------- 1 file changed, 21 deletions(-) diff --git a/R/bcajack2.R b/R/bcajack2.R index d30c30c..19d8846 100644 --- a/R/bcajack2.R +++ b/R/bcajack2.R @@ -103,15 +103,12 @@ bcajack2 <- function(x, B, func, ..., m = nrow(x), mr, pct = 0.333, K = 2, J = 1 ty. <- as.vector(m * stats::lm(tt[ip] ~ Y[ip, ] - 1)$coef) ty. <- ty. - mean(ty., na.rm = TRUE) a <- (1/6) * sum(ty.^3, na.rm = TRUE)/sum(ty.^2, na.rm = TRUE)^1.5 - ## if (sw == 3) - ## return(ty.) s <- mean(tt) B.mean <- c(B, s) zalpha <- stats::qnorm(alpha) nal <- length(alpha) ustat <- 2 * t0 - s - ##s. <- m * .v(stats::cov(tt, Y)) s. <- m * as.vector(stats::cov(tt, Y)) u. <- 2 * ty. - s. sdu <- sum(u.^2)^0.5/m @@ -127,7 +124,6 @@ bcajack2 <- function(x, B, func, ..., m = nrow(x), mr, pct = 0.333, K = 2, J = 1 ooo <- pmin(pmax(ooo, 1), B) lims <- sort(tt)[ooo] standard <- t0 + sdboot * stats::qnorm(alpha) - ##lims <- round(cbind(lims, standard), rou) lims <- cbind(lims, standard) dimnames(lims) <- list(alpha, c("bca", "std")) stats <- c(t0, sdboot, z0, a, sdjack) @@ -144,7 +140,6 @@ bcajack2 <- function(x, B, func, ..., m = nrow(x), mr, pct = 0.333, K = 2, J = 1 tt <- B$tt t0 <- B$t0 B <- length(tt) - ##vl0 <- qbca2(Y, tt, t0, alpha = alpha, pct = pct, rou = rou) vl0 <- qbca2(Y, tt, t0, alpha = alpha, pct = pct) } else { if (is.vector(x)) @@ -165,7 +160,6 @@ bcajack2 <- function(x, B, func, ..., m = nrow(x), mr, pct = 0.333, K = 2, J = 1 if (verbose) utils::setTxtProgressBar(pb, k) } if (verbose) close(pb) - ##vl0 <- qbca2(Y, tt, t0, alpha = alpha, pct = pct, rou = rou) vl0 <- qbca2(Y, tt, t0, alpha = alpha, pct = pct) } @@ -186,7 +180,6 @@ bcajack2 <- function(x, B, func, ..., m = nrow(x), mr, pct = 0.333, K = 2, J = 1 if (verbose) utils::setTxtProgressBar(pb, k) } if (verbose) close(pb) - ##vl0 <- qbca2(Y, tt, t0, alpha = alpha, pct = pct, rou = rou) vl0 <- qbca2(Y, tt, t0, alpha = alpha, pct = pct) } } @@ -198,7 +191,6 @@ bcajack2 <- function(x, B, func, ..., m = nrow(x), mr, pct = 0.333, K = 2, J = 1 nal <- length(alpha) Pct <- rep(0, nal) - ##for (i in 1:nal) Pct[i] <- round(sum(tt <= vl0$lims[i, 1])/B, rou) for (i in 1:nal) Pct[i] <- sum(tt <= vl0$lims[i, 1])/B Stand <- vl0$stats[1] + vl0$stats[2] * stats::qnorm(alpha) Limsd <- matrix(0, length(alpha), K) @@ -217,38 +209,25 @@ bcajack2 <- function(x, B, func, ..., m = nrow(x), mr, pct = 0.333, K = 2, J = 1 iij <- c(II[, -j]) Yj <- Y[iij, ] ttj <- tt[iij] - ##vlj <- qbca2(Yj, ttj, t0, alpha, pct, rou) vlj <- qbca2(Yj, ttj, t0, alpha, pct) limbc[, j] <- vlj$lims[, 1] limst[, j] <- vlj$lims[, 2] stats[, j] <- vlj$stats } - ## if (sw == 4) - ## return(list(limbc = limbc, limst = limst, stats = stats)) Limbcsd[, k] <- apply(limbc, 1, sd) * (J - 1)/sqrt(J) Statsd[, k] <- apply(stats, 1, sd) * (J - 1)/sqrt(J) - ## if (verbose) - ## cat("{", k, "}", sep = "") - ## if (sw == 6) - ## return(list(Limbcsd = Limbcsd, Statsd = Statsd)) } limsd <- rowMeans(Limbcsd, 1) statsd <- rowMeans(Statsd, 1) - ##limits <- round(cbind(vl0$lims[, 1], limsd, vl0$lims[, 2], Pct), rou) limits <- cbind(vl0$lims[, 1], limsd, vl0$lims[, 2], Pct) dimnames(limits) <- list(alpha, c("bca", "jacksd", "std", "pct")) - ##stats <- round(rbind(vl0$stats, statsd), rou) stats <- rbind(vl0$stats, statsd) - ##ustats <- round(vl0$ustats, rou) ustats <- vl0$ustats - ##B.mean <- c(B, round(mean(tt), rou)) B.mean <- c(B, mean(tt)) dimnames(stats) <- list(c("est", "jsd"), c("theta", "sdboot", "z0", "a", "sdjack")) vll <- list(call = call, lims = limits, stats = stats, B.mean = B.mean, ustats = ustats, seed = seed) - ## if (sw == 5) - ## vll$tt <- tt bcaboot.return(vll) } From 8a07b40a3b1e022bed12d876daa83932b732a7fd Mon Sep 17 00:00:00 2001 From: t-pollington Date: Sun, 29 Aug 2021 19:34:23 +0100 Subject: [PATCH 06/14] Additional naming to help reading of return object. --- R/bcajack2.R | 1 + 1 file changed, 1 insertion(+) diff --git a/R/bcajack2.R b/R/bcajack2.R index 19d8846..f694d33 100644 --- a/R/bcajack2.R +++ b/R/bcajack2.R @@ -105,6 +105,7 @@ bcajack2 <- function(x, B, func, ..., m = nrow(x), mr, pct = 0.333, K = 2, J = 1 a <- (1/6) * sum(ty.^3, na.rm = TRUE)/sum(ty.^2, na.rm = TRUE)^1.5 s <- mean(tt) B.mean <- c(B, s) + names(B.mean) <- c("B", "s") zalpha <- stats::qnorm(alpha) nal <- length(alpha) From 6dc822caf5ea168e02f9dedd59054bc58e28d042 Mon Sep 17 00:00:00 2001 From: t-pollington Date: Sun, 29 Aug 2021 19:47:04 +0100 Subject: [PATCH 07/14] Guard against partial failure in external bootstrap replication data. Better than just adding `na.rm = TRUE` to s = mean(tt) as it forces user to recheck their data. --- R/bcajack2.R | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/R/bcajack2.R b/R/bcajack2.R index f694d33..72ee067 100644 --- a/R/bcajack2.R +++ b/R/bcajack2.R @@ -76,8 +76,8 @@ #' bcajack2(x = Xy, B = 1000, func = rfun, m = 40, verbose = FALSE) #' #' @export -bcajack2 <- function(x, B, func, ..., m = nrow(x), mr, pct = 0.333, K = 2, J = 12, - alpha = c(0.025, 0.05, 0.1, 0.16), +bcajack2 <- function(x, B, func, ..., m = nrow(x), mr, pct = 0.333, K = 2, + J = 12, alpha = c(0.025, 0.05, 0.1, 0.16), verbose = TRUE) { call <- match.call() @@ -87,6 +87,9 @@ bcajack2 <- function(x, B, func, ..., m = nrow(x), mr, pct = 0.333, K = 2, J = 1 stats::runif(1) seed <- get(".Random.seed", envir = .GlobalEnv, inherits = FALSE) + stopifnot("tt & t0 must contain no missing values" = + (!is.na(tt) & !is.na(t0))) + qbca2 <- function(Y, tt, t0, alpha, pct) { m <- ncol(Y) B <- nrow(Y) From 1055240368adbd2c402e5f9ad99913e0efde2cb9 Mon Sep 17 00:00:00 2001 From: t-pollington Date: Sun, 29 Aug 2021 19:55:26 +0100 Subject: [PATCH 08/14] More unit tests --- R/bcajack2.R | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/R/bcajack2.R b/R/bcajack2.R index 72ee067..8e00114 100644 --- a/R/bcajack2.R +++ b/R/bcajack2.R @@ -87,10 +87,10 @@ bcajack2 <- function(x, B, func, ..., m = nrow(x), mr, pct = 0.333, K = 2, stats::runif(1) seed <- get(".Random.seed", envir = .GlobalEnv, inherits = FALSE) - stopifnot("tt & t0 must contain no missing values" = - (!is.na(tt) & !is.na(t0))) - qbca2 <- function(Y, tt, t0, alpha, pct) { + stopifnot("tt & t0 must contain no missing values" = + (!is.na(tt) & !is.na(t0))) + m <- ncol(Y) B <- nrow(Y) D <- rep(0, B) @@ -140,6 +140,9 @@ bcajack2 <- function(x, B, func, ..., m = nrow(x), mr, pct = 0.333, K = 2, alpha <- c(alpha, 0.5, rev(1 - alpha)) if (is.list(B)) { + stopifnot("To use the Blist format described in bcajack2() help, + please name `B` as Y, tt & t0" = + (names(B) == c("Y", "tt", "t0"))) Y <- B$Y tt <- B$tt t0 <- B$t0 From bc8ea7e66a7dc3c4779dcde32077a4fc4ae6a93d Mon Sep 17 00:00:00 2001 From: t-pollington Date: Sun, 29 Aug 2021 22:43:24 +0100 Subject: [PATCH 09/14] I was wrong. Removing the NAs from ty. may seem like a fix but the ty. is already invalid as it's trying to align a vector in a dimensional space smaller than the size of the dataset n. Instead B needs to be increased by doing more bootstrap replics (if possible). --- R/bcajack2.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/bcajack2.R b/R/bcajack2.R index 8e00114..e162fd8 100644 --- a/R/bcajack2.R +++ b/R/bcajack2.R @@ -104,8 +104,8 @@ bcajack2 <- function(x, B, func, ..., m = nrow(x), mr, pct = 0.333, K = 2, Qd <- stats::quantile(D, pct) ip <- seq_len(B)[D <= Qd] ty. <- as.vector(m * stats::lm(tt[ip] ~ Y[ip, ] - 1)$coef) - ty. <- ty. - mean(ty., na.rm = TRUE) - a <- (1/6) * sum(ty.^3, na.rm = TRUE)/sum(ty.^2, na.rm = TRUE)^1.5 + ty. <- ty. - mean(ty.) + a <- (1/6) * sum(ty.^3)/sum(ty.^2)^1.5 s <- mean(tt) B.mean <- c(B, s) names(B.mean) <- c("B", "s") From 20368932cf976eef60b19e209e6f973a06227d08 Mon Sep 17 00:00:00 2001 From: t-pollington Date: Sun, 29 Aug 2021 22:51:09 +0100 Subject: [PATCH 10/14] Adding unit test to guard against error mentioned in repo Issue 4 https://github.com/bnaras/bcaboot/issues/4 --- R/bcajack2.R | 2 ++ 1 file changed, 2 insertions(+) diff --git a/R/bcajack2.R b/R/bcajack2.R index e162fd8..7166463 100644 --- a/R/bcajack2.R +++ b/R/bcajack2.R @@ -103,6 +103,8 @@ bcajack2 <- function(x, B, func, ..., m = nrow(x), mr, pct = 0.333, K = 2, } Qd <- stats::quantile(D, pct) ip <- seq_len(B)[D <= Qd] + stopifnot("length(ip) < ncol(Y) makes lm() produce NA-containing ty." = + (length(ip) Date: Sun, 29 Aug 2021 22:57:46 +0100 Subject: [PATCH 11/14] Includes running example of what now happens if bcaboot::bcajack2() is run with a size of Y that means B*pct (approx)< n --- R/bcajack2.R | 2 +- bcaboot.Rproj | 5 +++++ deletemeifPRaccepted.justanexample.R | 16 ++++++++++++++++ 3 files changed, 22 insertions(+), 1 deletion(-) create mode 100644 deletemeifPRaccepted.justanexample.R diff --git a/R/bcajack2.R b/R/bcajack2.R index 7166463..02ad5f3 100644 --- a/R/bcajack2.R +++ b/R/bcajack2.R @@ -104,7 +104,7 @@ bcajack2 <- function(x, B, func, ..., m = nrow(x), mr, pct = 0.333, K = 2, Qd <- stats::quantile(D, pct) ip <- seq_len(B)[D <= Qd] stopifnot("length(ip) < ncol(Y) makes lm() produce NA-containing ty." = - (length(ip)= ncol(Y))) ty. <- as.vector(m * stats::lm(tt[ip] ~ Y[ip, ] - 1)$coef) ty. <- ty. - mean(ty.) a <- (1/6) * sum(ty.^3)/sum(ty.^2)^1.5 diff --git a/bcaboot.Rproj b/bcaboot.Rproj index d848a9f..cba1b6b 100644 --- a/bcaboot.Rproj +++ b/bcaboot.Rproj @@ -5,8 +5,13 @@ SaveWorkspace: No AlwaysSaveHistory: Default EnableCodeIndexing: Yes +UseSpacesForTab: Yes +NumSpacesForTab: 2 Encoding: UTF-8 +RnwWeave: Sweave +LaTeX: pdfLaTeX + AutoAppendNewline: Yes StripTrailingWhitespace: Yes diff --git a/deletemeifPRaccepted.justanexample.R b/deletemeifPRaccepted.justanexample.R new file mode 100644 index 0000000..dbdfbaa --- /dev/null +++ b/deletemeifPRaccepted.justanexample.R @@ -0,0 +1,16 @@ +data(diabetes, package = "bcaboot") +Xy <- cbind(diabetes$x, diabetes$y) +rfun <- function(Xy) { + y <- Xy[, 11] + X <- Xy[, 1:10] + summary(lm(y~X) )$adj.r.squared +} +set.seed(1234) +inds = matrix(NA, nrow = 1000, ncol = nrow(Xy)) +inds = t(apply(inds, MARGIN = 1, function(inds) sample(1:nrow(Xy), replace = TRUE))) # get bootstrap indices to feed into estimator rfun() +tt = vector(mode = "numeric", length = 1000) +tt = apply(inds, MARGIN = 1, function(inds) rfun(Xy[inds,])) # get \hat{theta}* +Y = t(apply(inds, MARGIN = 1, tabulate, nbins = nrow(Xy))) +t0 = rfun(Xy) +bcaboot::bcajack2(B = list(Y=Y, tt=tt, t0=t0), alpha = c(0.025,0.975), + pct = 0.33)$lims[,1] # this produces NA BCa CIs, because a is NA, because B*pct=1000*0.33=330<442=n. From 6836e121340f4cb966b1748802063d210ad947e3 Mon Sep 17 00:00:00 2001 From: t-pollington Date: Thu, 2 Sep 2021 11:19:38 +0100 Subject: [PATCH 12/14] redundant --- R/bcajack.R | 18 ++---------------- 1 file changed, 2 insertions(+), 16 deletions(-) diff --git a/R/bcajack.R b/R/bcajack.R index 162daea..8d70964 100644 --- a/R/bcajack.R +++ b/R/bcajack.R @@ -119,8 +119,8 @@ #' @export bcajack <- function(x, B, func, ..., m = nrow(x), mr = 5, K = 2, J = 10, alpha = c(0.025, 0.05, 0.1, 0.16), verbose = TRUE) { - ## x is nxp data matrix, func is statistic thetahat=func(x) can enter #bootsize B - ## for bootsim vector tt (which is calculated) + ## x is nxp data matrix, func is statistic thetahat=func(x) can enter + #bootsize B for bootsim vector tt (which is calculated) call <- match.call() ## Save rng state @@ -151,12 +151,10 @@ bcajack <- function(x, B, func, ..., m = nrow(x), mr = 5, K = 2, J = 10, } if (m < n) { - ##aa <- ssj <- rep(0, mr) aa <- ssj <- numeric(mr) r <- n %% m seq_len_m <- seq_len(m) for (k in seq_len(mr)) { - ##Imat <- matrix(sample(1:n, n - r), m) Imat <- sapply(seq_len_m, sample.int, n = n, size = n - r) Iout <- setdiff(seq_len(n), Imat) for (j in seq_len_m) { @@ -209,10 +207,8 @@ bcajack <- function(x, B, func, ..., m = nrow(x), mr = 5, K = 2, J = 10, ooo <- pmin(pmax(ooo, 1), B) lims0 <- sort(tt)[ooo] standard <- t0 + sdboot0 * stats::qnorm(alpha) - ## lims0 <- round(cbind(lims0, standard), rou) lims0 <- cbind(lims0, standard) dimnames(lims0) <- list(alpha, c("bca", "std")) - ## stats0 <- round(c(t0, sdboot0, z00, a, sdjack), rou) stats0 <- c(t0, sdboot0, z00, a, sdjack) names(stats0) <- c("theta", "sdboot", "z0", "a", "sdjack") vl0 <- list(lims = lims0, stats = stats0, B.mean = B.mean, call = call, seed = seed) @@ -220,7 +216,6 @@ bcajack <- function(x, B, func, ..., m = nrow(x), mr = 5, K = 2, J = 10, bcaboot.return(vl0) pct <- rep(0, nal) - ##for (i in 1:nal) pct[i] <- round(sum(tt <= lims0[i, 1])/B, 3) for (i in 1:nal) pct[i] <- sum(tt <= lims0[i, 1])/B Stand <- vl0$stats[1] + vl0$stats[2] * stats::qnorm(alpha) Limsd <- matrix(0, length(alpha), K) @@ -243,7 +238,6 @@ bcajack <- function(x, B, func, ..., m = nrow(x), mr = 5, K = 2, J = 10, oo <- pmin(pmax(oo, 1), Bj) li <- sort(ttj)[oo] standard <- t0 + sdboot * stats::qnorm(alpha) - ##sta <- round(c(t0, sdboot, z0, a, sdjack), rou) sta <- c(t0, sdboot, z0, a, sdjack) names(sta) <- c("theta", "sdboot", "z0", "a", "sdjack") lims[, j] <- li @@ -251,24 +245,16 @@ bcajack <- function(x, B, func, ..., m = nrow(x), mr = 5, K = 2, J = 10, } Limsd[, k] <- apply(lims, 1, sd) * (J - 1)/sqrt(J) Statsd[, k] <- apply(stats, 1, sd) * (J - 1)/sqrt(J) - ##if (verbose) cat("{", k, "}", sep = "") } limsd <- rowMeans(Limsd, 1) statsd <- rowMeans(Statsd, 1) - ##limits <- round(cbind(vl0$lims[, 1], limsd, vl0$lims[, 2], pct), rou) limits <- cbind(vl0$lims[, 1], limsd, vl0$lims[, 2], pct) dimnames(limits) <- list(alpha, c("bca", "jacksd", "std", "pct")) - ##stats <- round(rbind(stats0, statsd), rou) stats <- rbind(stats0, statsd) dimnames(stats) <- list(c("est", "jsd"), c("theta", "sdboot", "z0", "a", "sdjack")) vl <- list(call = call, lims = limits, stats = stats, B.mean = B.mean, seed = seed) if (ttind == 0) { - ##vl$ustats <- round(ustats, rou) vl$ustats <- ustats } - ## if (sw == 5) { - ## vl$tt <- tt - ## return(vl) - ## } bcaboot.return(vl) } From 0506c5ec12dfb33e0d6ac9a8d2c618c2bdc5ce82 Mon Sep 17 00:00:00 2001 From: t-pollington Date: Thu, 2 Sep 2021 11:26:34 +0100 Subject: [PATCH 13/14] Code correction and some refactoring of the loop to get aa and ssj. Imat[Ij, ] seemed to be a mistake as the cols of Imat represent the m groups. --- R/bcajack.R | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/R/bcajack.R b/R/bcajack.R index 8d70964..cd14879 100644 --- a/R/bcajack.R +++ b/R/bcajack.R @@ -155,12 +155,13 @@ bcajack <- function(x, B, func, ..., m = nrow(x), mr = 5, K = 2, J = 10, r <- n %% m seq_len_m <- seq_len(m) for (k in seq_len(mr)) { - Imat <- sapply(seq_len_m, sample.int, n = n, size = n - r) - Iout <- setdiff(seq_len(n), Imat) + k.ind = sample.int(n = n, size = n - r, replace = FALSE) for (j in seq_len_m) { - Ij <- setdiff(seq_len_m, j) - ij <- c(c(Imat[Ij, ], Iout)) - u[j] <- func(x[ij, ]) + ij <- k.ind[(r*j-(r-1)):(r*j)] + u[j] <- func(x[-ij, ],...) + # note the remainder not in m groups don't appear in k.ind + # but are implicitly kept in x + stopifnot("func(x) must not produce NAs" = (!is.na(u[j]))) } t. <- (mean(u) - u) * (m - 1) aa[k] <- (1/6) * sum(t.^3)/(sum(t.^2))^1.5 From 4742078189684d7cf71e797fbd61ffb86e233a63 Mon Sep 17 00:00:00 2001 From: t-pollington Date: Thu, 2 Sep 2021 11:36:25 +0100 Subject: [PATCH 14/14] Code refactoring. Since the line: u. <- 2 * t. - s. gave a warning of different t. & s. lengths when applied to my data. Following "Estimation and Accuracy After Model Selection" by Efron, p995. --- R/bcajack.R | 44 +++++++++++++++++++++++--------------------- 1 file changed, 23 insertions(+), 21 deletions(-) diff --git a/R/bcajack.R b/R/bcajack.R index cd14879..a426e90 100644 --- a/R/bcajack.R +++ b/R/bcajack.R @@ -172,38 +172,36 @@ bcajack <- function(x, B, func, ..., m = nrow(x), mr = 5, K = 2, J = 10, } if (ttind == 0) { - tY. <- Y. <- rep(0, n) + Yj <- covj <- rep(0, n) + Yij = vector("list", B) if (verbose) pb <- utils::txtProgressBar(min = 0, max = B, style = 3) for (j in seq_len(B)) { - ij <- sample(x = n, size = n, replace = TRUE) - Yj <- table(c(ij, 1:n)) - 1 - tt[j] <- func(x[ij, ], ...) - tY. <- tY. + tt[j] * Yj - Y. <- Y. + Yj + ij <- sample.int(n, size = n, replace = TRUE) + tt[j] <- func(x[ij, ],...) # \hat{theta}* + stopifnot("func(x) must not produce NAs" = (!is.na(tt[j]))) if (verbose) utils::setTxtProgressBar(pb, j) + Yij[[j]] = table(c(ij, 1:n)) - 1 + Yj = Yj + Yij[[j]] } if (verbose) close(pb) - tt. <- mean(tt) - tY. <- tY./B - Y. <- Y./B - s. <- n * (tY. - tt. * Y.) - u. <- 2 * t. - s. - sdu <- sqrt(sum(u.^2))/n - ustat <- 2 * t0 - tt. + covj = covj/B + sdu <- sqrt(sum(covj^2)) # sampling error for ustat. + ustat <- 2 * t0 - mean(tt) # 2\hat{theta}-mean(\hat{theta}*) ustats <- c(ustat, sdu) names(ustats) <- c("ustat", "sdu") } B.mean <- c(B, mean(tt)) + names(B.mean) <- c("B", "s") alpha <- alpha[alpha < 0.5] alpha <- c(alpha, 0.5, rev(1 - alpha)) zalpha <- stats::qnorm(alpha) nal <- length(alpha) - sdboot0 <- stats::sd(tt) # sdd=stats::sd(dd) + sdboot0 <- stats::sd(tt) z00 <- stats::qnorm(sum(tt < t0)/B) - iles <- stats::pnorm(z00 + (z00 + zalpha)/(1 - a * (z00 + zalpha))) + iles <- stats::pnorm(z00 + (z00 + zalpha)/(1 - a * (z00 + zalpha))) # Phi ooo <- trunc(iles * B) ooo <- pmin(pmax(ooo, 1), B) lims0 <- sort(tt)[ooo] @@ -212,10 +210,11 @@ bcajack <- function(x, B, func, ..., m = nrow(x), mr = 5, K = 2, J = 10, dimnames(lims0) <- list(alpha, c("bca", "std")) stats0 <- c(t0, sdboot0, z00, a, sdjack) names(stats0) <- c("theta", "sdboot", "z0", "a", "sdjack") - vl0 <- list(lims = lims0, stats = stats0, B.mean = B.mean, call = call, seed = seed) - if (K == 0) + vl0 <- list(lims = lims0, stats = stats0, B.mean = B.mean, call = call, + seed = seed) + if (K == 0){ bcaboot.return(vl0) - + } pct <- rep(0, nal) for (i in 1:nal) pct[i] <- sum(tt <= lims0[i, 1])/B Stand <- vl0$stats[1] + vl0$stats[2] * stats::qnorm(alpha) @@ -223,8 +222,9 @@ bcajack <- function(x, B, func, ..., m = nrow(x), mr = 5, K = 2, J = 10, Statsd <- matrix(0, 5, K) for (k in 1:K) { - II <- sample(x = B, size = B) + II <- sample.int(B, size = B, replace = FALSE) II <- matrix(II, ncol = J) + stopifnot("J must be a multiple of B" = ((B>=J)&(B%%J==0))) lims <- matrix(0, length(alpha), J) stats <- matrix(0, 5, J) for (j in 1:J) { @@ -252,8 +252,10 @@ bcajack <- function(x, B, func, ..., m = nrow(x), mr = 5, K = 2, J = 10, limits <- cbind(vl0$lims[, 1], limsd, vl0$lims[, 2], pct) dimnames(limits) <- list(alpha, c("bca", "jacksd", "std", "pct")) stats <- rbind(stats0, statsd) - dimnames(stats) <- list(c("est", "jsd"), c("theta", "sdboot", "z0", "a", "sdjack")) - vl <- list(call = call, lims = limits, stats = stats, B.mean = B.mean, seed = seed) + dimnames(stats) <- list(c("est", "jsd"), c("theta", "sdboot", "z0", "a", + "sdjack")) + vl <- list(call = call, lims = limits, stats = stats, B.mean = B.mean, + seed = seed) if (ttind == 0) { vl$ustats <- ustats }