diff --git a/R/bcajack.R b/R/bcajack.R index 162daea..a426e90 100644 --- a/R/bcajack.R +++ b/R/bcajack.R @@ -119,8 +119,8 @@ #' @export bcajack <- function(x, B, func, ..., m = nrow(x), mr = 5, K = 2, J = 10, alpha = c(0.025, 0.05, 0.1, 0.16), verbose = TRUE) { - ## x is nxp data matrix, func is statistic thetahat=func(x) can enter #bootsize B - ## for bootsim vector tt (which is calculated) + ## x is nxp data matrix, func is statistic thetahat=func(x) can enter + #bootsize B for bootsim vector tt (which is calculated) call <- match.call() ## Save rng state @@ -151,18 +151,17 @@ bcajack <- function(x, B, func, ..., m = nrow(x), mr = 5, K = 2, J = 10, } if (m < n) { - ##aa <- ssj <- rep(0, mr) aa <- ssj <- numeric(mr) r <- n %% m seq_len_m <- seq_len(m) for (k in seq_len(mr)) { - ##Imat <- matrix(sample(1:n, n - r), m) - Imat <- sapply(seq_len_m, sample.int, n = n, size = n - r) - Iout <- setdiff(seq_len(n), Imat) + k.ind = sample.int(n = n, size = n - r, replace = FALSE) for (j in seq_len_m) { - Ij <- setdiff(seq_len_m, j) - ij <- c(c(Imat[Ij, ], Iout)) - u[j] <- func(x[ij, ]) + ij <- k.ind[(r*j-(r-1)):(r*j)] + u[j] <- func(x[-ij, ],...) + # note the remainder not in m groups don't appear in k.ind + # but are implicitly kept in x + stopifnot("func(x) must not produce NAs" = (!is.na(u[j]))) } t. <- (mean(u) - u) * (m - 1) aa[k] <- (1/6) * sum(t.^3)/(sum(t.^2))^1.5 @@ -173,62 +172,59 @@ bcajack <- function(x, B, func, ..., m = nrow(x), mr = 5, K = 2, J = 10, } if (ttind == 0) { - tY. <- Y. <- rep(0, n) + Yj <- covj <- rep(0, n) + Yij = vector("list", B) if (verbose) pb <- utils::txtProgressBar(min = 0, max = B, style = 3) for (j in seq_len(B)) { - ij <- sample(x = n, size = n, replace = TRUE) - Yj <- table(c(ij, 1:n)) - 1 - tt[j] <- func(x[ij, ], ...) - tY. <- tY. + tt[j] * Yj - Y. <- Y. + Yj + ij <- sample.int(n, size = n, replace = TRUE) + tt[j] <- func(x[ij, ],...) # \hat{theta}* + stopifnot("func(x) must not produce NAs" = (!is.na(tt[j]))) if (verbose) utils::setTxtProgressBar(pb, j) + Yij[[j]] = table(c(ij, 1:n)) - 1 + Yj = Yj + Yij[[j]] } if (verbose) close(pb) - tt. <- mean(tt) - tY. <- tY./B - Y. <- Y./B - s. <- n * (tY. - tt. * Y.) - u. <- 2 * t. - s. - sdu <- sqrt(sum(u.^2))/n - ustat <- 2 * t0 - tt. + covj = covj/B + sdu <- sqrt(sum(covj^2)) # sampling error for ustat. + ustat <- 2 * t0 - mean(tt) # 2\hat{theta}-mean(\hat{theta}*) ustats <- c(ustat, sdu) names(ustats) <- c("ustat", "sdu") } B.mean <- c(B, mean(tt)) + names(B.mean) <- c("B", "s") alpha <- alpha[alpha < 0.5] alpha <- c(alpha, 0.5, rev(1 - alpha)) zalpha <- stats::qnorm(alpha) nal <- length(alpha) - sdboot0 <- stats::sd(tt) # sdd=stats::sd(dd) + sdboot0 <- stats::sd(tt) z00 <- stats::qnorm(sum(tt < t0)/B) - iles <- stats::pnorm(z00 + (z00 + zalpha)/(1 - a * (z00 + zalpha))) + iles <- stats::pnorm(z00 + (z00 + zalpha)/(1 - a * (z00 + zalpha))) # Phi ooo <- trunc(iles * B) ooo <- pmin(pmax(ooo, 1), B) lims0 <- sort(tt)[ooo] standard <- t0 + sdboot0 * stats::qnorm(alpha) - ## lims0 <- round(cbind(lims0, standard), rou) lims0 <- cbind(lims0, standard) dimnames(lims0) <- list(alpha, c("bca", "std")) - ## stats0 <- round(c(t0, sdboot0, z00, a, sdjack), rou) stats0 <- c(t0, sdboot0, z00, a, sdjack) names(stats0) <- c("theta", "sdboot", "z0", "a", "sdjack") - vl0 <- list(lims = lims0, stats = stats0, B.mean = B.mean, call = call, seed = seed) - if (K == 0) + vl0 <- list(lims = lims0, stats = stats0, B.mean = B.mean, call = call, + seed = seed) + if (K == 0){ bcaboot.return(vl0) - + } pct <- rep(0, nal) - ##for (i in 1:nal) pct[i] <- round(sum(tt <= lims0[i, 1])/B, 3) for (i in 1:nal) pct[i] <- sum(tt <= lims0[i, 1])/B Stand <- vl0$stats[1] + vl0$stats[2] * stats::qnorm(alpha) Limsd <- matrix(0, length(alpha), K) Statsd <- matrix(0, 5, K) for (k in 1:K) { - II <- sample(x = B, size = B) + II <- sample.int(B, size = B, replace = FALSE) II <- matrix(II, ncol = J) + stopifnot("J must be a multiple of B" = ((B>=J)&(B%%J==0))) lims <- matrix(0, length(alpha), J) stats <- matrix(0, 5, J) for (j in 1:J) { @@ -243,7 +239,6 @@ bcajack <- function(x, B, func, ..., m = nrow(x), mr = 5, K = 2, J = 10, oo <- pmin(pmax(oo, 1), Bj) li <- sort(ttj)[oo] standard <- t0 + sdboot * stats::qnorm(alpha) - ##sta <- round(c(t0, sdboot, z0, a, sdjack), rou) sta <- c(t0, sdboot, z0, a, sdjack) names(sta) <- c("theta", "sdboot", "z0", "a", "sdjack") lims[, j] <- li @@ -251,24 +246,18 @@ bcajack <- function(x, B, func, ..., m = nrow(x), mr = 5, K = 2, J = 10, } Limsd[, k] <- apply(lims, 1, sd) * (J - 1)/sqrt(J) Statsd[, k] <- apply(stats, 1, sd) * (J - 1)/sqrt(J) - ##if (verbose) cat("{", k, "}", sep = "") } limsd <- rowMeans(Limsd, 1) statsd <- rowMeans(Statsd, 1) - ##limits <- round(cbind(vl0$lims[, 1], limsd, vl0$lims[, 2], pct), rou) limits <- cbind(vl0$lims[, 1], limsd, vl0$lims[, 2], pct) dimnames(limits) <- list(alpha, c("bca", "jacksd", "std", "pct")) - ##stats <- round(rbind(stats0, statsd), rou) stats <- rbind(stats0, statsd) - dimnames(stats) <- list(c("est", "jsd"), c("theta", "sdboot", "z0", "a", "sdjack")) - vl <- list(call = call, lims = limits, stats = stats, B.mean = B.mean, seed = seed) + dimnames(stats) <- list(c("est", "jsd"), c("theta", "sdboot", "z0", "a", + "sdjack")) + vl <- list(call = call, lims = limits, stats = stats, B.mean = B.mean, + seed = seed) if (ttind == 0) { - ##vl$ustats <- round(ustats, rou) vl$ustats <- ustats } - ## if (sw == 5) { - ## vl$tt <- tt - ## return(vl) - ## } bcaboot.return(vl) } diff --git a/R/bcajack2.R b/R/bcajack2.R index 1210c23..02ad5f3 100644 --- a/R/bcajack2.R +++ b/R/bcajack2.R @@ -76,8 +76,8 @@ #' bcajack2(x = Xy, B = 1000, func = rfun, m = 40, verbose = FALSE) #' #' @export -bcajack2 <- function(x, B, func, ..., m = nrow(x), mr, pct = 0.333, K = 2, J = 12, - alpha = c(0.025, 0.05, 0.1, 0.16), +bcajack2 <- function(x, B, func, ..., m = nrow(x), mr, pct = 0.333, K = 2, + J = 12, alpha = c(0.025, 0.05, 0.1, 0.16), verbose = TRUE) { call <- match.call() @@ -88,9 +88,11 @@ bcajack2 <- function(x, B, func, ..., m = nrow(x), mr, pct = 0.333, K = 2, J = 1 seed <- get(".Random.seed", envir = .GlobalEnv, inherits = FALSE) qbca2 <- function(Y, tt, t0, alpha, pct) { + stopifnot("tt & t0 must contain no missing values" = + (!is.na(tt) & !is.na(t0))) + m <- ncol(Y) B <- nrow(Y) - o1 <- rep(1, m) D <- rep(0, B) for (i in seq_len(B)) { @@ -101,18 +103,18 @@ bcajack2 <- function(x, B, func, ..., m = nrow(x), mr, pct = 0.333, K = 2, J = 1 } Qd <- stats::quantile(D, pct) ip <- seq_len(B)[D <= Qd] + stopifnot("length(ip) < ncol(Y) makes lm() produce NA-containing ty." = + (length(ip) >= ncol(Y))) ty. <- as.vector(m * stats::lm(tt[ip] ~ Y[ip, ] - 1)$coef) ty. <- ty. - mean(ty.) a <- (1/6) * sum(ty.^3)/sum(ty.^2)^1.5 - ## if (sw == 3) - ## return(ty.) s <- mean(tt) B.mean <- c(B, s) + names(B.mean) <- c("B", "s") zalpha <- stats::qnorm(alpha) nal <- length(alpha) ustat <- 2 * t0 - s - ##s. <- m * .v(stats::cov(tt, Y)) s. <- m * as.vector(stats::cov(tt, Y)) u. <- 2 * ty. - s. sdu <- sum(u.^2)^0.5/m @@ -128,7 +130,6 @@ bcajack2 <- function(x, B, func, ..., m = nrow(x), mr, pct = 0.333, K = 2, J = 1 ooo <- pmin(pmax(ooo, 1), B) lims <- sort(tt)[ooo] standard <- t0 + sdboot * stats::qnorm(alpha) - ##lims <- round(cbind(lims, standard), rou) lims <- cbind(lims, standard) dimnames(lims) <- list(alpha, c("bca", "std")) stats <- c(t0, sdboot, z0, a, sdjack) @@ -141,11 +142,13 @@ bcajack2 <- function(x, B, func, ..., m = nrow(x), mr, pct = 0.333, K = 2, J = 1 alpha <- c(alpha, 0.5, rev(1 - alpha)) if (is.list(B)) { + stopifnot("To use the Blist format described in bcajack2() help, + please name `B` as Y, tt & t0" = + (names(B) == c("Y", "tt", "t0"))) Y <- B$Y tt <- B$tt t0 <- B$t0 B <- length(tt) - ##vl0 <- qbca2(Y, tt, t0, alpha = alpha, pct = pct, rou = rou) vl0 <- qbca2(Y, tt, t0, alpha = alpha, pct = pct) } else { if (is.vector(x)) @@ -166,7 +169,6 @@ bcajack2 <- function(x, B, func, ..., m = nrow(x), mr, pct = 0.333, K = 2, J = 1 if (verbose) utils::setTxtProgressBar(pb, k) } if (verbose) close(pb) - ##vl0 <- qbca2(Y, tt, t0, alpha = alpha, pct = pct, rou = rou) vl0 <- qbca2(Y, tt, t0, alpha = alpha, pct = pct) } @@ -187,7 +189,6 @@ bcajack2 <- function(x, B, func, ..., m = nrow(x), mr, pct = 0.333, K = 2, J = 1 if (verbose) utils::setTxtProgressBar(pb, k) } if (verbose) close(pb) - ##vl0 <- qbca2(Y, tt, t0, alpha = alpha, pct = pct, rou = rou) vl0 <- qbca2(Y, tt, t0, alpha = alpha, pct = pct) } } @@ -199,7 +200,6 @@ bcajack2 <- function(x, B, func, ..., m = nrow(x), mr, pct = 0.333, K = 2, J = 1 nal <- length(alpha) Pct <- rep(0, nal) - ##for (i in 1:nal) Pct[i] <- round(sum(tt <= vl0$lims[i, 1])/B, rou) for (i in 1:nal) Pct[i] <- sum(tt <= vl0$lims[i, 1])/B Stand <- vl0$stats[1] + vl0$stats[2] * stats::qnorm(alpha) Limsd <- matrix(0, length(alpha), K) @@ -218,38 +218,25 @@ bcajack2 <- function(x, B, func, ..., m = nrow(x), mr, pct = 0.333, K = 2, J = 1 iij <- c(II[, -j]) Yj <- Y[iij, ] ttj <- tt[iij] - ##vlj <- qbca2(Yj, ttj, t0, alpha, pct, rou) vlj <- qbca2(Yj, ttj, t0, alpha, pct) limbc[, j] <- vlj$lims[, 1] limst[, j] <- vlj$lims[, 2] stats[, j] <- vlj$stats } - ## if (sw == 4) - ## return(list(limbc = limbc, limst = limst, stats = stats)) Limbcsd[, k] <- apply(limbc, 1, sd) * (J - 1)/sqrt(J) Statsd[, k] <- apply(stats, 1, sd) * (J - 1)/sqrt(J) - ## if (verbose) - ## cat("{", k, "}", sep = "") - ## if (sw == 6) - ## return(list(Limbcsd = Limbcsd, Statsd = Statsd)) } limsd <- rowMeans(Limbcsd, 1) statsd <- rowMeans(Statsd, 1) - ##limits <- round(cbind(vl0$lims[, 1], limsd, vl0$lims[, 2], Pct), rou) limits <- cbind(vl0$lims[, 1], limsd, vl0$lims[, 2], Pct) dimnames(limits) <- list(alpha, c("bca", "jacksd", "std", "pct")) - ##stats <- round(rbind(vl0$stats, statsd), rou) stats <- rbind(vl0$stats, statsd) - ##ustats <- round(vl0$ustats, rou) ustats <- vl0$ustats - ##B.mean <- c(B, round(mean(tt), rou)) B.mean <- c(B, mean(tt)) dimnames(stats) <- list(c("est", "jsd"), c("theta", "sdboot", "z0", "a", "sdjack")) vll <- list(call = call, lims = limits, stats = stats, B.mean = B.mean, ustats = ustats, seed = seed) - ## if (sw == 5) - ## vll$tt <- tt bcaboot.return(vll) } diff --git a/_pkgdown.yml b/_pkgdown.yml index 22194ab..7a73e36 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -1,6 +1,8 @@ authors: Bradley Efron: href: https://efron.web.stanford.edu + Timothy M Pollington: # bcajack2() bug correction + href: https://orcid.org/0000-0002-9688-5960 Balasubramanian Narasimhan: href: https://statistics.stanford.edu/people/balasubramanian-narasimhan diff --git a/bcaboot.Rproj b/bcaboot.Rproj index d848a9f..cba1b6b 100644 --- a/bcaboot.Rproj +++ b/bcaboot.Rproj @@ -5,8 +5,13 @@ SaveWorkspace: No AlwaysSaveHistory: Default EnableCodeIndexing: Yes +UseSpacesForTab: Yes +NumSpacesForTab: 2 Encoding: UTF-8 +RnwWeave: Sweave +LaTeX: pdfLaTeX + AutoAppendNewline: Yes StripTrailingWhitespace: Yes diff --git a/deletemeifPRaccepted.justanexample.R b/deletemeifPRaccepted.justanexample.R new file mode 100644 index 0000000..dbdfbaa --- /dev/null +++ b/deletemeifPRaccepted.justanexample.R @@ -0,0 +1,16 @@ +data(diabetes, package = "bcaboot") +Xy <- cbind(diabetes$x, diabetes$y) +rfun <- function(Xy) { + y <- Xy[, 11] + X <- Xy[, 1:10] + summary(lm(y~X) )$adj.r.squared +} +set.seed(1234) +inds = matrix(NA, nrow = 1000, ncol = nrow(Xy)) +inds = t(apply(inds, MARGIN = 1, function(inds) sample(1:nrow(Xy), replace = TRUE))) # get bootstrap indices to feed into estimator rfun() +tt = vector(mode = "numeric", length = 1000) +tt = apply(inds, MARGIN = 1, function(inds) rfun(Xy[inds,])) # get \hat{theta}* +Y = t(apply(inds, MARGIN = 1, tabulate, nbins = nrow(Xy))) +t0 = rfun(Xy) +bcaboot::bcajack2(B = list(Y=Y, tt=tt, t0=t0), alpha = c(0.025,0.975), + pct = 0.33)$lims[,1] # this produces NA BCa CIs, because a is NA, because B*pct=1000*0.33=330<442=n.