Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
73 changes: 31 additions & 42 deletions R/bcajack.R
Original file line number Diff line number Diff line change
Expand Up @@ -119,8 +119,8 @@
#' @export
bcajack <- function(x, B, func, ..., m = nrow(x), mr = 5, K = 2, J = 10,
alpha = c(0.025, 0.05, 0.1, 0.16), verbose = TRUE) {
## x is nxp data matrix, func is statistic thetahat=func(x) can enter #bootsize B
## for bootsim vector tt (which is calculated)
## x is nxp data matrix, func is statistic thetahat=func(x) can enter
#bootsize B for bootsim vector tt (which is calculated)

call <- match.call()
## Save rng state
Expand Down Expand Up @@ -151,18 +151,17 @@ bcajack <- function(x, B, func, ..., m = nrow(x), mr = 5, K = 2, J = 10,
}

if (m < n) {
##aa <- ssj <- rep(0, mr)
aa <- ssj <- numeric(mr)
r <- n %% m
seq_len_m <- seq_len(m)
for (k in seq_len(mr)) {
##Imat <- matrix(sample(1:n, n - r), m)
Imat <- sapply(seq_len_m, sample.int, n = n, size = n - r)
Iout <- setdiff(seq_len(n), Imat)
k.ind = sample.int(n = n, size = n - r, replace = FALSE)
for (j in seq_len_m) {
Ij <- setdiff(seq_len_m, j)
ij <- c(c(Imat[Ij, ], Iout))
u[j] <- func(x[ij, ])
ij <- k.ind[(r*j-(r-1)):(r*j)]
u[j] <- func(x[-ij, ],...)
# note the remainder not in m groups don't appear in k.ind
# but are implicitly kept in x
stopifnot("func(x) must not produce NAs" = (!is.na(u[j])))
}
t. <- (mean(u) - u) * (m - 1)
aa[k] <- (1/6) * sum(t.^3)/(sum(t.^2))^1.5
Expand All @@ -173,62 +172,59 @@ bcajack <- function(x, B, func, ..., m = nrow(x), mr = 5, K = 2, J = 10,
}

if (ttind == 0) {
tY. <- Y. <- rep(0, n)
Yj <- covj <- rep(0, n)
Yij = vector("list", B)
if (verbose) pb <- utils::txtProgressBar(min = 0, max = B, style = 3)
for (j in seq_len(B)) {
ij <- sample(x = n, size = n, replace = TRUE)
Yj <- table(c(ij, 1:n)) - 1
tt[j] <- func(x[ij, ], ...)
tY. <- tY. + tt[j] * Yj
Y. <- Y. + Yj
ij <- sample.int(n, size = n, replace = TRUE)
tt[j] <- func(x[ij, ],...) # \hat{theta}*
stopifnot("func(x) must not produce NAs" = (!is.na(tt[j])))
if (verbose) utils::setTxtProgressBar(pb, j)
Yij[[j]] = table(c(ij, 1:n)) - 1
Yj = Yj + Yij[[j]]
}
if (verbose) close(pb)
tt. <- mean(tt)
tY. <- tY./B
Y. <- Y./B
s. <- n * (tY. - tt. * Y.)
u. <- 2 * t. - s.
sdu <- sqrt(sum(u.^2))/n
ustat <- 2 * t0 - tt.
covj = covj/B
sdu <- sqrt(sum(covj^2)) # sampling error for ustat.
ustat <- 2 * t0 - mean(tt) # 2\hat{theta}-mean(\hat{theta}*)
ustats <- c(ustat, sdu)
names(ustats) <- c("ustat", "sdu")
}
B.mean <- c(B, mean(tt))
names(B.mean) <- c("B", "s")
alpha <- alpha[alpha < 0.5]
alpha <- c(alpha, 0.5, rev(1 - alpha))

zalpha <- stats::qnorm(alpha)
nal <- length(alpha)

sdboot0 <- stats::sd(tt) # sdd=stats::sd(dd)
sdboot0 <- stats::sd(tt)
z00 <- stats::qnorm(sum(tt < t0)/B)

iles <- stats::pnorm(z00 + (z00 + zalpha)/(1 - a * (z00 + zalpha)))
iles <- stats::pnorm(z00 + (z00 + zalpha)/(1 - a * (z00 + zalpha))) # Phi
ooo <- trunc(iles * B)
ooo <- pmin(pmax(ooo, 1), B)
lims0 <- sort(tt)[ooo]
standard <- t0 + sdboot0 * stats::qnorm(alpha)
## lims0 <- round(cbind(lims0, standard), rou)
lims0 <- cbind(lims0, standard)
dimnames(lims0) <- list(alpha, c("bca", "std"))
## stats0 <- round(c(t0, sdboot0, z00, a, sdjack), rou)
stats0 <- c(t0, sdboot0, z00, a, sdjack)
names(stats0) <- c("theta", "sdboot", "z0", "a", "sdjack")
vl0 <- list(lims = lims0, stats = stats0, B.mean = B.mean, call = call, seed = seed)
if (K == 0)
vl0 <- list(lims = lims0, stats = stats0, B.mean = B.mean, call = call,
seed = seed)
if (K == 0){
bcaboot.return(vl0)

}
pct <- rep(0, nal)
##for (i in 1:nal) pct[i] <- round(sum(tt <= lims0[i, 1])/B, 3)
for (i in 1:nal) pct[i] <- sum(tt <= lims0[i, 1])/B
Stand <- vl0$stats[1] + vl0$stats[2] * stats::qnorm(alpha)
Limsd <- matrix(0, length(alpha), K)
Statsd <- matrix(0, 5, K)

for (k in 1:K) {
II <- sample(x = B, size = B)
II <- sample.int(B, size = B, replace = FALSE)
II <- matrix(II, ncol = J)
stopifnot("J must be a multiple of B" = ((B>=J)&(B%%J==0)))
lims <- matrix(0, length(alpha), J)
stats <- matrix(0, 5, J)
for (j in 1:J) {
Expand All @@ -243,32 +239,25 @@ bcajack <- function(x, B, func, ..., m = nrow(x), mr = 5, K = 2, J = 10,
oo <- pmin(pmax(oo, 1), Bj)
li <- sort(ttj)[oo]
standard <- t0 + sdboot * stats::qnorm(alpha)
##sta <- round(c(t0, sdboot, z0, a, sdjack), rou)
sta <- c(t0, sdboot, z0, a, sdjack)
names(sta) <- c("theta", "sdboot", "z0", "a", "sdjack")
lims[, j] <- li
stats[, j] <- sta
}
Limsd[, k] <- apply(lims, 1, sd) * (J - 1)/sqrt(J)
Statsd[, k] <- apply(stats, 1, sd) * (J - 1)/sqrt(J)
##if (verbose) cat("{", k, "}", sep = "")
}
limsd <- rowMeans(Limsd, 1)
statsd <- rowMeans(Statsd, 1)
##limits <- round(cbind(vl0$lims[, 1], limsd, vl0$lims[, 2], pct), rou)
limits <- cbind(vl0$lims[, 1], limsd, vl0$lims[, 2], pct)
dimnames(limits) <- list(alpha, c("bca", "jacksd", "std", "pct"))
##stats <- round(rbind(stats0, statsd), rou)
stats <- rbind(stats0, statsd)
dimnames(stats) <- list(c("est", "jsd"), c("theta", "sdboot", "z0", "a", "sdjack"))
vl <- list(call = call, lims = limits, stats = stats, B.mean = B.mean, seed = seed)
dimnames(stats) <- list(c("est", "jsd"), c("theta", "sdboot", "z0", "a",
"sdjack"))
vl <- list(call = call, lims = limits, stats = stats, B.mean = B.mean,
seed = seed)
if (ttind == 0) {
##vl$ustats <- round(ustats, rou)
vl$ustats <- ustats
}
## if (sw == 5) {
## vl$tt <- tt
## return(vl)
## }
bcaboot.return(vl)
}
35 changes: 11 additions & 24 deletions R/bcajack2.R
Original file line number Diff line number Diff line change
Expand Up @@ -76,8 +76,8 @@
#' bcajack2(x = Xy, B = 1000, func = rfun, m = 40, verbose = FALSE)
#'
#' @export
bcajack2 <- function(x, B, func, ..., m = nrow(x), mr, pct = 0.333, K = 2, J = 12,
alpha = c(0.025, 0.05, 0.1, 0.16),
bcajack2 <- function(x, B, func, ..., m = nrow(x), mr, pct = 0.333, K = 2,
J = 12, alpha = c(0.025, 0.05, 0.1, 0.16),
verbose = TRUE) {

call <- match.call()
Expand All @@ -88,9 +88,11 @@ bcajack2 <- function(x, B, func, ..., m = nrow(x), mr, pct = 0.333, K = 2, J = 1
seed <- get(".Random.seed", envir = .GlobalEnv, inherits = FALSE)

qbca2 <- function(Y, tt, t0, alpha, pct) {
stopifnot("tt & t0 must contain no missing values" =
(!is.na(tt) & !is.na(t0)))

m <- ncol(Y)
B <- nrow(Y)
o1 <- rep(1, m)
D <- rep(0, B)

for (i in seq_len(B)) {
Expand All @@ -101,18 +103,18 @@ bcajack2 <- function(x, B, func, ..., m = nrow(x), mr, pct = 0.333, K = 2, J = 1
}
Qd <- stats::quantile(D, pct)
ip <- seq_len(B)[D <= Qd]
stopifnot("length(ip) < ncol(Y) makes lm() produce NA-containing ty." =
(length(ip) >= ncol(Y)))
ty. <- as.vector(m * stats::lm(tt[ip] ~ Y[ip, ] - 1)$coef)
ty. <- ty. - mean(ty.)
a <- (1/6) * sum(ty.^3)/sum(ty.^2)^1.5
## if (sw == 3)
## return(ty.)
s <- mean(tt)
B.mean <- c(B, s)
names(B.mean) <- c("B", "s")

zalpha <- stats::qnorm(alpha)
nal <- length(alpha)
ustat <- 2 * t0 - s
##s. <- m * .v(stats::cov(tt, Y))
s. <- m * as.vector(stats::cov(tt, Y))
u. <- 2 * ty. - s.
sdu <- sum(u.^2)^0.5/m
Expand All @@ -128,7 +130,6 @@ bcajack2 <- function(x, B, func, ..., m = nrow(x), mr, pct = 0.333, K = 2, J = 1
ooo <- pmin(pmax(ooo, 1), B)
lims <- sort(tt)[ooo]
standard <- t0 + sdboot * stats::qnorm(alpha)
##lims <- round(cbind(lims, standard), rou)
lims <- cbind(lims, standard)
dimnames(lims) <- list(alpha, c("bca", "std"))
stats <- c(t0, sdboot, z0, a, sdjack)
Expand All @@ -141,11 +142,13 @@ bcajack2 <- function(x, B, func, ..., m = nrow(x), mr, pct = 0.333, K = 2, J = 1
alpha <- c(alpha, 0.5, rev(1 - alpha))

if (is.list(B)) {
stopifnot("To use the Blist format described in bcajack2() help,
please name `B` as Y, tt & t0" =
(names(B) == c("Y", "tt", "t0")))
Y <- B$Y
tt <- B$tt
t0 <- B$t0
B <- length(tt)
##vl0 <- qbca2(Y, tt, t0, alpha = alpha, pct = pct, rou = rou)
vl0 <- qbca2(Y, tt, t0, alpha = alpha, pct = pct)
} else {
if (is.vector(x))
Expand All @@ -166,7 +169,6 @@ bcajack2 <- function(x, B, func, ..., m = nrow(x), mr, pct = 0.333, K = 2, J = 1
if (verbose) utils::setTxtProgressBar(pb, k)
}
if (verbose) close(pb)
##vl0 <- qbca2(Y, tt, t0, alpha = alpha, pct = pct, rou = rou)
vl0 <- qbca2(Y, tt, t0, alpha = alpha, pct = pct)
}

Expand All @@ -187,7 +189,6 @@ bcajack2 <- function(x, B, func, ..., m = nrow(x), mr, pct = 0.333, K = 2, J = 1
if (verbose) utils::setTxtProgressBar(pb, k)
}
if (verbose) close(pb)
##vl0 <- qbca2(Y, tt, t0, alpha = alpha, pct = pct, rou = rou)
vl0 <- qbca2(Y, tt, t0, alpha = alpha, pct = pct)
}
}
Expand All @@ -199,7 +200,6 @@ bcajack2 <- function(x, B, func, ..., m = nrow(x), mr, pct = 0.333, K = 2, J = 1

nal <- length(alpha)
Pct <- rep(0, nal)
##for (i in 1:nal) Pct[i] <- round(sum(tt <= vl0$lims[i, 1])/B, rou)
for (i in 1:nal) Pct[i] <- sum(tt <= vl0$lims[i, 1])/B
Stand <- vl0$stats[1] + vl0$stats[2] * stats::qnorm(alpha)
Limsd <- matrix(0, length(alpha), K)
Expand All @@ -218,38 +218,25 @@ bcajack2 <- function(x, B, func, ..., m = nrow(x), mr, pct = 0.333, K = 2, J = 1
iij <- c(II[, -j])
Yj <- Y[iij, ]
ttj <- tt[iij]
##vlj <- qbca2(Yj, ttj, t0, alpha, pct, rou)
vlj <- qbca2(Yj, ttj, t0, alpha, pct)
limbc[, j] <- vlj$lims[, 1]
limst[, j] <- vlj$lims[, 2]
stats[, j] <- vlj$stats
}

## if (sw == 4)
## return(list(limbc = limbc, limst = limst, stats = stats))
Limbcsd[, k] <- apply(limbc, 1, sd) * (J - 1)/sqrt(J)
Statsd[, k] <- apply(stats, 1, sd) * (J - 1)/sqrt(J)
## if (verbose)
## cat("{", k, "}", sep = "")
## if (sw == 6)
## return(list(Limbcsd = Limbcsd, Statsd = Statsd))
}
limsd <- rowMeans(Limbcsd, 1)
statsd <- rowMeans(Statsd, 1)
##limits <- round(cbind(vl0$lims[, 1], limsd, vl0$lims[, 2], Pct), rou)
limits <- cbind(vl0$lims[, 1], limsd, vl0$lims[, 2], Pct)
dimnames(limits) <- list(alpha, c("bca", "jacksd", "std", "pct"))
##stats <- round(rbind(vl0$stats, statsd), rou)
stats <- rbind(vl0$stats, statsd)
##ustats <- round(vl0$ustats, rou)
ustats <- vl0$ustats
##B.mean <- c(B, round(mean(tt), rou))
B.mean <- c(B, mean(tt))
dimnames(stats) <- list(c("est", "jsd"), c("theta", "sdboot", "z0", "a", "sdjack"))
vll <- list(call = call, lims = limits, stats = stats, B.mean = B.mean, ustats = ustats,
seed = seed)
## if (sw == 5)
## vll$tt <- tt
bcaboot.return(vll)
}

Expand Down
2 changes: 2 additions & 0 deletions _pkgdown.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
authors:
Bradley Efron:
href: https://efron.web.stanford.edu
Timothy M Pollington: # bcajack2() bug correction
href: https://orcid.org/0000-0002-9688-5960
Balasubramanian Narasimhan:
href: https://statistics.stanford.edu/people/balasubramanian-narasimhan

5 changes: 5 additions & 0 deletions bcaboot.Rproj
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,13 @@ SaveWorkspace: No
AlwaysSaveHistory: Default

EnableCodeIndexing: Yes
UseSpacesForTab: Yes
NumSpacesForTab: 2
Encoding: UTF-8

RnwWeave: Sweave
LaTeX: pdfLaTeX

AutoAppendNewline: Yes
StripTrailingWhitespace: Yes

Expand Down
16 changes: 16 additions & 0 deletions deletemeifPRaccepted.justanexample.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
data(diabetes, package = "bcaboot")
Xy <- cbind(diabetes$x, diabetes$y)
rfun <- function(Xy) {
y <- Xy[, 11]
X <- Xy[, 1:10]
summary(lm(y~X) )$adj.r.squared
}
set.seed(1234)
inds = matrix(NA, nrow = 1000, ncol = nrow(Xy))
inds = t(apply(inds, MARGIN = 1, function(inds) sample(1:nrow(Xy), replace = TRUE))) # get bootstrap indices to feed into estimator rfun()
tt = vector(mode = "numeric", length = 1000)
tt = apply(inds, MARGIN = 1, function(inds) rfun(Xy[inds,])) # get \hat{theta}*
Y = t(apply(inds, MARGIN = 1, tabulate, nbins = nrow(Xy)))
t0 = rfun(Xy)
bcaboot::bcajack2(B = list(Y=Y, tt=tt, t0=t0), alpha = c(0.025,0.975),
pct = 0.33)$lims[,1] # this produces NA BCa CIs, because a is NA, because B*pct=1000*0.33=330<442=n.