From 5e6c20710b9a94beb11cbc8a1caefffa4824e3ca Mon Sep 17 00:00:00 2001
From: Markus Ulmer <markus.ulmer@stat.math.ethz.ch>
Date: Tue, 9 Dec 2025 16:48:35 +0100
Subject: [PATCH 01/15] test tree estimation helper function

---
 tests/testthat/test-tree.R | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/tests/testthat/test-tree.R b/tests/testthat/test-tree.R
index 637e5f9..520b69c 100644
--- a/tests/testthat/test-tree.R
+++ b/tests/testthat/test-tree.R
@@ -34,6 +34,14 @@ tree <- SDTree(x = X[, 1], y = Y, Q_type = 'no_deconfounding',
 expect_equal(tree$predictions, as.vector(predict(tree, data.frame(X = X[, 1]))))
 expect_equal(tree$predictions[1], predict(tree, data.frame(X = X[1, 1])))
 
+
+#test tree with bootstrap sample
+boot_index <- sample(1:50, 30, replace = T)
+estimate_tree(Y = Y, X = X, A = NULL, max_leaves = 100, cp = 0, min_sample = 1, 
+              mtry = NULL, fast = TRUE, Q_type = "trim", trim_quantile = 0.7, 
+              q_hat = 3, Qf = NULL, gamma = NULL, max_candidates = 200, 
+              Q_scale = TRUE, predictors = NULL, boot_index = boot_index)
+
              
 #### does it work with only one covariate?
 set.seed(1)
@@ -68,4 +76,4 @@ partDependence(tree, 1, X, subSample = 10)
 tree <- SDTree(x = X[, 1], y = Y, Q_type = 'no_deconfounding', 
                cp = 0, min_sample = 5)
 expect_equal(tree$predictions, as.vector(predict(tree, data.frame(X = X[, 1]))))
-expect_equal(tree$predictions[1], predict(tree, data.frame(X = X[1, 1])))
\ No newline at end of file
+expect_equal(tree$predictions[1], predict(tree, data.frame(X = X[1, 1])))

From 15aeab31e10be6092bbaf4d6d342cba7a52f9166 Mon Sep 17 00:00:00 2001
From: Markus Ulmer <markus.ulmer@stat.math.ethz.ch>
Date: Tue, 9 Dec 2025 16:49:14 +0100
Subject: [PATCH 02/15] add tree estimation helper function reduces ram usage
 in multicoring

---
 R/utility.R | 264 +++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 263 insertions(+), 1 deletion(-)

diff --git a/R/utility.R b/R/utility.R
index 754a820..d7ff1a5 100644
--- a/R/utility.R
+++ b/R/utility.R
@@ -82,7 +82,7 @@ split_names <- function(node, var_names = NULL, digits = 2){
 }
 
 
-# finds all the reasonable spliting points in a data matrix
+# finds all the reasonable splitting points in a data matrix
 find_s <- function(X, max_candidates = 100){
   p <- ncol(X)
   if(p == 1){
@@ -268,4 +268,266 @@ Bbasis <- function(x, breaks){
   return(Bx)
 }
 
+estimate_tree <- function(boot_index, Y, X, A, max_leaves, cp, min_sample, mtry, fast,
+                          Q_type, trim_quantile, q_hat, Qf, gamma, max_candidates, 
+                          Q_scale, predictors){
+  if(is.null(boot_index)){
+    boot_index <- 1:nrow(X)
+    tree_in_forest <- FALSE
+  }else{
+    tree_in_forest <- TRUE
+  }
+  n <- length(boot_index)
+  
+  # estimate spectral transformation
+  if(!is.null(A)){
+    if(is.null(gamma)) stop('gamma must be provided if A is provided')
+    if(is.vector(A)) A <- matrix(A)
+    if(!is.matrix(A)) stop('A must be a matrix')
+    if(nrow(A) != nrow(X)) stop('A must have n rows')
+    Wf <- get_Wf(matrix(A[boot_index, ], ncol = ncol(A)), gamma)
+  }else {
+    Wf <- function(v) v
+  }
 
+  if(is.null(Qf)){
+    if(!is.null(A)){
+      Qf <- function(v) get_Qf(Wf(X[boot_index, ]), Q_type, trim_quantile, q_hat, Q_scale)(Wf(v))
+    }else{
+      Qf <- get_Qf(X[boot_index, ], Q_type, trim_quantile, q_hat, Q_scale)
+    }
+  }else{
+    if(!is.function(Qf)) stop('Q must be a function')
+    if(length(Qf(rnorm(n))) == n) stop('Q must map from n to n')
+  }
+  
+  #selection of predictors
+  if(!is.null(predictors)){
+    if(is.character(predictors)){
+      if(!all(predictors %in% colnames(X)))
+        stop("predictors must either be numeric columne index or in colnames of X")
+      predictors <- which(colnames(X) %in% predictors)
+    }
+    if(is.numeric(predictors)){
+      if(!all(predictors > 0 & predictors <= ncol(X)))
+        stop("predictors must either be numeric columne index or in colnames of X")
+    }
+    pred_names <- colnames(X)
+    X <- matrix(X[, predictors], ncol = length(predictors))
+    if(!is.null(pred_names)){
+      colnames(X) <- pred_names[predictors]
+    }
+  }
+  
+  # number of covariates
+  p <- ncol(X)
+  if(!is.null(mtry) && mtry > p) stop('mtry must be at most p')
+  
+  # calculate first estimate
+  E <- matrix(1, n, 1)
+  E_tilde <- Qf(E)
+  Ue <- E_tilde / sqrt(sum(E_tilde ** 2))
+  Y_tilde <- Qf(Y[boot_index])
+  
+  # solve linear model
+  c_hat <- qr.coef(qr(E_tilde), Y_tilde)
+  c_hat <- as.numeric(c_hat)
+  
+  loss_start <- as.numeric(sum((Y_tilde - c_hat) ** 2) / n)
+  loss_temp <- loss_start
+  
+  # initialize tree
+  treeInfo <- c("name", "left", "right", "j", "s", "value", "dloss", 
+                "res_dloss", "cp", "n_samples", "leaf")
+  d <- length(treeInfo)
+  
+  tree <- matrix(0, ncol = d, nrow = 1, dimnames = list(NULL, treeInfo))
+  tree[1, c("name", "value", "dloss", "cp", "n_samples", "leaf")] <- 
+    c(1, c_hat, loss_start, 10, n, 1)
+  treeSize <- 1
+  
+  # memory for optimal splits
+  memory <- list()
+  potential_splits <- 1
+  
+  # variable importance
+  var_imp <- rep(0, p)
+  names(var_imp) <- colnames(X)
+  
+  after_mtry <- 0
+  
+  for(i in 1:max_leaves){
+    # iterate over all possible splits every time
+    # for slow but slightly better solution
+    if(!fast){
+      potential_splits <- 1:i
+      to_small <- sapply(potential_splits, 
+                         function(x){sum(E[, x]) < min_sample*2})
+      potential_splits <- potential_splits[!to_small]
+    }
+    
+    #iterate over new to estimate splits
+    for(branch in potential_splits){
+      # get samples in branch to evaluate
+      E_branch <- E[, branch]
+      index <- which(E_branch == 1)
+      X_branch <- matrix(X[boot_index[index], ], nrow = length(index))
+      
+      # get potential splitting candidates
+      s <- find_s(X_branch, max_candidates = max_candidates)
+      n_splits <- nrow(s)
+      
+      # remove splits resulting in to small leaves
+      if(min_sample > 1) {
+        s <- s[-c(0:(min_sample - 1), (n_splits - min_sample + 2):(n_splits+1)), ]
+      }
+      s <- matrix(s, ncol = p)
+      
+      optSplits <- lapply(1:p, function(j){
+        s_j <- unique(s[, j])
+        E_next <- lapply(s_j, function(si) {
+          E_next <- matrix(0, nrow = n, ncol = 1)
+          E_next[index[X_branch[, j] > si], ] <- 1
+          if(sum(E_next) == 0)return(NULL)
+          E_next
+        })
+        E_next <- do.call(cbind, E_next)
+        if(is.null(E_next)) return(c(-10, j, 0, branch))
+        U_next_prime <- Qf_temp(E_next, Ue, Qf)
+        U_next_size <- colSums(U_next_prime ** 2)
+        dloss <- as.numeric(crossprod(U_next_prime, Y_tilde))**2 / U_next_size
+        
+        opt <- which.max(unlist(dloss))
+        c(dloss[[opt]], j, s_j[opt], branch)
+      })
+      memory[[branch]] <- do.call(rbind, optSplits)
+    }
+    
+    if(i > after_mtry && !is.null(mtry)){
+      Losses_dec <- lapply(memory, function(branch){
+        branch[sample(1:p, mtry), ]})
+      Losses_dec <- do.call(rbind, Losses_dec)
+    }else {
+      Losses_dec <- do.call(rbind, memory)
+    }
+    
+    loc <- which.max(Losses_dec[, 1])
+    best_branch <- Losses_dec[loc, 4]
+    j <- Losses_dec[loc, 2]
+    s <- Losses_dec[loc, 3]
+    
+    if(Losses_dec[loc, 1] <= 0){
+      break
+    }
+    
+    # divide observations in leaf
+    index <- which(E[, best_branch] == 1)
+    index_n_branches <- index[X[boot_index[index], j] > s]
+    
+    # new indicator matrix
+    E <- cbind(E, matrix(0, n, 1))
+    E[index_n_branches, best_branch] <- 0
+    E[index_n_branches, i+1] <- 1
+    
+    E_tilde_branch <- E_tilde[, best_branch]
+    suppressWarnings({
+      E_tilde[, best_branch] <- Qf(E[, best_branch])
+    })
+    E_tilde <- cbind(E_tilde, matrix(E_tilde_branch - E_tilde[, best_branch]))
+    
+    c_hat <- qr.coef(qr(E_tilde), Y_tilde)
+    
+    u_next_prime <- Qf_temp(E[, i + 1], Ue, Qf)
+    Ue <- cbind(Ue, u_next_prime / sqrt(sum(u_next_prime ** 2)))
+    
+    # check if loss decrease is larger than minimum loss decrease
+    # and if linear model could be estimated
+    if(sum(is.na(as.numeric(c_hat))) > 0){
+      warning('singulaer matrix QE, tree might be to large, consider increasing cp')
+      break
+    }
+    
+    loss_dec <- as.numeric(loss_temp - loss(Y_tilde, E_tilde %*% c_hat))
+    loss_temp <- loss_temp - loss_dec
+    
+    if(loss_dec <= cp * loss_start){
+      break
+    }
+    # add loss decrease to variable importance
+    var_imp[j] <- var_imp[j] + loss_dec
+    
+    # add space for the two new leaves
+    tree <- rbind(tree, matrix(0, nrow = 2, ncol = d))
+    
+    # select leaf to split
+    leaves <- tree[, "leaf"] == 1
+    toSplit <- leaves & (tree[, "name"] == best_branch)
+    if(sum(toSplit) != 1) stop("Tries to split more than one leaf")
+    
+    # save split rule
+    tree[toSplit, c("left", "right", "j", "s", "res_dloss", "leaf")] <- 
+      c(treeSize + 1, treeSize + 2, j, s, loss_dec, 2)
+    
+    # add new leaves
+    tree[treeSize + 1, c("name", "dloss", "cp", "n_samples", "leaf")] <- 
+      c(tree[toSplit, "name"], loss_dec, loss_dec / loss_start, sum(E[, best_branch] == 1), 1)
+    tree[treeSize + 2, c("name", "dloss", "cp", "n_samples", "leaf")] <- 
+      c(i + 1, loss_dec, loss_dec / loss_start, sum(E[, i + 1] == 1), 1)
+    treeSize <- treeSize + 2
+    
+    # add estimates to tree leaves
+    c_hat <- as.numeric(c_hat)
+    # access leaf estimates by leaf names (i.e. columns of E)
+    tree[tree[, "leaf"] == 1, "value"] <- c_hat[tree[tree[, "leaf"] == 1, "name"]]
+    
+    # the two new partitions need to be checked for optimal splits in next iteration
+    potential_splits <- c(best_branch, i + 1)
+    
+    # a partition with less than min_sample observations or unique samples 
+    # are not available for further splits
+    to_small <- sapply(potential_splits, function(x){
+      new_samples <- nrow(unique(matrix(X[boot_index[as.logical(E[, x])],], nrow = sum(E[, x]))))
+      if(is.null(new_samples)) new_samples <- 0
+      (new_samples < min_sample * 2)
+    })
+    if(sum(to_small) > 0){
+      for(el in potential_splits[to_small]){
+        # to small partitions cannot decrease the loss
+        memory[[el]] <- matrix(0, p, 4)
+      }
+      potential_splits <- potential_splits[!to_small]
+    }
+  }
+  
+  if(i == max_leaves){
+    warning('maximum number of iterations was reached, consider increasing m!')
+  }
+  
+  # predict the test set
+  if(tree_in_forest){
+    f_X_hat <- NULL
+  }else{
+    f_X_hat <- traverse_tree(tree, X)
+  }
+  
+  
+  var_names <- colnames(data.frame(X))
+  names(var_imp) <- var_names
+  
+  # cp max of all splits after
+  new_cp <- getCp_max(tree)
+  tree[new_cp[[2]], "cp"] <- new_cp[[1]]
+  
+  # use max cp over siblings to ensure binary tree
+  for(i in 1:nrow(tree)){
+    if(tree[i, c("j")] != 0){
+      tree[tree[i, c("left", "right")], "cp"] <- 
+        max(tree[tree[i, c("left", "right")], "cp"])
+    }
+  }
+  
+  res <- list(predictions = f_X_hat, tree = tree, 
+              var_names = var_names, var_importance = var_imp)
+  class(res) <- 'SDTree'
+  res
+}

From ff49465e8e41558ae10127ab18e13f38e9115cd1 Mon Sep 17 00:00:00 2001
From: Markus Ulmer <markus.ulmer@stat.math.ethz.ch>
Date: Tue, 9 Dec 2025 17:24:24 +0100
Subject: [PATCH 03/15] prepare clean future usage

---
 R/predict.R | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/R/predict.R b/R/predict.R
index 033acd1..8af3fd5 100644
--- a/R/predict.R
+++ b/R/predict.R
@@ -74,6 +74,13 @@ predict.SDForest <- function(object, newdata, mc.cores = 1, ...){
     })
     preds_i
   }
+  #if(mc.cores > 1){
+  #  plan <- if (parallelly::supportsMulticore()) "multicore" else "multisession"
+  #  with(future::plan(plan, workers = mc.cores), local = TRUE)
+  #}
+  #preds_list <- future.apply::future_lapply(future.seed = TRUE, 
+  #                                          X = object$forest, worker_fun)
+  
   if(mc.cores > 1){
     if(Sys.info()[["sysname"]] == "Linux"){
       preds_list <- parallel::mclapply(object$forest, 

From dcbedd9b2f124cb154d9ef7d7b9d6e835281b7ce Mon Sep 17 00:00:00 2001
From: Markus Ulmer <markus.ulmer@stat.math.ethz.ch>
Date: Tue, 9 Dec 2025 17:24:43 +0100
Subject: [PATCH 04/15] use helper function to estimate tree

---
 R/SDTree.R | 251 +----------------------------------------------------
 1 file changed, 3 insertions(+), 248 deletions(-)

diff --git a/R/SDTree.R b/R/SDTree.R
index 4d0a54e..6c63fc7 100644
--- a/R/SDTree.R
+++ b/R/SDTree.R
@@ -138,253 +138,8 @@ SDTree <- function(formula = NULL, data = NULL, x = NULL, y = NULL, max_leaves =
   if(!is.null(mtry) && mtry < 1) stop('mtry must be larger than 0')
   if(n < 2 * min_sample) stop('n must be at least 2 * min_sample')
   if(max_candidates < 1) stop('max_candidates must be at least 1')
-
-  # estimate spectral transformation
-
-  if(!is.null(A)){
-    if(is.null(gamma)) stop('gamma must be provided if A is provided')
-    if(is.vector(A)) A <- matrix(A)
-    if(!is.matrix(A)) stop('A must be a matrix')
-    if(nrow(A) != n) stop('A must have n rows')
-    Wf <- get_Wf(A, gamma)
-  }else {
-    Wf <- function(v) v
-  }
-
-  if(is.null(Qf)){
-    if(!is.null(A)){
-      Qf <- function(v) get_Qf(Wf(X), Q_type, trim_quantile, q_hat, Q_scale)(Wf(v))
-    }else{
-      Qf <- get_Qf(X, Q_type, trim_quantile, q_hat, Q_scale)
-    }
-  }else{
-    if(!is.function(Qf)) stop('Q must be a function')
-    if(length(Qf(rnorm(n))) == n) stop('Q must map from n to n')
-  }
-  
-  #selection of predictors
-  if(!is.null(predictors)){
-    if(is.character(predictors)){
-      if(!all(predictors %in% colnames(X)))
-        stop("predictors must either be numeric columne index or in colnames of X")
-      predictors <- which(colnames(X) %in% predictors)
-    }
-    if(is.numeric(predictors)){
-      if(!all(predictors > 0 & predictors <= ncol(X)))
-        stop("predictors must either be numeric columne index or in colnames of X")
-    }
-    pred_names <- colnames(X)
-    X <- matrix(X[, predictors], ncol = length(predictors))
-    if(!is.null(pred_names)){
-      colnames(X) <- pred_names[predictors]
-    }
-  }
-  
-  # number of covariates
-  p <- ncol(X)
-  
-  if(!is.null(mtry) && mtry > p) stop('mtry must be at most p')
-
-  # calculate first estimate
-  E <- matrix(1, n, 1)
-  E_tilde <- Qf(E)
-  Ue <- E_tilde / sqrt(sum(E_tilde ** 2))
-  Y_tilde <- Qf(Y)
-
-  # solve linear model
-  c_hat <- qr.coef(qr(E_tilde), Y_tilde)
-  c_hat <- as.numeric(c_hat)
-
-  loss_start <- as.numeric(sum((Y_tilde - c_hat) ** 2) / n)
-  loss_temp <- loss_start
-
-  # initialize tree
-  treeInfo <- c("name", "left", "right", "j", "s", "value", "dloss", 
-                "res_dloss", "cp", "n_samples", "leaf")
-  d <- length(treeInfo)
-  
-  tree <- matrix(0, ncol = d, nrow = 1, dimnames = list(NULL, treeInfo))
-  tree[1, c("name", "value", "dloss", "cp", "n_samples", "leaf")] <- 
-    c(1, c_hat, loss_start, 10, n, 1)
-  treeSize <- 1
-
-  # memory for optimal splits
-  memory <- list()
-  potential_splits <- 1
-
-  # variable importance
-  var_imp <- rep(0, p)
-  names(var_imp) <- colnames(X)
-
-  after_mtry <- 0
-
-  for(i in 1:max_leaves){
-    # iterate over all possible splits every time
-    # for slow but slightly better solution
-    if(!fast){
-      potential_splits <- 1:i
-      to_small <- sapply(potential_splits, 
-                         function(x){sum(E[, x]) < min_sample*2})
-      potential_splits <- potential_splits[!to_small]
-    }
-    
-    #iterate over new to estimate splits
-    for(branch in potential_splits){
-      # get samples in branch to evaluate
-      E_branch <- E[, branch]
-      index <- which(E_branch == 1)
-      X_branch <- matrix(X[index, ], nrow = length(index))
-      
-      # get potential splitting candidates
-      s <- find_s(X_branch, max_candidates = max_candidates)
-      n_splits <- nrow(s)
-      
-      # remove splits resulting in to small leaves
-      if(min_sample > 1) {
-        s <- s[-c(0:(min_sample - 1), (n_splits - min_sample + 2):(n_splits+1)), ]
-      }
-      s <- matrix(s, ncol = p)
-      
-      optSplits <- lapply(1:p, function(j){
-        s_j <- unique(s[, j])
-        E_next <- lapply(s_j, function(si) {
-          E_next <- matrix(0, nrow = n, ncol = 1)
-          E_next[index[X_branch[, j] > si], ] <- 1
-          if(sum(E_next) == 0)return(NULL)
-          E_next
-        })
-        E_next <- do.call(cbind, E_next)
-        if(is.null(E_next)) return(c(-10, j, 0, branch))
-        U_next_prime <- Qf_temp(E_next, Ue, Qf)
-        U_next_size <- colSums(U_next_prime ** 2)
-        dloss <- as.numeric(crossprod(U_next_prime, Y_tilde))**2 / U_next_size
-        
-        opt <- which.max(unlist(dloss))
-        c(dloss[[opt]], j, s_j[opt], branch)
-      })
-      memory[[branch]] <- do.call(rbind, optSplits)
-    }
-
-    if(i > after_mtry && !is.null(mtry)){
-      Losses_dec <- lapply(memory, function(branch){
-        branch[sample(1:p, mtry), ]})
-      Losses_dec <- do.call(rbind, Losses_dec)
-    }else {
-       Losses_dec <- do.call(rbind, memory)
-    }
-
-    loc <- which.max(Losses_dec[, 1])
-    best_branch <- Losses_dec[loc, 4]
-    j <- Losses_dec[loc, 2]
-    s <- Losses_dec[loc, 3]
-
-    if(Losses_dec[loc, 1] <= 0){
-      break
-    }
-    
-    # divide observations in leaf
-    index <- which(E[, best_branch] == 1)
-    index_n_branches <- index[X[index, j] > s]
-    
-    # new indicator matrix
-    E <- cbind(E, matrix(0, n, 1))
-    E[index_n_branches, best_branch] <- 0
-    E[index_n_branches, i+1] <- 1
-
-    E_tilde_branch <- E_tilde[, best_branch]
-    suppressWarnings({
-      E_tilde[, best_branch] <- Qf(E[, best_branch])
-    })
-    E_tilde <- cbind(E_tilde, matrix(E_tilde_branch - E_tilde[, best_branch]))
-
-    c_hat <- qr.coef(qr(E_tilde), Y_tilde)
-
-    u_next_prime <- Qf_temp(E[, i + 1], Ue, Qf)
-    Ue <- cbind(Ue, u_next_prime / sqrt(sum(u_next_prime ** 2)))
-    
-    # check if loss decrease is larger than minimum loss decrease
-    # and if linear model could be estimated
-    if(sum(is.na(as.numeric(c_hat))) > 0){
-      warning('singulaer matrix QE, tree might be to large, consider increasing cp')
-      break
-    }
-
-    loss_dec <- as.numeric(loss_temp - loss(Y_tilde, E_tilde %*% c_hat))
-    loss_temp <- loss_temp - loss_dec
-
-    if(loss_dec <= cp * loss_start){
-      break
-    }
-    # add loss decrease to variable importance
-    var_imp[j] <- var_imp[j] + loss_dec
-
-    # add space for the two new leaves
-    tree <- rbind(tree, matrix(0, nrow = 2, ncol = d))
-    
-    # select leaf to split
-    leaves <- tree[, "leaf"] == 1
-    toSplit <- leaves & (tree[, "name"] == best_branch)
-    if(sum(toSplit) != 1) stop("Tries to split more than one leaf")
-    
-    # save split rule
-    tree[toSplit, c("left", "right", "j", "s", "res_dloss", "leaf")] <- 
-      c(treeSize + 1, treeSize + 2, j, s, loss_dec, 2)
-    
-    # add new leaves
-    tree[treeSize + 1, c("name", "dloss", "cp", "n_samples", "leaf")] <- 
-      c(tree[toSplit, "name"], loss_dec, loss_dec / loss_start, sum(E[, best_branch] == 1), 1)
-    tree[treeSize + 2, c("name", "dloss", "cp", "n_samples", "leaf")] <- 
-      c(i + 1, loss_dec, loss_dec / loss_start, sum(E[, i + 1] == 1), 1)
-    treeSize <- treeSize + 2
-
-    # add estimates to tree leaves
-    c_hat <- as.numeric(c_hat)
-    # access leaf estimates by leaf names (i.e. columns of E)
-    tree[tree[, "leaf"] == 1, "value"] <- c_hat[tree[tree[, "leaf"] == 1, "name"]]
-
-    # the two new partitions need to be checked for optimal splits in next iteration
-    potential_splits <- c(best_branch, i + 1)
-
-    # a partition with less than min_sample observations or unique samples 
-    # are not available for further splits
-    to_small <- sapply(potential_splits, function(x){
-      new_samples <- nrow(unique(matrix(X[as.logical(E[, x]),], nrow = sum(E[, x]))))
-      if(is.null(new_samples)) new_samples <- 0
-      (new_samples < min_sample * 2)
-      })
-    if(sum(to_small) > 0){
-      for(el in potential_splits[to_small]){
-        # to small partitions cannot decrease the loss
-        memory[[el]] <- matrix(0, p, 4)
-      }
-      potential_splits <- potential_splits[!to_small]
-    }
-  }
-
-  if(i == max_leaves){
-    warning('maximum number of iterations was reached, consider increasing m!')
-  }
-
-  # predict the test set
-  f_X_hat <- traverse_tree(tree, X)
-  
-  var_names <- colnames(data.frame(X))
-  names(var_imp) <- var_names
-
-  # cp max of all splits after
-  new_cp <- getCp_max(tree)
-  tree[new_cp[[2]], "cp"] <- new_cp[[1]]
   
-  # use max cp over siblings to ensure binary tree
-  for(i in 1:nrow(tree)){
-    if(tree[i, c("j")] != 0){
-      tree[tree[i, c("left", "right")], "cp"] <- 
-        max(tree[tree[i, c("left", "right")], "cp"])
-    }
-  }
-
-  res <- list(predictions = f_X_hat, tree = tree, 
-              var_names = var_names, var_importance = var_imp)
-  class(res) <- 'SDTree'
-  res
+  return(estimate_tree(boot_index = NULL, Y, X, A, max_leaves, cp, min_sample, mtry, fast,
+                       Q_type, trim_quantile, q_hat, Qf, gamma, max_candidates, 
+                       Q_scale, predictors))
 }

From d3665a6efc54fcd89ad3b74858e534fc3f5bb07a Mon Sep 17 00:00:00 2001
From: Markus Ulmer <markus.ulmer@stat.math.ethz.ch>
Date: Tue, 9 Dec 2025 17:25:01 +0100
Subject: [PATCH 05/15] update future usage

---
 R/SDForest.R | 47 ++++++++++++++++++++---------------------------
 1 file changed, 20 insertions(+), 27 deletions(-)

diff --git a/R/SDForest.R b/R/SDForest.R
index 7580dd6..962ef7f 100644
--- a/R/SDForest.R
+++ b/R/SDForest.R
@@ -276,27 +276,21 @@ SDForest <- function(formula = NULL, data = NULL, x = NULL, y = NULL, nTree = 10
       ind <- do.call(c, ind)
   }
   
-  #use random generater that works with multiprocessing
+  #use random generator that works with multiprocessing
   ok <- RNGkind("L'Ecuyer-CMRG")
-  
+
   # Worker wrapper for bagged trees
   worker_fun <- function(i) {
-    Xi <- matrix(X[i, ], ncol = ncol(X))
-    colnames(Xi) <- colnames(X)
-    if(!is.null(A)){
-      Ai <- matrix(A[i, ], ncol = ncol(A))
-    }else{
-      Ai <- NULL
-    }
-    
     # protect SDTree call
     res_i <- tryCatch({
-      tree_obj <- SDTree(x = Xi, y = Y[i],
-                         cp = cp, min_sample = min_sample,
+      tree_obj <- estimate_tree(X = X, Y = Y, Qf = NULL,
+                         cp = cp, min_sample = min_sample, max_leaves = n,
                          Q_type = Q_type, trim_quantile = trim_quantile,
-                         q_hat = q_hat, mtry = mtry, A = Ai, gamma = gamma, 
-                         max_candidates = max_candidates, 
-                         Q_scale = Q_scale, predictors = predictors)
+                         q_hat = q_hat, mtry = mtry, A = A, gamma = gamma, 
+                         max_candidates = max_candidates, fast = TRUE,
+                         Q_scale = Q_scale, predictors = predictors, 
+                         boot_index = i)
+      
       list(ok = TRUE, tree = tree_obj)
     }, error = function(e) {
       list(ok = FALSE, error = conditionMessage(e))
@@ -304,30 +298,28 @@ SDForest <- function(formula = NULL, data = NULL, x = NULL, y = NULL, nTree = 10
       # convert warnings to tagged results if needed
       list(ok = TRUE, tree = NULL, warning = conditionMessage(w))
     })
+    p(sprintf("i=%g", i))
+    
     res_i
   }
   
+  state <- progressr::handlers(global = NA)
+  if(verbose & !state) progressr::handlers(global = TRUE)
+  p <- progressr::progressor(along = ind)
   if(mc.cores > 1){
-    if(Sys.info()[["sysname"]] == "Linux"){
-      if(verbose) print('mclapply')
-      res_list <- parallel::mclapply(ind, worker_fun, mc.cores = mc.cores)
-    }else{
-      if(verbose) print('future')
-      future::plan('multisession', workers = mc.cores)
-      res_list <- future.apply::future_lapply(future.seed = TRUE, X = ind, worker_fun)
-    }
-  }else{
-    res_list <- pbapply::pblapply(ind, worker_fun)
+    plan <- if (parallelly::supportsMulticore()) "multicore" else "multisession"
+    with(future::plan(plan, workers = mc.cores), local = TRUE)
   }
-  RNGkind(ok[1])
+  res_list <- future.apply::future_lapply(future.seed = TRUE, X = ind, worker_fun)
   
-  #check worker statuses
+  # check worker statuses
   failed_workers <- which(vapply(res_list, function(z) !isTRUE(z$ok), logical(1)))
   if (length(failed_workers) > 0) {
     stop(sprintf("SDForest: %d worker(s) failed, first error: %s",
                  length(failed_workers), res_list[[failed_workers[1]]]$error))
   }
   res <- lapply(res_list, function(res) res$tree)
+  if(verbose & !state) progressr::handlers(global = FALSE)
   
   #selection of predictors
   if(!is.null(predictors)){
@@ -437,6 +429,7 @@ SDForest <- function(formula = NULL, data = NULL, x = NULL, y = NULL, nTree = 10
     output$ooEnv_predictions <- ooEnv_predictions
   }
 
+  RNGkind(ok[1])
   class(output) <- 'SDForest'
   output
 }

From 34fd2938830d07442d8dc08cc9d7ad9572343d0f Mon Sep 17 00:00:00 2001
From: markus <markus_ulmer@bluewin.ch>
Date: Wed, 10 Dec 2025 21:38:15 +0100
Subject: [PATCH 06/15] change verbose

---
 DESCRIPTION | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index 904ee4f..0a56aeb 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -21,7 +21,9 @@ Imports:
     tidyr,
     fda,
     grplasso,
-    rlang
+    rlang, 
+    progressr, 
+    parallelly
 Suggests: 
     plotly,
     datasets,

From 8094d7f8cb55aca965c4dec1f05874201fdfcf99 Mon Sep 17 00:00:00 2001
From: markus <markus_ulmer@bluewin.ch>
Date: Wed, 10 Dec 2025 21:39:33 +0100
Subject: [PATCH 07/15] update/fix progess reporting and parallelization

---
 R/SDForest.R                       | 17 ++++++-----
 R/partDependence.R                 | 41 ++++++++++++++-----------
 R/paths.R                          | 48 +++++++++++++++++++++++-------
 R/plot.R                           | 42 +++++++++++++++-----------
 R/predict.R                        | 37 ++++++++---------------
 man/SDForest.Rd                    |  8 +++--
 man/partDependence.Rd              | 17 +++++++++--
 man/plot.SDForest.Rd               |  5 +++-
 man/predict.SDForest.Rd            | 10 +++++--
 man/regPath.SDForest.Rd            | 18 ++++++++++-
 man/stabilitySelection.SDForest.Rd |  5 +++-
 tests/testthat/test-parallel.R     |  2 +-
 12 files changed, 159 insertions(+), 91 deletions(-)

diff --git a/R/SDForest.R b/R/SDForest.R
index 962ef7f..13578be 100644
--- a/R/SDForest.R
+++ b/R/SDForest.R
@@ -34,8 +34,9 @@
 #' @param mtry Number of randomly selected covariates to consider for a split, 
 #' if \code{NULL} half of the covariates are available for each split. 
 #' \eqn{\text{mtry} = \lfloor \frac{p}{2} \rfloor}
-#' @param mc.cores Number of cores to use for parallel processing,
-#' if \code{mc.cores > 1} the trees are estimated in parallel.
+#' @param mc.cores Number of cores to use for parallel computation `vignette("Runtime")`. 
+#' The `future` package is used for parallel processing. 
+#' To use custom processing plans mc.cores has to be <= 1, see [`future` package](https://future.futureverse.org/).
 #' @param Q_type Type of deconfounding, one of 'trim', 'pca', 'no_deconfounding'. 
 #' 'trim' corresponds to the Trim transform \insertCite{Cevid2020SpectralModels}{SDModels} 
 #' as implemented in the Doubly debiased lasso \insertCite{Guo2022DoublyConfounding}{SDModels}, 
@@ -67,7 +68,8 @@
 #' @param Q_scale Should data be scaled to estimate the spectral transformation? 
 #' Default is \code{TRUE} to not reduce the signal of high variance covariates, 
 #' and we do not know of a scenario where this hurts.
-#' @param verbose If \code{TRUE} fitting information is shown.
+#' @param verbose If \code{TRUE} progress updates are shown using the `progressr` package. 
+#' To customize the progress bar, see [`progressr` package](https://progressr.futureverse.org/articles/progressr-intro.html)
 #' @param predictors Subset of colnames(X) or numerical indices of the covariates 
 #' for which an effect on y should be estimated. All the other covariates are only
 #' used for deconfounding.
@@ -298,19 +300,19 @@ SDForest <- function(formula = NULL, data = NULL, x = NULL, y = NULL, nTree = 10
       # convert warnings to tagged results if needed
       list(ok = TRUE, tree = NULL, warning = conditionMessage(w))
     })
-    p(sprintf("i=%g", i))
+    p()
     
     res_i
   }
   
-  state <- progressr::handlers(global = NA)
-  if(verbose & !state) progressr::handlers(global = TRUE)
-  p <- progressr::progressor(along = ind)
+  progressr::with_progress({
+  p <- progressr::progressor(along = ind, enable = verbose)
   if(mc.cores > 1){
     plan <- if (parallelly::supportsMulticore()) "multicore" else "multisession"
     with(future::plan(plan, workers = mc.cores), local = TRUE)
   }
   res_list <- future.apply::future_lapply(future.seed = TRUE, X = ind, worker_fun)
+  })
   
   # check worker statuses
   failed_workers <- which(vapply(res_list, function(z) !isTRUE(z$ok), logical(1)))
@@ -319,7 +321,6 @@ SDForest <- function(formula = NULL, data = NULL, x = NULL, y = NULL, nTree = 10
                  length(failed_workers), res_list[[failed_workers[1]]]$error))
   }
   res <- lapply(res_list, function(res) res$tree)
-  if(verbose & !state) progressr::handlers(global = FALSE)
   
   #selection of predictors
   if(!is.null(predictors)){
diff --git a/R/partDependence.R b/R/partDependence.R
index c514ade..5c87a1f 100644
--- a/R/partDependence.R
+++ b/R/partDependence.R
@@ -17,8 +17,11 @@
 #' If NULL, tries to extract the dataset from the model object.
 #' @param subSample Number of samples to draw from the original data for the empirical 
 #' partial dependence. If NULL, all the observations are used.
-#' @param mc.cores Number of cores to use for parallel computation. 
-#' Parallel computing is only supported for unix.
+#' @param verbose If \code{TRUE} progress updates are shown using the `progressr` package. 
+#' To customize the progress bar, see [`progressr` package](https://progressr.futureverse.org/)
+#' @param mc.cores Number of cores to use for parallel computation `vignette("Runtime")`. 
+#' The `future` package is used for parallel processing. 
+#' To use custom processing plans mc.cores has to be <= 1, see [`future` package](https://future.futureverse.org/).
 #' @return An object of class \code{partDependence} containing
 #' \item{preds_mean}{The average prediction for each value of the variable of interest.}
 #' \item{x_seq}{The sequence of values for the variable of interest.}
@@ -34,7 +37,8 @@
 #' plot(pd)
 #' @seealso \code{\link{SDForest}}, \code{\link{SDTree}}
 #' @export
-partDependence <- function(object, j, X = NULL, subSample = NULL, mc.cores = 1){
+partDependence <- function(object, j, X = NULL, subSample = NULL, 
+                           verbose = TRUE, mc.cores = 1){
   j_name <- j
 
   if(is.null(X)){
@@ -58,21 +62,22 @@ partDependence <- function(object, j, X = NULL, subSample = NULL, mc.cores = 1){
   
   x_seq <- seq(min(X[, j]), max(X[, j]), length.out = 100)
   
-  if(mc.cores > 1){
-    preds <- parallel::mclapply(x_seq, function(x){
-      X_new <- X
-      X_new[, j] <- x
-      pred <- predict(object, newdata = X_new)
-      return(pred)
-    }, mc.cores = mc.cores)
-  }else{
-    preds <- pbapply::pblapply(x_seq, function(x){
-      X_new <- X
-      X_new[, j] <- x
-      pred <- predict(object, newdata = X_new)
-      return(pred)
-    })
-  }
+  progressr::with_progress({
+    p <- progressr::progressor(along = x_seq, enable = verbose)
+    if(mc.cores > 1){
+      plan <- if (parallelly::supportsMulticore()) "multicore" else "multisession"
+      with(future::plan(plan, workers = mc.cores), local = TRUE)
+    }
+    preds <- future.apply::future_lapply(future.seed = TRUE, 
+                                         X = x_seq, 
+            function(x){
+              X_new <- X
+              X_new[, j] <- x
+              pred <- predict(object, newdata = X_new)
+              p(sprintf("x=%g", x))
+              return(pred)
+            })
+  })
   preds <- do.call(rbind, preds)
   preds_mean <- rowMeans(preds)
   
diff --git a/R/paths.R b/R/paths.R
index 406553b..9a9ad96 100644
--- a/R/paths.R
+++ b/R/paths.R
@@ -58,6 +58,11 @@ regPath.SDTree <- function(object, cp_seq = NULL, ...){
 #' @param X The training data, if NULL the data from the forest object is used.
 #' @param Y The training response variable, if NULL the data from the forest object is used.
 #' @param Q The transformation matrix, if NULL the data from the forest object is used.
+#' @param verbose If \code{TRUE} progress updates are shown using the `progressr` package. 
+#' To customize the progress bar, see [`progressr` package](https://progressr.futureverse.org/articles/progressr-intro.html)
+#' @param mc.cores Number of cores to use for parallel computation `vignette("Runtime")`. 
+#' The `future` package is used for parallel processing. 
+#' To use custom processing plans mc.cores has to be <= 1, see [`future` package](https://future.futureverse.org/).
 #' @param ... Further arguments passed to or from other methods.
 #' @return An object of class \code{paths} containing
 #' \item{cp}{The sequence of complexity parameters.}
@@ -84,21 +89,29 @@ regPath.SDTree <- function(object, cp_seq = NULL, ...){
 #' 
 #' @export
 regPath.SDForest <- function(object, cp_seq = NULL, X = NULL, Y = NULL, Q = NULL, 
-                             ...){
+                             verbose = TRUE, mc.cores = 1, ...){
   if(is.null(cp_seq)) cp_seq <- get_cp_seq(object)
   cp_seq <- sort(cp_seq)
 
-  res <- pbapply::pblapply(cp_seq, function(cp){
-    pruned_object <- prune(object, cp, X, Y, Q, pred = FALSE)
-    return(list(var_importance = pruned_object$var_importance, 
-                oob_SDloss = pruned_object$oob_SDloss, 
-                oob_loss = pruned_object$oob_loss))})
+  progressr::with_progress({
+    p <- progressr::progressor(along = cp_seq, enable = verbose)
+    if(mc.cores > 1){
+      plan <- if (parallelly::supportsMulticore()) "multicore" else "multisession"
+      with(future::plan(plan, workers = mc.cores), local = TRUE)
+    }
+    res <- future.apply::future_lapply(future.seed = TRUE, 
+                                       X = cp_seq, 
+                    function(cp){
+                      pruned_object <- prune(object, cp, X, Y, Q, pred = FALSE)
+                      p(sprintf("cp=%g", cp))
+                      return(list(var_importance = pruned_object$var_importance, 
+                                  oob_SDloss = pruned_object$oob_SDloss, 
+                                  oob_loss = pruned_object$oob_loss))})
+  })
 
-  #varImp_path <- t(sapply(res, function(x)x$var_importance))
   varImp_path <- do.call(rbind, lapply(res, function(x)x$var_importance))
   colnames(varImp_path) <- object$var_names
 
-  #loss_path <- t(sapply(res, function(x) c(x$oob_SDloss, x$oob_loss)))
   loss_path <- do.call(rbind, lapply(res, function(x) c(x$oob_SDloss, x$oob_loss)))
   colnames(loss_path) <- c('oob SDE', 'oob MSE')
   paths <- list(cp = cp_seq, varImp_path = varImp_path, loss_path = loss_path,
@@ -125,6 +138,8 @@ stabilitySelection <- function(object, ...) UseMethod('stabilitySelection')
 #' @param object an SDForest object
 #' @param cp_seq A sequence of complexity parameters.
 #' If NULL, the sequence is calculated automatically using only relevant values.
+#' @param verbose If \code{TRUE} progress updates are shown using the `progressr` package. 
+#' To customize the progress bar, see [`progressr` package](https://progressr.futureverse.org/articles/progressr-intro.html)
 #' @param ... Further arguments passed to or from other methods.
 #' @return An object of class \code{paths} containing
 #' \item{cp}{The sequence of complexity parameters.}
@@ -145,11 +160,22 @@ stabilitySelection <- function(object, ...) UseMethod('stabilitySelection')
 #' plot(paths, plotly = TRUE)
 #' }
 #' @export
-stabilitySelection.SDForest <- function(object, cp_seq = NULL, ...){
+stabilitySelection.SDForest <- function(object, cp_seq = NULL, 
+                                        verbose = TRUE, ...){
   if(is.null(cp_seq)) cp_seq <- get_cp_seq(object)
   cp_seq <- sort(cp_seq)
-
-  imp <- pbapply::pblapply(object$forest, function(x)regPath(x, cp_seq)$varImp_path > 0)
+  
+  progressr::with_progress({
+    p <- progressr::progressor(along = 1:length(object$forest), enable = verbose)
+    imp <- lapply(1:length(object$forest), 
+                  function(i){
+                    path <- regPath(object$forest[[i]], 
+                                    cp_seq, vebose = FALSE)$varImp_path > 0
+                    p()
+                    path
+    })
+  })
+  
 
   imp <- lapply(imp, function(x)matrix(as.numeric(x), ncol = ncol(x)))
   imp <- Reduce('+', imp) / length(object$forest)
diff --git a/R/plot.R b/R/plot.R
index 1eb2f67..6f79785 100644
--- a/R/plot.R
+++ b/R/plot.R
@@ -44,7 +44,7 @@ plot.SDTree <- function(x, main = "", digits = 2, digits_decisions = 2,
   if(weighted){
     #res_dloss <- edges$res_dloss
     #re scale edge weights
-    edges$res_dloss <- (edges$res_dloss - min(edges$res_dloss)) / (max(edges$res_dloss) - min(edges$res_dloss)) * 2 + 0.5
+    edges$res_dloss <- (edges$res_dloss - min(edges$res_dloss)) / ((max(edges$res_dloss) - min(edges$res_dloss)) * 2 + 0.1) + 0.5
   }else{
     edges$res_dloss <- 0.5
   }
@@ -66,7 +66,8 @@ plot.SDTree <- function(x, main = "", digits = 2, digits_decisions = 2,
     ggplot2::annotate("segment", x = nLeaves*1.02, y = depth*0.98, xend = nLeaves*1.1, yend = depth*0.98, 
                       arrow = ggplot2::arrow(length = ggplot2::unit(0.1, "inches")), color = "black") +
     ggplot2::annotate("text", x = nLeaves * 1.05, y = depth, label = "no", size = 4) + 
-    ggplot2::ggtitle(main)
+    ggplot2::ggtitle(main) +
+    ggplot2::ylim(-0.1*depth, depth*1.1)
 }
 
 #' Plot performance of SDForest against number of trees
@@ -75,6 +76,8 @@ plot.SDTree <- function(x, main = "", digits = 2, digits_decisions = 2,
 #' not stabilize one can fit another SDForest and merge the two.
 #' @author Markus Ulmer
 #' @param x Fitted object of class \code{SDForest}.
+#' @param verbose If \code{TRUE} progress updates are shown using the `progressr` package. 
+#' To customize the progress bar, see [`progressr` package](https://progressr.futureverse.org/articles/progressr-intro.html)
 #' @param ... Further arguments passed to or from other methods.
 #' @return A ggplot object
 #' @seealso \code{\link{SDForest}}
@@ -85,26 +88,31 @@ plot.SDTree <- function(x, main = "", digits = 2, digits_decisions = 2,
 #' model <- SDForest(x = X, y = y, Q_type = 'no_deconfounding', cp = 0.5, nTree = 500)
 #' plot(model)
 #' @export
-plot.SDForest <- function(x, ...){
+plot.SDForest <- function(x, verbose = TRUE, ...){
   Y_ <- x$Q(x$Y)
   
   # iterate over observations
-  preds <- pbapply::pblapply(1:length(x$Y), function(i){
-    if(length(x$oob_ind[[i]]) == 0){
-      return(NA)
-    }
-    xi <- matrix(x$X[i, ], nrow = 1)
-    
-    # predict for each tree
-    pred <- rep(NA, length(x$forest))
-    model_idx <- x$oob_ind[[i]]
-    model_idx <- model_idx[model_idx <= length(x$forest)]
-    predictions <- sapply(model_idx, function(model){
-      traverse_tree(x$forest[[model]]$tree, xi)
+  progressr::with_progress({
+    p <- progressr::progressor(along = 1:length(x$Y), enable = verbose)
+    preds <- lapply(1:length(x$Y), function(i){
+      if(length(x$oob_ind[[i]]) == 0){
+        return(NA)
+      }
+      xi <- matrix(x$X[i, ], nrow = 1)
+      
+      # predict for each tree
+      pred <- rep(NA, length(x$forest))
+      model_idx <- x$oob_ind[[i]]
+      model_idx <- model_idx[model_idx <= length(x$forest)]
+      predictions <- sapply(model_idx, function(model){
+        traverse_tree(x$forest[[model]]$tree, xi)
+      })
+      pred[model_idx] <- predictions
+      p()
+      pred
     })
-    pred[model_idx] <- predictions
-    pred
   })
+
   
   preds <- do.call(rbind, preds)
   
diff --git a/R/predict.R b/R/predict.R
index 8af3fd5..b8c8483 100644
--- a/R/predict.R
+++ b/R/predict.R
@@ -35,8 +35,11 @@ predict.SDTree <- function(object, newdata, ...){
 #' @param object Fitted object of class \code{SDForest}.
 #' @param newdata New test data of class \code{data.frame} containing
 #' the covariates for which to predict the response.
-#' @param mc.cores Number of cores to use for parallel processing,
-#' if \code{mc.cores > 1} the trees predict in parallel.
+#' @param mc.cores Number of cores to use for parallel computation `vignette("Runtime")`. 
+#' The `future` package is used for parallel processing. 
+#' To use custom processing plans mc.cores has to be <= 1, see [`future` package](https://future.futureverse.org/).
+#' @param verbose If \code{TRUE} progress updates are shown using the `progressr` package. 
+#' To customize the progress bar, see [`progressr` package](https://progressr.futureverse.org/articles/progressr-intro.html)
 #' @param ... Further arguments passed to or from other methods.
 #' @return A vector of predictions for the new data.
 #' @examples
@@ -48,7 +51,7 @@ predict.SDTree <- function(object, newdata, ...){
 #' predict(model, newdata = data.frame(X))
 #' @seealso \code{\link{SDForest}}
 #' @export
-predict.SDForest <- function(object, newdata, mc.cores = 1, ...){
+predict.SDForest <- function(object, newdata, mc.cores = 1, verbose = FALSE, ...){
   # predict function for the spectral deconfounded random forest
   # using the mean over all trees as the prediction
   # check data type
@@ -62,9 +65,9 @@ predict.SDForest <- function(object, newdata, mc.cores = 1, ...){
   
   if(any(is.na(X))) stop('X must not contain missing values')
   
-  worker_fun <- function(tree){
+  worker_fun <- function(i){
     preds_i <- tryCatch({
-      preds <- traverse_tree(tree[["tree"]], X)
+      preds <- traverse_tree(object$forest[[i]][["tree"]], X)
       list(ok = TRUE, preds = preds)
     }, error = function(e) {
       list(ok = FALSE, error = conditionMessage(e))
@@ -74,28 +77,14 @@ predict.SDForest <- function(object, newdata, mc.cores = 1, ...){
     })
     preds_i
   }
-  #if(mc.cores > 1){
-  #  plan <- if (parallelly::supportsMulticore()) "multicore" else "multisession"
-  #  with(future::plan(plan, workers = mc.cores), local = TRUE)
-  #}
-  #preds_list <- future.apply::future_lapply(future.seed = TRUE, 
-  #                                          X = object$forest, worker_fun)
   
   if(mc.cores > 1){
-    if(Sys.info()[["sysname"]] == "Linux"){
-      preds_list <- parallel::mclapply(object$forest, 
-                                       worker_fun, 
-                                       mc.cores = mc.cores)
-    }else{
-      future::plan('multisession', workers = mc.cores)
-      preds_list <- future.apply::future_lapply(future.seed = TRUE, 
-                                                X = object$forest, 
-                                                worker_fun)
-    }
-  }else{
-    preds_list <- pbapply::pblapply(object$forest, worker_fun)
+    plan <- if (parallelly::supportsMulticore()) "multicore" else "multisession"
+    with(future::plan(plan, workers = mc.cores), local = TRUE)
   }
-  
+  preds_list <- future.apply::future_lapply(future.seed = TRUE, 
+                                            X = 1:length(object$forest), 
+                                            worker_fun)
   #check worker statuses
   failed_workers <- which(vapply(preds_list, function(z) !isTRUE(z$ok), logical(1)))
   if (length(failed_workers) > 0) {
diff --git a/man/SDForest.Rd b/man/SDForest.Rd
index ab85532..14ca259 100644
--- a/man/SDForest.Rd
+++ b/man/SDForest.Rd
@@ -57,8 +57,9 @@ A split is only performed if both resulting leaves have at least
 if \code{NULL} half of the covariates are available for each split. 
 \eqn{\text{mtry} = \lfloor \frac{p}{2} \rfloor}}
 
-\item{mc.cores}{Number of cores to use for parallel processing,
-if \code{mc.cores > 1} the trees are estimated in parallel.}
+\item{mc.cores}{Number of cores to use for parallel computation `vignette("Runtime")`. 
+The `future` package is used for parallel processing. 
+To use custom processing plans mc.cores has to be <= 1, see [`future` package](https://future.futureverse.org/).}
 
 \item{Q_type}{Type of deconfounding, one of 'trim', 'pca', 'no_deconfounding'. 
 'trim' corresponds to the Trim transform \insertCite{Cevid2020SpectralModels}{SDModels} 
@@ -105,7 +106,8 @@ proposed at each node for each covariate.}
 Default is \code{TRUE} to not reduce the signal of high variance covariates, 
 and we do not know of a scenario where this hurts.}
 
-\item{verbose}{If \code{TRUE} fitting information is shown.}
+\item{verbose}{If \code{TRUE} progress updates are shown using the `progressr` package. 
+To customize the progress bar, see [`progressr` package](https://progressr.futureverse.org/articles/progressr-intro.html)}
 
 \item{predictors}{Subset of colnames(X) or numerical indices of the covariates 
 for which an effect on y should be estimated. All the other covariates are only
diff --git a/man/partDependence.Rd b/man/partDependence.Rd
index 4af75a6..ea9b932 100644
--- a/man/partDependence.Rd
+++ b/man/partDependence.Rd
@@ -4,7 +4,14 @@
 \alias{partDependence}
 \title{Partial dependence}
 \usage{
-partDependence(object, j, X = NULL, subSample = NULL, mc.cores = 1)
+partDependence(
+  object,
+  j,
+  X = NULL,
+  subSample = NULL,
+  verbose = TRUE,
+  mc.cores = 1
+)
 }
 \arguments{
 \item{object}{A model object that has a predict method that takes newdata as argument 
@@ -20,8 +27,12 @@ If NULL, tries to extract the dataset from the model object.}
 \item{subSample}{Number of samples to draw from the original data for the empirical 
 partial dependence. If NULL, all the observations are used.}
 
-\item{mc.cores}{Number of cores to use for parallel computation. 
-Parallel computing is only supported for unix.}
+\item{verbose}{If \code{TRUE} progress updates are shown using the `progressr` package. 
+To customize the progress bar, see [`progressr` package](https://progressr.futureverse.org/)}
+
+\item{mc.cores}{Number of cores to use for parallel computation `vignette("Runtime")`. 
+The `future` package is used for parallel processing. 
+To use custom processing plans mc.cores has to be <= 1, see [`future` package](https://future.futureverse.org/).}
 }
 \value{
 An object of class \code{partDependence} containing
diff --git a/man/plot.SDForest.Rd b/man/plot.SDForest.Rd
index 2e8261d..b1f5efe 100644
--- a/man/plot.SDForest.Rd
+++ b/man/plot.SDForest.Rd
@@ -4,11 +4,14 @@
 \alias{plot.SDForest}
 \title{Plot performance of SDForest against number of trees}
 \usage{
-\method{plot}{SDForest}(x, ...)
+\method{plot}{SDForest}(x, verbose = TRUE, ...)
 }
 \arguments{
 \item{x}{Fitted object of class \code{SDForest}.}
 
+\item{verbose}{If \code{TRUE} progress updates are shown using the `progressr` package. 
+To customize the progress bar, see [`progressr` package](https://progressr.futureverse.org/articles/progressr-intro.html)}
+
 \item{...}{Further arguments passed to or from other methods.}
 }
 \value{
diff --git a/man/predict.SDForest.Rd b/man/predict.SDForest.Rd
index af78ba5..c78120a 100644
--- a/man/predict.SDForest.Rd
+++ b/man/predict.SDForest.Rd
@@ -4,7 +4,7 @@
 \alias{predict.SDForest}
 \title{Predictions for the SDForest}
 \usage{
-\method{predict}{SDForest}(object, newdata, mc.cores = 1, ...)
+\method{predict}{SDForest}(object, newdata, mc.cores = 1, verbose = FALSE, ...)
 }
 \arguments{
 \item{object}{Fitted object of class \code{SDForest}.}
@@ -12,8 +12,12 @@
 \item{newdata}{New test data of class \code{data.frame} containing
 the covariates for which to predict the response.}
 
-\item{mc.cores}{Number of cores to use for parallel processing,
-if \code{mc.cores > 1} the trees predict in parallel.}
+\item{mc.cores}{Number of cores to use for parallel computation `vignette("Runtime")`. 
+The `future` package is used for parallel processing. 
+To use custom processing plans mc.cores has to be <= 1, see [`future` package](https://future.futureverse.org/).}
+
+\item{verbose}{If \code{TRUE} progress updates are shown using the `progressr` package. 
+To customize the progress bar, see [`progressr` package](https://progressr.futureverse.org/articles/progressr-intro.html)}
 
 \item{...}{Further arguments passed to or from other methods.}
 }
diff --git a/man/regPath.SDForest.Rd b/man/regPath.SDForest.Rd
index 24f0b75..3172a02 100644
--- a/man/regPath.SDForest.Rd
+++ b/man/regPath.SDForest.Rd
@@ -5,7 +5,16 @@
 \alias{regPath}
 \title{Calculate the regularization path of an SDForest}
 \usage{
-\method{regPath}{SDForest}(object, cp_seq = NULL, X = NULL, Y = NULL, Q = NULL, ...)
+\method{regPath}{SDForest}(
+  object,
+  cp_seq = NULL,
+  X = NULL,
+  Y = NULL,
+  Q = NULL,
+  verbose = TRUE,
+  mc.cores = 1,
+  ...
+)
 }
 \arguments{
 \item{object}{an SDForest object}
@@ -19,6 +28,13 @@ If NULL, the sequence is calculated automatically using only relevant values.}
 
 \item{Q}{The transformation matrix, if NULL the data from the forest object is used.}
 
+\item{verbose}{If \code{TRUE} progress updates are shown using the `progressr` package. 
+To customize the progress bar, see [`progressr` package](https://progressr.futureverse.org/articles/progressr-intro.html)}
+
+\item{mc.cores}{Number of cores to use for parallel computation `vignette("Runtime")`. 
+The `future` package is used for parallel processing. 
+To use custom processing plans mc.cores has to be <= 1, see [`future` package](https://future.futureverse.org/).}
+
 \item{...}{Further arguments passed to or from other methods.}
 }
 \value{
diff --git a/man/stabilitySelection.SDForest.Rd b/man/stabilitySelection.SDForest.Rd
index d48f305..bf7f994 100644
--- a/man/stabilitySelection.SDForest.Rd
+++ b/man/stabilitySelection.SDForest.Rd
@@ -5,7 +5,7 @@
 \alias{stabilitySelection}
 \title{Calculate the stability selection of an SDForest}
 \usage{
-\method{stabilitySelection}{SDForest}(object, cp_seq = NULL, ...)
+\method{stabilitySelection}{SDForest}(object, cp_seq = NULL, verbose = TRUE, ...)
 }
 \arguments{
 \item{object}{an SDForest object}
@@ -13,6 +13,9 @@
 \item{cp_seq}{A sequence of complexity parameters.
 If NULL, the sequence is calculated automatically using only relevant values.}
 
+\item{verbose}{If \code{TRUE} progress updates are shown using the `progressr` package. 
+To customize the progress bar, see [`progressr` package](https://progressr.futureverse.org/articles/progressr-intro.html)}
+
 \item{...}{Further arguments passed to or from other methods.}
 }
 \value{
diff --git a/tests/testthat/test-parallel.R b/tests/testthat/test-parallel.R
index b094ca2..e5347c4 100644
--- a/tests/testthat/test-parallel.R
+++ b/tests/testthat/test-parallel.R
@@ -1,5 +1,5 @@
 set.seed(1)
-n <- 50
+n <- 20
 X <- matrix(rnorm(n * 20), nrow = n)
 Y <- rnorm(n)
 

From 30fc45693b10811c1a14fdf35da08aae8773b43d Mon Sep 17 00:00:00 2001
From: Markus Ulmer <markus.ulmer@stat.math.ethz.ch>
Date: Fri, 12 Dec 2025 11:38:11 +0100
Subject: [PATCH 08/15] remove dependence pbapply is replaced by progressr

---
 DESCRIPTION | 1 -
 1 file changed, 1 deletion(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index 0a56aeb..5f56d20 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -16,7 +16,6 @@ Imports:
     ggraph,
     gridExtra,
     parallel,
-    pbapply,
     Rdpack,
     tidyr,
     fda,

From f77960b53aa2b6826f82a6d85f365bcf482cac2f Mon Sep 17 00:00:00 2001
From: Markus Ulmer <markus.ulmer@stat.math.ethz.ch>
Date: Fri, 12 Dec 2025 11:38:35 +0100
Subject: [PATCH 09/15] update parallelization and progress reporting

---
 R/SDAM.R | 85 +++++++++++++++++++++++++++++++++++++++++---------------
 1 file changed, 62 insertions(+), 23 deletions(-)

diff --git a/R/SDAM.R b/R/SDAM.R
index 527b80a..b5057dd 100644
--- a/R/SDAM.R
+++ b/R/SDAM.R
@@ -36,9 +36,11 @@
 #' Default is \code{TRUE} to not reduce the signal of high variance covariates.
 #' @param ind_lin A vector of indices specifying which covariates to model linearly (i.e. not expanded into basis function).
 #'  Default is `NULL`.
-#' @param mc.cores  Number of cores to use for parallel processing, if \code{mc.cores > 1}
-#' the cross validation is parallelized. Default is `1`. (only supported for unix)
-#' @param verbose If \code{TRUE} fitting information is shown.
+#' @param mc.cores Number of cores to use for parallel computation `vignette("Runtime")`. 
+#' The `future` package is used for parallel processing. 
+#' To use custom processing plans mc.cores has to be <= 1, see [`future` package](https://future.futureverse.org/).
+#' @param verbose If \code{TRUE} progress updates are shown using the `progressr` package. 
+#' To customize the progress bar, see [`progressr` package](https://progressr.futureverse.org/articles/progressr-intro.html)
 #' @param notRegularized A vector of indices specifying which covariates not to regularize.
 #'  Default is `NULL`.
 #' @return An object of class `SDAM` containing the following elements:
@@ -98,7 +100,8 @@
 #' # predict 
 #' predict(model, newdata = wine[42, ])
 #' 
-#' ## alternative function call
+#' ## alternative function call with customized progress bar
+#' progressr::handlers(handler_txtprogressbar(char = cli::col_red(cli::symbol$heart)))
 #' mod_none <- SDAM(x = as.matrix(wine[1:10, -c(1, 2)]), y = wine$alcohol[1:10], 
 #'                  Q_type = "no_deconfounding", nfolds = 2, n_K = 4, 
 #'                  n_lambda1 = 4, n_lambda2 = 8)
@@ -156,8 +159,15 @@ SDAM <- function(formula = NULL, data = NULL, x = NULL, y = NULL,
   n_unique_X <- apply(X, 2, function(x){length(unique(x))})
   
   # Generate the design and model parameters for every K in vK
-  lmodK <- list()
-  for (i in 1:length(vK)){
+  progressr::with_progress({
+    pr <- progressr::progressor(along = 1:(n_K), enable = verbose)
+    pr(sprintf("Design generation"), amount = 0, class = "sticky")
+    if(mc.cores > 1){
+      plan <- if (parallelly::supportsMulticore()) "multicore" else "multisession"
+      with(future::plan(plan, workers = min(mc.cores, n_K)), local = TRUE)
+    }
+    
+  lmodK <- future.apply::future_lapply(future.seed = TRUE, 1:length(vK), function(i){
     K <- vK[i]
     # effective number of basis functions for each Xj, j = 1,..., p
     # K_eff[j] can be at most equal to the number of unique values of Xj
@@ -213,9 +223,11 @@ SDAM <- function(formula = NULL, data = NULL, x = NULL, y = NULL,
       lambda <- rep(0, n_lambda1)
       index <- rep(1, length(index))
     }
-    lmodK[[i]] <- list(Rlist = Rlist, lbreaks = lbreaks, index = index, B = B, 
-                       QB = QB, lambda = lambda, K = K, K_eff = K_eff)
-  }
+    pr()
+    list(Rlist = Rlist, lbreaks = lbreaks, index = index, 
+         QB = QB, lambda = lambda, K = K, K_eff = K_eff)
+  })
+  })
   
   # generate folds for CV
   ind <- sample(rep(1:nfolds, length.out = n), replace = FALSE)
@@ -236,20 +248,34 @@ SDAM <- function(formula = NULL, data = NULL, x = NULL, y = NULL,
 
     QYpred <- predict(mod, newdata = listK$QB[test, ])
     mse <- apply(QYpred, 2, function(y){mean((y - QY[test])^2)})
+    pr()
     return(mse)
   }
   
   mse_fold <- function(l){
-    MSEl <- lapply(lmodK, function(listK){mse_fold_K(l, listK)})
+    MSEl <- future.apply::future_lapply(future.seed = TRUE, lmodK, 
+                                        mse_fold_K, 
+                                        l = l)
     return(unname(do.call(rbind, MSEl)))
   }
   
-  if(verbose) print("Initial cross-validation")
-  if(mc.cores == 1){
-    MSES <- pbapply::pblapply(1:nfolds, mse_fold) 
-  } else {
-    MSES <- parallel::mclapply(1:nfolds, mse_fold, mc.cores = mc.cores)
-  }
+  #use random generator that works with multiprocessing
+  ok <- RNGkind("L'Ecuyer-CMRG")
+  progressr::with_progress({
+    pr <- progressr::progressor(along = 1:(nfolds * n_K), enable = verbose)
+    pr(sprintf("Initial cross-validation"), amount = 0, class = "sticky")
+    if(mc.cores > 1){
+      plan <- if (parallelly::supportsMulticore()) "multicore" else "multisession"
+      with(future::plan(plan, workers = min(mc.cores, nfolds)), local = TRUE)
+    }
+    MSES <- lapply(X = 1:nfolds, mse_fold)
+  })
+  
+  #if(mc.cores == 1){
+  #  MSES <- pbapply::pblapply(1:nfolds, mse_fold) 
+  #} else {
+  #  MSES <- parallel::mclapply(1:nfolds, mse_fold, mc.cores = mc.cores)
+  #}
   
   # aggregate MSEs over folds
   MSES.agg <- Reduce("+", MSES) / nfolds
@@ -267,13 +293,25 @@ SDAM <- function(formula = NULL, data = NULL, x = NULL, y = NULL,
                                length.out = n_lambda2))
   }
   
-  if(verbose) print("Second stage cross-validation")
-  if(mc.cores == 1){
-    MSES1 <- pbapply::pblapply(1:nfolds, mse_fold_K, listK = modK.min)
-  } else {
-    MSES1 <- parallel::mclapply(1:nfolds, mse_fold_K, listK = modK.min, 
-                                mc.cores = mc.cores)
-  }
+  progressr::with_progress({
+    pr <- progressr::progressor(along = 1:nfolds, enable = verbose)
+    pr(sprintf("Second stage cross-validation"), amount = 0, class = "sticky")
+    if(mc.cores > 1){
+      plan <- if (parallelly::supportsMulticore()) "multicore" else "multisession"
+      with(future::plan(plan, workers = min(mc.cores, nfolds)), local = TRUE)
+    }
+    MSES1 <- future.apply::future_lapply(future.seed = TRUE, 
+                                         X = 1:nfolds, 
+                                         mse_fold_K, 
+                                         listK = modK.min)
+  })
+  #if(verbose) print("Second stage cross-validation")
+  #if(mc.cores == 1){
+  #  MSES1 <- pbapply::pblapply(1:nfolds, mse_fold_K, listK = modK.min)
+  #} else {
+  #  MSES1 <- parallel::mclapply(1:nfolds, mse_fold_K, listK = modK.min, 
+  #                              mc.cores = mc.cores)
+  #}
   
   MSES1 <- do.call(rbind, MSES1)
   MSE1.agg <- apply(MSES1, 2, mean)
@@ -339,6 +377,7 @@ SDAM <- function(formula = NULL, data = NULL, x = NULL, y = NULL,
   # estimated active set
   lreturn$active <- active
   class(lreturn) <- "SDAM"
+  RNGkind(ok[1])
   return(lreturn)
 }
 

From 5c811bb97c1285b6588e532e3b2d552571e5930f Mon Sep 17 00:00:00 2001
From: Markus Ulmer <markus.ulmer@stat.math.ethz.ch>
Date: Fri, 12 Dec 2025 11:39:00 +0100
Subject: [PATCH 10/15] update examples with custom progress bar

---
 R/SDForest.R | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/R/SDForest.R b/R/SDForest.R
index 13578be..ef61cc2 100644
--- a/R/SDForest.R
+++ b/R/SDForest.R
@@ -129,6 +129,8 @@
 #' # comparison to classical random forest
 #' fit_ranger <- ranger::ranger(Y ~ ., train_data, importance = 'impurity')
 #' 
+#' # you can customize the progress bar see parameter verbose
+#' progressr::handlers("cli")
 #' fit <- SDForest(x = X, y = Y, nTree = 100, Q_type = 'pca', q_hat = 2)
 #' fit <- SDForest(Y ~ ., nTree = 100, train_data)
 #' fit
@@ -139,6 +141,7 @@
 #' plot(fit)
 #' 
 #' # a few more might be helpfull
+#' progressr::handlers(handler_txtprogressbar(char = cli::col_red(cli::symbol$heart)))
 #' fit2 <- SDForest(Y ~ ., nTree = 50, train_data) 
 #' fit <- mergeForest(fit, fit2)
 #' 

From cfcf4f7cb988dea2aad609230907e783a0fafe4e Mon Sep 17 00:00:00 2001
From: Markus Ulmer <markus.ulmer@stat.math.ethz.ch>
Date: Fri, 12 Dec 2025 11:39:28 +0100
Subject: [PATCH 11/15] add reference to the future package

---
 inst/REFERENCES.bib | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/inst/REFERENCES.bib b/inst/REFERENCES.bib
index 08e990b..b2f964a 100644
--- a/inst/REFERENCES.bib
+++ b/inst/REFERENCES.bib
@@ -186,4 +186,16 @@ @misc{londschien2025domain
       archivePrefix={arXiv},
       primaryClass={stat.AP},
       url={https://arxiv.org/abs/2507.21783}, 
+}
+
+@Article{RJ-2021-048,
+  author = {Henrik Bengtsson},
+  title = {A Unifying Framework for Parallel and Distributed Processing in R using Futures},
+  year = {2021},
+  journal = {The R Journal},
+  doi = {10.32614/RJ-2021-048},
+  url = {https://doi.org/10.32614/RJ-2021-048},
+  pages = {208--227},
+  volume = {13},
+  number = {2},
 }
\ No newline at end of file

From d14ebb83836f70218a520a1a6587114b0b93290f Mon Sep 17 00:00:00 2001
From: Markus Ulmer <markus.ulmer@stat.math.ethz.ch>
Date: Fri, 12 Dec 2025 11:40:11 +0100
Subject: [PATCH 12/15] update description of parallelization and progress
 report

---
 man/SDAM.Rd           | 11 +++++++----
 man/SDForest.Rd       |  3 +++
 vignettes/Runtime.Rmd | 27 ++++++++++++++++++++-------
 3 files changed, 30 insertions(+), 11 deletions(-)

diff --git a/man/SDAM.Rd b/man/SDAM.Rd
index 703d338..7aa2df8 100644
--- a/man/SDAM.Rd
+++ b/man/SDAM.Rd
@@ -66,10 +66,12 @@ Default is \code{TRUE} to not reduce the signal of high variance covariates.}
 \item{ind_lin}{A vector of indices specifying which covariates to model linearly (i.e. not expanded into basis function).
 Default is `NULL`.}
 
-\item{mc.cores}{Number of cores to use for parallel processing, if \code{mc.cores > 1}
-the cross validation is parallelized. Default is `1`. (only supported for unix)}
+\item{mc.cores}{Number of cores to use for parallel computation `vignette("Runtime")`. 
+The `future` package is used for parallel processing. 
+To use custom processing plans mc.cores has to be <= 1, see [`future` package](https://future.futureverse.org/).}
 
-\item{verbose}{If \code{TRUE} fitting information is shown.}
+\item{verbose}{If \code{TRUE} progress updates are shown using the `progressr` package. 
+To customize the progress bar, see [`progressr` package](https://progressr.futureverse.org/articles/progressr-intro.html)}
 
 \item{notRegularized}{A vector of indices specifying which covariates not to regularize.
 Default is `NULL`.}
@@ -139,7 +141,8 @@ plot(partDependence(model, mostImp))
 # predict 
 predict(model, newdata = wine[42, ])
 
-## alternative function call
+## alternative function call with customized progress bar
+progressr::handlers(handler_txtprogressbar(char = cli::col_red(cli::symbol$heart)))
 mod_none <- SDAM(x = as.matrix(wine[1:10, -c(1, 2)]), y = wine$alcohol[1:10], 
                  Q_type = "no_deconfounding", nfolds = 2, n_K = 4, 
                  n_lambda1 = 4, n_lambda2 = 8)
diff --git a/man/SDForest.Rd b/man/SDForest.Rd
index 14ca259..0991e7b 100644
--- a/man/SDForest.Rd
+++ b/man/SDForest.Rd
@@ -185,6 +185,8 @@ sim_data$j
 # comparison to classical random forest
 fit_ranger <- ranger::ranger(Y ~ ., train_data, importance = 'impurity')
 
+# you can customize the progress bar see parameter verbose
+progressr::handlers("cli")
 fit <- SDForest(x = X, y = Y, nTree = 100, Q_type = 'pca', q_hat = 2)
 fit <- SDForest(Y ~ ., nTree = 100, train_data)
 fit
@@ -195,6 +197,7 @@ fit
 plot(fit)
 
 # a few more might be helpfull
+progressr::handlers(handler_txtprogressbar(char = cli::col_red(cli::symbol$heart)))
 fit2 <- SDForest(Y ~ ., nTree = 50, train_data) 
 fit <- mergeForest(fit, fit2)
 
diff --git a/vignettes/Runtime.Rmd b/vignettes/Runtime.Rmd
index 0e051df..29683ad 100644
--- a/vignettes/Runtime.Rmd
+++ b/vignettes/Runtime.Rmd
@@ -20,22 +20,35 @@ For this package, we have written methods to estimate regressions trees and rand
 $$\hat{f} = \text{argmin}_{f' \in \mathcal{F}} \frac{||Q(\mathbf{Y} - f'(\mathbf{X}))||_2^2}{n}$$
 The package is currently fully written in @RCoreTeam2024R:Computing for now and it gets quite slow for larger sample sizes. There might be a faster cpp version in the future, but for now, there are a few ways to increase the computations if you apply the methods to larger data sets.
 
-## Computations
+## Parallel Processing
 
-Some speedup can be achieved by taking advantage of modern hardware.
+Many of our functions support parallel processing using the parameter `mc.cores` to control the number of cores used. 
 
-### Multicore
-
-When estimating an SDForest, the most obvious way to increase the computations is to fit the individual trees on different cores in parallel. Parallel computing is supported for both Unix and Windows. Depending on how your system is set up, some linear algebra libraries might already run in parallel. In this case, the speed improvement from choosing more than one core to run on might not be that large. Be aware of potential RAM-overflows.
-
-```{r core, eval=FALSE}
+```{r core1, eval=FALSE}
 # fits the individual SDTrees in parallel on 22 cores
 fit <- SDForest(x = X, y = Y, mc.cores = 22)
 
+# predicts with the individual SDTrees in parallel
+predict(fit, newdata = data.frame(X), mc.cores = 10)
+
+# evaluates different strengths of regularization in parallel
+paths <- regPath(fit, mc.cores = 10)
+
+# predicts potential outcomes for different values of covariate one in parallel
+pd <- partDependence(model, 1, mc.cores = 10)
+
 # performs cross validation in parallel
 model <- SDAM(X, Y, cv_k = 5, mc.cores = 5)
 ```
 
+To support parallelization, we use the R package [future](https://future.futureverse.org/) @RJ-2021-048. If `mc.cores` is larger than one, `multicore` (forking of processes) is used if possible, and `multisession` otherwise. If `mc.cores` is smaller than two, we process sequentially or use a pre-specified plan. This way, a user can freely choose and set up any [backend](https://future.futureverse.org/articles/future-2b-backend.html).
+
+```{r core2, eval=FALSE}
+# predefined plan
+future::plan(multisession, workers = 2)
+# fits the individual SDTrees in parallel on 2 cores
+fit <- SDForest(x = X, y = Y)
+```
 
 ## Approximations
 

From 2fa590e1dd10b169245f0ec463c0ce5179f300f5 Mon Sep 17 00:00:00 2001
From: Markus Ulmer <markus.ulmer@stat.math.ethz.ch>
Date: Fri, 12 Dec 2025 12:01:38 +0100
Subject: [PATCH 13/15] fix details

---
 DESCRIPTION                   |  1 +
 R/SDForest.R                  |  2 +-
 README.Rmd                    |  2 +-
 README.md                     | 12 ++++--------
 man/SDForest.Rd               |  2 +-
 vignettes/articles/SDTree.Rmd |  2 +-
 6 files changed, 9 insertions(+), 12 deletions(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index 5f56d20..1a0931a 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -32,6 +32,7 @@ Suggests:
     ranger,
     HDclassif,
     qpdf,
+    cli,
     testthat (>= 3.0.0)
 RdMacros: Rdpack
 Encoding: UTF-8
diff --git a/R/SDForest.R b/R/SDForest.R
index ef61cc2..d7d180b 100644
--- a/R/SDForest.R
+++ b/R/SDForest.R
@@ -141,7 +141,7 @@
 #' plot(fit)
 #' 
 #' # a few more might be helpfull
-#' progressr::handlers(handler_txtprogressbar(char = cli::col_red(cli::symbol$heart)))
+#' progressr::handlers(progressr::handler_txtprogressbar(char = cli::col_red(cli::symbol$heart)))
 #' fit2 <- SDForest(Y ~ ., nTree = 50, train_data) 
 #' fit <- mergeForest(fit, fit2)
 #' 
diff --git a/README.Rmd b/README.Rmd
index e2f3708..0ade575 100644
--- a/README.Rmd
+++ b/README.Rmd
@@ -75,7 +75,7 @@ You can also estimate just one Spectrally Deconfounded Regression Tree using the
 
 ```{r SDTree, fig.height=7}
 Tree <- SDTree(Y ~ ., train_data, cp = 0.01)
-plot(Tree)
+#plot(Tree)
 ```
 
 Or you can estimate a Spectrally Deconfounded Additive Model, with theoretical guarantees, using the `SDAM` function. See also the article [SDAM](https://www.markus-ulmer.ch/SDModels/articles/SDAM.html).
diff --git a/README.md b/README.md
index 62bc28f..ba5d343 100644
--- a/README.md
+++ b/README.md
@@ -76,8 +76,8 @@ fit
 #> 
 #> Number of trees:  100 
 #> Number of covariates:  50 
-#> OOB loss:  0.1554798 
-#> OOB spectral loss:  0.05246865
+#> OOB loss:  0.1617913 
+#> OOB spectral loss:  0.05095329
 ```
 
 You can also estimate just one Spectrally Deconfounded Regression Tree
@@ -86,24 +86,20 @@ using the `SDTree` function. See also the article
 
 ``` r
 Tree <- SDTree(Y ~ ., train_data, cp = 0.01)
-plot(Tree)
+#plot(Tree)
 ```
 
-<img src="man/figures/README-SDTree-1.png" width="100%" />
-
 Or you can estimate a Spectrally Deconfounded Additive Model, with
 theoretical guarantees, using the `SDAM` function. See also the article
 [SDAM](https://www.markus-ulmer.ch/SDModels/articles/SDAM.html).
 
 ``` r
 model <- SDAM(Y ~ ., train_data)
-#> [1] "Initial cross-validation"
-#> [1] "Second stage cross-validation"
 model
 #> SDAM result
 #> 
 #> Number of covariates:  50 
-#> Number of active covariates:  4
+#> Number of active covariates:  3
 ```
 
 <div id="refs" class="references csl-bib-body hanging-indent"
diff --git a/man/SDForest.Rd b/man/SDForest.Rd
index 0991e7b..015d33a 100644
--- a/man/SDForest.Rd
+++ b/man/SDForest.Rd
@@ -197,7 +197,7 @@ fit
 plot(fit)
 
 # a few more might be helpfull
-progressr::handlers(handler_txtprogressbar(char = cli::col_red(cli::symbol$heart)))
+progressr::handlers(progressr::handler_txtprogressbar(char = cli::col_red(cli::symbol$heart)))
 fit2 <- SDForest(Y ~ ., nTree = 50, train_data) 
 fit <- mergeForest(fit, fit2)
 
diff --git a/vignettes/articles/SDTree.Rmd b/vignettes/articles/SDTree.Rmd
index a7184a3..cfc9e08 100644
--- a/vignettes/articles/SDTree.Rmd
+++ b/vignettes/articles/SDTree.Rmd
@@ -90,7 +90,7 @@ gg_res
 
 Now, we can directly look at the estimated trees. On top is the true underlying random regression tree, and below are the two estimated trees. In the middle is the SDTree and on the bottom is the plain classical regression tree. The SDTree uses all the causal parents of $Y$ for the first few splits. In addition to a tree that results in a similar partition as the true causal function one of the leaves is split using the spurious covariate $X_{458}$. The classical regression tree on the other hand uses no causal variable at all, but only spurious covariates. This might result in a more predictive estimation, but has no causal meaning and would predict completely wrong in interventional settings.
 
-```{r plot_tree,  echo=TRUE, message=FALSE, warning=FALSE, fig.height=7}
+```{r plot_tree,  echo=TRUE, message=FALSE, warning=FALSE}
 plot(dat$tree, main = expression({f^0} (X)))
 plot(dec_tree, main = expression({f^SDTree} (X)))
 plot(plain_tree, expression({f^classicalTree} (X)))

From 4b8ee2bbda7f5e0c9541efc377e32a9284c5999e Mon Sep 17 00:00:00 2001
From: Markus Ulmer <markus.ulmer@stat.math.ethz.ch>
Date: Fri, 12 Dec 2025 12:07:04 +0100
Subject: [PATCH 14/15] fix progress example

---
 R/SDAM.R    | 2 +-
 man/SDAM.Rd | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/R/SDAM.R b/R/SDAM.R
index b5057dd..4195e90 100644
--- a/R/SDAM.R
+++ b/R/SDAM.R
@@ -101,7 +101,7 @@
 #' predict(model, newdata = wine[42, ])
 #' 
 #' ## alternative function call with customized progress bar
-#' progressr::handlers(handler_txtprogressbar(char = cli::col_red(cli::symbol$heart)))
+#' progressr::handlers(progressr::handler_txtprogressbar(char = cli::col_red(cli::symbol$heart)))
 #' mod_none <- SDAM(x = as.matrix(wine[1:10, -c(1, 2)]), y = wine$alcohol[1:10], 
 #'                  Q_type = "no_deconfounding", nfolds = 2, n_K = 4, 
 #'                  n_lambda1 = 4, n_lambda2 = 8)
diff --git a/man/SDAM.Rd b/man/SDAM.Rd
index 7aa2df8..286888b 100644
--- a/man/SDAM.Rd
+++ b/man/SDAM.Rd
@@ -142,7 +142,7 @@ plot(partDependence(model, mostImp))
 predict(model, newdata = wine[42, ])
 
 ## alternative function call with customized progress bar
-progressr::handlers(handler_txtprogressbar(char = cli::col_red(cli::symbol$heart)))
+progressr::handlers(progressr::handler_txtprogressbar(char = cli::col_red(cli::symbol$heart)))
 mod_none <- SDAM(x = as.matrix(wine[1:10, -c(1, 2)]), y = wine$alcohol[1:10], 
                  Q_type = "no_deconfounding", nfolds = 2, n_K = 4, 
                  n_lambda1 = 4, n_lambda2 = 8)

From 2b111479cdaffb8abf3589c882d7c9325d7bf11d Mon Sep 17 00:00:00 2001
From: Markus Ulmer <markus.ulmer@stat.math.ethz.ch>
Date: Fri, 12 Dec 2025 12:27:07 +0100
Subject: [PATCH 15/15] version update

---
 DESCRIPTION | 2 +-
 NEWS.md     | 6 ++++++
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index 1a0931a..9abb785 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: SDModels
 Title: Spectrally Deconfounded Models
-Version: 2.0.1
+Version: 2.0.2
 Authors@R: c(
   person("Markus", "Ulmer", email = "markus.ulmer@stat.math.ethz.ch", 
          role = c("aut", "cre", "cph"), comment = c(ORCID = "0000-0001-7783-8475")), 
diff --git a/NEWS.md b/NEWS.md
index 03671ab..00376d5 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,3 +1,9 @@
+# SDModels 2.0.2
+
+* Switch all the parallelization to futures. See `vignette("Runtime")`
+* Switch all the progress updates to progressr. Progress updates are now also available for parallel processing and are customizable.
+* Process are much more RAM efficient now.
+
 # SDModels 2.0.1
 
 * Fix bug in SDTree and SDForest where an error occurred, if X had columns with only one unique value.