For this, I would like to plot ROC and obtain sensitivity/specificity and confusion matrix values, not just obtain a single AUC at the end.
This is my current code in {stacks}. Any help would be greatly appreciated. Thanks!
########################################################################
# Define resampling and basic recipe
########################################################################
set.seed(1234)
cores <- parallel::detectCores() - 1 # determine number of cores
d_folds <- vfold_cv(d, v = 10, repeats = 5, strata = status) # creates cross validation
keep_pred <- control_resamples(save_pred = TRUE, save_workflow = TRUE)
ctrl_grid <- control_stack_grid() # use same control settings as in numeric response setting
log_recipe <- recipe(status ~., data = d) %>% step_log(all_predictors())
status_wkfl <- workflow() %>% add_recipe(log_recipe)
########################################################################
# Define models to predict status
########################################################################
logistic_reg_spec_tuned <- logistic_reg() %>% # no tuning parameters
set_engine("glm") %>%
set_mode("classification")
boost_tree_spec_tuned <- boost_tree() %>%
set_args(tree_depth = 4, learn_rate = 0.001, loss_reduction = 0.005, mtry = 2, # stop iter
min_n = 27, sample_size = 0.8125, trees = 10000) %>%
set_mode("classification") %>%
set_engine("xgboost")
nearest_neighbor_spec_tuned <- nearest_neighbor() %>%
set_args(neighbors = 14, dist_power = 1.525, weight_func = "gaussian") %>%
set_mode("classification") %>%
set_engine("kknn")
discrim_linear_spec_tuned <- discrim_linear() %>% # no tuning parameters
set_mode("classification") %>%
set_engine("MASS")
naive_bayes_spec_tuned <- naive_Bayes(smoothness = 0.5, Laplace = 1.75) %>%
set_mode("classification") %>%
set_engine("naivebayes")
discrim_quad_spec_tuned <- discrim_quad() %>% # no tuning parameters
set_mode("classification") %>%
set_engine("MASS")
rand_forest_spec_tuned <- rand_forest() %>%
set_args(trees = 10000, mtry = 2, min_n = 7) %>%
set_mode("classification") %>%
set_engine("ranger",
importance = "permutation",
num.threads = cores,
probability = TRUE,
seed = 1234)
svm_linear_spec_tuned <- svm_linear() %>%
set_args(cost = 0.002, margin = 0.167) %>%
set_mode("classification") %>%
set_engine("kernlab")
########################################################################
# Create {workflowsets} object
########################################################################
antibody_wkfl_tuned <- workflow_set(
preproc = list(log = log_recipe),
models = list(
rand_forest = rand_forest_spec_tuned,
logistic_reg = logistic_reg_spec_tuned,
discrim_linear = discrim_linear_spec_tuned,
discrim_quad = discrim_quad_spec_tuned,
nearest_neighbor = nearest_neighbor_spec_tuned,
boost_tree = boost_tree_spec_tuned,
svm_linear = svm_linear_spec_tuned,
naive_bayes = naive_bayes_spec_tuned
)
)
########################################################################
# Run resampling
########################################################################
grid_ctrl <- control_grid(
save_pred = TRUE,
parallel_over = "everything",
save_workflow = TRUE
)
final_models <-
antibody_wkfl_tuned %>%
workflow_map(
seed = 1234,
resamples = d_folds,
grid = 25,
control = grid_ctrl
)
########################################################################
# Putting my stack together
########################################################################
antibody_wkfl_stack <-
# initialise the stack
stacks() %>%
# add candidate members i.e., {workflowsets} object
add_candidates(final_models)
as_tibble(antibody_wkfl_stack)
########################################################################
# Blend the stack
########################################################################
antibody_wkfl_blend <-
antibody_wkfl_stack_nofit %>%
blend_predictions()
antibody_wkfl_blend
# view roc_auc for various penalty parameters:
antibody_wkfl_blend$metrics %>% filter(.metric == "roc_auc")
########################################################################
# Fit the stack
########################################################################
antibody_wkfl_fit_stack <-
# initialise the stack
stacks() %>%
# add candidate members i.e., {workflowsets} object
add_candidates(final_models) %>%
# determine how to combine predictions
blend_predictions() %>%
# fit the candidates with non-zero stacking coefficients
fit_members()
antibody_wkfl_fit_stack
########################################################################
# Evaluate stack performance
########################################################################
# Plot: Trade-off between minimising number of members and optimising performance
autoplot(antibody_wkfl_stack)
# Plot: Weights
autoplot(antibody_wkfl_stack, type = "weights")
# Collect parameters e.g., for random forest model
collect_parameters(antibody_wkfl_stack, "log_rand_forest")
Hi tidymodels/stacks team,
I love the
stackspackage but cannot find any documentation to obtain model performance metrics, akin to the tidymodels workflow pipeline.I am interested in comparing the model performance of the stacks model to my single model workflows generated using
workflowsets.For this, I would like to plot ROC and obtain sensitivity/specificity and confusion matrix values, not just obtain a single AUC at the end.
This is my current code in {stacks}. Any help would be greatly appreciated. Thanks!