Model performance metrics: ROC, Conf Matrix, compare to single models

Hi tidymodels/stacks team, 

I **love** the `stacks` package but cannot find any documentation to obtain model performance metrics, akin to the tidymodels workflow pipeline. 

I am interested in comparing the model performance of the stacks model to my **single** model workflows generated using `workflowsets`. 

For this, I would like to plot ROC and obtain sensitivity/specificity and confusion matrix values, not just obtain a single AUC at the end. 

This is my current code in {stacks}. Any help would be greatly appreciated. Thanks! 

```{r}
########################################################################
# Define resampling and basic recipe 
########################################################################
set.seed(1234)
cores         <- parallel::detectCores() - 1 # determine number of cores
d_folds       <- vfold_cv(d, v = 10, repeats = 5, strata = status) # creates cross validation
keep_pred     <- control_resamples(save_pred = TRUE, save_workflow = TRUE)
ctrl_grid     <- control_stack_grid() # use same control settings as in numeric response setting 
log_recipe    <- recipe(status ~., data = d) %>% step_log(all_predictors()) 
status_wkfl   <- workflow() %>% add_recipe(log_recipe)

########################################################################
# Define models to predict status
########################################################################
logistic_reg_spec_tuned <- logistic_reg() %>% # no tuning parameters
	set_engine("glm") %>%
	set_mode("classification") 

boost_tree_spec_tuned <- boost_tree() %>%
  set_args(tree_depth = 4, learn_rate = 0.001, loss_reduction = 0.005, mtry = 2, # stop iter
           min_n = 27, sample_size = 0.8125, trees = 10000) %>% 
  set_mode("classification") %>%
  set_engine("xgboost")

nearest_neighbor_spec_tuned <- nearest_neighbor() %>% 
  set_args(neighbors = 14, dist_power = 1.525, weight_func = "gaussian") %>% 
  set_mode("classification") %>%
  set_engine("kknn")

discrim_linear_spec_tuned <- discrim_linear() %>% # no tuning parameters
  set_mode("classification") %>%
  set_engine("MASS")

naive_bayes_spec_tuned <- naive_Bayes(smoothness = 0.5, Laplace = 1.75) %>%
  set_mode("classification") %>%
  set_engine("naivebayes")

discrim_quad_spec_tuned <- discrim_quad() %>% # no tuning parameters
  set_mode("classification") %>%
  set_engine("MASS")

rand_forest_spec_tuned <- rand_forest() %>%
  set_args(trees = 10000, mtry = 2, min_n = 7) %>%
  set_mode("classification") %>%
  set_engine("ranger",
             importance = "permutation",
             num.threads = cores,
             probability = TRUE,
             seed = 1234) 

svm_linear_spec_tuned <- svm_linear() %>%
  set_args(cost = 0.002, margin = 0.167) %>% 
  set_mode("classification") %>%
  set_engine("kernlab")

########################################################################
# Create {workflowsets} object
########################################################################
antibody_wkfl_tuned <- workflow_set(
  preproc = list(log = log_recipe),
  models = list(
    rand_forest = rand_forest_spec_tuned, 
    logistic_reg = logistic_reg_spec_tuned, 
    discrim_linear = discrim_linear_spec_tuned, 
    discrim_quad = discrim_quad_spec_tuned, 
    nearest_neighbor = nearest_neighbor_spec_tuned, 
    boost_tree = boost_tree_spec_tuned, 
    svm_linear = svm_linear_spec_tuned,
    naive_bayes = naive_bayes_spec_tuned
  )
)

########################################################################
# Run resampling
########################################################################
grid_ctrl <- control_grid(
      save_pred = TRUE,
      parallel_over = "everything",
      save_workflow = TRUE
   )

final_models <-
   antibody_wkfl_tuned %>%
   workflow_map(
      seed = 1234,
      resamples = d_folds,
      grid = 25,
      control = grid_ctrl
   )

########################################################################
# Putting my stack together 
########################################################################
antibody_wkfl_stack <- 
  # initialise the stack 
  stacks() %>% 
  # add candidate members i.e., {workflowsets} object
  add_candidates(final_models)

as_tibble(antibody_wkfl_stack)

########################################################################
# Blend the stack 
########################################################################
antibody_wkfl_blend <- 
  antibody_wkfl_stack_nofit %>% 
  blend_predictions()

antibody_wkfl_blend

# view roc_auc for various penalty parameters:
antibody_wkfl_blend$metrics %>% filter(.metric == "roc_auc")

########################################################################
# Fit the stack 
########################################################################
antibody_wkfl_fit_stack <- 
  # initialise the stack 
  stacks() %>% 
  # add candidate members i.e., {workflowsets} object
  add_candidates(final_models) %>% 
  # determine how to combine predictions
  blend_predictions() %>% 
  # fit the candidates with non-zero stacking coefficients
  fit_members() 

antibody_wkfl_fit_stack

########################################################################
# Evaluate stack performance 
########################################################################
# Plot: Trade-off between minimising number of members and optimising performance 
autoplot(antibody_wkfl_stack)

# Plot: Weights
autoplot(antibody_wkfl_stack, type = "weights")

# Collect parameters e.g., for random forest model 
collect_parameters(antibody_wkfl_stack, "log_rand_forest")

```


Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Model performance metrics: ROC, Conf Matrix, compare to single models #244

Metadata

Assignees

Labels

Type

Fields

Projects

Milestone

Relationships

Development

Uh oh!

Model performance metrics: ROC, Conf Matrix, compare to single models #244

Description

Metadata

Metadata

Assignees

Labels

Type

Fields

Projects

Milestone

Relationships

Development

Issue actions