Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ export(isp)
export(normalize_full_credit)
export(read_evals)
export(rubric_mae)
export(scores_from_metadata)
export(update_scores)
export(update_scores_in_metadata)
export(validate_metadata_json)
Expand Down
82 changes: 76 additions & 6 deletions R/computing-accuracy.R
Original file line number Diff line number Diff line change
Expand Up @@ -261,20 +261,70 @@ find_differences_wrt_students <- function(experts_file, student_file,
#'
#' @param file1 file path for first grades csv
#' @param file2 file path for second grades csv
#' @param metadata_file optional path to a metadata JSON file. When supplied,
#' rubric item point values are extracted and passed as weights to
#' `rubric_mae()`, producing a point-weighted MAE.
#'
#' @return a list
#'
#' @importFrom readr read_csv
#'
#' @export
compute_mae_and_isp <- function(file1, file2){
compute_mae_and_isp <- function(file1, file2, metadata_file = NULL){
eval1 <- readr::read_csv(file1, show_col_types = FALSE)
eval2 <- readr::read_csv(file2, show_col_types = FALSE)
list(MAE = rubric_mae(eval1, eval2),
weights <- if (!is.null(metadata_file)) scores_from_metadata(metadata_file) else NULL
list(MAE = rubric_mae(eval1, eval2, weights = weights),
ISP = isp(eval1, eval2))
}


#' Extract Rubric Item Scores from a Metadata JSON File
#'
#' Reads a metadata JSON file and returns the ordered numeric vector of rubric
#' item point values. This vector can be passed as the `weights` argument to
#' `rubric_mae()` to compute a point-weighted MAE.
#'
#' @param metadata_file path to a metadata JSON file
#' @param calibrated logical; if `TRUE` (default) extract scores from
#' `rubric$calibrated$scores`, otherwise from `rubric$uncalibrated$scores`
#'
#' @return a numeric vector of point values, one per rubric item
#'
#' @importFrom jsonlite read_json
#' @export
scores_from_metadata <- function(metadata_file, calibrated = TRUE) {
meta <- jsonlite::read_json(metadata_file)
rubric_type <- if (calibrated) "calibrated" else "uncalibrated"
scores <- meta$rubric[[rubric_type]]$scores
if (is.null(scores)) {
stop(paste0("No scores found in metadata for rubric type '", rubric_type, "'"))
}
scores_unlisted <- unlist(scores, use.names = FALSE)
numeric_scores <- suppressWarnings(as.numeric(scores_unlisted))

if (any(is.na(numeric_scores) & !is.na(scores_unlisted))) {
stop(
paste0(
"Scores in metadata for rubric type '", rubric_type,
"' must be numeric and contain no non-numeric values"
)
)
}

if (any(is.na(numeric_scores))) {
stop(
paste0(
"Scores in metadata for rubric type '", rubric_type,
"' must not contain NA values"
)
)
}

numeric_scores
}


#' Calculate Proportion of Identical Scores
#'
#' This function calculates the proportion of identical scores
Expand Down Expand Up @@ -324,21 +374,36 @@ isp <- function(eval1, eval2){
#' It's recommended to `normalize_full_credit()` for `eval1` and `eval2` prior
#' to using this function.
#'
#' When `weights` is supplied, each rubric item's disagreement is scaled by its
#' point value before summing, so a mismatch on a 1-point item contributes more
#' than a mismatch on a 0.5-point item. Use `scores_from_metadata()` to extract
#' the weights vector from a metadata JSON file.
#'
#' @param eval1 first dataframe of Gradescope evaluations
#' @param eval2 second dataframe of Gradescope evaluations
#' @param weights optional numeric vector of point values, one per rubric item
#' (in the same order as the R1, R2, ... columns). When `NULL` (default),
#' all items are treated as equally weighted.
#'
#' @return double for mean absolute error
#'
#' @export
rubric_mae <- function(eval1, eval2){
rubric_mae <- function(eval1, eval2, weights = NULL){
if (!is.null(weights)) {
rubric_cols <- grep("^R[0-9]+$", names(eval1), value = TRUE)
if (length(weights) != length(rubric_cols)) {
stop(paste0("Length of weights (", length(weights), ") must match ",
"number of rubric columns (", length(rubric_cols), ")"))
}
}
# find differences in rubric toggles
error_per_student <- find_differences(eval1, eval2)$error_per_student
error_per_student <- find_differences(eval1, eval2, weights = weights)$error_per_student
# mean absolute error calculation
mean(error_per_student)
}


find_differences <- function(eval1, eval2){
find_differences <- function(eval1, eval2, weights = NULL){
if (!("SID" %in% colnames(eval1)) || !("SID" %in% colnames(eval2))){
stop("Missing SID")
}
Expand Down Expand Up @@ -369,7 +434,12 @@ find_differences <- function(eval1, eval2){
# elementwise matrix comparison
check_equal <- rubric1 != rubric2

error_per_student <- rowSums(check_equal)
if (!is.null(weights)) {
error_per_student <- as.vector(check_equal %*% weights)
names(error_per_student) <- students
} else {
error_per_student <- rowSums(check_equal)
}

return (list(error_per_student = error_per_student,
rubric1 = rubric1,
Expand Down
1 change: 1 addition & 0 deletions _pkgdown.yml
Original file line number Diff line number Diff line change
Expand Up @@ -31,3 +31,4 @@ reference:
- compute_mae_and_isp
- isp
- rubric_mae
- scores_from_metadata
4 changes: 3 additions & 1 deletion inst/extdata/metadata-calibrated.json
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,9 @@
"scoring_type": "positive",
"is_proctored": true,
"n_submissions": 0,
"mean_score": 0.0
"mean_score": 0.0,
"total_points": 2,
"rubric_type": "positive disjoint"
},
"rubric": {
"calibrated": {
Expand Down
4 changes: 3 additions & 1 deletion inst/extdata/metadata-uncalibrated.json
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,9 @@
"scoring_type": "positive",
"is_proctored": true,
"n_submissions": 0,
"mean_score": 0.0
"mean_score": 0.0,
"total_points": 2,
"rubric_type": "positive disjoint"
},
"rubric": {
"calibrated": {
Expand Down
4 changes: 3 additions & 1 deletion inst/extdata/metadata.json
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,9 @@
"scoring_type": "positive",
"is_proctored": true,
"n_submissions": 5,
"mean_score": 0.6
"mean_score": 0.6,
"total_points": 2,
"rubric_type": "positive disjoint"
},
"rubric": {
"calibrated": {
Expand Down
6 changes: 5 additions & 1 deletion man/compute_mae_and_isp.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

12 changes: 11 additions & 1 deletion man/rubric_mae.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

22 changes: 22 additions & 0 deletions man/scores_from_metadata.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

49 changes: 49 additions & 0 deletions tests/testthat/test-computing-accuracy.R
Original file line number Diff line number Diff line change
Expand Up @@ -271,6 +271,55 @@ test_that("normalize_full_credit - missing rubric items, row names", {
rubric_items = c("R2", "R4")))
})

test_that("rubric_mae - weighted, basic", {
eval1 <- data.frame(
SID = c(1111, 2222, 3333),
R1 = c(T, T, F),
R2 = c(T, F, T)
)
eval2 <- data.frame(
SID = c(1111, 2222, 3333),
R1 = c(T, F, F), # 2222 differs on R1 (weight 1.0)
R2 = c(T, F, F) # 3333 differs on R2 (weight 0.5)
)
# 1111: no diff -> 0; 2222: R1 diff -> 1.0; 3333: R2 diff -> 0.5
# MAE = (0 + 1.0 + 0.5) / 3
actual_mae <- rubric_mae(eval1, eval2, weights = c(1.0, 0.5))
expect_equal(actual_mae, (0 + 1.0 + 0.5) / 3)
})

test_that("rubric_mae - weighted equals unweighted when all weights are 1", {
eval1 <- data.frame(
SID = c(1111, 2222, 3333, 4444, 5555),
R1 = c(T, T, T, F, F),
R2 = c(T, F, T, F, T)
)
eval2 <- data.frame(
SID = c(1111, 2222, 3333, 4444, 5555),
R1 = c(T, T, F, F, F),
R2 = c(T, T, T, T, T)
)
expect_equal(rubric_mae(eval1, eval2, weights = c(1, 1)),
rubric_mae(eval1, eval2))
})

test_that("rubric_mae - wrong weights length errors", {
eval1 <- data.frame(SID = 1111, R1 = TRUE, R2 = FALSE)
eval2 <- data.frame(SID = 1111, R1 = TRUE, R2 = TRUE)
expect_error(rubric_mae(eval1, eval2, weights = c(1.0, 0.5, 0.5)))
})

test_that("scores_from_metadata - calibrated", {
path <- system.file("extdata", "metadata-calibrated.json", package = "GradingAccuracy")
scores <- scores_from_metadata(path)
expect_equal(scores, c(1.0, 0.5, 0.5, 0.0))
})

test_that("scores_from_metadata - uncalibrated returns error for null", {
path <- system.file("extdata", "metadata-calibrated.json", package = "GradingAccuracy")
expect_error(scores_from_metadata(path, calibrated = FALSE))
})

test_that("normalize_full_credit - missing rubric items, indices", {
eval_before <- data.frame(
SID = c(1111, 3333, 2222, 4444, 5555),
Expand Down
2 changes: 2 additions & 0 deletions vignettes/creating-metadata-json.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,8 @@ The `course_info` object must have the following keys (and their corresponding v
- `is_proctored` : whether the assignment was taken in a proctored environment or if it was take-home
- `n_submissions`: the number of student submissions, which can be programmatically updated with `update_scores_in_metadata()`
- `mean_score`: the mean score of experts using, which can be programmatically updated with `update_scores_in_metadata()` (which uses `mean(experts$Score/max(experts$Score))`)
- `total_points`: the total number of points the question is worth
- `rubric_type`: the type of rubric (e.g. `"positive disjoint"`)


The `rubric` object must have a `calibrated` rubric object and optionally an `uncalibrated` object that follows the same structure. A rubric object (either `calibrated` or `uncalibrated`) must have the following structure:
Expand Down
Loading