From 8341bfaadd1d4cde6138906cd9b07605552004c4 Mon Sep 17 00:00:00 2001 From: andrewpbray Date: Tue, 26 May 2026 11:49:56 -0700 Subject: [PATCH 1/4] Add weighted rubric MAE using metadata point values - Add scores_from_metadata() helper to extract rubric scores from JSON - Add weights argument to rubric_mae() and find_differences() - Update compute_mae_and_isp() to accept optional metadata_file - Add tests for weighted MAE and scores_from_metadata() --- NAMESPACE | 1 + R/computing-accuracy.R | 61 +++++++++++++++++++++--- man/compute_mae_and_isp.Rd | 6 ++- man/rubric_mae.Rd | 12 ++++- man/scores_from_metadata.Rd | 22 +++++++++ tests/testthat/test-computing-accuracy.R | 49 +++++++++++++++++++ 6 files changed, 143 insertions(+), 8 deletions(-) create mode 100644 man/scores_from_metadata.Rd diff --git a/NAMESPACE b/NAMESPACE index 358a412..8245bbf 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -16,6 +16,7 @@ export(isp) export(normalize_full_credit) export(read_evals) export(rubric_mae) +export(scores_from_metadata) export(update_scores) export(update_scores_in_metadata) export(validate_metadata_json) diff --git a/R/computing-accuracy.R b/R/computing-accuracy.R index e15171e..7d9236c 100644 --- a/R/computing-accuracy.R +++ b/R/computing-accuracy.R @@ -261,20 +261,49 @@ find_differences_wrt_students <- function(experts_file, student_file, #' #' @param file1 file path for first grades csv #' @param file2 file path for second grades csv +#' @param metadata_file optional path to a metadata JSON file. When supplied, +#' rubric item point values are extracted and passed as weights to +#' `rubric_mae()`, producing a point-weighted MAE. #' #' @return a list #' #' @importFrom readr read_csv #' #' @export -compute_mae_and_isp <- function(file1, file2){ +compute_mae_and_isp <- function(file1, file2, metadata_file = NULL){ eval1 <- readr::read_csv(file1, show_col_types = FALSE) eval2 <- readr::read_csv(file2, show_col_types = FALSE) - list(MAE = rubric_mae(eval1, eval2), + weights <- if (!is.null(metadata_file)) scores_from_metadata(metadata_file) else NULL + list(MAE = rubric_mae(eval1, eval2, weights = weights), ISP = isp(eval1, eval2)) } +#' Extract Rubric Item Scores from a Metadata JSON File +#' +#' Reads a metadata JSON file and returns the ordered numeric vector of rubric +#' item point values. This vector can be passed as the `weights` argument to +#' `rubric_mae()` to compute a point-weighted MAE. +#' +#' @param metadata_file path to a metadata JSON file +#' @param calibrated logical; if `TRUE` (default) extract scores from +#' `rubric$calibrated$scores`, otherwise from `rubric$uncalibrated$scores` +#' +#' @return a numeric vector of point values, one per rubric item +#' +#' @importFrom jsonlite read_json +#' @export +scores_from_metadata <- function(metadata_file, calibrated = TRUE) { + meta <- jsonlite::read_json(metadata_file) + rubric_type <- if (calibrated) "calibrated" else "uncalibrated" + scores <- meta$rubric[[rubric_type]]$scores + if (is.null(scores)) { + stop(paste0("No scores found in metadata for rubric type '", rubric_type, "'")) + } + as.numeric(unlist(scores)) +} + + #' Calculate Proportion of Identical Scores #' #' This function calculates the proportion of identical scores @@ -324,21 +353,36 @@ isp <- function(eval1, eval2){ #' It's recommended to `normalize_full_credit()` for `eval1` and `eval2` prior #' to using this function. #' +#' When `weights` is supplied, each rubric item's disagreement is scaled by its +#' point value before summing, so a mismatch on a 1-point item contributes more +#' than a mismatch on a 0.5-point item. Use `scores_from_metadata()` to extract +#' the weights vector from a metadata JSON file. +#' #' @param eval1 first dataframe of Gradescope evaluations #' @param eval2 second dataframe of Gradescope evaluations +#' @param weights optional numeric vector of point values, one per rubric item +#' (in the same order as the R1, R2, ... columns). When `NULL` (default), +#' all items are treated as equally weighted. #' #' @return double for mean absolute error #' #' @export -rubric_mae <- function(eval1, eval2){ +rubric_mae <- function(eval1, eval2, weights = NULL){ + if (!is.null(weights)) { + rubric_cols <- grep("^R[0-9]+$", names(eval1), value = TRUE) + if (length(weights) != length(rubric_cols)) { + stop(paste0("Length of weights (", length(weights), ") must match ", + "number of rubric columns (", length(rubric_cols), ")")) + } + } # find differences in rubric toggles - error_per_student <- find_differences(eval1, eval2)$error_per_student + error_per_student <- find_differences(eval1, eval2, weights = weights)$error_per_student # mean absolute error calculation mean(error_per_student) } -find_differences <- function(eval1, eval2){ +find_differences <- function(eval1, eval2, weights = NULL){ if (!("SID" %in% colnames(eval1)) || !("SID" %in% colnames(eval2))){ stop("Missing SID") } @@ -369,7 +413,12 @@ find_differences <- function(eval1, eval2){ # elementwise matrix comparison check_equal <- rubric1 != rubric2 - error_per_student <- rowSums(check_equal) + if (!is.null(weights)) { + error_per_student <- as.vector(check_equal %*% weights) + names(error_per_student) <- students + } else { + error_per_student <- rowSums(check_equal) + } return (list(error_per_student = error_per_student, rubric1 = rubric1, diff --git a/man/compute_mae_and_isp.Rd b/man/compute_mae_and_isp.Rd index faf46cb..5285a2e 100644 --- a/man/compute_mae_and_isp.Rd +++ b/man/compute_mae_and_isp.Rd @@ -4,12 +4,16 @@ \alias{compute_mae_and_isp} \title{Calculate MAE and ISP} \usage{ -compute_mae_and_isp(file1, file2) +compute_mae_and_isp(file1, file2, metadata_file = NULL) } \arguments{ \item{file1}{file path for first grades csv} \item{file2}{file path for second grades csv} + +\item{metadata_file}{optional path to a metadata JSON file. When supplied, +rubric item point values are extracted and passed as weights to +\code{rubric_mae()}, producing a point-weighted MAE.} } \value{ a list diff --git a/man/rubric_mae.Rd b/man/rubric_mae.Rd index 941f430..e634c01 100644 --- a/man/rubric_mae.Rd +++ b/man/rubric_mae.Rd @@ -4,12 +4,16 @@ \alias{rubric_mae} \title{Mean Absolute Error of Rubric Items} \usage{ -rubric_mae(eval1, eval2) +rubric_mae(eval1, eval2, weights = NULL) } \arguments{ \item{eval1}{first dataframe of Gradescope evaluations} \item{eval2}{second dataframe of Gradescope evaluations} + +\item{weights}{optional numeric vector of point values, one per rubric item +(in the same order as the R1, R2, ... columns). When \code{NULL} (default), +all items are treated as equally weighted.} } \value{ double for mean absolute error @@ -23,3 +27,9 @@ For expert answer \{1, 0\}, \cr It's recommended to \code{normalize_full_credit()} for \code{eval1} and \code{eval2} prior to using this function. } +\details{ +When \code{weights} is supplied, each rubric item's disagreement is scaled by its +point value before summing, so a mismatch on a 1-point item contributes more +than a mismatch on a 0.5-point item. Use \code{scores_from_metadata()} to extract +the weights vector from a metadata JSON file. +} diff --git a/man/scores_from_metadata.Rd b/man/scores_from_metadata.Rd new file mode 100644 index 0000000..58cf923 --- /dev/null +++ b/man/scores_from_metadata.Rd @@ -0,0 +1,22 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/computing-accuracy.R +\name{scores_from_metadata} +\alias{scores_from_metadata} +\title{Extract Rubric Item Scores from a Metadata JSON File} +\usage{ +scores_from_metadata(metadata_file, calibrated = TRUE) +} +\arguments{ +\item{metadata_file}{path to a metadata JSON file} + +\item{calibrated}{logical; if \code{TRUE} (default) extract scores from +\code{rubric$calibrated$scores}, otherwise from \code{rubric$uncalibrated$scores}} +} +\value{ +a numeric vector of point values, one per rubric item +} +\description{ +Reads a metadata JSON file and returns the ordered numeric vector of rubric +item point values. This vector can be passed as the \code{weights} argument to +\code{rubric_mae()} to compute a point-weighted MAE. +} diff --git a/tests/testthat/test-computing-accuracy.R b/tests/testthat/test-computing-accuracy.R index f110321..a322cea 100644 --- a/tests/testthat/test-computing-accuracy.R +++ b/tests/testthat/test-computing-accuracy.R @@ -271,6 +271,55 @@ test_that("normalize_full_credit - missing rubric items, row names", { rubric_items = c("R2", "R4"))) }) +test_that("rubric_mae - weighted, basic", { + eval1 <- data.frame( + SID = c(1111, 2222, 3333), + R1 = c(T, T, F), + R2 = c(T, F, T) + ) + eval2 <- data.frame( + SID = c(1111, 2222, 3333), + R1 = c(T, F, F), # 2222 differs on R1 (weight 1.0) + R2 = c(T, F, F) # 3333 differs on R2 (weight 0.5) + ) + # 1111: no diff -> 0; 2222: R1 diff -> 1.0; 3333: R2 diff -> 0.5 + # MAE = (0 + 1.0 + 0.5) / 3 + actual_mae <- rubric_mae(eval1, eval2, weights = c(1.0, 0.5)) + expect_equal(actual_mae, (0 + 1.0 + 0.5) / 3) +}) + +test_that("rubric_mae - weighted equals unweighted when all weights are 1", { + eval1 <- data.frame( + SID = c(1111, 2222, 3333, 4444, 5555), + R1 = c(T, T, T, F, F), + R2 = c(T, F, T, F, T) + ) + eval2 <- data.frame( + SID = c(1111, 2222, 3333, 4444, 5555), + R1 = c(T, T, F, F, F), + R2 = c(T, T, T, T, T) + ) + expect_equal(rubric_mae(eval1, eval2, weights = c(1, 1)), + rubric_mae(eval1, eval2)) +}) + +test_that("rubric_mae - wrong weights length errors", { + eval1 <- data.frame(SID = 1111, R1 = TRUE, R2 = FALSE) + eval2 <- data.frame(SID = 1111, R1 = TRUE, R2 = TRUE) + expect_error(rubric_mae(eval1, eval2, weights = c(1.0, 0.5, 0.5))) +}) + +test_that("scores_from_metadata - calibrated", { + path <- system.file("extdata", "metadata-calibrated.json", package = "GradingAccuracy") + scores <- scores_from_metadata(path) + expect_equal(scores, c(1.0, 0.5, 0.5, 0.0)) +}) + +test_that("scores_from_metadata - uncalibrated returns error for null", { + path <- system.file("extdata", "metadata-calibrated.json", package = "GradingAccuracy") + expect_error(scores_from_metadata(path, calibrated = FALSE)) +}) + test_that("normalize_full_credit - missing rubric items, indices", { eval_before <- data.frame( SID = c(1111, 3333, 2222, 4444, 5555), From ef34a2a3c8fa2d2b88774ec5a31a6929de6cef22 Mon Sep 17 00:00:00 2001 From: Andrew Bray Date: Tue, 26 May 2026 11:57:44 -0700 Subject: [PATCH 2/4] Potential fix for pull request finding Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com> --- R/computing-accuracy.R | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/R/computing-accuracy.R b/R/computing-accuracy.R index 7d9236c..02574ec 100644 --- a/R/computing-accuracy.R +++ b/R/computing-accuracy.R @@ -300,7 +300,28 @@ scores_from_metadata <- function(metadata_file, calibrated = TRUE) { if (is.null(scores)) { stop(paste0("No scores found in metadata for rubric type '", rubric_type, "'")) } - as.numeric(unlist(scores)) + scores_unlisted <- unlist(scores, use.names = FALSE) + numeric_scores <- suppressWarnings(as.numeric(scores_unlisted)) + + if (any(is.na(numeric_scores) & !is.na(scores_unlisted))) { + stop( + paste0( + "Scores in metadata for rubric type '", rubric_type, + "' must be numeric and contain no non-numeric values" + ) + ) + } + + if (any(is.na(numeric_scores))) { + stop( + paste0( + "Scores in metadata for rubric type '", rubric_type, + "' must not contain NA values" + ) + ) + } + + numeric_scores } From db4a36b901483f81c8896d36201ca0cd842dcaaf Mon Sep 17 00:00:00 2001 From: andrewpbray Date: Tue, 26 May 2026 12:05:53 -0700 Subject: [PATCH 3/4] Add total_points and rubric_type to example JSON files and vignette docs --- inst/extdata/metadata-calibrated.json | 4 +++- inst/extdata/metadata-uncalibrated.json | 4 +++- inst/extdata/metadata.json | 4 +++- vignettes/creating-metadata-json.Rmd | 2 ++ 4 files changed, 11 insertions(+), 3 deletions(-) diff --git a/inst/extdata/metadata-calibrated.json b/inst/extdata/metadata-calibrated.json index 676b42a..a6dbc54 100644 --- a/inst/extdata/metadata-calibrated.json +++ b/inst/extdata/metadata-calibrated.json @@ -15,7 +15,9 @@ "scoring_type": "positive", "is_proctored": true, "n_submissions": 0, - "mean_score": 0.0 + "mean_score": 0.0, + "total_points": 2, + "rubric_type": "positive disjoint" }, "rubric": { "calibrated": { diff --git a/inst/extdata/metadata-uncalibrated.json b/inst/extdata/metadata-uncalibrated.json index ed6adbc..d0cc21b 100644 --- a/inst/extdata/metadata-uncalibrated.json +++ b/inst/extdata/metadata-uncalibrated.json @@ -15,7 +15,9 @@ "scoring_type": "positive", "is_proctored": true, "n_submissions": 0, - "mean_score": 0.0 + "mean_score": 0.0, + "total_points": 2, + "rubric_type": "positive disjoint" }, "rubric": { "calibrated": { diff --git a/inst/extdata/metadata.json b/inst/extdata/metadata.json index c57bbf5..bf3aa07 100644 --- a/inst/extdata/metadata.json +++ b/inst/extdata/metadata.json @@ -15,7 +15,9 @@ "scoring_type": "positive", "is_proctored": true, "n_submissions": 5, - "mean_score": 0.6 + "mean_score": 0.6, + "total_points": 2, + "rubric_type": "positive disjoint" }, "rubric": { "calibrated": { diff --git a/vignettes/creating-metadata-json.Rmd b/vignettes/creating-metadata-json.Rmd index 2bed7f0..852632f 100644 --- a/vignettes/creating-metadata-json.Rmd +++ b/vignettes/creating-metadata-json.Rmd @@ -41,6 +41,8 @@ The `course_info` object must have the following keys (and their corresponding v - `is_proctored` : whether the assignment was taken in a proctored environment or if it was take-home - `n_submissions`: the number of student submissions, which can be programmatically updated with `update_scores_in_metadata()` - `mean_score`: the mean score of experts using, which can be programmatically updated with `update_scores_in_metadata()` (which uses `mean(experts$Score/max(experts$Score))`) +- `total_points`: the total number of points the question is worth +- `rubric_type`: the type of rubric (e.g. `"positive disjoint"`) The `rubric` object must have a `calibrated` rubric object and optionally an `uncalibrated` object that follows the same structure. A rubric object (either `calibrated` or `uncalibrated`) must have the following structure: From 49bf1ecf40ba8995df392d0faa19cd7d1da23f34 Mon Sep 17 00:00:00 2001 From: andrewpbray Date: Tue, 26 May 2026 12:08:55 -0700 Subject: [PATCH 4/4] Add scores_from_metadata to pkgdown reference index --- _pkgdown.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/_pkgdown.yml b/_pkgdown.yml index 755f830..224d045 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -31,3 +31,4 @@ reference: - compute_mae_and_isp - isp - rubric_mae + - scores_from_metadata