From 8341bfaadd1d4cde6138906cd9b07605552004c4 Mon Sep 17 00:00:00 2001
From: andrewpbray <andrewbray@berkeley.edu>
Date: Tue, 26 May 2026 11:49:56 -0700
Subject: [PATCH 1/4] Add weighted rubric MAE using metadata point values

- Add scores_from_metadata() helper to extract rubric scores from JSON
- Add weights argument to rubric_mae() and find_differences()
- Update compute_mae_and_isp() to accept optional metadata_file
- Add tests for weighted MAE and scores_from_metadata()
---
 NAMESPACE                                |  1 +
 R/computing-accuracy.R                   | 61 +++++++++++++++++++++---
 man/compute_mae_and_isp.Rd               |  6 ++-
 man/rubric_mae.Rd                        | 12 ++++-
 man/scores_from_metadata.Rd              | 22 +++++++++
 tests/testthat/test-computing-accuracy.R | 49 +++++++++++++++++++
 6 files changed, 143 insertions(+), 8 deletions(-)
 create mode 100644 man/scores_from_metadata.Rd

diff --git a/NAMESPACE b/NAMESPACE
index 358a412..8245bbf 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -16,6 +16,7 @@ export(isp)
 export(normalize_full_credit)
 export(read_evals)
 export(rubric_mae)
+export(scores_from_metadata)
 export(update_scores)
 export(update_scores_in_metadata)
 export(validate_metadata_json)
diff --git a/R/computing-accuracy.R b/R/computing-accuracy.R
index e15171e..7d9236c 100644
--- a/R/computing-accuracy.R
+++ b/R/computing-accuracy.R
@@ -261,20 +261,49 @@ find_differences_wrt_students <- function(experts_file, student_file,
 #'
 #' @param file1 file path for first grades csv
 #' @param file2 file path for second grades csv
+#' @param metadata_file optional path to a metadata JSON file. When supplied,
+#'   rubric item point values are extracted and passed as weights to
+#'   `rubric_mae()`, producing a point-weighted MAE.
 #'
 #' @return a list
 #'
 #' @importFrom readr read_csv
 #'
 #' @export
-compute_mae_and_isp <- function(file1, file2){
+compute_mae_and_isp <- function(file1, file2, metadata_file = NULL){
   eval1 <- readr::read_csv(file1, show_col_types = FALSE)
   eval2 <- readr::read_csv(file2, show_col_types = FALSE)
-  list(MAE = rubric_mae(eval1, eval2),
+  weights <- if (!is.null(metadata_file)) scores_from_metadata(metadata_file) else NULL
+  list(MAE = rubric_mae(eval1, eval2, weights = weights),
        ISP = isp(eval1, eval2))
 }
 
 
+#' Extract Rubric Item Scores from a Metadata JSON File
+#'
+#' Reads a metadata JSON file and returns the ordered numeric vector of rubric
+#' item point values. This vector can be passed as the `weights` argument to
+#' `rubric_mae()` to compute a point-weighted MAE.
+#'
+#' @param metadata_file path to a metadata JSON file
+#' @param calibrated logical; if `TRUE` (default) extract scores from
+#'   `rubric$calibrated$scores`, otherwise from `rubric$uncalibrated$scores`
+#'
+#' @return a numeric vector of point values, one per rubric item
+#'
+#' @importFrom jsonlite read_json
+#' @export
+scores_from_metadata <- function(metadata_file, calibrated = TRUE) {
+  meta <- jsonlite::read_json(metadata_file)
+  rubric_type <- if (calibrated) "calibrated" else "uncalibrated"
+  scores <- meta$rubric[[rubric_type]]$scores
+  if (is.null(scores)) {
+    stop(paste0("No scores found in metadata for rubric type '", rubric_type, "'"))
+  }
+  as.numeric(unlist(scores))
+}
+
+
 #' Calculate Proportion of Identical Scores
 #'
 #' This function calculates the proportion of identical scores
@@ -324,21 +353,36 @@ isp <- function(eval1, eval2){
 #' It's recommended to `normalize_full_credit()` for `eval1` and `eval2` prior
 #' to using this function.
 #'
+#' When `weights` is supplied, each rubric item's disagreement is scaled by its
+#' point value before summing, so a mismatch on a 1-point item contributes more
+#' than a mismatch on a 0.5-point item. Use `scores_from_metadata()` to extract
+#' the weights vector from a metadata JSON file.
+#'
 #' @param eval1 first dataframe of Gradescope evaluations
 #' @param eval2 second dataframe of Gradescope evaluations
+#' @param weights optional numeric vector of point values, one per rubric item
+#'   (in the same order as the R1, R2, ... columns). When `NULL` (default),
+#'   all items are treated as equally weighted.
 #'
 #' @return double for mean absolute error
 #'
 #' @export
-rubric_mae <- function(eval1, eval2){
+rubric_mae <- function(eval1, eval2, weights = NULL){
+  if (!is.null(weights)) {
+    rubric_cols <- grep("^R[0-9]+$", names(eval1), value = TRUE)
+    if (length(weights) != length(rubric_cols)) {
+      stop(paste0("Length of weights (", length(weights), ") must match ",
+                  "number of rubric columns (", length(rubric_cols), ")"))
+    }
+  }
   # find differences in rubric toggles
-  error_per_student <- find_differences(eval1, eval2)$error_per_student
+  error_per_student <- find_differences(eval1, eval2, weights = weights)$error_per_student
   # mean absolute error calculation
   mean(error_per_student)
 }
 
 
-find_differences <- function(eval1, eval2){
+find_differences <- function(eval1, eval2, weights = NULL){
   if (!("SID" %in% colnames(eval1)) || !("SID" %in% colnames(eval2))){
     stop("Missing SID")
   }
@@ -369,7 +413,12 @@ find_differences <- function(eval1, eval2){
   # elementwise matrix comparison
   check_equal <- rubric1 != rubric2
 
-  error_per_student <- rowSums(check_equal)
+  if (!is.null(weights)) {
+    error_per_student <- as.vector(check_equal %*% weights)
+    names(error_per_student) <- students
+  } else {
+    error_per_student <- rowSums(check_equal)
+  }
 
   return (list(error_per_student = error_per_student,
               rubric1 = rubric1,
diff --git a/man/compute_mae_and_isp.Rd b/man/compute_mae_and_isp.Rd
index faf46cb..5285a2e 100644
--- a/man/compute_mae_and_isp.Rd
+++ b/man/compute_mae_and_isp.Rd
@@ -4,12 +4,16 @@
 \alias{compute_mae_and_isp}
 \title{Calculate MAE and ISP}
 \usage{
-compute_mae_and_isp(file1, file2)
+compute_mae_and_isp(file1, file2, metadata_file = NULL)
 }
 \arguments{
 \item{file1}{file path for first grades csv}
 
 \item{file2}{file path for second grades csv}
+
+\item{metadata_file}{optional path to a metadata JSON file. When supplied,
+rubric item point values are extracted and passed as weights to
+\code{rubric_mae()}, producing a point-weighted MAE.}
 }
 \value{
 a list
diff --git a/man/rubric_mae.Rd b/man/rubric_mae.Rd
index 941f430..e634c01 100644
--- a/man/rubric_mae.Rd
+++ b/man/rubric_mae.Rd
@@ -4,12 +4,16 @@
 \alias{rubric_mae}
 \title{Mean Absolute Error of Rubric Items}
 \usage{
-rubric_mae(eval1, eval2)
+rubric_mae(eval1, eval2, weights = NULL)
 }
 \arguments{
 \item{eval1}{first dataframe of Gradescope evaluations}
 
 \item{eval2}{second dataframe of Gradescope evaluations}
+
+\item{weights}{optional numeric vector of point values, one per rubric item
+(in the same order as the R1, R2, ... columns). When \code{NULL} (default),
+all items are treated as equally weighted.}
 }
 \value{
 double for mean absolute error
@@ -23,3 +27,9 @@ For expert answer \{1, 0\}, \cr
 It's recommended to \code{normalize_full_credit()} for \code{eval1} and \code{eval2} prior
 to using this function.
 }
+\details{
+When \code{weights} is supplied, each rubric item's disagreement is scaled by its
+point value before summing, so a mismatch on a 1-point item contributes more
+than a mismatch on a 0.5-point item. Use \code{scores_from_metadata()} to extract
+the weights vector from a metadata JSON file.
+}
diff --git a/man/scores_from_metadata.Rd b/man/scores_from_metadata.Rd
new file mode 100644
index 0000000..58cf923
--- /dev/null
+++ b/man/scores_from_metadata.Rd
@@ -0,0 +1,22 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/computing-accuracy.R
+\name{scores_from_metadata}
+\alias{scores_from_metadata}
+\title{Extract Rubric Item Scores from a Metadata JSON File}
+\usage{
+scores_from_metadata(metadata_file, calibrated = TRUE)
+}
+\arguments{
+\item{metadata_file}{path to a metadata JSON file}
+
+\item{calibrated}{logical; if \code{TRUE} (default) extract scores from
+\code{rubric$calibrated$scores}, otherwise from \code{rubric$uncalibrated$scores}}
+}
+\value{
+a numeric vector of point values, one per rubric item
+}
+\description{
+Reads a metadata JSON file and returns the ordered numeric vector of rubric
+item point values. This vector can be passed as the \code{weights} argument to
+\code{rubric_mae()} to compute a point-weighted MAE.
+}
diff --git a/tests/testthat/test-computing-accuracy.R b/tests/testthat/test-computing-accuracy.R
index f110321..a322cea 100644
--- a/tests/testthat/test-computing-accuracy.R
+++ b/tests/testthat/test-computing-accuracy.R
@@ -271,6 +271,55 @@ test_that("normalize_full_credit - missing rubric items, row names", {
                                         rubric_items = c("R2", "R4")))
 })
 
+test_that("rubric_mae - weighted, basic", {
+  eval1 <- data.frame(
+    SID = c(1111, 2222, 3333),
+    R1 = c(T, T, F),
+    R2 = c(T, F, T)
+  )
+  eval2 <- data.frame(
+    SID = c(1111, 2222, 3333),
+    R1 = c(T, F, F),  # 2222 differs on R1 (weight 1.0)
+    R2 = c(T, F, F)   # 3333 differs on R2 (weight 0.5)
+  )
+  # 1111: no diff -> 0; 2222: R1 diff -> 1.0; 3333: R2 diff -> 0.5
+  # MAE = (0 + 1.0 + 0.5) / 3
+  actual_mae <- rubric_mae(eval1, eval2, weights = c(1.0, 0.5))
+  expect_equal(actual_mae, (0 + 1.0 + 0.5) / 3)
+})
+
+test_that("rubric_mae - weighted equals unweighted when all weights are 1", {
+  eval1 <- data.frame(
+    SID = c(1111, 2222, 3333, 4444, 5555),
+    R1 = c(T, T, T, F, F),
+    R2 = c(T, F, T, F, T)
+  )
+  eval2 <- data.frame(
+    SID = c(1111, 2222, 3333, 4444, 5555),
+    R1 = c(T, T, F, F, F),
+    R2 = c(T, T, T, T, T)
+  )
+  expect_equal(rubric_mae(eval1, eval2, weights = c(1, 1)),
+               rubric_mae(eval1, eval2))
+})
+
+test_that("rubric_mae - wrong weights length errors", {
+  eval1 <- data.frame(SID = 1111, R1 = TRUE, R2 = FALSE)
+  eval2 <- data.frame(SID = 1111, R1 = TRUE, R2 = TRUE)
+  expect_error(rubric_mae(eval1, eval2, weights = c(1.0, 0.5, 0.5)))
+})
+
+test_that("scores_from_metadata - calibrated", {
+  path <- system.file("extdata", "metadata-calibrated.json", package = "GradingAccuracy")
+  scores <- scores_from_metadata(path)
+  expect_equal(scores, c(1.0, 0.5, 0.5, 0.0))
+})
+
+test_that("scores_from_metadata - uncalibrated returns error for null", {
+  path <- system.file("extdata", "metadata-calibrated.json", package = "GradingAccuracy")
+  expect_error(scores_from_metadata(path, calibrated = FALSE))
+})
+
 test_that("normalize_full_credit - missing rubric items, indices", {
   eval_before <- data.frame(
     SID = c(1111, 3333, 2222, 4444, 5555),

From ef34a2a3c8fa2d2b88774ec5a31a6929de6cef22 Mon Sep 17 00:00:00 2001
From: Andrew Bray <andrewbray@berkeley.edu>
Date: Tue, 26 May 2026 11:57:44 -0700
Subject: [PATCH 2/4] Potential fix for pull request finding

Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com>
---
 R/computing-accuracy.R | 23 ++++++++++++++++++++++-
 1 file changed, 22 insertions(+), 1 deletion(-)

diff --git a/R/computing-accuracy.R b/R/computing-accuracy.R
index 7d9236c..02574ec 100644
--- a/R/computing-accuracy.R
+++ b/R/computing-accuracy.R
@@ -300,7 +300,28 @@ scores_from_metadata <- function(metadata_file, calibrated = TRUE) {
   if (is.null(scores)) {
     stop(paste0("No scores found in metadata for rubric type '", rubric_type, "'"))
   }
-  as.numeric(unlist(scores))
+  scores_unlisted <- unlist(scores, use.names = FALSE)
+  numeric_scores <- suppressWarnings(as.numeric(scores_unlisted))
+
+  if (any(is.na(numeric_scores) & !is.na(scores_unlisted))) {
+    stop(
+      paste0(
+        "Scores in metadata for rubric type '", rubric_type,
+        "' must be numeric and contain no non-numeric values"
+      )
+    )
+  }
+
+  if (any(is.na(numeric_scores))) {
+    stop(
+      paste0(
+        "Scores in metadata for rubric type '", rubric_type,
+        "' must not contain NA values"
+      )
+    )
+  }
+
+  numeric_scores
 }
 
 

From db4a36b901483f81c8896d36201ca0cd842dcaaf Mon Sep 17 00:00:00 2001
From: andrewpbray <andrewbray@berkeley.edu>
Date: Tue, 26 May 2026 12:05:53 -0700
Subject: [PATCH 3/4] Add total_points and rubric_type to example JSON files
 and vignette docs

---
 inst/extdata/metadata-calibrated.json   | 4 +++-
 inst/extdata/metadata-uncalibrated.json | 4 +++-
 inst/extdata/metadata.json              | 4 +++-
 vignettes/creating-metadata-json.Rmd    | 2 ++
 4 files changed, 11 insertions(+), 3 deletions(-)

diff --git a/inst/extdata/metadata-calibrated.json b/inst/extdata/metadata-calibrated.json
index 676b42a..a6dbc54 100644
--- a/inst/extdata/metadata-calibrated.json
+++ b/inst/extdata/metadata-calibrated.json
@@ -15,7 +15,9 @@
     "scoring_type": "positive",
     "is_proctored": true,
     "n_submissions": 0,
-    "mean_score": 0.0
+    "mean_score": 0.0,
+    "total_points": 2,
+    "rubric_type": "positive disjoint"
   },
   "rubric": {
     "calibrated": {
diff --git a/inst/extdata/metadata-uncalibrated.json b/inst/extdata/metadata-uncalibrated.json
index ed6adbc..d0cc21b 100644
--- a/inst/extdata/metadata-uncalibrated.json
+++ b/inst/extdata/metadata-uncalibrated.json
@@ -15,7 +15,9 @@
     "scoring_type": "positive",
     "is_proctored": true,
     "n_submissions": 0,
-    "mean_score": 0.0
+    "mean_score": 0.0,
+    "total_points": 2,
+    "rubric_type": "positive disjoint"
   },
   "rubric": {
     "calibrated": {
diff --git a/inst/extdata/metadata.json b/inst/extdata/metadata.json
index c57bbf5..bf3aa07 100644
--- a/inst/extdata/metadata.json
+++ b/inst/extdata/metadata.json
@@ -15,7 +15,9 @@
     "scoring_type": "positive",
     "is_proctored": true,
     "n_submissions": 5,
-    "mean_score": 0.6
+    "mean_score": 0.6,
+    "total_points": 2,
+    "rubric_type": "positive disjoint"
   },
   "rubric": {
     "calibrated": {
diff --git a/vignettes/creating-metadata-json.Rmd b/vignettes/creating-metadata-json.Rmd
index 2bed7f0..852632f 100644
--- a/vignettes/creating-metadata-json.Rmd
+++ b/vignettes/creating-metadata-json.Rmd
@@ -41,6 +41,8 @@ The `course_info` object must have the following keys (and their corresponding v
 - `is_proctored` : whether the assignment was taken in a proctored environment or if it was take-home
 - `n_submissions`: the number of student submissions, which can be programmatically updated with `update_scores_in_metadata()`
 - `mean_score`: the mean score of experts using, which can be programmatically updated with `update_scores_in_metadata()` (which uses `mean(experts$Score/max(experts$Score))`)
+- `total_points`: the total number of points the question is worth
+- `rubric_type`: the type of rubric (e.g. `"positive disjoint"`)
 
 
 The `rubric` object must have a `calibrated` rubric object and optionally an `uncalibrated` object that follows the same structure. A rubric object (either `calibrated` or `uncalibrated`) must have the following structure:

From 49bf1ecf40ba8995df392d0faa19cd7d1da23f34 Mon Sep 17 00:00:00 2001
From: andrewpbray <andrewbray@berkeley.edu>
Date: Tue, 26 May 2026 12:08:55 -0700
Subject: [PATCH 4/4] Add scores_from_metadata to pkgdown reference index

---
 _pkgdown.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/_pkgdown.yml b/_pkgdown.yml
index 755f830..224d045 100644
--- a/_pkgdown.yml
+++ b/_pkgdown.yml
@@ -31,3 +31,4 @@ reference:
   - compute_mae_and_isp
   - isp
   - rubric_mae
+  - scores_from_metadata