Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions R/computing-accuracy.R
Original file line number Diff line number Diff line change
Expand Up @@ -274,8 +274,9 @@ compute_mae_and_isp <- function(file1, file2, metadata_file = NULL){
eval1 <- readr::read_csv(file1, show_col_types = FALSE)
eval2 <- readr::read_csv(file2, show_col_types = FALSE)
weights <- if (!is.null(metadata_file)) scores_from_metadata(metadata_file) else NULL
list(MAE = rubric_mae(eval1, eval2, weights = weights),
ISP = isp(eval1, eval2))
list(MAE = rubric_mae(eval1, eval2),
wMAE = if (!is.null(weights)) rubric_mae(eval1, eval2, weights = weights) else NA_real_,
ISP = isp(eval1, eval2))
Comment on lines 274 to +279
Comment on lines 276 to +279
Comment on lines +277 to +279
}


Expand Down
26 changes: 20 additions & 6 deletions R/generate-results-table.R
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,10 @@ generate_gt_results_table <- function(results_table){
label = "MAE",
columns = starts_with("MAE_")
) |>
gt::tab_spanner(
label = "wMAE",
columns = starts_with("wMAE_")
) |>
gt::tab_spanner(
label = "ISP",
columns = starts_with("ISP_")
Expand All @@ -50,6 +54,10 @@ generate_gt_results_table <- function(results_table){
tidyr::starts_with("MAE_"),
~ sub("^MAE_", "", .x)
) |>
gt::cols_label_with(
tidyr::starts_with("wMAE_"),
~ sub("^wMAE_", "", .x)
) |>
gt::cols_label_with(
tidyr::starts_with("ISP_"),
~ sub("^ISP_", "", .x)
Expand All @@ -75,8 +83,11 @@ generate_results_row <- function(dir) {
name = sub("-.*$", "", files)
)

mae_vals <- list()
isp_vals <- list()
metadata_file <- paste0(dir, "/metadata.json")

mae_vals <- list()
wmae_vals <- list()
isp_vals <- list()
Comment on lines +86 to +90

for (status in unique(df$status)) {
group <- df[df$status == status, ]
Expand All @@ -94,10 +105,12 @@ generate_results_row <- function(dir) {

pair_name <- paste0(n1, ".v.", n2)

metrics <- compute_mae_and_isp(paste0(dir, f1), paste0(dir, f2))
metrics <- compute_mae_and_isp(paste0(dir, f1), paste0(dir, f2),
metadata_file = metadata_file)

mae_vals[[pair_name]] <- metrics$MAE
isp_vals[[pair_name]] <- metrics$ISP
mae_vals[[pair_name]] <- metrics$MAE
wmae_vals[[pair_name]] <- metrics$wMAE
isp_vals[[pair_name]] <- metrics$ISP
Comment on lines +108 to +113
}
}

Expand All @@ -116,7 +129,8 @@ generate_results_row <- function(dir) {
)

for (nm in names(mae_vals)) {
results_row[[paste0("MAE_", nm)]] <- mae_vals[[nm]]
results_row[[paste0("MAE_", nm)]] <- mae_vals[[nm]]
results_row[[paste0("wMAE_", nm)]] <- wmae_vals[[nm]]
}

for (nm in names(isp_vals)) {
Expand Down
93 changes: 93 additions & 0 deletions tests/testthat/test-compute-mae-and-isp.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
# Shared fixtures -----------------------------------------------------------

experts_csv <- system.file("extdata", "experts-calibrated.csv",
package = "GradingAccuracy")
students_csv <- system.file("extdata", "students-calibrated.csv",
package = "GradingAccuracy")
metadata_json <- system.file("extdata", "metadata.json",
package = "GradingAccuracy")

# compute_mae_and_isp - without metadata ------------------------------------

test_that("compute_mae_and_isp - returns MAE, wMAE (NA), and ISP without metadata", {
result <- compute_mae_and_isp(experts_csv, students_csv)

expect_named(result, c("MAE", "wMAE", "ISP"))
expect_true(is.numeric(result$MAE))
expect_true(is.na(result$wMAE))
expect_true(is.numeric(result$ISP))
})
Comment on lines +10 to +19

test_that("compute_mae_and_isp - MAE is non-negative", {
result <- compute_mae_and_isp(experts_csv, students_csv)
expect_gte(result$MAE, 0)
})

test_that("compute_mae_and_isp - ISP is between 0 and 1", {
result <- compute_mae_and_isp(experts_csv, students_csv)
expect_gte(result$ISP, 0)
expect_lte(result$ISP, 1)
})

# compute_mae_and_isp - with metadata ---------------------------------------

test_that("compute_mae_and_isp - returns numeric wMAE when metadata supplied", {
result <- compute_mae_and_isp(experts_csv, students_csv,
metadata_file = metadata_json)

expect_named(result, c("MAE", "wMAE", "ISP"))
expect_true(is.numeric(result$wMAE))
expect_false(is.na(result$wMAE))
})

test_that("compute_mae_and_isp - wMAE is non-negative", {
result <- compute_mae_and_isp(experts_csv, students_csv,
metadata_file = metadata_json)
expect_gte(result$wMAE, 0)
})

test_that("compute_mae_and_isp - MAE unaffected by presence of metadata", {
result_no_meta <- compute_mae_and_isp(experts_csv, students_csv)
result_with_meta <- compute_mae_and_isp(experts_csv, students_csv,
metadata_file = metadata_json)

expect_equal(result_no_meta$MAE, result_with_meta$MAE)
expect_equal(result_no_meta$ISP, result_with_meta$ISP)
})

test_that("compute_mae_and_isp - wMAE equals MAE when all weights are equal", {
# Write a temp metadata with equal weights matching the 4-item rubric
equal_weight_meta <- tempfile(fileext = ".json")
jsonlite::write_json(
list(
rubric = list(
calibrated = list(scores = list(1, 1, 1, 1))
)
),
equal_weight_meta, auto_unbox = TRUE
)
result <- compute_mae_and_isp(experts_csv, students_csv,
metadata_file = equal_weight_meta)
expect_equal(result$wMAE, result$MAE)
})

test_that("compute_mae_and_isp - wMAE correct for known example", {
# experts vs students differ only on R3 for SID 1002 (weight 0.5)
# All other rows match. Expected wMAE = 0.5 / 5 = 0.1
weights <- c(1.0, 0.5, 0.5, 0.0)
result <- compute_mae_and_isp(experts_csv, students_csv,
metadata_file = metadata_json)
expected_wmae <- (0 + 0.5 + 0 + 0 + 0) / 5
Comment on lines +76 to +80
expect_equal(result$wMAE, expected_wmae)
})

# compute_mae_and_isp - identical files -------------------------------------

test_that("compute_mae_and_isp - identical files give MAE=0, wMAE=0, ISP=1", {
result <- compute_mae_and_isp(experts_csv, experts_csv,
metadata_file = metadata_json)

expect_equal(result$MAE, 0)
expect_equal(result$wMAE, 0)
expect_equal(result$ISP, 1)
})
Loading