diff --git a/__tests__/unit/health-baselines.test.ts b/__tests__/unit/health-baselines.test.ts
new file mode 100644
index 0000000..9a2466f
--- /dev/null
+++ b/__tests__/unit/health-baselines.test.ts
@@ -0,0 +1,509 @@
+/**
+ * Unit tests for lib/health-baselines.ts and the applyRoleBaseline() function
+ * exported from lib/health.ts.
+ *
+ * Tests are grouped into four areas:
+ *  1. medianOf() helper
+ *  2. computeRoleBaseline() — computation logic and degradation ladder
+ *  3. getRoleBaseline() — cache TTL and invalidation
+ *  4. applyRoleBaseline() — normalisation rules and guards
+ *
+ * Uses a real isolated SQLite DB (via setup.ts) so that computeRoleBaseline
+ * exercises the full DB→compute path rather than mocking internals.
+ */
+import { describe, it, expect, beforeEach, vi, afterEach } from 'vitest'
+import { randomUUID } from 'crypto'
+import { dbAddTaskRun } from '@/lib/db/repositories/taskRunRepo'
+import { makeTestTaskRun } from '../helpers/test-utils'
+import {
+  computeRoleBaseline,
+  getRoleBaseline,
+  invalidateRoleBaselines,
+  medianOf,
+  MIN_COHORT_SIZE,
+  BASELINE_TTL_MS,
+  BASELINE_WINDOW_MS,
+} from '@/lib/health-baselines'
+import {
+  applyRoleBaseline,
+  MIN_COHORT_SIZE_FOR_BASELINE,
+} from '@/lib/health'
+import type { AgentHealthMetrics } from '@/lib/types'
+
+// ---------------------------------------------------------------------------
+// Fixed reference time
+// ---------------------------------------------------------------------------
+
+/** Wednesday 2026-03-11 noon UTC — same anchor used in health.test.ts */
+const NOW = new Date('2026-03-11T12:00:00Z').getTime()
+
+// ---------------------------------------------------------------------------
+// Helpers
+// ---------------------------------------------------------------------------
+
+/**
+ * Seeds `count` task runs for the given agentId and role inside the 7-day
+ * rolling window that `computeHealthMetrics` uses, spaced 30 minutes apart.
+ *
+ * Runs are placed within the last 24 hours so they safely fall inside the
+ * rolling window regardless of which NOW anchor is used.  All `count` runs
+ * will therefore contribute to `hasEnoughData` and the per-agent metrics
+ * that `computeRoleBaseline` collects.
+ */
+function seedRunsForAgent(
+  agentId: string,
+  role: string,
+  count: number,
+  status: 'done' | 'failed' = 'done',
+  now: number = NOW,
+): void {
+  for (let i = 0; i < count; i++) {
+    // Space 30 min apart, most recent first, all within last 24 h
+    const completedAt = now - (i + 1) * 30 * 60_000
+    dbAddTaskRun(
+      makeTestTaskRun(randomUUID(), {
+        agentId,
+        role,
+        status,
+        completedAt,
+        startedAt: completedAt - 30_000,
+      }),
+    )
+  }
+}
+
+// ---------------------------------------------------------------------------
+// 1. medianOf()
+// ---------------------------------------------------------------------------
+
+describe('medianOf', () => {
+  it('returns null for an empty array', () => {
+    expect(medianOf([])).toBeNull()
+  })
+
+  it('returns the single value for a one-element array', () => {
+    expect(medianOf([42])).toBe(42)
+  })
+
+  it('returns the middle value for an odd-length array', () => {
+    expect(medianOf([3, 1, 4, 1, 5])).toBe(3) // sorted: [1,1,3,4,5]
+  })
+
+  it('returns the average of the two middle values for an even-length array', () => {
+    expect(medianOf([1, 3, 5, 7])).toBe(4) // (3+5)/2
+  })
+
+  it('does not mutate the input array', () => {
+    const input = [5, 2, 8, 1]
+    medianOf(input)
+    expect(input).toEqual([5, 2, 8, 1])
+  })
+})
+
+// ---------------------------------------------------------------------------
+// 2. computeRoleBaseline()
+// ---------------------------------------------------------------------------
+
+describe('computeRoleBaseline', () => {
+  beforeEach(() => {
+    invalidateRoleBaselines()
+  })
+
+  it('returns null-metric baseline with cohortSize=0 when no runs exist for the role', () => {
+    const result = computeRoleBaseline('tester', NOW)
+    expect(result.cohortSize).toBe(0)
+    expect(result.medianCompletionRate).toBeNull()
+    expect(result.medianErrorDensity).toBeNull()
+    expect(result.medianWeeklyThroughput).toBeNull()
+    expect(result.role).toBe('tester')
+    expect(result.computedAt).toBe(NOW)
+  })
+
+  it('returns null-metric baseline when cohort has fewer than MIN_COHORT_SIZE qualifying agents', () => {
+    // Seed 2 agents with enough runs — below the MIN_COHORT_SIZE of 3
+    for (let i = 0; i < MIN_COHORT_SIZE - 1; i++) {
+      seedRunsForAgent(randomUUID(), 'writer', 6, 'done', NOW)
+    }
+    const result = computeRoleBaseline('writer', NOW)
+    expect(result.cohortSize).toBeLessThan(MIN_COHORT_SIZE)
+    expect(result.medianCompletionRate).toBeNull()
+    expect(result.medianErrorDensity).toBeNull()
+  })
+
+  it('returns meaningful baselines when cohort meets MIN_COHORT_SIZE', () => {
+    // Seed 3 agents, each with 6 done runs → completion rate = 1, error density = 0
+    for (let i = 0; i < MIN_COHORT_SIZE; i++) {
+      seedRunsForAgent(randomUUID(), 'researcher', 6, 'done', NOW)
+    }
+    const result = computeRoleBaseline('researcher', NOW)
+    expect(result.cohortSize).toBe(MIN_COHORT_SIZE)
+    expect(result.medianCompletionRate).toBe(1)
+    expect(result.medianErrorDensity).toBe(0)
+    expect(result.medianWeeklyThroughput).not.toBeNull()
+    expect(result.medianWeeklyThroughput!).toBeGreaterThan(0)
+  })
+
+  it('only counts agents with >= MIN_RUNS_THRESHOLD runs as qualifying', () => {
+    // 2 agents with enough runs + 1 with too few — only 2 qualify, below cohort min
+    seedRunsForAgent(randomUUID(), 'coder', 6, 'done', NOW)
+    seedRunsForAgent(randomUUID(), 'coder', 6, 'done', NOW)
+    seedRunsForAgent(randomUUID(), 'coder', 2, 'done', NOW) // sparse — not qualifying
+    const result = computeRoleBaseline('coder', NOW)
+    expect(result.cohortSize).toBe(2) // only 2 qualify
+    expect(result.medianCompletionRate).toBeNull()  // below MIN_COHORT_SIZE
+  })
+
+  it('excludes runs outside the 30-day window when determining qualifying agents', () => {
+    const agentId = randomUUID()
+    // Seed 6 runs but all 36 days ago — outside BASELINE_WINDOW_MS
+    const ancient = NOW - BASELINE_WINDOW_MS - 36 * 24 * 3600_000
+    for (let i = 0; i < 6; i++) {
+      dbAddTaskRun(
+        makeTestTaskRun(randomUUID(), {
+          agentId,
+          role: 'senior-coder',
+          status: 'done',
+          completedAt: ancient + i * 60_000,
+          startedAt: ancient + i * 60_000 - 30_000,
+        }),
+      )
+    }
+    // No other agents → cohort = 0
+    const result = computeRoleBaseline('senior-coder', NOW)
+    expect(result.cohortSize).toBe(0)
+  })
+
+  it('computes correct median completionRate across a mixed cohort', () => {
+    // 3 agents: rates ~1.0, ~0.5, ~0.0 → median = 0.5
+    const agents = [randomUUID(), randomUUID(), randomUUID()]
+    // Agent 0: all done → rate 1.0
+    seedRunsForAgent(agents[0], 'tester', 6, 'done', NOW)
+    // Agent 1: half done, half failed → rate 0.5
+    seedRunsForAgent(agents[1], 'tester', 3, 'done', NOW)
+    seedRunsForAgent(agents[1], 'tester', 3, 'failed', NOW)
+    // Agent 2: all failed → rate 0.0
+    seedRunsForAgent(agents[2], 'tester', 6, 'failed', NOW)
+
+    const result = computeRoleBaseline('tester', NOW)
+    expect(result.cohortSize).toBe(3)
+    expect(result.medianCompletionRate).toBeCloseTo(0.5, 5)
+    expect(result.medianErrorDensity).toBeCloseTo(0.5, 5)
+  })
+
+  it('returns cohortSize correctly even when below threshold', () => {
+    // senior-coder is only seeded in the "excludes runs outside window" test above,
+    // and those runs are outside BASELINE_WINDOW_MS — so they produce 0 qualifying agents.
+    // Adding 1 agent with inside-window runs here means cohortSize = 1 (below MIN_COHORT_SIZE).
+    seedRunsForAgent(randomUUID(), 'senior-coder', 6, 'done', NOW)
+    const result = computeRoleBaseline('senior-coder', NOW)
+    expect(result.cohortSize).toBe(1)
+  })
+})
+
+// ---------------------------------------------------------------------------
+// 3. getRoleBaseline() — cache behaviour
+// ---------------------------------------------------------------------------
+
+describe('getRoleBaseline', () => {
+  beforeEach(() => {
+    invalidateRoleBaselines()
+    vi.useFakeTimers()
+    vi.setSystemTime(NOW)
+  })
+
+  afterEach(() => {
+    vi.useRealTimers()
+  })
+
+  it('returns a baseline without throwing even for an empty role cohort', () => {
+    const result = getRoleBaseline('researcher')
+    expect(result).toBeDefined()
+    expect(result.role).toBe('researcher')
+  })
+
+  it('returns cached baseline within TTL without recomputing', () => {
+    // Seed enough data so a real baseline exists
+    for (let i = 0; i < MIN_COHORT_SIZE; i++) {
+      seedRunsForAgent(randomUUID(), 'coder', 6, 'done', NOW)
+    }
+
+    const first = getRoleBaseline('coder')
+    // Advance time but stay within TTL
+    vi.advanceTimersByTime(BASELINE_TTL_MS - 1_000)
+    const second = getRoleBaseline('coder')
+    // Same object reference means the cache was hit
+    expect(second).toBe(first)
+  })
+
+  it('recomputes baseline after TTL expires', () => {
+    for (let i = 0; i < MIN_COHORT_SIZE; i++) {
+      seedRunsForAgent(randomUUID(), 'coder', 6, 'done', NOW)
+    }
+
+    const first = getRoleBaseline('coder')
+    // Advance past TTL
+    vi.advanceTimersByTime(BASELINE_TTL_MS + 1_000)
+    const second = getRoleBaseline('coder')
+    // Should be a fresh object (different reference)
+    expect(second).not.toBe(first)
+  })
+
+  it('invalidateRoleBaselines() for a specific role forces recompute', () => {
+    for (let i = 0; i < MIN_COHORT_SIZE; i++) {
+      seedRunsForAgent(randomUUID(), 'writer', 6, 'done', NOW)
+    }
+
+    const first = getRoleBaseline('writer')
+    invalidateRoleBaselines('writer')
+    const second = getRoleBaseline('writer')
+    expect(second).not.toBe(first)
+  })
+
+  it('invalidateRoleBaselines() without argument clears all roles', () => {
+    for (let i = 0; i < MIN_COHORT_SIZE; i++) {
+      seedRunsForAgent(randomUUID(), 'coder', 6, 'done', NOW)
+      seedRunsForAgent(randomUUID(), 'tester', 6, 'done', NOW)
+    }
+    const c1 = getRoleBaseline('coder')
+    const t1 = getRoleBaseline('tester')
+
+    invalidateRoleBaselines()
+
+    const c2 = getRoleBaseline('coder')
+    const t2 = getRoleBaseline('tester')
+
+    expect(c2).not.toBe(c1)
+    expect(t2).not.toBe(t1)
+  })
+
+  it('does not throw when called for an unknown/new agent type', () => {
+    // Force TypeScript to accept an arbitrary string via cast (simulates future role addition)
+    expect(() => getRoleBaseline('pilot' as Parameters<typeof getRoleBaseline>[0])).not.toThrow()
+  })
+})
+
+// ---------------------------------------------------------------------------
+// 4. applyRoleBaseline() — normalisation rules and guards
+// ---------------------------------------------------------------------------
+
+/** Helper: build a fully-populated AgentHealthMetrics with hasEnoughData=true */
+function makeMetrics(overrides: Partial<AgentHealthMetrics> = {}): AgentHealthMetrics {
+  return {
+    completionRate: 0.8,
+    throughputTrend: 1.5,
+    errorDensity: 0.2,
+    idleSeconds: 3600,
+    hasEnoughData: true,
+    ...overrides,
+  }
+}
+
+describe('applyRoleBaseline', () => {
+  // ── Guard conditions ────────────────────────────────────────────────────────
+
+  it('returns raw metrics unchanged when baseline is null', () => {
+    const raw = makeMetrics()
+    expect(applyRoleBaseline(raw, null)).toEqual(raw)
+  })
+
+  it('returns raw metrics unchanged when cohortSize < MIN_COHORT_SIZE_FOR_BASELINE', () => {
+    const raw = makeMetrics()
+    const baseline = {
+      medianCompletionRate: 0.9,
+      medianErrorDensity: 0.1,
+      cohortSize: MIN_COHORT_SIZE_FOR_BASELINE - 1,
+    }
+    expect(applyRoleBaseline(raw, baseline)).toEqual(raw)
+  })
+
+  it('returns raw metrics unchanged when hasEnoughData is false', () => {
+    const raw = makeMetrics({ hasEnoughData: false, completionRate: null, errorDensity: null })
+    const baseline = {
+      medianCompletionRate: 0.9,
+      medianErrorDensity: 0.1,
+      cohortSize: MIN_COHORT_SIZE_FOR_BASELINE,
+    }
+    expect(applyRoleBaseline(raw, baseline)).toEqual(raw)
+  })
+
+  // ── Normalisation — completionRate ──────────────────────────────────────────
+
+  it('normalises completionRate: raw / median, capped at 1', () => {
+    const raw = makeMetrics({ completionRate: 0.72 })
+    const baseline = {
+      medianCompletionRate: 0.70,
+      medianErrorDensity: 0.30,
+      cohortSize: MIN_COHORT_SIZE_FOR_BASELINE,
+    }
+    const result = applyRoleBaseline(raw, baseline)
+    // 0.72 / 0.70 = 1.028... → capped at 1.0
+    expect(result.completionRate).toBe(1)
+  })
+
+  it('normalises completionRate below 1 when agent underperforms role', () => {
+    const raw = makeMetrics({ completionRate: 0.72 })
+    const baseline = {
+      medianCompletionRate: 0.92,
+      medianErrorDensity: 0.08,
+      cohortSize: MIN_COHORT_SIZE_FOR_BASELINE,
+    }
+    const result = applyRoleBaseline(raw, baseline)
+    // 0.72 / 0.92 ≈ 0.782
+    expect(result.completionRate).toBeCloseTo(0.72 / 0.92, 10)
+    expect(result.completionRate!).toBeLessThan(1)
+  })
+
+  it('skips completionRate normalisation when raw completionRate is null', () => {
+    const raw = makeMetrics({ completionRate: null })
+    const baseline = {
+      medianCompletionRate: 0.9,
+      medianErrorDensity: 0.1,
+      cohortSize: MIN_COHORT_SIZE_FOR_BASELINE,
+    }
+    const result = applyRoleBaseline(raw, baseline)
+    expect(result.completionRate).toBeNull()
+  })
+
+  it('skips completionRate normalisation when baseline median is null', () => {
+    const raw = makeMetrics({ completionRate: 0.8 })
+    const baseline = {
+      medianCompletionRate: null,
+      medianErrorDensity: 0.1,
+      cohortSize: MIN_COHORT_SIZE_FOR_BASELINE,
+    }
+    const result = applyRoleBaseline(raw, baseline)
+    expect(result.completionRate).toBe(0.8) // unchanged
+  })
+
+  it('skips completionRate normalisation when baseline median is zero (division guard)', () => {
+    const raw = makeMetrics({ completionRate: 0.8 })
+    const baseline = {
+      medianCompletionRate: 0,  // would cause division by zero
+      medianErrorDensity: 0.1,
+      cohortSize: MIN_COHORT_SIZE_FOR_BASELINE,
+    }
+    const result = applyRoleBaseline(raw, baseline)
+    expect(result.completionRate).toBe(0.8) // unchanged
+  })
+
+  // ── Normalisation — errorDensity ────────────────────────────────────────────
+
+  it('normalises errorDensity: raw / median (higher ratio = worse than norm)', () => {
+    const raw = makeMetrics({ errorDensity: 0.3 })
+    const baseline = {
+      medianCompletionRate: 0.7,
+      medianErrorDensity: 0.3,
+      cohortSize: MIN_COHORT_SIZE_FOR_BASELINE,
+    }
+    const result = applyRoleBaseline(raw, baseline)
+    // 0.3 / 0.3 = 1.0 — exactly at role norm
+    expect(result.errorDensity).toBeCloseTo(1.0, 10)
+  })
+
+  it('normalises errorDensity below 1 when agent has fewer errors than role norm', () => {
+    const raw = makeMetrics({ errorDensity: 0.1 })
+    const baseline = {
+      medianCompletionRate: 0.7,
+      medianErrorDensity: 0.3,
+      cohortSize: MIN_COHORT_SIZE_FOR_BASELINE,
+    }
+    const result = applyRoleBaseline(raw, baseline)
+    // 0.1 / 0.3 ≈ 0.333
+    expect(result.errorDensity).toBeCloseTo(0.1 / 0.3, 10)
+  })
+
+  it('normalises errorDensity above 1 when agent has more errors than role norm', () => {
+    const raw = makeMetrics({ errorDensity: 0.6 })
+    const baseline = {
+      medianCompletionRate: 0.4,
+      medianErrorDensity: 0.3,
+      cohortSize: MIN_COHORT_SIZE_FOR_BASELINE,
+    }
+    const result = applyRoleBaseline(raw, baseline)
+    // 0.6 / 0.3 = 2.0 — twice the role error rate
+    expect(result.errorDensity).toBeCloseTo(2.0, 10)
+  })
+
+  it('skips errorDensity normalisation when raw errorDensity is null', () => {
+    const raw = makeMetrics({ errorDensity: null })
+    const baseline = {
+      medianCompletionRate: 0.9,
+      medianErrorDensity: 0.1,
+      cohortSize: MIN_COHORT_SIZE_FOR_BASELINE,
+    }
+    const result = applyRoleBaseline(raw, baseline)
+    expect(result.errorDensity).toBeNull()
+  })
+
+  it('skips errorDensity normalisation when baseline median is null', () => {
+    const raw = makeMetrics({ errorDensity: 0.2 })
+    const baseline = {
+      medianCompletionRate: 0.8,
+      medianErrorDensity: null,
+      cohortSize: MIN_COHORT_SIZE_FOR_BASELINE,
+    }
+    const result = applyRoleBaseline(raw, baseline)
+    expect(result.errorDensity).toBe(0.2) // unchanged
+  })
+
+  it('skips errorDensity normalisation when baseline median is zero (division guard)', () => {
+    const raw = makeMetrics({ errorDensity: 0.2 })
+    const baseline = {
+      medianCompletionRate: 0.8,
+      medianErrorDensity: 0,  // would cause division by zero
+      cohortSize: MIN_COHORT_SIZE_FOR_BASELINE,
+    }
+    const result = applyRoleBaseline(raw, baseline)
+    expect(result.errorDensity).toBe(0.2) // unchanged
+  })
+
+  // ── Unchanged fields ─────────────────────────────────────────────────────────
+
+  it('leaves throughputTrend unchanged (slope value, not role-dependent)', () => {
+    const raw = makeMetrics({ throughputTrend: -2.5 })
+    const baseline = {
+      medianCompletionRate: 0.9,
+      medianErrorDensity: 0.1,
+      cohortSize: MIN_COHORT_SIZE_FOR_BASELINE,
+    }
+    const result = applyRoleBaseline(raw, baseline)
+    expect(result.throughputTrend).toBe(-2.5)
+  })
+
+  it('leaves idleSeconds unchanged (absolute wall-clock metric)', () => {
+    const raw = makeMetrics({ idleSeconds: 86400 })
+    const baseline = {
+      medianCompletionRate: 0.9,
+      medianErrorDensity: 0.1,
+      cohortSize: MIN_COHORT_SIZE_FOR_BASELINE,
+    }
+    const result = applyRoleBaseline(raw, baseline)
+    expect(result.idleSeconds).toBe(86400)
+  })
+
+  it('leaves hasEnoughData unchanged', () => {
+    const raw = makeMetrics({ hasEnoughData: true })
+    const baseline = {
+      medianCompletionRate: 0.9,
+      medianErrorDensity: 0.1,
+      cohortSize: MIN_COHORT_SIZE_FOR_BASELINE,
+    }
+    const result = applyRoleBaseline(raw, baseline)
+    expect(result.hasEnoughData).toBe(true)
+  })
+
+  // ── New role / unknown type graceful degradation ─────────────────────────────
+
+  it('degrades to flat thresholds for a brand-new role with zero baseline data', () => {
+    // Simulate a freshly added 6th role with no run history
+    const raw = makeMetrics({ completionRate: 0.6, errorDensity: 0.4 })
+    const emptyBaseline = {
+      medianCompletionRate: null,
+      medianErrorDensity: null,
+      cohortSize: 0,  // zero — below MIN_COHORT_SIZE_FOR_BASELINE
+    }
+    const result = applyRoleBaseline(raw, emptyBaseline)
+    // Should pass through unchanged — flat thresholds apply
+    expect(result).toEqual(raw)
+  })
+})
diff --git a/__tests__/unit/health-cache.test.ts b/__tests__/unit/health-cache.test.ts
index 601189a..6da31ae 100644
--- a/__tests__/unit/health-cache.test.ts
+++ b/__tests__/unit/health-cache.test.ts
@@ -3,16 +3,65 @@
  *
  * These tests use a real DB (isolated per test file via setup.ts) to verify
  * that computeAndCacheHealthMetrics correctly reads from DB and caches results.
+ *
+ * The second describe block ("with ROLE_BASELINES_ENABLED") covers the
+ * role-relative baseline integration path, seeding both agent records and
+ * task-run history to exercise the full normalization pipeline.
  */
-import { describe, it, expect, beforeEach } from 'vitest'
+import { describe, it, expect, beforeEach, afterEach } from 'vitest'
 import {
   getCachedHealthMetrics,
   computeAndCacheHealthMetrics,
   invalidateHealthCache,
 } from '@/lib/health-cache'
 import { dbAddTaskRun } from '@/lib/db/repositories/taskRunRepo'
+import { dbAddAgent } from '@/lib/db/repositories/agentRepo'
+import { invalidateRoleBaselines } from '@/lib/health-baselines'
 import { makeTestTaskRun } from '../helpers/test-utils'
 import { randomUUID } from 'crypto'
+import type { Agent } from '@/lib/types'
+
+// ---------------------------------------------------------------------------
+// Helpers
+// ---------------------------------------------------------------------------
+
+/** Creates a minimal Agent record and persists it to the DB. */
+function seedAgent(agentId: string, type: Agent['type'] = 'coder'): Agent {
+  const agent: Agent = {
+    id: agentId,
+    type,
+    prompt: '',
+    status: 'idle',
+    events: [],
+    createdAt: Date.now(),
+  }
+  dbAddAgent(agent)
+  return agent
+}
+
+/**
+ * Seeds `count` task runs for the given agentId inside the 7-day rolling
+ * window so hasEnoughData will be true.
+ */
+function seedRunsForAgent(
+  agentId: string,
+  role: Agent['type'],
+  count: number,
+  status: 'done' | 'failed' = 'done',
+): void {
+  const now = Date.now()
+  for (let i = 0; i < count; i++) {
+    dbAddTaskRun(
+      makeTestTaskRun(randomUUID(), {
+        agentId,
+        role,
+        status,
+        completedAt: now - i * 60_000,
+        startedAt: now - i * 60_000 - 30_000,
+      }),
+    )
+  }
+}
 
 beforeEach(() => {
   // Clear cache between tests to avoid cross-test contamination
@@ -110,3 +159,82 @@ describe('invalidateHealthCache', () => {
     expect(() => invalidateHealthCache('ghost-agent')).not.toThrow()
   })
 })
+
+// ---------------------------------------------------------------------------
+// Role-baseline integration (ROLE_BASELINES_ENABLED=true)
+// ---------------------------------------------------------------------------
+
+describe('computeAndCacheHealthMetrics with ROLE_BASELINES_ENABLED', () => {
+  beforeEach(() => {
+    invalidateHealthCache()
+    invalidateRoleBaselines()
+    process.env.ROLE_BASELINES_ENABLED = 'true'
+  })
+
+  afterEach(() => {
+    delete process.env.ROLE_BASELINES_ENABLED
+    invalidateRoleBaselines()
+  })
+
+  it('returns raw metrics (no normalisation) when feature flag is off', () => {
+    delete process.env.ROLE_BASELINES_ENABLED // ensure flag is off
+    const agentId = randomUUID()
+    seedAgent(agentId, 'coder')
+    seedRunsForAgent(agentId, 'coder', 6)
+
+    const metrics = computeAndCacheHealthMetrics(agentId)
+    // Without baseline, completionRate should be the raw value (1.0 = all done)
+    expect(metrics.completionRate).toBe(1)
+    expect(metrics.errorDensity).toBe(0)
+  })
+
+  it('applies role-relative normalisation when feature flag is enabled', () => {
+    // Set up: role median completionRate = 0.5 (mixed cohort)
+    // Build a cohort of 3 agents for the 'researcher' role with 50% completion
+    const cohortAgents = [randomUUID(), randomUUID(), randomUUID()]
+    for (const id of cohortAgents) {
+      seedAgent(id, 'researcher')
+      // 3 done + 3 failed = 50% completion rate per agent
+      seedRunsForAgent(id, 'researcher', 3, 'done')
+      seedRunsForAgent(id, 'researcher', 3, 'failed')
+    }
+
+    // Now test an agent that has 100% completion rate
+    const agentId = randomUUID()
+    seedAgent(agentId, 'researcher')
+    seedRunsForAgent(agentId, 'researcher', 6, 'done')
+
+    // Invalidate baseline cache so it recomputes from the seeded cohort
+    invalidateRoleBaselines()
+
+    const metrics = computeAndCacheHealthMetrics(agentId)
+
+    // The agent performs above role norm:
+    //   raw completionRate = 1.0, role median ≈ 0.5
+    //   normalised = min(1.0 / 0.5, 1) = 1.0 (capped)
+    expect(metrics.hasEnoughData).toBe(true)
+    expect(metrics.completionRate).toBe(1)
+  })
+
+  it('degrades to raw metrics when agent is not found in DB', () => {
+    // agentId with no DB record — getRoleBaseline should not be called
+    const ghostAgentId = randomUUID()
+    // Seed runs directly without creating the agent record
+    seedRunsForAgent(ghostAgentId, 'coder', 6)
+
+    const metrics = computeAndCacheHealthMetrics(ghostAgentId)
+    // Should succeed without throwing; returns raw metrics
+    expect(metrics).toBeDefined()
+    expect(metrics.hasEnoughData).toBe(true)
+  })
+
+  it('stores normalised metrics in cache and returns the same value on hit', () => {
+    const agentId = randomUUID()
+    seedAgent(agentId, 'coder')
+    seedRunsForAgent(agentId, 'coder', 6)
+
+    const computed = computeAndCacheHealthMetrics(agentId)
+    const cached = getCachedHealthMetrics(agentId, 60_000)
+    expect(cached).toEqual(computed)
+  })
+})
diff --git a/context/current-sprint.md b/context/current-sprint.md
index 1075775..18ffd31 100644
--- a/context/current-sprint.md
+++ b/context/current-sprint.md
@@ -1,12 +1,13 @@
 # Current Sprint
 
-> Last updated: 2026-03-17
+> Last updated: 2026-03-18
 
 ## Active Worktrees
 
-*(none — all branches merged, worktrees cleaned up)*
+- `feat/role-aware-health-baselines` — PR #29 open, awaiting review (depends on #26, #27)
 
 ## In Progress
+- [ ] Role-aware health baselines — PR #29 open (awaiting #26 + #27 as prerequisites)
 - [ ] Authentication & role-based access (protect dashboard + API routes)
 - [ ] Agent log search & filtering
 
diff --git a/context/decisions.md b/context/decisions.md
index 5ac2abe..9070d72 100644
--- a/context/decisions.md
+++ b/context/decisions.md
@@ -2,6 +2,14 @@
 
 > Append new entries at the top. Keep each entry ≤ 10 lines.
 
+## ADR-040 — Role-aware health baselines (2026-03-18)
+**Decision:** Add `lib/health-baselines.ts` — a dedicated module that computes and caches 30-day median baselines (completionRate, errorDensity, weeklyThroughput) per agent role. `applyRoleBaseline()` in `health.ts` normalises raw sub-metrics relative to the role's median before badge thresholds are applied, so a tester at 72% completion looks healthy if testers average 70% but alarming if they average 92%.
+**Architecture:** `BaselineNorms` interface defined in `health.ts` (not `health-baselines.ts`) to break the potential circular import. `health-baselines.ts` satisfies it structurally. 5-minute TTL cache; `MIN_COHORT_SIZE = 3` guard falls back to flat thresholds when cohort is too thin. Feature-flagged via `ROLE_BASELINES_ENABLED` env var (default off) for safe A/B rollout.
+**Degradation contract:** Cohort < 3 → flat thresholds. Agent < 5 runs → skip (hasEnoughData unchanged). Baseline metric null or zero → skip that sub-metric. DB error → return stale cache or null-metrics. New role → cohort = 0 → flat thresholds until data accumulates (≈2–3 weeks).
+**New role warning:** Adding a new AgentType requires a baseline recalibration period; documented in `docs/agent-types.md`.
+**Affects:** `lib/health-baselines.ts` (new), `lib/health.ts` (+`applyRoleBaseline`, `BaselineNorms`, `MIN_COHORT_SIZE_FOR_BASELINE`), `lib/health-cache.ts` (+role lookup, feature flag), `lib/db/repositories/taskRunRepo.ts` (+`dbGetTaskRunsByRole`), `__tests__/unit/health-baselines.test.ts` (new), `__tests__/unit/health-cache.test.ts` (extended), `docs/agent-types.md`.
+**PR:** #28 (`feat/role-aware-health-baselines`) — depends on #26, #27.
+
 ## ADR-037 — Meetings toggle in HistoryList + global meetings API (2026-03-14)
 **Decision:** Added `mode` state (`'runs' | 'meetings'`) to `HistoryList`. Meetings view shows a filterable table (date, topic, project, status, agents, tokens, cost) via a new global `GET /api/meetings` endpoint. Endpoint enriches data from `dbGetAllMeetings()` + per-meeting message aggregations (agentCount, totalTokens, totalCostUsd). Supports `from`/`to` epoch-ms and `status` query params.
 **Why:** History tab covers all runs; logical to extend it to meetings. Global endpoint needed because /history has no projectId.
diff --git a/docs/agent-types.md b/docs/agent-types.md
index ad1d1e2..18cbdcb 100644
--- a/docs/agent-types.md
+++ b/docs/agent-types.md
@@ -74,3 +74,31 @@ tester      → 'testing'
 ```
 
 Cards are automatically moved to the matching column when an agent of that role starts.
+
+## Health Baselines and New Role Addition
+
+When `ROLE_BASELINES_ENABLED=true`, each agent's health sub-metrics are
+normalised against the median performance of its role cohort (see
+`lib/health-baselines.ts` and ADR-040).
+
+**Adding a new AgentType requires a baseline recalibration period.**
+
+When a sixth (or later) role is added to the `AgentType` union:
+
+1. At deploy time, the new role has zero historical runs → `cohortSize = 0`
+   → `MIN_COHORT_SIZE` guard fires → health scores fall back to flat thresholds
+   automatically. No crash, no code change needed.
+2. After approximately **2–3 weeks** of production traffic, once the cohort
+   accumulates ≥ 3 agents each with ≥ 5 runs per week, the baseline will
+   self-populate on the next 5-minute cache refresh.
+3. During the recalibration window, health badges for the new role display raw
+   absolute values (same as before `ROLE_BASELINES_ENABLED` was set).
+
+**Action required when adding a new role:**
+- Update `AgentType` in `lib/types.ts`
+- Update `ROLE_COLORS` / `ROLE_HEX` in `lib/constants.ts`
+- Update pipeline logic in `lib/services/agentService.ts` if it joins the pipeline
+- Note the recalibration period in the PR description so the team knows to
+  monitor badge accuracy for the new role for 2–3 weeks post-deploy
+- Call `invalidateRoleBaselines()` in any migration script that back-fills
+  historical runs for the new role (so the cache doesn't serve stale data)
diff --git a/lib/db/repositories/taskRunRepo.ts b/lib/db/repositories/taskRunRepo.ts
index 10888e2..bec47af 100644
--- a/lib/db/repositories/taskRunRepo.ts
+++ b/lib/db/repositories/taskRunRepo.ts
@@ -178,3 +178,30 @@ export function dbGetRecentTaskRunsByAgent(agentId: string, since: number): Task
     .all()
     .map(rowToTaskRun)
 }
+
+/**
+ * Returns all task runs for agents of the given role within a trailing time
+ * window, ordered by completedAt DESC.
+ *
+ * Used by `lib/health-baselines.ts` to compute per-role aggregate baselines.
+ * Fetching all runs for the role in one query is more efficient than issuing N
+ * per-agent queries and merging in JS.
+ *
+ * @param role     - The agent role string (e.g. 'coder', 'researcher').
+ * @param windowMs - Look-back window in ms (default: 30 days).
+ * @param now      - Injectable epoch ms for deterministic tests.
+ */
+export function dbGetTaskRunsByRole(
+  role: string,
+  windowMs: number = 30 * 24 * 60 * 60 * 1_000,
+  now: number = Date.now(),
+): TaskRun[] {
+  const since = now - windowMs
+  return db
+    .select()
+    .from(taskRuns)
+    .where(and(eq(taskRuns.role, role), gte(taskRuns.completedAt, since)))
+    .orderBy(desc(taskRuns.completedAt))
+    .all()
+    .map(rowToTaskRun)
+}
diff --git a/lib/health-baselines.ts b/lib/health-baselines.ts
new file mode 100644
index 0000000..0602ec5
--- /dev/null
+++ b/lib/health-baselines.ts
@@ -0,0 +1,243 @@
+/**
+ * Per-role aggregate health baselines.
+ *
+ * Computes and caches median performance metrics per agent role so that
+ * `lib/health-cache.ts` can normalise per-agent health scores against the
+ * role's own norms rather than flat absolute thresholds.
+ *
+ * Intended evolution: the in-memory TTL cache here is a stepping stone. Once
+ * data volume justifies it, replace `computeRoleBaseline` with a query backed
+ * by a persistent materialised view, keeping the `getRoleBaseline` API stable.
+ *
+ * Usage:
+ *   import { getRoleBaseline, invalidateRoleBaselines } from '@/lib/health-baselines'
+ */
+import { dbGetTaskRunsByRole } from '@/lib/db/repositories/taskRunRepo'
+import { computeHealthMetrics, MIN_RUNS_THRESHOLD } from '@/lib/health'
+import type { AgentType, TaskRun } from '@/lib/types'
+
+// ---------------------------------------------------------------------------
+// Constants
+// ---------------------------------------------------------------------------
+
+/**
+ * Minimum number of agents of a given role, each with at least
+ * MIN_RUNS_THRESHOLD runs within the baseline window, required to form a
+ * statistically meaningful cohort.  Below this we return null-metric
+ * baselines and fall back to flat thresholds.
+ */
+export const MIN_COHORT_SIZE = 3
+
+/**
+ * TTL for cached role baselines (5 minutes).
+ * Baselines change slowly — far longer than the per-agent 30-second TTL.
+ */
+export const BASELINE_TTL_MS = 300_000 // 5 minutes
+
+/**
+ * Look-back window for baseline computation: 30 days.
+ * Wider than the per-agent 7-day rolling window to establish stable role
+ * norms rather than sensitive short-term trends.
+ */
+export const BASELINE_WINDOW_MS = 30 * 24 * 60 * 60 * 1_000 // 30 days
+
+// ---------------------------------------------------------------------------
+// Types
+// ---------------------------------------------------------------------------
+
+/**
+ * Aggregate baseline for a single agent role derived from the trailing
+ * BASELINE_WINDOW_MS of task run history.
+ *
+ * Any metric field may be null when the cohort lacks sufficient data (e.g. a
+ * role where every agent always succeeds → `medianErrorDensity` is null).
+ * Callers must treat null as "baseline unavailable for this metric".
+ */
+export interface RoleBaseline {
+  role: AgentType
+  /** Median completion rate across all qualifying agents of this role. */
+  medianCompletionRate: number | null
+  /** Median error density across all qualifying agents of this role. */
+  medianErrorDensity: number | null
+  /** Median average weekly task throughput across qualifying agents. */
+  medianWeeklyThroughput: number | null
+  /** Number of agents that contributed to this baseline. */
+  cohortSize: number
+  /** Epoch ms when this baseline was last computed. */
+  computedAt: number
+}
+
+interface CachedBaseline {
+  baseline: RoleBaseline
+  /** Epoch ms when the cache entry was stored (used for TTL eviction). */
+  computedAt: number
+}
+
+// ---------------------------------------------------------------------------
+// Cache store
+// ---------------------------------------------------------------------------
+
+/** In-process TTL cache — keyed by AgentType. */
+const baselineCache = new Map<AgentType, CachedBaseline>()
+
+// ---------------------------------------------------------------------------
+// Internal helpers
+// ---------------------------------------------------------------------------
+
+/**
+ * Returns the sorted median of a numeric array.
+ * Returns null for empty arrays rather than NaN so callers get a clean signal.
+ */
+export function medianOf(values: number[]): number | null {
+  if (values.length === 0) return null
+  const sorted = [...values].sort((a, b) => a - b)
+  const mid = Math.floor(sorted.length / 2)
+  return sorted.length % 2 === 0
+    ? (sorted[mid - 1] + sorted[mid]) / 2
+    : sorted[mid]
+}
+
+// ---------------------------------------------------------------------------
+// Core computation
+// ---------------------------------------------------------------------------
+
+/**
+ * Computes per-role aggregate health baselines from the trailing
+ * BASELINE_WINDOW_MS of task runs.
+ *
+ * Steps:
+ *  1. Fetch all runs for this role within the window via DB.
+ *  2. Group by agentId and keep only agents with ≥ MIN_RUNS_THRESHOLD runs.
+ *  3. If the qualifying cohort is < MIN_COHORT_SIZE, return null-metric baseline.
+ *  4. Compute per-agent metrics; take the median of each metric across agents.
+ *
+ * @param role - The AgentType to compute baselines for.
+ * @param now  - Injectable epoch ms for deterministic testing.
+ */
+export function computeRoleBaseline(
+  role: AgentType,
+  now: number = Date.now(),
+): RoleBaseline {
+  const nullBaseline: RoleBaseline = {
+    role,
+    medianCompletionRate: null,
+    medianErrorDensity: null,
+    medianWeeklyThroughput: null,
+    cohortSize: 0,
+    computedAt: now,
+  }
+
+  let runs: TaskRun[]
+  try {
+    runs = dbGetTaskRunsByRole(role, BASELINE_WINDOW_MS, now)
+  } catch {
+    // DB error — return null-metrics baseline; caller falls back to flat thresholds
+    return nullBaseline
+  }
+
+  // Group runs by agentId
+  const byAgent = new Map<string, TaskRun[]>()
+  for (const run of runs) {
+    const existing = byAgent.get(run.agentId) ?? []
+    existing.push(run)
+    byAgent.set(run.agentId, existing)
+  }
+
+  // Only agents with sufficient run history in the window contribute to the baseline
+  const windowStart = now - BASELINE_WINDOW_MS
+  const qualifyingIds: string[] = []
+  for (const [agentId, agentRuns] of byAgent) {
+    const windowRuns = agentRuns.filter(
+      (r) => r.completedAt >= windowStart && r.completedAt <= now,
+    )
+    if (windowRuns.length >= MIN_RUNS_THRESHOLD) {
+      qualifyingIds.push(agentId)
+    }
+  }
+
+  if (qualifyingIds.length < MIN_COHORT_SIZE) {
+    // Cohort too small — medians would be statistically meaningless
+    return { ...nullBaseline, cohortSize: qualifyingIds.length }
+  }
+
+  // Collect per-agent metric values for median calculation
+  const completionRates: number[] = []
+  const errorDensities: number[] = []
+  const weeklyThroughputs: number[] = []
+  const weeksInWindow = BASELINE_WINDOW_MS / (7 * 24 * 60 * 60 * 1_000)
+
+  for (const agentId of qualifyingIds) {
+    const agentRuns = byAgent.get(agentId)!
+    const metrics = computeHealthMetrics(agentRuns, now)
+    if (!metrics.hasEnoughData) continue
+
+    if (metrics.completionRate !== null) completionRates.push(metrics.completionRate)
+    if (metrics.errorDensity !== null) errorDensities.push(metrics.errorDensity)
+
+    // Average weekly throughput = total window runs / weeks in window
+    const windowRuns = agentRuns.filter(
+      (r) => r.completedAt >= windowStart && r.completedAt <= now,
+    )
+    weeklyThroughputs.push(windowRuns.length / weeksInWindow)
+  }
+
+  return {
+    role,
+    medianCompletionRate: medianOf(completionRates),
+    medianErrorDensity: medianOf(errorDensities),
+    medianWeeklyThroughput: medianOf(weeklyThroughputs),
+    cohortSize: qualifyingIds.length,
+    computedAt: now,
+  }
+}
+
+// ---------------------------------------------------------------------------
+// Public API
+// ---------------------------------------------------------------------------
+
+/**
+ * Returns a cached role baseline, recomputing if stale or absent.
+ *
+ * This function never throws.  On any DB or computation error it returns a
+ * stale cached value if one exists, otherwise a safe null-metrics baseline.
+ * This ensures health scoring always degrades gracefully rather than crashing.
+ */
+export function getRoleBaseline(role: AgentType): RoleBaseline {
+  const cached = baselineCache.get(role)
+  if (cached && Date.now() - cached.computedAt < BASELINE_TTL_MS) {
+    return cached.baseline
+  }
+
+  try {
+    const baseline = computeRoleBaseline(role)
+    baselineCache.set(role, { baseline, computedAt: Date.now() })
+    return baseline
+  } catch {
+    // On recomputation failure, prefer a stale cached value over nothing
+    if (cached) return cached.baseline
+    return {
+      role,
+      medianCompletionRate: null,
+      medianErrorDensity: null,
+      medianWeeklyThroughput: null,
+      cohortSize: 0,
+      computedAt: Date.now(),
+    }
+  }
+}
+
+/**
+ * Invalidates cached baselines.
+ * - If `role` is provided, clears only that role's cached entry.
+ * - If omitted, clears all roles.
+ *
+ * Call this when an agent is created or deleted (cohort size may shift), or
+ * when a new AgentType is added to the system.
+ */
+export function invalidateRoleBaselines(role?: AgentType): void {
+  if (role !== undefined) {
+    baselineCache.delete(role)
+  } else {
+    baselineCache.clear()
+  }
+}
diff --git a/lib/health-cache.ts b/lib/health-cache.ts
index 6096eb1..7526a3a 100644
--- a/lib/health-cache.ts
+++ b/lib/health-cache.ts
@@ -1,5 +1,7 @@
-import { computeHealthMetrics, ROLLING_WINDOW_MS } from '@/lib/health'
+import { computeHealthMetrics, applyRoleBaseline } from '@/lib/health'
 import { dbGetTaskRunsByAgent } from '@/lib/db/repositories/taskRunRepo'
+import { dbGetAgent } from '@/lib/db/repositories/agentRepo'
+import { getRoleBaseline } from '@/lib/health-baselines'
 import type { AgentHealthMetrics } from '@/lib/types'
 
 // ---------------------------------------------------------------------------
@@ -37,11 +39,31 @@ export function getCachedHealthMetrics(
 
 /**
  * Queries the DB for all task runs belonging to this agent, computes health
- * metrics, stores the result in the cache, and returns it.
+ * metrics, applies role-relative baseline normalisation when the
+ * `ROLE_BASELINES_ENABLED` environment variable is set to `"true"`, stores
+ * the result in the cache, and returns it.
+ *
+ * Role-baseline normalisation is gated behind the feature flag so the code
+ * can be deployed without activating it, enabling an A/B comparison of badge
+ * behaviour and an emergency rollback without a code revert.
  */
 export function computeAndCacheHealthMetrics(agentId: string): AgentHealthMetrics {
   const runs = dbGetTaskRunsByAgent(agentId)
-  const metrics = computeHealthMetrics(runs)
+  const raw = computeHealthMetrics(runs)
+
+  let metrics = raw
+
+  if (process.env.ROLE_BASELINES_ENABLED === 'true') {
+    // Look up the agent's role so we can fetch the correct cohort baseline.
+    // dbGetAgent returns undefined for unknown ids; we degrade to flat thresholds
+    // in that case (applyRoleBaseline treats a null baseline as a no-op).
+    const agent = dbGetAgent(agentId)
+    if (agent) {
+      const baseline = getRoleBaseline(agent.type)
+      metrics = applyRoleBaseline(raw, baseline)
+    }
+  }
+
   cache.set(agentId, { metrics, computedAt: Date.now() })
   return metrics
 }
diff --git a/lib/health.ts b/lib/health.ts
index 958e3de..1d5f528 100644
--- a/lib/health.ts
+++ b/lib/health.ts
@@ -1,5 +1,24 @@
 import type { TaskRun, AgentHealthMetrics } from '@/lib/types'
 
+// ---------------------------------------------------------------------------
+// Role-baseline normalisation types
+// ---------------------------------------------------------------------------
+
+/**
+ * Structural subset of RoleBaseline used by applyRoleBaseline.
+ *
+ * Defining it here (rather than importing from health-baselines.ts) avoids a
+ * circular dependency: health-baselines.ts → health.ts → health-baselines.ts.
+ * Any object that satisfies this shape — including RoleBaseline — is accepted
+ * by applyRoleBaseline thanks to TypeScript's structural typing.
+ */
+export interface BaselineNorms {
+  medianCompletionRate: number | null
+  medianErrorDensity: number | null
+  /** Number of agents that contributed; guards against thin cohorts. */
+  cohortSize: number
+}
+
 // ---------------------------------------------------------------------------
 // Constants
 // ---------------------------------------------------------------------------
@@ -117,3 +136,75 @@ export function computeHealthMetrics(
     hasEnoughData: true,
   }
 }
+
+// ---------------------------------------------------------------------------
+// Role-baseline normalisation
+// ---------------------------------------------------------------------------
+
+/**
+ * The minimum cohort size required to treat a baseline as meaningful.
+ * Mirrors the constant in health-baselines.ts; duplicated here to keep
+ * health.ts free of any import from health-baselines.ts.
+ */
+export const MIN_COHORT_SIZE_FOR_BASELINE = 3
+
+/**
+ * Adjusts raw health metrics relative to the role's aggregate norms.
+ *
+ * Returns the raw metrics **unchanged** when:
+ *  - `baseline` is null (caller signals "no baseline available")
+ *  - `baseline.cohortSize < MIN_COHORT_SIZE_FOR_BASELINE` (cohort too thin)
+ *  - `raw.hasEnoughData` is false (not enough per-agent data to compare)
+ *
+ * Normalisation rules (only for metrics where both raw value and baseline
+ * median are non-null and the baseline median is > 0):
+ *  - `completionRate`  → raw / medianCompletionRate, capped at [0, 1]
+ *    (1.0 = at role norm; <1.0 = below norm; values >1 clamped to 1)
+ *  - `errorDensity`    → raw / medianErrorDensity  (higher ratio = worse)
+ *    (1.0 = at role norm; >1.0 = worse than norm; <1.0 = better than norm)
+ *  - `throughputTrend` → unchanged (already a slope/delta, not an absolute)
+ *  - `idleSeconds`     → unchanged (wall-clock metric, not role-dependent)
+ *
+ * The AgentHealthMetrics shape is **not** changed — this is a purely internal
+ * value transform applied before badge thresholds are evaluated.
+ *
+ * @internal — exported for unit testing; consume via health-cache.ts
+ */
+export function applyRoleBaseline(
+  raw: AgentHealthMetrics,
+  baseline: BaselineNorms | null,
+): AgentHealthMetrics {
+  // Gate 1: no baseline or cohort too small → flat thresholds apply unchanged
+  if (!baseline || baseline.cohortSize < MIN_COHORT_SIZE_FOR_BASELINE) return raw
+
+  // Gate 2: agent lacks sufficient data → nothing to normalise
+  if (!raw.hasEnoughData) return raw
+
+  let { completionRate, errorDensity } = raw
+
+  // Normalise completionRate: higher raw / high baseline = "at or above norm"
+  // Guard: skip if either value is null or baseline median is zero
+  if (
+    completionRate !== null &&
+    baseline.medianCompletionRate !== null &&
+    baseline.medianCompletionRate > 0
+  ) {
+    completionRate = Math.min(completionRate / baseline.medianCompletionRate, 1)
+  }
+
+  // Normalise errorDensity: lower raw / high baseline = "better than norm"
+  // Guard: skip if either value is null or baseline median is zero
+  if (
+    errorDensity !== null &&
+    baseline.medianErrorDensity !== null &&
+    baseline.medianErrorDensity > 0
+  ) {
+    errorDensity = errorDensity / baseline.medianErrorDensity
+  }
+
+  return {
+    ...raw,
+    completionRate,
+    errorDensity,
+  }
+}