diff --git a/.STATUS b/.STATUS index a7401ad0..d4903d04 100644 --- a/.STATUS +++ b/.STATUS @@ -1,14 +1,50 @@ status: Active progress: 100 -next: Start v2.18.0 planning — post-release maintenance or next feature -target: v2.17.0 — Canvas Enhancements & Bug Fixes (released) -milestone: v2.17.0 released 2026-03-04 | 33 commands | 3,340 tests -last_session: 2026-03-04 - v2.17.0 release (Session 88) -complexity: Low +next: Create release PR (dev → main) for v2.18.0 +target: v2.18.0 — Rich Exam LaTeX Format (merged to dev, ready for release) +milestone: v2.18.0 merged to dev 2026-05-12 | 33 commands | 3,400 tests +last_session: 2026-05-12 - PR #103 review + 6 fixes + merge to dev (Session 90) +complexity: Medium risk_level: Low -dependencies: Claude API, js-yaml, ajv, ajv-keywords, ajv-formats, Quarto, flow-cli, R/Rscript (runtime), examark (Canvas QTI) +dependencies: Claude API, js-yaml, ajv, ajv-keywords, ajv-formats, Quarto, flow-cli, R/Rscript (runtime), examark (Canvas QTI), jstat (F-distribution) worktree: none +# Session 90 Summary (2026-05-12) + +**Branch:** dev (PR #103 merged at 686bc93) | **Tests:** 3,400 (3,263 Jest + 137 node:test), 143 suites + +**Completed (PR #103 code review + fixes + merge):** +- Code review of PR #103 (rich exam LaTeX, +2322/-44, 5 milestones) — identified 6 issues +- Fix 1: Replaced stateful /g-regex .test() with startsWith() in interaction-aware-pass.js +- Fix 2: Added input validation to bracket-table-generator.js (throws on invalid df pairs) +- Fix 3: Extended interaction-aware reframe to include question.rubric (essay/short-answer) +- Fix 4: YAML title escaping via JSON.stringify() instead of shallow quote-only replace +- Fix 5: Added PromptLoader.checkMinScholarVersion() with correct min-version semantics; + bridge now uses it instead of checkVersion() (which has prompt_version semantics). + Fixes pre-existing stat-545-prompt-discovery integration test failure. +- Fix 6: REFCARD ASCII version v2.16.0 → v2.18.0; mkdocs.yml counts refreshed to 3,400/143 +- Added 11 new tests (6 checkMinScholarVersion + 2 rubric/YAML + 1 bracket validation + 2 misc) +- All 3,400 tests pass (3,263 Jest + 137 node:test, 8 skipped, 0 failures) +- Squash-merged to dev (commit 686bc93); remote branch deletion required manual cleanup + +# Session 89 Summary (2026-05-12) + +**Branch:** feature/exam-rich-latex | **Tests:** 3,360 (3,223 Jest + 137 node:test), 141 suites + +**Completed (v2.18.0 Wave 5 — pre-PR release prep):** +- Bumped version 2.17.0 → 2.18.0 in package.json + .claude-plugin/plugin.json +- Updated mkdocs.yml extra.scholar block (version, prev_version, release_date 2026-05-12, test_count 3,360, jest_count 3,223, suite_count 141) +- Ran version-sync.js: 16 changes / 6 skips / 0 errors across docs and SCHOLAR_VERSION constant +- CHANGELOG.md: added [2.18.0] — 2026-05-12 section + link reference at bottom +- Created docs/WHATS-NEW-v2.18.0.md +- Added v2.18.0 entry to mkdocs.yml Release Notes nav +- Final test sweep confirmed counts (1 pre-existing unrelated failure in stat-545-prompt-discovery) + +**v2.18.0 highlights (Waves 1-4, already committed):** +- New `--format exam-rich-latex` for /teaching:exam (LaTeX exam class, evidence-based policy) +- F-critical bracket table (jstat), s-value calibration table, interaction-aware language pass +- New exam template options: policy, aids_level, textbook_citation_prefix, interaction_aware_language + # Session 87 Summary (2026-03-04) **Commits:** 6 on feature/canvas-enhancements | **PRs:** none yet | **Worktree:** created + ORCHESTRATE committed @@ -286,6 +322,7 @@ worktree: none | Version | Date | Highlights | |---------|------|-----------| +| v2.18.0 | 2026-05-12 | Rich exam LaTeX format: --format exam-rich-latex, F-critical bracket table (jstat), s-value calibration, interaction-aware language, 3,400 tests | | v2.17.0 | 2026-03-04 | Canvas enhancements: pre-flight validation, shared canvas-preflight.js helper, exam --format canvas, SCHOLAR_VERSION fix, 3,340 tests | | v2.16.0 | 2026-02-27 | Canvas QTI pipeline: /teaching:canvas, QMD parser, 10 question types, image bundling, examark integration, 3,302 tests | | v2.15.0 | 2026-02-23 | Insights-driven enhancements: /teaching:preflight, validate-pipeline, send-output, CI merge-conflict guard, 3,092 tests | diff --git a/.claude-plugin/plugin.json b/.claude-plugin/plugin.json index e163212c..97560ab4 100644 --- a/.claude-plugin/plugin.json +++ b/.claude-plugin/plugin.json @@ -1,6 +1,6 @@ { "name": "scholar", - "version": "2.17.0", + "version": "2.18.0", "description": "Academic workflows for research and teaching - literature management, manuscript writing, simulation design, and course material generation", "author": { "name": "Data-Wise" diff --git a/.gitignore b/.gitignore index c1a28abd..d11c8a6f 100644 --- a/.gitignore +++ b/.gitignore @@ -42,3 +42,6 @@ src/discovery/cache.json .claude/ test-output-lecture-e2e/ site/ + +# M4 rich-LaTeX validation artifacts (generated, do not commit) +out/m4-validation/ diff --git a/CHANGELOG.md b/CHANGELOG.md index e7ce7634..bdfd1b37 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,22 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [2.18.0] — 2026-05-12 + +### Added + +- **Rich exam LaTeX format** — new `--format exam-rich-latex` for `/teaching:exam`. Emits a Quarto `.qmd` using the LaTeX `exam` document class with evidence-based reporting policy ($s$-values, compatibility intervals, no significance language), F-critical bracket table, $s$-value calibration table, Dean (2017) citation pattern, and interaction-aware language (reframes "main effects" to "marginal mean differences" in factorial designs containing interaction terms). +- `policy`, `aids_level`, `textbook_citation_prefix`, and `interaction_aware_language` options on the exam template (`src/teaching/templates/exam.json`) plus matching `teach-config.yml` `scholar.teaching_style.exam` override block. +- `jstat` runtime dependency (`^1.9.6`) for F-distribution critical-value computation in the bracket-table generator. +- New formatter helpers — `formatters/helpers/bracket-table-generator.js` (jstat-backed F* values, 2-decimal precision), `formatters/helpers/s-calibration-table.js` (static Greenland/Rafi $s$-value calibration), `formatters/helpers/interaction-aware-pass.js` (regex + AI-stub interaction-aware reframer). +- Validation harness — `tests/teaching/formatters/fixtures/stat-545-final-spring-2026.fixture.json` + `scripts/m4-validate-rich-format.js` for structural integrity and PDF compilation checks. + +### Fixed + +- `exam-rich-latex` formatter now correctly places LaTeX preamble in Quarto's `format.pdf.include-in-header.text:` YAML field rather than the document body, so `lualatex` compilation succeeds end-to-end. + +--- + ## [2.17.0] — 2026-03-04 ### Fixed @@ -1349,6 +1365,7 @@ text.replace(/\$([^$]+)\$/g, (match, content) => { --- +[2.18.0]: https://github.com/Data-Wise/scholar/compare/v2.17.0...v2.18.0 [2.17.0]: https://github.com/Data-Wise/scholar/compare/v2.16.0...v2.17.0 [2.16.0]: https://github.com/Data-Wise/scholar/compare/v2.15.0...v2.16.0 [2.15.0]: https://github.com/Data-Wise/scholar/compare/v2.14.0...v2.15.0 diff --git a/CLAUDE.md b/CLAUDE.md index 1ffa3276..2c500477 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -54,14 +54,14 @@ This file contains project-specific instructions for Claude Code when working on ### Overview Scholar is a Claude Code plugin for academic workflows with research + teaching capabilities. -### Current State (v2.17.0 released; v2.17.0 in progress on feature/canvas-enhancements) +### Current State (v2.18.0 released; v2.17.0 in progress on feature/canvas-enhancements) - **Research:** 14 commands, 17 skills - **Teaching:** 18 commands (`/teaching:exam`, `/teaching:quiz`, `/teaching:slides`, `/teaching:assignment`, `/teaching:solution`, `/teaching:syllabus`, `/teaching:rubric`, `/teaching:feedback`, `/teaching:demo`, `/teaching:lecture`, `/teaching:validate`, `/teaching:validate-r`, `/teaching:diff`, `/teaching:sync`, `/teaching:migrate`, `/teaching:config`, `/teaching:preflight`, `/teaching:canvas`) - **Hub:** 1 command (`/scholar:hub` — command discovery and navigation, with flag discovery) - **Tests:** ~3,302 tests - **Docs:** MkDocs site on GitHub Pages - **Integration:** Works with flow-cli for workflow automation -- **Release:** https://github.com/Data-Wise/scholar/releases/tag/v2.17.0 +- **Release:** https://github.com/Data-Wise/scholar/releases/tag/v2.18.0 ### Key Directories diff --git a/README.md b/README.md index fdcc4228..d10ac688 100644 --- a/README.md +++ b/README.md @@ -5,14 +5,14 @@ A comprehensive Claude Code plugin for academic workflows combining research and teaching. Features unified Plugin + MCP architecture with 33 slash commands and research skills. [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT) -[![Version](https://img.shields.io/badge/version-2.17.0-blue.svg)](https://github.com/Data-Wise/scholar/releases/tag/v2.17.0) -[![Tests](https://img.shields.io/badge/tests-3340%20passing-brightgreen.svg)](https://github.com/Data-Wise/scholar) +[![Version](https://img.shields.io/badge/version-2.18.0-blue.svg)](https://github.com/Data-Wise/scholar/releases/tag/v2.18.0) +[![Tests](https://img.shields.io/badge/tests-3400%20passing-brightgreen.svg)](https://github.com/Data-Wise/scholar) --- ## Features -### 📚 31 Slash Commands +### 📚 33 Slash Commands **Literature Management (4 commands)** - `/arxiv ` - Search arXiv for papers (top-level command) @@ -36,11 +36,11 @@ A comprehensive Claude Code plugin for academic workflows combining research and - `/scholar:analysis-plan` - Create statistical analysis plans - `/scholar:method-scout ` - Scout statistical methods for research problems -**Teaching (15 commands)** +**Teaching (18 commands)** - `/teaching:quiz ` - Generate quiz questions with answer keys ✅ -- `/teaching:exam ` - Create comprehensive exams with rubrics ✅ +- `/teaching:exam ` - Create comprehensive exams with rubrics. NEW in v2.18.0: `--format exam-rich-latex` for evidence-based statistics exams ✅ - `/teaching:assignment ` - Create homework assignments with solutions ✅ -- `/teaching:solution ` - Generate standalone solution keys from assignment files ✅ NEW +- `/teaching:solution ` - Generate standalone solution keys from assignment files ✅ - `/teaching:syllabus ` - Generate comprehensive course syllabus ✅ - `/teaching:slides ` - Create lecture slides with examples ✅ - `/teaching:rubric ` - Generate detailed grading rubrics ✅ @@ -48,10 +48,13 @@ A comprehensive Claude Code plugin for academic workflows combining research and - `/teaching:demo [path]` - Create demo course environment with sample materials ✅ - `/teaching:lecture ` - Generate comprehensive lecture notes ✅ - `/teaching:validate ` - Validate YAML configuration files (multi-level) ✅ +- `/teaching:validate-r ` - Validate R code chunks in `.qmd` files ✅ - `/teaching:diff ` - Compare YAML and JSON sync status ✅ - `/teaching:sync [options]` - Synchronize YAML to JSON ✅ - `/teaching:migrate` - Batch migrate YAML configs from v1 to v2 schema ✅ -- `/teaching:config ` - Manage prompts, config, and provenance ✅ NEW +- `/teaching:config ` - Manage prompts, config, and provenance ✅ +- `/teaching:preflight` - Pre-release health checks for Scholar projects ✅ +- `/teaching:canvas ` - Convert exam files to Canvas QTI format ✅ **Command Discovery (1 command)** - `/scholar:hub [argument]` - Browse all commands, drill into categories, get usage details ✅ NEW @@ -108,7 +111,7 @@ This architecture eliminates IPC overhead by sharing core logic directly between - `src/teaching/config/` - Configuration management - `src/teaching/validators/` - Multi-layer validation (Schema + LaTeX + Completeness) - `src/teaching/ai/` - AI content generation with retry logic -- `tests/teaching/` - 3,340 unit tests (100% passing) +- `tests/teaching/` - 3,400 unit tests (100% passing) See [Phase 0 Architecture](docs/architecture/PHASE-0-FOUNDATION.md) for detailed documentation. @@ -137,9 +140,9 @@ The Homebrew formula automatically: - Makes it available in Claude Code CLI and Claude Desktop - No additional configuration needed -**Latest version:** v2.17.0 (released 2026-02-09) +**Latest version:** v2.18.0 (released 2026-02-09) - 33 commands (18 teaching + 14 research + 1 hub) -- 3,340 tests with 100% pass rate +- 3,400 tests with 100% pass rate - Comprehensive documentation (95% coverage) ### Option 2: Manual Installation (Local Development) @@ -193,7 +196,7 @@ cd ~/projects/dev-tools/scholar - 33 commands present (18 teaching + 14 research + 1 hub) - 17 skills present - No hardcoded paths -- v2.17.0 verified +- v2.18.0 verified ``` ### Using in Claude Code CLI @@ -506,6 +509,7 @@ Create comprehensive exams with rubrics. ```bash /teaching:exam midterm --questions 20 --duration 90 /teaching:exam final --topics "regression,ANOVA,hypothesis testing" +/teaching:exam stat-545-final --format exam-rich-latex --topics "two-way ANOVA" ``` **Options:** @@ -513,8 +517,9 @@ Create comprehensive exams with rubrics. - `--questions N` - Number of questions - `--duration N` - Duration in minutes - `--topics "t1,t2"` - Specific topics to cover +- `--format FORMAT` - md (default), tex, qmd, canvas, json, exam-rich-latex *(v2.18.0)* -**Output:** JSON with questions, answer key, grading rubric +**Output:** JSON with questions, answer key, grading rubric. `exam-rich-latex` emits a Quarto `.qmd` using the LaTeX `exam` class with an evidence-based reporting policy, F-critical bracket table, and $s$-value calibration — see the [Rich Exam LaTeX tutorial](docs/tutorials/teaching/rich-exam-format.md). --- diff --git a/docs/API-REFERENCE.md b/docs/API-REFERENCE.md index e5fdb719..264077c2 100644 --- a/docs/API-REFERENCE.md +++ b/docs/API-REFERENCE.md @@ -4,9 +4,19 @@ render_macros: false # Scholar Plugin - API Reference -> **Version:** 2.17.0 +> **Version:** 2.18.0 > **Last Updated:** 2026-03-04 > **Audience:** Plugin developers and contributors +> +> **📌 TL;DR - 30 Second Version** +> +> **What:** Full developer API for Scholar's internal modules — Generators, Formatters, Parsers, Validators, Discovery, and Prompt helpers. +> +> **Find a module:** Jump via the Table of Contents below. Each section follows the same shape: module path, exported functions, parameter tables, return types, usage example. +> +> **New in v2.18.0:** [PromptLoader Version Helpers](#promptloader-version-helpers-v2180) (the `checkVersion` vs `checkMinScholarVersion` semantic distinction). +> +> **Conventions:** All paths are relative to `src/teaching/` unless stated otherwise. JavaScript code blocks use ESM (`import`/`export`). Type annotations follow JSDoc `{Type}` syntax. This document provides a comprehensive API reference for developers contributing to or extending the Scholar plugin. @@ -1334,6 +1344,56 @@ Formats content as LaTeX document using exam class. --- +### Exam Rich-LaTeX Formatter *(v2.18.0)* + +**Location:** `src/teaching/formatters/exam-rich-latex.js` + +#### `ExamRichLatexFormatter` + +Extends `LaTeXFormatter`. Produces a Quarto `.qmd` using the LaTeX `exam` document class with an evidence-based reporting policy block, an F-critical bracket table, an $s$-value calibration table, and an interaction-aware language pass for factorial designs. + +**File extension:** `.qmd` (overrides parent's `.tex`) + +#### `format(content, options)` + +Emit the complete `.qmd` document. Calls (in order): `addExamConfig()`, `addReportingPolicy()`, `addBracketTable()`, `addSCalibrationTable()`, `addInstructions()` (parent), `addExamHeader()` (parent), then the question environment via `formatQuestion()`. + +**Parameters:** + +| Option | Type | Default | Description | +|--------|------|---------|-------------| +| `answers` | boolean | `true` | Include answer key in question rendering (parent behavior) | + +#### `formatQuestion(question, answerKey, includeAnswers, output)` *(override)* + +Reframes the question via `reframeQuestionLanguage()` before delegating to `super.formatQuestion()`. The reframe runs only when `question.designModel?.hasInteractionTerm === true`. + +#### `reframeQuestionLanguage(question)` *(private)* + +Returns a shallow copy of the question with the interaction-aware pass applied to: + +- `question.text` +- `question.parts[].prompt` (for multi-part questions) +- `question.rubric` (essay/short-answer) + +The pass leaves `answer_key` content, top-level `instructions`, `formula_sheet`, and `solution_hint` untouched. Math regions (`$…$` and `\(…\)`) are preserved byte-for-byte. + +#### Helpers + +| Helper | Location | Purpose | +|--------|----------|---------| +| `generateBracketTable(dfPairs, options?)` | `helpers/bracket-table-generator.js` | F-critical values via `jstat.centralF.inv`. Validates that `df1, df2` are finite integers ≥ 1; dedupes and sorts. | +| `generateSCalibrationTable(options?)` | `helpers/s-calibration-table.js` | Static Greenland/Rafi $s$-value tabular (0–13.3 bits with $p$-value anchors). | +| `applyInteractionAwarePass(text, questionMeta, options?)` | `helpers/interaction-aware-pass.js` | Regex reframer with math-region masking. Idempotent. `options.aiProvider` is a stub for future AI re-prompting. | + +**Format gate:** the legacy `buildExamPrompt()` path in `generators/exam.js` is byte-identical for all other formats. Only `--format exam-rich-latex` (or `--format rich-latex`) triggers `buildRichExamPrompt()`, which loads the `exam-rich.md` prompt via `PromptLoader` and substitutes `{{topic}}`, `{{aids_level}}`, `{{textbook_citation_prefix}}`, `{{interaction_aware_language}}`, `{{df_pairs}}` template variables. + +**Required runtime dep:** `jstat ^1.9.6` (added in v2.18.0). + +→ End-to-end flow: [Architecture Diagrams §10](ARCHITECTURE-DIAGRAMS.md#10-rich-exam-latex-pipeline-v2180) · [Rich Exam tutorial](tutorials/teaching/rich-exam-format.md) + +--- + ### Examark Formatter **Location:** `src/teaching/formatters/examark.js` @@ -2656,3 +2716,95 @@ Generate a formatted auto-tip string for first-use suggestion. **Returns:** `string` — Formatted tip. Teaching tips now mention `-i` flag. --- + +## PromptLoader Version Helpers (v2.18.0) + +The `PromptLoader` class exposes two version-comparison helpers with **different semantics**. Plugin authors must pick the right one for the field they're checking — they are not interchangeable. + +**Module:** `src/teaching/ai/prompt-loader.js` + +### Semantic distinction + +| Field in prompt frontmatter | Meaning | Use this helper | +|-----------------------------|---------|-----------------| +| `prompt_version: "3.3"` | The prompt document's own revision number (e.g. 3rd major revision, 3rd minor edit). Unrelated to Scholar versioning. | `checkVersion()` | +| `min_scholar_version: "2.16.0"` | Minimum Scholar runtime required to use this prompt. | `checkMinScholarVersion()` | + +Passing `min_scholar_version` to `checkVersion()` is a **bug**: `checkVersion` warns whenever Scholar is newer than the supplied version, which is wrong for a minimum-requirement field (a prompt declaring `min_scholar_version: "2.16.0"` is fully compatible with Scholar 2.18 — not stale). This was the root cause of the pre-existing `stat-545-prompt-discovery` integration failure fixed in v2.18.0. + +### `PromptLoader.checkVersion(promptVersion, scholarVersion)` + +Compares a prompt's own revision number (`prompt_version`) against Scholar. Used by `/teaching:preflight` and the legacy upgrade-check path. + +**Parameters:** + +| Name | Type | Description | +|------|------|-------------| +| `promptVersion` | `string` | Prompt revision, e.g. `"2.0"` or `"3.3"` | +| `scholarVersion` | `string` | Current Scholar version, e.g. `"2.18.0"` | + +**Returns:** `{ compatible: boolean, severity: 'none' | 'warning' | 'error', message: string, suggestion?: string }` + +| Severity | When | +|----------|------| +| `error` | Prompt major > Scholar major (prompt requires newer Scholar) | +| `warning` | Prompt major < Scholar major, OR same major but prompt minor < Scholar minor (prompt is "stale") | +| `none` | Same major.minor | + +**Example:** + +```javascript +import { PromptLoader } from './ai/prompt-loader.js'; + +const result = PromptLoader.checkVersion('2.0', '2.18.0'); +// → { compatible: true, severity: 'warning', +// message: 'Prompt v2.0 is older than Scholar v2.18.0', +// suggestion: "Run 'teach templates update' to get the latest prompts" } +``` + +### `PromptLoader.checkMinScholarVersion(minScholarVersion, scholarVersion)` *(NEW in v2.18.0)* + +Checks whether the current Scholar runtime satisfies a prompt's `min_scholar_version` declaration. Used by `PromptConfigBridge` when loading configured prompts. + +**Parameters:** + +| Name | Type | Description | +|------|------|-------------| +| `minScholarVersion` | `string` | Minimum Scholar version declared by the prompt, e.g. `"2.16.0"` | +| `scholarVersion` | `string` | Current Scholar runtime, e.g. `"2.18.0"` | + +**Returns:** `{ compatible: boolean, severity: 'none' | 'error', message: string }` + +| Severity | When | +|----------|------| +| `error` | Scholar runtime is **below** the declared minimum (any component: major, minor, or patch) | +| `none` | Scholar runtime is at or above the declared minimum | + +**Crucially:** this helper **never emits warnings**. A prompt declaring `min_scholar_version: "1.0.0"` running on Scholar 2.18 is fully compatible — not stale. Use `checkVersion()` if you want stale-prompt warnings. + +**Example:** + +```javascript +import { PromptLoader } from './ai/prompt-loader.js'; + +const ok = PromptLoader.checkMinScholarVersion('2.16.0', '2.18.0'); +// → { compatible: true, severity: 'none', +// message: 'Scholar runtime satisfies prompt requirement' } + +const tooOld = PromptLoader.checkMinScholarVersion('3.0.0', '2.18.0'); +// → { compatible: false, severity: 'error', +// message: 'Prompt requires Scholar >= v3.0.0, but you have v2.18.0' } +``` + +**Comparison rule:** patch-level differences matter. `checkMinScholarVersion('2.16.1', '2.16.0')` returns `severity: 'error'` because `2.16.0 < 2.16.1`. + +### Picking the right helper — decision table + +| You want to know… | Check this field | Use this helper | +|-------------------|------------------|-----------------| +| "Is this prompt template stale relative to the latest Scholar?" | `prompt_version` | `checkVersion()` | +| "Can this prompt even run on this Scholar version?" | `min_scholar_version` | `checkMinScholarVersion()` | +| "Should I show a 'newer Scholar available' upgrade tip?" | `prompt_version` | `checkVersion()` (warning severity) | +| "Should I refuse to load the prompt and tell the user to upgrade?" | `min_scholar_version` | `checkMinScholarVersion()` (error severity) | + +--- diff --git a/docs/ARCHITECTURE-DIAGRAMS.md b/docs/ARCHITECTURE-DIAGRAMS.md index 46f9554a..a8f02050 100644 --- a/docs/ARCHITECTURE-DIAGRAMS.md +++ b/docs/ARCHITECTURE-DIAGRAMS.md @@ -2,6 +2,14 @@ Comprehensive visual documentation of the Scholar plugin's internal architecture, including data flow, configuration hierarchy, and multi-stage generation pipelines. +> **📌 TL;DR - 30 Second Version** +> +> **What:** 10 Mermaid pipelines covering the full Scholar runtime — from CLI invocation through generator/formatter chains, AI prompt assembly, configuration resolution, and validation. +> +> **For visual learners:** every section opens with a diagram; supporting prose explains intent and design tradeoffs. +> +> **Start here:** Section 1 (Generator → Formatter → Output) if you're new; Section 5 (AI Prompt Flow) if you're debugging prompt rendering; [Section 10 (Rich Exam LaTeX Pipeline)](#10-rich-exam-latex-pipeline-v2180) for the newest v2.18.0 feature. + --- ## 1. Generator → Formatter → Output Flow @@ -1157,3 +1165,125 @@ sequenceDiagram SO-->>CMD: Preview + send instructions CMD->>HIM: compose_email(to, subject, body) ``` + +--- + +## 10. Rich Exam LaTeX Pipeline (v2.18.0) + +End-to-end flow for `/teaching:exam --format exam-rich-latex`. Two phases: **(a)** prompt assembly + AI generation, **(b)** formatter composition into a Quarto `.qmd` with the `exam` document class. + +### 10.1 Component View + +```mermaid +flowchart TD + CMD[/teaching:exam --format exam-rich-latex/] --> GEN[generateExam] + GEN -->|format gate| BRP[buildRichExamPrompt] + GEN -->|legacy formats| LBP[buildExamPrompt] + BRP --> PL[PromptLoader.load 'exam-rich'] + PL -->|project override| PROJ[.flow/templates/prompts/exam-rich.md] + PL -->|fallback| DEF[plugin default: ai/prompts/default/exam-rich.md] + BRP -->|merge vars| AI[AIProvider.generate] + AI -->|exam JSON| FMT[ExamRichLatexFormatter.format] + + FMT --> PRE[YAML preamble: exam class, lualatex, \\printanswers toggle] + FMT --> POL[Reporting-policy quote block] + FMT --> BT[addBracketTable] + FMT --> SC[addSCalibrationTable] + FMT --> Q[\\begin{questions}] + + BT -->|df_pairs| BTG[bracket-table-generator.js] + BTG -->|jstat.centralF.inv| JST([jstat ^1.9.6]) + + SC --> SCG[s-calibration-table.js — static Greenland/Rafi] + + Q --> FQ[formatQuestion — override] + FQ --> IAP[applyInteractionAwarePass] + IAP -->|hasInteractionTerm=true| MASK[mask $...$ math regions] + MASK --> RFR[reframe 'main effects' → 'marginal mean differences'] + FQ --> SUP[super.formatQuestion — LaTeXFormatter] + + FMT -->|Quarto .qmd| OUT[stat-545-midterm.qmd] + OUT -->|optional| QR[quarto render --to pdf] + QR --> PDF[stat-545-midterm.pdf] +``` + +### 10.2 Sequence: Prompt Assembly (Phase A) + +```mermaid +sequenceDiagram + participant CMD as /teaching:exam + participant GEN as generateExam + participant BRP as buildRichExamPrompt + participant PL as PromptLoader + participant CFG as teach-config.yml + participant AI as AIProvider + + CMD->>GEN: options { format: 'exam-rich-latex', topics, ... } + GEN->>GEN: format gate: matches 'exam-rich-latex' | 'rich-latex' + GEN->>BRP: buildRichExamPrompt(examOptions, config, options) + BRP->>CFG: read teaching_style.exam.{policy, aids_level, citation, interaction} + BRP->>BRP: resolve vars (caller > config > defaults) + BRP->>PL: load('exam-rich', cwd) + PL-->>BRP: { body, metadata } + BRP->>BRP: substitute {{topic}}, {{aids_level}}, {{df_pairs}}, ... + BRP-->>GEN: composed prompt + GEN->>AI: generate(prompt, { format: 'json' }) + AI-->>GEN: exam JSON (questions, answer_key, designModel per Q) +``` + +### 10.3 Sequence: Formatter Composition (Phase B) + +```mermaid +sequenceDiagram + participant GEN as generateExam result + participant FMT as ExamRichLatexFormatter + participant IAP as interaction-aware-pass + participant BTG as bracket-table-generator + participant SCG as s-calibration-table + participant SUP as LaTeXFormatter (parent) + + GEN->>FMT: format(content) + FMT->>FMT: emit YAML preamble (exam class, lualatex, \\printanswers commented) + FMT->>FMT: emit reporting-policy quote + alt content.exam_metadata.df_pairs present + FMT->>BTG: generateBracketTable(df_pairs) + BTG->>BTG: validate (df ≥ 1, finite, integer) + BTG->>BTG: dedupe + sort by (df1, df2) + BTG-->>FMT: LaTeX tabular + else df_pairs missing + FMT->>FMT: emit "% bracket table omitted" comment + end + FMT->>SCG: generateSCalibrationTable() + SCG-->>FMT: static tabular (0–13.3 bits) + loop for each question + FMT->>FMT: reframeQuestionLanguage(question) + alt designModel.hasInteractionTerm === true + FMT->>IAP: applyInteractionAwarePass(text, meta) + IAP->>IAP: split on math regex, preserve $...$ / \\(...\\) + IAP->>IAP: reframe text + parts[].prompt + rubric + IAP-->>FMT: reframed question (shallow copy) + end + FMT->>SUP: super.formatQuestion(reframed, answer_key, ...) + SUP-->>FMT: \\question … with point box + end + FMT-->>GEN: complete .qmd string +``` + +### 10.4 Module Map + +| Module | Path | Responsibility | +|--------|------|----------------| +| `ExamRichLatexFormatter` | `src/teaching/formatters/exam-rich-latex.js` | Compose preamble + policy + tables + questions; override `formatQuestion()` to reframe | +| `bracket-table-generator` | `src/teaching/formatters/helpers/bracket-table-generator.js` | F-critical values via `jstat.centralF.inv`; dedup + sort + validate | +| `s-calibration-table` | `src/teaching/formatters/helpers/s-calibration-table.js` | Static Greenland/Rafi $s$-value calibration tabular | +| `interaction-aware-pass` | `src/teaching/formatters/helpers/interaction-aware-pass.js` | Math-region-preserving regex reframe of "main effects" → "marginal mean differences" | +| `buildRichExamPrompt` | `src/teaching/generators/exam.js` | Gate on `options.format`; load `exam-rich.md` via `PromptLoader`; substitute template vars | +| Prompt template | `src/teaching/ai/prompts/default/exam-rich.md` (project override path: `.flow/templates/prompts/exam-rich.md`) | Evidence-based reporting policy + Dean (2017) citation + interaction-aware language | + +### 10.5 Design Notes + +- **Format gate is non-invasive.** The legacy `buildExamPrompt()` path is byte-identical for `--format md|tex|qmd|canvas|json`. Only `exam-rich-latex` / `rich-latex` enters the new prompt path. +- **Inheritance over duplication.** `ExamRichLatexFormatter` adds 5 scaffolding sections in `format()` and one `formatQuestion()` override; question rendering stays in `LaTeXFormatter`. +- **Non-mutating reframe.** `reframeQuestionLanguage()` returns a shallow copy; `parts[]` are mapped to fresh objects. The original `question` argument is never mutated, which protects the answer-key path that runs in parallel. +- **Math-region preservation.** The interaction-aware pass splits on `(\$[^$]*\$|\\\([^)]*\\\))` and detects math segments by leading delimiter (stateless `startsWith` check, not stateful `.test()`). +- **Idempotency.** The reframe is idempotent: running it twice on the same text produces the same output as running it once. See the [rich-exam-format tutorial](tutorials/teaching/rich-exam-format.md) for the rule set. diff --git a/docs/CONFIGURATION.md b/docs/CONFIGURATION.md index 9d61e6d9..8e875abf 100644 --- a/docs/CONFIGURATION.md +++ b/docs/CONFIGURATION.md @@ -605,6 +605,48 @@ Include real-world applications and contexts. --- +### `teaching_style.exam` Section *(NEW in v2.18.0)* + +Rich-LaTeX exam format overrides. These settings drive the on-disk +`exam-rich.md` prompt loaded when `/teaching:exam` is called with +`--format exam-rich-latex` (or `--format rich-latex`). They control the +evidence-based reporting policy, citation prefix, computational-aid density, +and interaction-aware question wording. + +```yaml +scholar: + teaching_style: + exam: + policy: + report_s_values: true # Require Shannon-information ($s$-value) reporting + avoid_significance_language: true # Ban "statistically significant", "reject H0", etc. + use_compatibility_intervals: true # Frame intervals as compatibility, not accept/reject + aids_level: moderate # minimal | moderate | full — scaffolding density + textbook_citation_prefix: "Dean (2017)" # Cited in question stems for chapter refs + interaction_aware_language: true # "marginal mean differences" vs "main effects" +``` + +#### Field reference + +| Field | Type | Default | Effect | +|-------|------|---------|--------| +| `policy.report_s_values` | bool | `true` | When true, the AI is instructed to report $s$-values (bits of information) rather than $p$-values in rubrics and example interpretations. | +| `policy.avoid_significance_language` | bool | `true` | Bans "statistically significant", "reject H0", "fail to reject" language from the generated exam. | +| `policy.use_compatibility_intervals` | bool | `true` | Frames intervals as compatibility intervals (with $s$-value interpretation) rather than confidence-interval accept/reject. | +| `aids_level` | enum | `"moderate"` | `minimal` / `moderate` / `full`. Controls how much computational scaffolding (formula reminders, code hints, calculation steps) appears in question stems and rubrics. | +| `textbook_citation_prefix` | string | `""` | Prepended to chapter/section references in question stems (e.g. `"Dean (2017)"` → "Recall from Dean (2017, Ch. 4) that..."). Leave empty to disable citations. | +| `interaction_aware_language` | bool | `true` | When true and a question's `designModel.hasInteractionTerm` is true, the formatter reframes "main effects" → "marginal mean differences" in the prompt and rubric. Math regions (`$...$`, `\(...\)`) are preserved byte-for-byte. | + +#### Precedence + +Caller CLI flags > `teaching_style.exam.*` > built-in defaults. The legacy +`/teaching:exam --format md|tex|qmd|canvas|json` paths are byte-identical +and do not read this block. + +→ Full walkthrough: [Rich Exam LaTeX Format tutorial](tutorials/teaching/rich-exam-format.md) + +--- + ## AI Generation Settings ### `ai_generation` Section diff --git a/docs/OUTPUT-FORMATS-GUIDE.md b/docs/OUTPUT-FORMATS-GUIDE.md index da3922be..d459538e 100644 --- a/docs/OUTPUT-FORMATS-GUIDE.md +++ b/docs/OUTPUT-FORMATS-GUIDE.md @@ -51,6 +51,7 @@ graph TD | **Quarto Notes** | `.qmd` | Long-form lecture notes | quarto | (lecture command) | Renderable doc | | **LaTeX** | `.tex` | Academic typesetting | texlive | `--format latex` | Compilable doc | | **LaTeX Export** | `.tex` | Multi-version exports (student/key) | texlive | (advanced) | Compilable doc | +| **Rich-LaTeX** *(v2.18.0)* | `.qmd` | Evidence-based statistics exams (exam class, $s$-values, bracket table) | quarto + lualatex | `--format exam-rich-latex` | Quarto + LaTeX | | **Canvas QTI** | `.qti.zip` | Canvas LMS import | examark (npm) | `--format canvas` | ZIP archive | | **Examark** | `.md` | Canvas QTI via examark | examark (npm) | (intermediate) | Plain text | | **Diff** | Terminal | Sync preview (YAML↔JSON) | None | `--dry-run` | Colored text | @@ -573,6 +574,79 @@ Q1 & What is the purpose of hypothesis testing? & 10 \\ --- +### Rich-LaTeX (`.qmd`) - Evidence-Based Statistics Exams *(NEW in v2.18.0)* + +A Quarto `.qmd` using the LaTeX `exam` document class with auto-generated F-critical bracket tables, $s$-value calibration tables, an evidence-based reporting-policy quote block, and interaction-aware language reframing for factorial designs. + +### Best for + +- Statistics exams that follow an evidence-based reporting policy ($s$-values, compatibility intervals, no significance language) +- Factorial / two-way ANOVA exams that should auto-reframe "main effects" → "marginal mean differences" +- Print-ready PDFs with point boxes in the right margin and an answer-key toggle (`\printanswers`) +- Courses citing a single textbook prefix (e.g., "Dean (2017)") + +### Characteristics + +- Wraps `LaTeXFormatter` and adds 5 scaffolding sections in `format()` +- Bracket table is jstat-backed (`jstat ^1.9.6` runtime dep), F* values at 2-decimal precision +- $s$-value calibration is static (Greenland/Rafi; 0–13.3 bits with $p$-value anchors) +- Interaction-aware pass: preserves `$…$` and `\(…\)` math regions byte-for-byte; idempotent +- Generator gate: only `--format exam-rich-latex` (or `--format rich-latex`) enters the new prompt path; legacy formats unchanged + +### Command + +```bash +/teaching:exam stat-545-final \ + --format exam-rich-latex \ + --topics "two-way ANOVA, factorial designs" +``` + +Optional metadata to enable the bracket table: + +```bash +/teaching:exam stat-545-final \ + --format exam-rich-latex \ + --metadata '{"df_pairs":[{"df1":1,"df2":20},{"df1":2,"df2":30}]}' +``` + +### Configuration + +Override defaults in `.flow/teach-config.yml`: + +```yaml +scholar: + teaching_style: + exam: + policy: + report_s_values: true + avoid_significance_language: true + use_compatibility_intervals: true + aids_level: moderate # minimal | moderate | full + textbook_citation_prefix: "Dean (2017)" + interaction_aware_language: true +``` + +### Rendering + +```bash +quarto render exam.qmd --to pdf # student PDF +# uncomment `\printanswers` in the preamble → answer-key PDF +quarto render exam-key.qmd --to pdf +``` + +### Common Issues + +| Error | Fix | +|-------|-----| +| `! Package fontspec error` | Ensure `pdf-engine: lualatex` in YAML (default) | +| `! Class exam Error` | Install `texlive-latex-extra` (Linux) / `mactex-extra` (macOS) | +| `Invalid df pair` | `df1` and `df2` must be finite integers ≥ 1 | +| Answer-key PDF byte-identical to student PDF | Known limitation for short-answer-only exams in v2.18.0 (parent `LaTeXFormatter.formatShortAnswer()` does not wrap in `\begin{solution}`) | + +→ Full walkthrough: [Rich Exam LaTeX Format tutorial](tutorials/teaching/rich-exam-format.md). Architecture: [Section 10 of ARCHITECTURE-DIAGRAMS](ARCHITECTURE-DIAGRAMS.md#10-rich-exam-latex-pipeline-v2180). + +--- + ### Canvas QTI (`.qti.zip`) - LMS Import Package ### Best for - 3. Special chars diff --git a/docs/QUICK-START.md b/docs/QUICK-START.md index 23120de5..5290fef7 100644 --- a/docs/QUICK-START.md +++ b/docs/QUICK-START.md @@ -2,6 +2,16 @@ > **Get up and running in 5 minutes** > **Latest:** v{{ scholar.version }} ({{ scholar.command_count }} commands, {{ scholar.test_count }} tests) +> +> **📌 TL;DR - 30 Second Version** +> +> **Install:** `brew install scholar` ⏱️ 2 min +> +> **Restart Claude Code** (loads the plugin) ⏱️ 30 sec +> +> **First command:** `/teaching:lecture "Your Topic"` or `/teaching:exam midterm` ⏱️ 30 sec +> +> **Verify:** `/scholar:hub` shows all {{ scholar.command_count }} commands grouped by namespace --- diff --git a/docs/REFCARD.md b/docs/REFCARD.md index 2bd27f00..15d3a5fa 100644 --- a/docs/REFCARD.md +++ b/docs/REFCARD.md @@ -1,10 +1,14 @@ # Scholar Plugin - Reference Card > **Version:** {{ scholar.version }} | **Last Updated:** 2026-02-27 +> +> **📌 TL;DR - What's on this card** +> +> One-page command index for all {{ scholar.command_count }} Scholar commands grouped by namespace (teaching, research, literature, hub). Scan the ASCII box below for the full grid, or use the Quick Command Reference table further down for a sortable view. ``` ┌─────────────────────────────────────────────────────────────────────────────┐ -│ SCHOLAR PLUGIN REFERENCE v2.16.0 │ +│ SCHOLAR PLUGIN REFERENCE v2.18.0 │ ├─────────────────────────────────────────────────────────────────────────────┤ │ │ │ TEACHING (18 commands) │ LITERATURE (4 commands) │ diff --git a/docs/TEACHING-COMMANDS-API.md b/docs/TEACHING-COMMANDS-API.md index 48676e59..8da48239 100644 --- a/docs/TEACHING-COMMANDS-API.md +++ b/docs/TEACHING-COMMANDS-API.md @@ -67,6 +67,8 @@ Generate comprehensive exams with AI-powered questions and automatic answer keys | `--json` | boolean | No | false | Output dry-run as JSON (requires --dry-run) | | `--config` | string | No | - | Explicit config file path | | `--debug` | boolean | No | false | Enable debug logging | +| `--format` | enum | No | `md` | Output format: `md`, `qmd`, `tex`, `canvas`, `json`, `exam-rich-latex` *(v2.18.0)* | +| `--metadata` | string | No | - | JSON metadata fragment. For `exam-rich-latex`, supply `df_pairs` to enable the F-critical bracket table. | #### Request Schema (/teachingexam) diff --git a/docs/TEACHING-COMMANDS-REFERENCE.md b/docs/TEACHING-COMMANDS-REFERENCE.md index 1973a2b4..f5811c96 100644 --- a/docs/TEACHING-COMMANDS-REFERENCE.md +++ b/docs/TEACHING-COMMANDS-REFERENCE.md @@ -1231,8 +1231,23 @@ All commands support multiple output formats: # Generate multiple formats /teaching:exam midterm --formats "md,qmd,tex,canvas" + +# Rich-LaTeX (NEW in v2.18.0) — evidence-based statistics exams +/teaching:exam stat-545-final --format exam-rich-latex \ + --topics "two-way ANOVA, factorial designs" ``` +**Supported `--format` values for `/teaching:exam`:** + +| Value | File | Description | +|-------|------|-------------| +| `md` (default) | `.md` | Plain markdown | +| `qmd` / `quarto` | `.qmd` | Quarto literate document | +| `tex` / `latex` | `.tex` | Standalone LaTeX | +| `canvas` / `qti` | `.qti.zip` | Canvas LMS import package | +| `json` | `.json` | Structured data | +| `exam-rich-latex` / `rich-latex` *(v2.18.0)* | `.qmd` | LaTeX `exam` class with evidence-based reporting policy, F-critical bracket table, $s$-value calibration, interaction-aware language reframing | + --- ## Configuration diff --git a/docs/TEACHING-WORKFLOWS.md b/docs/TEACHING-WORKFLOWS.md index dcc6f754..245ce478 100644 --- a/docs/TEACHING-WORKFLOWS.md +++ b/docs/TEACHING-WORKFLOWS.md @@ -4,7 +4,7 @@ render_macros: false # Scholar Teaching Workflows -> **Version:** 2.17.0 +> **Version:** 2.18.0 > **Last Updated:** 2026-02-09 > **Audience:** Educators using Scholar for course material generation > @@ -250,6 +250,12 @@ graph TD /teaching:exam midterm --questions 15 --difficulty medium ``` + > 💡 **NEW in v2.18.0** — For statistics exams with an evidence-based + > reporting policy ($s$-values, compatibility intervals, no significance + > language), add `--format exam-rich-latex` to produce a Quarto/LaTeX + > exam with auto-generated F-critical bracket tables and $s$-value + > calibration. See the [Rich Exam LaTeX Format tutorial](tutorials/teaching/rich-exam-format.md). + 3. **Review generated questions** > **⚠️ ⚠️ Critical - Never Skip Human Review** @@ -952,5 +958,5 @@ Regenerate with new settings. --- **Last updated:** 2026-02-09 -**Version:** 2.17.0 +**Version:** 2.18.0 **Contribute your workflows:** [GitHub Issues](https://github.com/Data-Wise/scholar/issues) diff --git a/docs/USER-GUIDE.md b/docs/USER-GUIDE.md index 7925dfd8..a9864b07 100644 --- a/docs/USER-GUIDE.md +++ b/docs/USER-GUIDE.md @@ -135,6 +135,7 @@ Scholar provides {{ scholar.teaching_commands }} teaching commands to automate c - Markdown (default) - Quarto (.qmd for rendering) - LaTeX (.tex for printing) +- **Rich-LaTeX (`exam-rich-latex`)** — NEW in v2.18.0: evidence-based statistics exams using the LaTeX `exam` class, with auto-generated F-critical bracket tables, $s$-value calibration, and interaction-aware language. See the [Rich Exam LaTeX Format tutorial](tutorials/teaching/rich-exam-format.md). - Canvas QTI (for LMS upload) - JSON (for programmatic use) diff --git a/docs/WHATS-NEW-v2.18.0.md b/docs/WHATS-NEW-v2.18.0.md new file mode 100644 index 00000000..8185eb5a --- /dev/null +++ b/docs/WHATS-NEW-v2.18.0.md @@ -0,0 +1,50 @@ +# What's New in v2.18.0 + +**Release:** v2.18.0 (2026-05-12) +**Theme:** Rich exam LaTeX format with evidence-based reporting policy + +--- + +## Overview + +v2.18.0 introduces a new `--format exam-rich-latex` for `/teaching:exam`, producing a polished Quarto/LaTeX exam built on the `exam` document class. The format is designed for statistics instructors who want professional, print-ready exams that follow an evidence-based reporting policy ($s$-values, compatibility intervals, no significance language) — with auto-generated F-critical bracket tables, $s$-value calibration tables, Dean (2017) citation patterns, and interaction-aware language that automatically reframes "main effects" to "marginal mean differences" in factorial designs containing interaction terms. + +## Highlights + +- **`--format exam-rich-latex`** — emits a Quarto `.qmd` using the LaTeX `exam` document class with `\printanswers` solution toggling, header layout, and per-question point boxes. +- **F-critical bracket table** — jstat-backed (`^1.9.6`) auto-generated F* values at 2-decimal precision for $\alpha$ levels and degrees of freedom referenced by the exam. +- **$s$-value calibration table** — static Greenland/Rafi $s$-value to $p$-value conversion table embedded in the front matter. +- **Dean (2017) citation pattern** — standardized textbook citation prefix configurable per course. +- **Interaction-aware language pass** — regex + AI-stub reframer that detects interaction terms in question stems and rewrites "main effect of X" to "marginal mean differences across X" automatically. +- **New template options** — `policy`, `aids_level`, `textbook_citation_prefix`, and `interaction_aware_language` exposed in `src/teaching/templates/exam.json` and the `teach-config.yml` `scholar.teaching_style.exam` override block. + +## How to use + +```bash +/teaching:exam stat-545-final --format exam-rich-latex --topics "two-way ANOVA, factorial designs" +``` + +Override defaults in `.flow/teach-config.yml`: + +```yaml +scholar: + teaching_style: + exam: + policy: "evidence-based" + aids_level: "open-book" + textbook_citation_prefix: "Dean (2017)" + interaction_aware_language: true +``` + +The generated `.qmd` compiles with `quarto render exam.qmd --to pdf` using `lualatex`. + +## Limitations / what's next + +- Short-answer `\printanswers` visibility relies on parent `\begin{solution}` wrapping at the class level — exposing per-question solution visibility is future work. +- The interaction-aware reframer ships with a regex pass and a stubbed AI re-prompt; full AI roundtrip will land in a follow-up release. + +## Related + +- [Spec: Rich Exam Format](specs/SPEC-2026-05-12-rich-exam-format.md) +- [ORCHESTRATE: exam-rich-latex](https://github.com/Data-Wise/scholar/blob/feature/exam-rich-latex/ORCHESTRATE-exam-rich-latex.md) +- [CHANGELOG v2.18.0](https://github.com/Data-Wise/scholar/blob/main/CHANGELOG.md#2180--2026-05-12) diff --git a/docs/commands.md b/docs/commands.md index 27d7667d..dddd657c 100644 --- a/docs/commands.md +++ b/docs/commands.md @@ -231,7 +231,7 @@ All teaching commands support these options: | -------------- | ---------------------------------------------------- | | `--dry-run` | Preview what would be generated without API calls | | `--json` | Output dry-run as JSON (requires `--dry-run`) | -| `--format FMT` | Output format: markdown, json, latex, quarto, canvas | +| `--format FMT` | Output format: markdown, json, latex, quarto, canvas, exam-rich-latex *(v2.18.0, /teaching:exam only)* | **Dry-Run Example:** diff --git a/docs/help/COMMON-ISSUES.md b/docs/help/COMMON-ISSUES.md index a0235ebe..f7e72ecd 100644 --- a/docs/help/COMMON-ISSUES.md +++ b/docs/help/COMMON-ISSUES.md @@ -1,7 +1,11 @@ -# Common Issues Index +# Common Issues Index - Scholar v2.18.0 > **Quick navigation to common problems and solutions** +Last Updated: 2026-05-12 + +--- + This index helps you quickly find solutions to frequent issues. For comprehensive troubleshooting, see the full guides linked below. ## Installation & Setup diff --git a/docs/help/FAQ-research.md b/docs/help/FAQ-research.md index f419300d..675edce5 100644 --- a/docs/help/FAQ-research.md +++ b/docs/help/FAQ-research.md @@ -2,7 +2,13 @@ render_macros: false --- -# Research FAQ +# Research FAQ - Scholar v2.18.0 + +> **Quick answers to common questions about Scholar's research commands** + +Last Updated: 2026-05-12 + +--- Frequently asked questions about Scholar's research commands, organized by topic. @@ -73,7 +79,7 @@ Scholar provides 14 specialized research commands across 4 categories: ### How do I discover all available commands? -Use `/scholar:hub` to browse all {{ scholar.command_count }} commands (research + teaching) from one place: +Use `/scholar:hub` to browse all 33 commands (research + teaching) from one place: ```bash /scholar:hub # Full overview diff --git a/docs/help/FAQ-teaching.md b/docs/help/FAQ-teaching.md index fb4be7c2..7d0edc42 100644 --- a/docs/help/FAQ-teaching.md +++ b/docs/help/FAQ-teaching.md @@ -2,11 +2,11 @@ render_macros: false --- -# Teaching FAQ - Scholar v2.17.0 +# Teaching FAQ - Scholar v2.18.0 > **Quick answers to common questions about Scholar's teaching commands** -Last Updated: 2026-02-09 +Last Updated: 2026-05-12 --- @@ -27,7 +27,7 @@ Last Updated: 2026-02-09 ### What is Scholar's teaching namespace? -Scholar's teaching namespace provides AI-powered commands for creating course materials. It includes {{ scholar.teaching_commands }} commands for generating exams, quizzes, assignments, solutions, lectures, slides, rubrics, syllabi, feedback, demos, R code validation, Canvas QTI export, and config management (validate, diff, sync, migrate, config, preflight). +Scholar's teaching namespace provides AI-powered commands for creating course materials. It includes 18 commands for generating exams, quizzes, assignments, solutions, lectures, slides, rubrics, syllabi, feedback, demos, R code validation, Canvas QTI export, and config management (validate, diff, sync, migrate, config, preflight). All teaching commands start with `/teaching:` prefix. Use `/scholar:hub teaching` to see all commands with descriptions. @@ -62,7 +62,7 @@ See [Teaching Commands Refcard](../refcards/teaching-commands.md) for quick refe ### How do I discover all available commands? -Use the Scholar Hub to browse all {{ scholar.command_count }} commands from a single entry point: +Use the Scholar Hub to browse all 33 commands from a single entry point: ```bash /scholar:hub # Overview of all commands @@ -750,6 +750,111 @@ Use the `--variations` flag: --- +### What is `--format exam-rich-latex` and when should I use it? (NEW in v2.18.0) + +`--format exam-rich-latex` produces a Quarto `.qmd` exam using the LaTeX `exam` document class, with auto-generated F-critical bracket tables, $s$-value calibration tables, an evidence-based reporting-policy block, and interaction-aware language for factorial designs. + +**Use it when:** + +- Teaching statistics with an evidence-based reporting policy ($s$-values, compatibility intervals — no significance language) +- You want print-ready PDF exams with point boxes in the right margin +- Your exam includes ANOVA / regression / factorial designs that need F-critical reference tables +- You want a separate answer-key PDF rendered from the same source + +**Use the default `--format md|tex|qmd` when:** + +- You're producing a quick informal exam +- You don't need the reporting-policy scaffolding +- Your students aren't expected to use $s$-values + +**Quick example:** + +```bash +/teaching:exam stat-545-final \ + --format exam-rich-latex \ + --topics "two-way ANOVA, factorial designs, contrasts" +``` + +See the [Rich Exam LaTeX Format tutorial](../tutorials/teaching/rich-exam-format.md) for the full walkthrough. + +--- + +### How do I get the F-critical bracket table in my rich-LaTeX exam? (v2.18.0) + +The bracket table is rendered **only when** you supply `exam_metadata.df_pairs` either via `--metadata` or a metadata JSON file: + +```bash +/teaching:exam stat-545-final \ + --format exam-rich-latex \ + --metadata '{"df_pairs":[{"df1":1,"df2":20},{"df1":2,"df2":30}]}' +``` + +Without `df_pairs`, you'll see a `% bracket table omitted: no df_pairs in metadata` comment in the generated `.qmd`. Both `df1` and `df2` must be finite integers $\geq 1$. + +--- + +### What does the interaction-aware language pass do? (v2.18.0) + +When a question's design model contains an interaction term (`designModel.hasInteractionTerm: true`), Scholar reframes "main effects" language to "marginal mean differences" in both the student-facing prompt and the rubric. + +**Before reframing (factorial $A \times B$ with interaction):** + +> "Test the main effects of factors A and B at $\alpha = 0.05$." + +**After reframing:** + +> "Compare the marginal mean differences of factors A and B at $\alpha = 0.05$." + +LaTeX math regions (`$…$` and `\(…\)`) are preserved byte-for-byte; the pass is idempotent. Disable it per course with: + +```yaml +scholar: + teaching_style: + exam: + interaction_aware_language: false +``` + +--- + +### How do I render the answer-key PDF separately? (v2.18.0) + +The generated `.qmd` ships with `\printanswers` commented out in the preamble: + +```latex +% \printanswers % uncomment to render answer-key PDF +``` + +To produce an answer-key PDF: + +1. Copy the source: `cp exam.qmd exam-key.qmd` +2. Open the copy and remove the `%` before `\printanswers` +3. Re-render: `quarto render exam-key.qmd --to pdf` + +> ⚠️ **Known limitation (v2.18.0):** the parent `LaTeXFormatter` does not yet wrap short-answer solutions in `\begin{solution}…\end{solution}`, so short-answer-only exams produce byte-identical PDFs with or without `\printanswers`. Multiple-choice and essay questions toggle correctly. Per-question solution visibility is on the roadmap. + +--- + +### How do I override the rich-LaTeX policy, aids, and citation prefix? (v2.18.0) + +The on-disk `exam-rich.md` prompt reads from `teaching_style.exam` in `.flow/teach-config.yml`: + +```yaml +scholar: + teaching_style: + exam: + policy: + report_s_values: true + avoid_significance_language: true + use_compatibility_intervals: true + aids_level: moderate # minimal | moderate | full + textbook_citation_prefix: "Dean (2017)" + interaction_aware_language: true +``` + +Precedence: caller CLI flags > `teaching_style.exam.*` > built-in defaults. See the [Course Configuration tutorial — Rich Exam Format Overrides](../tutorials/teaching/configuration.md#rich-exam-format-overrides) for the full reference. + +--- + ## Lectures & Slides ### How do I generate lecture notes? @@ -1193,10 +1298,10 @@ See [Teaching Commands Reference](../TEACHING-COMMANDS-REFERENCE.md). ### How do I fix configuration errors? -**Use the Auto-Fixer:** +**Use the validator's auto-fix mode:** ```bash -/teaching:fix +/teaching:validate --fix ``` This automatically detects and repairs: @@ -1663,4 +1768,4 @@ See [Migration Guide](../MIGRATION-v2.2.0.md). --- -**Last Updated:** 2026-02-09 for Scholar v2.17.0 +**Last Updated:** 2026-05-12 for Scholar v2.18.0 diff --git a/docs/help/TROUBLESHOOTING-teaching.md b/docs/help/TROUBLESHOOTING-teaching.md index 008fcf72..c257ba18 100644 --- a/docs/help/TROUBLESHOOTING-teaching.md +++ b/docs/help/TROUBLESHOOTING-teaching.md @@ -5,7 +5,7 @@ render_macros: false # Troubleshooting Guide - Scholar Teaching Commands > **Quick Fixes for Common Issues** -> Version: 2.17.0 | Last Updated: 2026-02-09 +> Version: 2.18.0 | Last Updated: 2026-05-12 --- @@ -38,7 +38,7 @@ scholar --version **Expected output:** ``` -Scholar v2.17.0 +Scholar v2.18.0 ``` **If not found:** @@ -574,7 +574,7 @@ claude plugin install scholar ```bash # Check version -scholar --version # Should be 2.17.0+ +scholar --version # Should be 2.18.0+ # Update if older brew upgrade scholar @@ -1111,7 +1111,7 @@ title: "Regression: Analysis & Theory" **Step 4: Use auto-fixer** ```bash -/teaching:fix +/teaching:validate --fix # Automatically detects and repairs common errors ``` @@ -2614,6 +2614,82 @@ After import: --- +### Problem: Rich-LaTeX exam fails to compile (v2.18.0) + +**Symptoms:** + +- `/teaching:exam --format exam-rich-latex` produces a `.qmd` file but `quarto render exam.qmd --to pdf` errors out +- Error mentions `! Package fontspec`, `! Class exam`, or `! LaTeX Error` +- Mermaid renders fine but PDF compile fails + +**Diagnostic:** + +```bash +# Check Quarto + engine +quarto --version # Should be 1.4 or later +which lualatex # Should resolve + +# Inspect the rendered .tex (Quarto's keep-tex: true is on by default for rich-latex) +ls exam.tex +head -40 exam.tex # YAML preamble should match what's in the .qmd +``` + +**Common causes and fixes:** + +| Error | Cause | Fix | +|-------|-------|-----| +| `! Package fontspec error` | Using `pdflatex` instead of `lualatex` | Ensure `pdf-engine: lualatex` in the `.qmd` YAML (Scholar emits this by default) | +| `! Class exam Error` | `exam` class not installed | Install `texlive-latex-extra` (Linux) or `mactex-extra` (macOS) | +| `! Undefined control sequence \printanswers` | Trying to render answer key without `exam` class | Same fix — install `texlive-latex-extra` | +| `Quarto not found` | Quarto CLI missing | Install Quarto 1.4+ from | +| Bracket table shows `NaN` rows | Invalid df pair supplied (`df1: 0`, etc.) | Both `df1` and `df2` must be finite integers ≥ 1; Scholar v2.18.0+ throws on invalid input | + +**Step 1: Verify the YAML preamble** + +The `.qmd` should start with: + +```yaml +format: + pdf: + documentclass: exam + classoption: [12pt, addpoints] + pdf-engine: lualatex +``` + +If `pdf-engine` is missing, regenerate the exam — earlier dev versions of the formatter had a different preamble layout. + +**Step 2: Render with verbose output** + +```bash +quarto render exam.qmd --to pdf --debug 2>&1 | tail -30 +``` + +The last 30 lines usually show the underlying LuaLaTeX error verbatim. + +**Step 3: Validate that the bracket table data was valid** + +If the table generator threw on invalid df pairs, you'll see: + +``` +Error: Invalid df pair: {"df1":0,"df2":5} — both df1 and df2 must be finite integers >= 1 +``` + +Re-run with valid df pairs (every value ≥ 1, both integers). + +--- + +### Problem: Rich-LaTeX answer-key PDF looks identical to student PDF (v2.18.0) + +**Symptom:** Removing `%` before `\printanswers` and re-rendering produces a byte-identical PDF for short-answer-only exams. + +**Cause:** The parent `LaTeXFormatter.formatShortAnswer()` does not wrap answers in `\begin{solution}…\end{solution}` blocks, so the `\printanswers` toggle has nothing to reveal for short-answer questions. + +**Workaround:** Multiple-choice and essay questions render the toggle correctly. For short-answer-only exams, manually add a "Sample Answer" rubric to each question and include it visually for the answer key. + +**Status:** Known limitation. Per-question solution visibility is on the v2.19.0 roadmap. + +--- + ## Configuration Problems ### Problem: YAML syntax errors @@ -2693,7 +2769,7 @@ course_info: **Step 5: Use auto-fixer** ```bash -/teaching:fix +/teaching:validate --fix # Automatically repairs common YAML errors ``` @@ -3303,7 +3379,7 @@ If custom prompts are complex, use refine: **Solution:** -1. File was generated before provenance tracking (requires Scholar v2.17.0+) +1. File was generated before provenance tracking (requires Scholar v2.18.0+) 2. File was not generated by Scholar (e.g., manually created) 3. Frontmatter was modified and metadata comments were removed 4. Re-generate the file with current Scholar version to add provenance @@ -4239,4 +4315,4 @@ Visit: https://console.anthropic.com/account/billing/usage **Questions? Spotted an issue?** Open a GitHub issue: https://github.com/Data-Wise/scholar/issues -**Last Updated:** 2026-02-09 for Scholar v2.17.0 +**Last Updated:** 2026-05-12 for Scholar v2.18.0 diff --git a/docs/index.md b/docs/index.md index bb79dc0d..db551c9e 100644 --- a/docs/index.md +++ b/docs/index.md @@ -1,6 +1,18 @@ # Scholar Plugin > **Academic workflows for research and teaching** - Literature management, manuscript writing, simulation studies, course material generation, and 17 A-grade research skills +> +> **📌 TL;DR - 30 Second Version** +> +> **What:** A Claude Code plugin for academic workflows — {{ scholar.command_count }} commands across teaching (exams, slides, syllabi, Canvas QTI) and research (arXiv, BibTeX, DOI, manuscript drafting). +> +> **Install:** `brew install scholar` ⏱️ 2 min · then restart Claude Code +> +> **Try:** `/teaching:exam midterm` · `/arxiv "your topic"` · `/scholar:hub` ⏱️ 30 sec +> +> **Configure (optional):** Create `.flow/teach-config.yml` per the [Configuration Tutorial](tutorials/teaching/configuration.md) ⏱️ 5 min +> +> **New in v{{ scholar.version }}:** `--format exam-rich-latex` for evidence-based statistics exams — see [What's New v{{ scholar.version }}](WHATS-NEW-v2.18.0.md) A comprehensive Claude Code plugin for academic workflows combining research and teaching. Features unified Plugin + MCP architecture with {{ scholar.command_count }} slash commands and research skills. diff --git a/docs/specs/SPEC-2026-05-12-rich-exam-format.md b/docs/specs/SPEC-2026-05-12-rich-exam-format.md new file mode 100644 index 00000000..00d7d92d --- /dev/null +++ b/docs/specs/SPEC-2026-05-12-rich-exam-format.md @@ -0,0 +1,461 @@ +--- +title: "Rich Exam Format → Scholar Generator + Examark Tooling Integration" +status: approved +created: 2026-05-12 +decisions_locked: 2026-05-12 +from_brainstorm: "STAT 445/545 Final Exam Spring 2026 Revised-1.qmd" +target_repos: + - "~/projects/dev-tools/scholar" # v1 + - "~/projects/apps/examark" # v2 (deferred) +target_release: + - "scholar v2.18.x" # v1 + - "examark v0.6.x" # v2 (deferred) +v1_scope: "Scholar formatter only (Examark deferred to v2)" +v1_kickoff: "Week of 2026-05-12, post-final-exam" +--- + +## Decisions Locked (2026-05-12) + +| # | Decision area | Choice | +| --- | --- | --- | +| 1 | V1 scope | **Scholar formatter only** — Examark deferred to v2 | +| 2 | Conventions storage | **Hybrid** — Scholar prompt defaults + `teach-config.yml` per-course override | +| 3 | Interaction-aware language pass | **Hybrid** — regex for known phrases (e.g. "main effects" → "marginal mean differences") + AI re-prompt for surrounding context | +| 4 | Examark QTI default (v2) | **Essay-default**, per-question `numeric` override via metadata tag | +| 5 | Bracket-table df detection | **Pre-declared in question metadata** + post-hoc scan warning fallback | +| 6 | Computational-aid level | **Exam-wide knob** (`--aids=moderate` default) with per-question override | +| 7 | Implementation timing | **This week, post-exam** (week of 2026-05-12) | + +## Overview + +Codify the **STAT 445/545 rich exam format** developed for the Spring 2026 final (`exams/STAT 445_545 Final Exam Spring 2026 Revised-1.qmd`) into reusable tooling so future exams across courses can be generated, rendered, and converted programmatically. + +The format combines: +- LaTeX `exam` document class with `\printanswers` single-source-two-outputs toggle +- Evidence-based reporting front-matter (no "significant" / "reject", uses $s$-values and $\hat\eta^2_p$) +- Auto-generated F-quantile bracket table and $s$-value calibration table +- Pre-computed computational aids (correction term $C$, $\sum T_i^2$, etc.) to reduce hand-arithmetic +- Question conventions: don't ask for "main effects" when interactions are present; use Dean's unrestricted EMS for mixed models; cite Dean (2017) chapters + +Currently, Scholar's `latex.js` formatter produces a bare-bones `exam`-class skeleton without any of these rich features. This spec extends Scholar with a new `exam-rich-latex` format and adds Examark support for the resulting QMD. + +--- + +## Primary User Story + +> **As** an instructor (Davood) teaching STAT 545 and similar quantitative courses, +> **I want** to run a single command like `/scholar:teaching:exam final --format exam-rich-latex --policy evidence-based --course stat-545` +> **And get** a fully-scaffolded LaTeX-`exam`-class Quarto file with bracket tables, $s$-calibration, computational aids, and the evidence-based reporting policy baked in, +> **So that** I don't manually re-author the ~80-line front matter each semester and so the answer-key and student versions stay perfectly in sync. + +### Acceptance Criteria + +- [ ] Running `/scholar:teaching:exam final --format exam-rich-latex` produces a `.qmd` file that compiles to PDF with both a student version (`\printanswers` commented) and an instructor version (`\printanswers` active) via a single toggle. +- [ ] Generated front matter includes the F-quantile bracket table for every df pair detected in the generated questions. +- [ ] Generated front matter includes the $s$-value calibration table and the evidence-based reporting policy block. +- [ ] No question prompt contains the phrases "statistically significant" or "reject / fail-to-reject"; no solution does either. +- [ ] For factorial / mixed designs with interaction terms, question prompts use "marginal mean differences" instead of "main effects" in computation-request parts. +- [ ] Examark CLI can read the resulting QMD (LaTeX `exam` class) and convert open-response questions to Canvas QTI where possible (essay/short-answer types). +- [ ] Memory file `~/.claude/projects/-Users-dt-projects-teaching-stat-545/memory/exam-workflow.md` is updated to reference the new Scholar/Examark commands. + +--- + +## Secondary User Stories + +### Story 2 — TA Grading + +> **As** a TA grading paper exams, +> **I want** the answer key to print with `\printanswers` showing per-part grading rubrics inline, +> **So that** I can quickly assign partial credit without flipping between documents. + +**Acceptance**: every `\solution` block contains a `\textit{Grading: ...}` line that prints only when `\printanswers` is active. + +### Story 3 — Cross-course Reuse + +> **As** an instructor teaching a related course (STAT 446, STAT 547), +> **I want** to swap the textbook citations and topic list via `teach-config.yml` and regenerate, +> **So that** I can reuse the format with minimal manual editing. + +**Acceptance**: course-specific options (textbook citation prefix, df-pairs to pre-populate, computational-aid level) are configurable per course via `teach-config.yml` under `teaching_style.command_overrides.exam`. + +### Story 4 — Canvas Export + +> **As** an instructor who also runs online makeup exams, +> **I want** to export the same exam content to Canvas QTI for online delivery, +> **So that** content authoring is single-source for both paper and online formats. + +**Acceptance**: Examark `examark convert .qmd --to qti` produces a Canvas-importable `.qti.zip`; open-response questions become "essay" type, computational questions with numeric answers become "numeric" type. + +--- + +## Architecture + +```mermaid +graph TD + A[teach-config.yml] -->|reads| B[Scholar /scholar:teaching:exam] + B -->|uses| C[exam-rich-latex formatter] + C -->|reads| D[prompts/exam-rich.md template] + C -->|generates| E[bracket-table-generator] + C -->|generates| F[s-calibration-table] + C -->|emits| G[output.qmd
LaTeX exam class] + G -->|quarto render --to pdf| H1[student PDF] + G -->|quarto render --to pdf
with printanswers| H2[answer-key PDF] + G -->|examark convert --to qti| I[Canvas QTI .zip] + G -->|examark convert --to md| J[Markdown for archival] + + style C fill:#e1f5ff + style G fill:#fff4e1 + style I fill:#e1ffe1 +``` + +--- + +## Technical Requirements + +### Scholar-side changes + +#### New formatter: `src/teaching/formatters/exam-rich-latex.js` + +Extends the existing `LaTeXFormatter` (`latex.js`) with: + +| Capability | Description | +| --- | --- | +| **Front-matter scaffolding** | Auto-emit the YAML header with `documentclass: exam`, `classoption: [12pt, addpoints]`, `keep-tex: true`, `\printanswers` toggle, and standard packages. | +| **Reporting-policy block** | Inject a "Reporting policy" itemize block with: never use "significant"/"reject"; report $F(df_1, df_2)$, $p$, $s$, $\hat\eta^2_p$; use the calibration table. | +| **Bracket table** | Call `bracket-table-generator.js` (new helper) to produce the F-quantile lookup table for the df pairs detected in the exam's questions. | +| **$s$-value calibration table** | Static table mapping $p \to s \to$ evidence-strength label. | +| **`\printanswers` toggle** | Render with the toggle commented out (student) or active (answer key) based on `options.answers` flag. | +| **Computational-aid injection** | When `options.computational_aids: true`, AI prompt instructs the generator to compute and provide $C$, $\sum T_i^2$, $\sum B_j^2$, intermediate SS values, etc., in each question's prompt. | +| **Interaction-aware language** | Post-processing pass: when the question's `model` field includes an interaction term, replace "main effects" with "marginal mean differences" in part-(a)-type computation requests. | + +#### New helper: `src/teaching/formatters/helpers/bracket-table-generator.js` + +```javascript +// Scans the exam's questions, extracts every F-test's (df1, df2), +// and returns a LaTeX tabular environment with critical values from +// the F-distribution at alpha = 0.10, 0.05, 0.025, 0.01, 0.005, 0.001. +// +// Uses jstat or a precomputed table to avoid an R dependency. + +export function generateBracketTable(dfPairs) { + // ... +} +``` + +Critical values must match R's `qf(alpha, df1, df2, lower.tail=FALSE)` to 2 decimals. + +#### New AI prompt: `src/teaching/prompts/exam-rich.md` + +Authored prompt template that instructs the AI to: +- Use evidence-based reporting language throughout solutions +- Add `\textit{Grading: ...}` lines inside every `\solution` block +- Pre-compute and include computational aids in each question's prompt +- Cite Dean (2017) chapters per question +- Apply Dean's unrestricted EMS convention for mixed models +- Avoid "main effects" framing in question prompts where interactions are modeled + +#### Schema additions: `src/teaching/schemas/exam.json` + +```json +{ + "options": { + "format": "exam-rich-latex", + "policy": "evidence-based", + "include_bracket_table": true, + "include_s_calibration": true, + "include_computational_aids": true, + "textbook_citation_prefix": "Dean, Voss & Draguljić (2017)", + "interaction_aware_language": true + } +} +``` + +#### CLI / command updates + +| Command | New flag | Behavior | +| --- | --- | --- | +| `/scholar:teaching:exam` | `--format exam-rich-latex` | Emit rich LaTeX exam class with all scaffolding | +| `/scholar:teaching:exam` | `--policy evidence-based` | Inject reporting policy block in front matter | +| `/scholar:teaching:exam` | `--with-aids` | Include pre-computed components in question prompts | +| `flow teach exam` (zsh dispatcher) | same flags | Pass through to Scholar | + +### Examark-side changes + +#### New parser: `parsers/latex-exam-class.js` + +```javascript +// Parses a Quarto/Markdown file whose YAML declares documentclass: exam. +// Extracts: +// - \question blocks +// - \part[N] blocks within each question +// - \begin{solution}...\end{solution} blocks +// - Per-question point totals +// Maps to Examark's internal Question object schema. +``` + +#### Conversion path + +```text +input.qmd (exam class) + ├── examark convert --to md → output.md (Examark markdown) + ├── examark convert --to qti → output.qti.zip (Canvas import) + └── examark convert --to pdf → output.pdf (passes through Quarto) +``` + +#### Question-type mapping for Canvas QTI + +| LaTeX exam class | Examark / Canvas QTI | +| --- | --- | +| `\question` with `\parts` (numeric short-answer) | `numeric_question` | +| `\question` with no parts (essay) | `essay_question` | +| `\question` with `\choices` (if present) | `multiple_choice_question` | +| `\solution` content | Sample answer / rubric attachment | + +#### Examark CLI update + +```bash +# New subcommand +examark convert .qmd --to qti # LaTeX exam class → Canvas QTI +examark convert .qmd --to md # LaTeX exam class → Examark markdown +examark validate .qmd --schema exam-rich # Lint the rich format +``` + +--- + +## Data Models + +### Internal exam representation (Scholar) + +```typescript +interface RichExam { + metadata: { + title: string; + subtitle: string; + courseCode: string; // e.g., "STAT 445/545" + date: string; // e.g., "May 12, 2026" + duration: number; // minutes + totalPoints: number; + textbookCitationPrefix: string; + }; + reportingPolicy: { + type: "evidence-based" | "classical"; + excludedPhrases: string[]; // ["statistically significant", "reject", "fail to reject"] + requiredMetrics: string[]; // ["F", "p", "s", "eta_squared_partial"] + }; + computationalAids: { + enabled: boolean; + level: "minimal" | "moderate" | "heavy"; + }; + questions: RichQuestion[]; + frontMatterTables: { + bracketTable: BracketTable; // auto-generated + sCalibration: SCalibrationTable; // static + }; +} + +interface RichQuestion { + number: number; + topic: string; + textbookChapter: string; // e.g., "Ch. 6" + points: number; + designModel: ANOVADesign; // includes interaction flag + parts: RichPart[]; + preComputedComponents: PreComputedComponents; + solution: { + body: string; + gradingRubric: string; + }; +} + +interface ANOVADesign { + type: "factorial" | "ancova" | "rcbd" | "latin-square" | "random-effects" + | "mixed" | "rcbd-random-block" | "split-plot"; + hasInteractionTerm: boolean; + emsConvention?: "unrestricted" | "restricted"; // Dean uses unrestricted +} +``` + +### Bracket-table data model + +```typescript +interface BracketTable { + dfPairs: Array<[number, number]>; // [(1,8), (2,26), ...] + alphaLevels: number[]; // [0.10, 0.05, 0.025, 0.01, 0.005, 0.001] + criticalValues: number[][]; // [df_pair_idx][alpha_idx] +} +``` + +--- + +## Dependencies + +| Component | Dependency | Notes | +| --- | --- | --- | +| Scholar new formatter | Existing `LaTeXFormatter` class | Extend, don't replace | +| Scholar bracket-table generator | `jstat` npm package or precomputed table | Avoid R dependency at generation time | +| Generated `.qmd` | LuaLaTeX + `exam` LaTeX class + standard packages (amsmath, amssymb, multicol, graphicx, enumitem) | Already in `tex/macros.tex` for STAT 545 | +| Examark parser | Existing Examark markdown parser | Extend to handle LaTeX-exam-class syntax | +| Examark QTI export | `examark` npm package (current) | Already produces Canvas QTI | + +--- + +## UI/UX Specifications + +### Scholar interactive prompt flow + +```text +$ flow teach exam final --week 15 +🎓 Generating final exam... + +? Format: (Use arrow keys) +❯ exam-rich-latex (NEW - full STAT 545 paper exam scaffolding) + exam-latex (legacy bare-bones) + examark (markdown for Canvas) + qti (direct Canvas QTI) + +? Reporting policy: +❯ evidence-based (recommended for stats courses) + classical + none + +? Include computational aids (pre-computed C, sums of squares, etc.)? +❯ Yes (moderate) + Yes (heavy - skip most arithmetic) + No (full hand calculation) + +✓ Generated: exams/final-spring-2026.qmd (8 questions, 136 pts) +✓ Front matter includes: + - F-quantile bracket table covering df pairs: (1,8), (1,11), (2,6), (2,26), (3,6), (3,8), (3,9), (4,15) + - s-value calibration table + - Evidence-based reporting policy block +✓ Two-output ready: comment/uncomment \printanswers in YAML +✓ Next: quarto render exams/final-spring-2026.qmd +``` + +### Examark CLI + +```bash +$ examark convert exams/final-spring-2026.qmd --to qti +ℹ Detected: LaTeX exam class +ℹ Parsing 8 questions, 24 parts +⚠ Q1 part (a): "Calculate the marginal mean differences" → essay (no numeric answer in solution) +⚠ Q1 part (c): ANOVA table construction → essay +✓ Q3 part (b): Relative efficiency = 1.38 → numeric_question +✓ Wrote final-spring-2026.qti.zip (8 questions, 6 essay + 2 numeric) +✓ Import: Canvas → Settings → Import Course Content → QTI .zip +``` + +--- + +## Resolved Decisions + +All open questions were resolved during the 2026-05-12 brainstorm review session. See the "Decisions Locked" table at the top of this spec. Summary of the chosen design: + +1. **Bracket-table df-pair detection** — pre-declared in question metadata with post-hoc scan warning fallback. Each generated question carries its `(df1, df2)` in metadata; Scholar also parses generated solutions for F-tests and warns if any pair isn't in the table. + +2. **Computational-aid level granularity** — exam-wide CLI knob (`--aids=moderate` default) with per-question override via metadata. Matches the STAT 545 Spring 2026 final's pattern where most questions had moderate aids and Q3 had heavier (full $\sum B_j^2$ and $\sum T_i^2$ given). + +3. **Interaction-aware language post-processing** — hybrid: deterministic regex for well-known substitutions (e.g. "main effects" → "marginal mean differences" in part-(a)-type prompts when `model.hasInteractionTerm == true`), AI re-prompt for the surrounding interpretation paragraphs in part-(d)-type sections. + +4. **Examark Canvas QTI fidelity** (v2) — essay-default for all open-response questions; per-question `numeric` override available via a metadata tag in the solution block. Preserves TA ability to give partial credit on intermediate work. + +5. **Cross-course reuse** — `teach-config.yml` declares `teaching_style.exam.textbook_citation_prefix` for the default; per-question metadata can override (e.g. when a single exam draws from multiple textbooks). + +6. **Conventions storage** — hybrid: Scholar's `prompts/exam-rich.md` declares the global defaults (evidence-based reporting, Dean unrestricted EMS, etc.); `teach-config.yml` under `teaching_style.exam` allows per-course override. This matches how lecture/slides/assignment configuration already works in Scholar. + +## Out of v1 Scope (Deferred to v2) + +\emph{Cross-reference:} The Scholar repo already has a related draft spec at `docs/specs/SPEC-2026-02-26-examark-qti-integration.md` (`/teaching:canvas` converter + `--format canvas`). Our v2 work below would EXTEND examark itself with a LaTeX-exam-class parser, which the Feb 2026 spec's converter would then consume. Coordinate when v2 kicks off. + +These items are explicitly deferred to a follow-up release (`examark v0.6.x`, post-summer): + +- Examark `parsers/latex-exam-class.js` (read LaTeX exam class) +- Examark `examark convert ... --to qti` (Canvas export) +- Examark `examark convert ... --to md` (Markdown archival) +- Examark schema validator for the rich format +- Cross-tool integration tests + +Rationale: validate the Scholar-side generator end-to-end with the next semester's STAT 545 midterm before committing to a downstream consumer. The QMD format itself is already Quarto/LuaLaTeX-renderable without Examark. + +--- + +## Implementation Plan (v1 — Scholar only) + +Total estimate: **~3 days of focused work** in the Scholar repo. Kicks off this week (post-final-exam). Examark work (v2) deferred. + +### Milestone 1 — Scholar formatter (1-2 days) + +- [ ] Create `src/teaching/formatters/exam-rich-latex.js` extending `LaTeXFormatter` +- [ ] Create `src/teaching/formatters/helpers/bracket-table-generator.js` with precomputed critical values (verify against R `qf()` to 2 decimals during build) +- [ ] Create `src/teaching/formatters/helpers/s-calibration.js` (static table) +- [ ] Register new format in `src/teaching/formatters/index.js` +- [ ] Add `--format exam-rich-latex` CLI option in `src/plugin-api/commands/teaching/exam.md` + +### Milestone 2 — Scholar prompts & schema (1 day) + +- [ ] Create `src/teaching/prompts/exam-rich.md` (AI generation prompt with evidence-based reporting policy, Dean unrestricted EMS convention, citation pattern) +- [ ] Extend `src/teaching/schemas/exam.json` with new options (`policy`, `include_bracket_table`, `aids_level`, `textbook_citation_prefix`, `interaction_aware_language`) +- [ ] Document the new `teaching_style.exam` override block in `teach-config.yml` schema/docs + +### Milestone 3 — Interaction-aware post-processing (0.5 day) + +- [ ] Add deterministic regex substitutions for known phrases (`main effects → marginal mean differences`) when `question.designModel.hasInteractionTerm == true` +- [ ] Add AI re-prompt pass for surrounding interpretation paragraphs in part-(d)-type sections +- [ ] Unit tests covering both factorial (interaction present) and additive (no interaction) models + +### Milestone 4 — Validation & demo (0.5 day) + +- [ ] Regenerate the STAT 545 Spring 2026 final using the new `--format exam-rich-latex` and diff against the hand-authored version (should be ≥95% structurally identical) +- [ ] Update Scholar's `/scholar:teaching:exam` command docs with the new flags +- [ ] Cross-link from `~/.claude/projects/-Users-dt-projects-teaching-stat-545/memory/exam-workflow.md` memory once Scholar v2.18 ships + +### v2 (deferred) — Examark integration + +- See "Out of v1 Scope" section. Pick up after the Scholar v1 has been validated on a second exam (STAT 545 midterm fall 2026 or equivalent). + +--- + +## Review Checklist + +Before merging: + +- [ ] Generated exam compiles to PDF on both macOS (LuaLaTeX via TinyTeX) and CI (no PDF, HTML-only renders are not in scope but should not break) +- [ ] All F-quantile critical values in the generated bracket table match R's `qf()` to 2 decimals +- [ ] No phrase from `reportingPolicy.excludedPhrases` appears in any generated question or solution +- [ ] When a generated question's model has `hasInteractionTerm: true`, no part-(a)-type prompt uses the phrase "main effects" +- [ ] Two-output toggle works: rendering with `\printanswers` commented out vs. active produces visibly different PDFs +- [ ] Examark QTI export passes Canvas import validation +- [ ] STAT 545 final regenerates with the new format and is bitwise (or near-bitwise) identical to the hand-authored Spring 2026 version +- [ ] Memory file updated to reference the new commands + +--- + +## Implementation Notes + +### Why a new formatter rather than extending `latex.js`? + +Two reasons: +1. **Backward compatibility**: existing exams using `latex.js` (bare exam class) should not change. +2. **Separation of concerns**: the rich format embeds policy decisions (evidence-based reporting, no main-effect questions when interactions present) that don't apply to all LaTeX exams generally. + +### Why pre-compute the bracket table at generation time? + +Rendering bracket tables from R at PDF compile time would require R in the rendering environment. Pre-computing in the Scholar generator (using `jstat` or a hardcoded critical-value table) keeps the rendered `.qmd` self-contained and Quarto-only. + +### Why store course conventions in `teach-config.yml`? + +Per the existing project architecture (`.flow/teach-config.yml` is the single source of truth for the STAT 545 site), adding `teaching_style.command_overrides.exam` extends the established pattern. The Scholar plugin already reads from `teach-config.yml` for lecture/slides/assignment generation; extending this to exams is the natural step. + +### Risk: AI hallucination of computational aids + +If the AI is asked to pre-compute $C = G^2/N$ and gets the arithmetic wrong, the prompt becomes internally inconsistent. **Mitigation**: post-generation validation pass that re-computes claimed aids and warns on mismatch. (This is what `validate-r.js` does for R chunks; extend to numeric assertions.) + +### Risk: Examark QTI fidelity for open-response numeric questions + +Canvas QTI's `numeric_question` type expects an exact value or range. Some STAT 545 questions ask for "the F-statistic" but partial credit is given for the setup even if the final number is wrong. Mapping these to Canvas `numeric_question` loses partial credit. **Mitigation**: default to `essay_question` and let the instructor flip per-question via metadata. + +--- + +## History + +- **2026-05-12** — Initial draft from the Spring 2026 STAT 545 final exam project. The format was developed in `/Users/dt/projects/teaching/stat-545/exams/STAT 445_545 Final Exam Spring 2026 Revised-1.qmd` through an iterative review process (Q-by-Q audit, answer-leak hygiene, bracket-table verification, evidence-based reporting policy, hand-arithmetic easing, interaction-aware language). diff --git a/docs/stylesheets/extra.css b/docs/stylesheets/extra.css new file mode 100644 index 00000000..2b7c017f --- /dev/null +++ b/docs/stylesheets/extra.css @@ -0,0 +1,45 @@ +/* Scholar — ADHD-friendly stylesheet overrides. + * Loaded via mkdocs.yml `extra_css:`. + * + * v2.18.0: introduced for mobile mermaid overflow + readable touch targets. + */ + +/* --------------------------------------------------------------------------- + * Mermaid diagrams: prevent horizontal page overflow on mobile. + * Wraps the diagram in a scroll-on-overflow container without breaking + * desktop centering. + * ------------------------------------------------------------------------ */ +.mermaid { + max-width: 100%; + overflow-x: auto; +} + +.mermaid svg { + max-width: 100%; + height: auto; +} + +/* --------------------------------------------------------------------------- + * TL;DR blockquotes: a touch more visual weight so they read as a callout. + * Detected by the leading 📌 emoji in the standard pattern. + * ------------------------------------------------------------------------ */ +.md-typeset blockquote { + scroll-margin-top: 5rem; +} + +/* --------------------------------------------------------------------------- + * Touch targets on mobile: ensure nav links + table rows are >= 44px tall. + * ------------------------------------------------------------------------ */ +@media (max-width: 768px) { + .md-nav__link, + .md-tabs__link { + min-height: 44px; + line-height: 1.4; + } + + /* Prevent code blocks from forcing horizontal scroll of the page. */ + .md-typeset pre { + max-width: 100%; + overflow-x: auto; + } +} diff --git a/docs/tutorials/advanced/lms-integration.md b/docs/tutorials/advanced/lms-integration.md index 18f11b0c..2b052868 100644 --- a/docs/tutorials/advanced/lms-integration.md +++ b/docs/tutorials/advanced/lms-integration.md @@ -6,7 +6,7 @@ A comprehensive guide to integrating Scholar-generated assessments with Canvas a Before starting this tutorial, you should have: -- Scholar v2.5.0 or later installed +- Scholar v2.17.0 or later installed - Node.js >= 20.19.0 - Basic familiarity with Scholar's teaching commands - Canvas LMS instructor access (or equivalent for other LMS) @@ -16,7 +16,8 @@ Before starting this tutorial, you should have: **Learning Objectives**: By the end of this tutorial, you will be able to: - Install and configure examark for QTI conversion -- Generate Canvas-compatible assessments from Scholar +- Generate Canvas-compatible assessments with `canvasSafe` mode +- Validate questions with pre-flight checks before conversion (v2.17.0) - Export question banks to Canvas with proper formatting - Organize and manage question banks efficiently - Troubleshoot common import issues @@ -32,7 +33,8 @@ Canvas uses the QTI (Question & Test Interoperability) standard for importing as ```mermaid graph LR A[Scholar
/teaching:exam] --> B[JSON Output] - B --> C[examark
Converter] + B --> PF[Pre-flight
Validation] + PF --> C[examark
Converter] C --> D[QTI Package
.zip] D --> E[Canvas
Quiz Bank] @@ -40,6 +42,7 @@ graph LR G[Custom Prompts] -.-> A style A fill:#e1f5ff + style PF fill:#fff3cd style C fill:#ffe1f5 style E fill:#e1ffe1 ``` @@ -244,6 +247,153 @@ jq '.questions[] | select(.type == "multiple-choice") | .correct' exam-*.json jq '.questions[] | {id, type, question, correct}' exam-*.json ``` +## Part 2.5: Pre-flight Validation (NEW in v2.17.0) + +Before converting to QTI, Scholar automatically validates your exam questions against Canvas requirements. This **pre-flight check** catches problems that would cause silent failures or malformed imports — before you ever touch examark. + +### Why Pre-flight Validation Matters + +Canvas has stricter requirements than Scholar's default JSON schema: + +| Requirement | Canvas | Scholar Default | +|------------|--------|-----------------| +| MC/TF must have exactly 1 correct answer | Required | Not enforced | +| Multiple Answer must have 2+ correct answers | Required | Not enforced | +| Fill-in-Multiple-Blanks must define all blanks | Required | Not enforced | +| Short-answer without sample answer | Allowed (manual grade) | Warning | + +Without pre-flight, an exam with a missing correct answer would convert to QTI successfully — but import into Canvas silently broken, with no correct answer marked. + +### Step 1: How Pre-flight Runs Automatically + +When you use `--format canvas`, Scholar runs pre-flight automatically: + +```bash +/teaching:exam "Linear Regression" --format canvas +``` + +You will see pre-flight output before conversion: + +``` +[Scholar] Pre-flight Canvas validation... + ✓ Q1 [MC] correct answer: "B" + ✓ Q2 [MC] correct answer: "A" + ✓ Q3 [MA] 3 correct answers + ⚠ Q4 [Short]: no sample answer — will be manually graded in Canvas + ✓ Q5 [TF] correct answer: "True" + ✓ Q6 [MC] correct answer: "C" + +Pre-flight passed (0 errors, 1 warning) +Proceeding to QTI conversion... +``` + +Warnings are advisory — the conversion continues. **Errors block conversion** until fixed. + +### Step 2: Understanding Error vs Warning + +| Severity | Condition | Action Required | +|----------|-----------|----------------| +| **Error** | MC/TF has no correct answer | Fix before conversion | +| **Error** | Multiple Answer has < 2 correct answers | Add correct answers | +| **Error** | Fill-in-Multiple-Blanks has undefined blanks | Define all blank answers | +| **Warning** | Short-answer has no sample answer | Optional: add sample for feedback | + +Example of a blocking error: + +``` +[Scholar] Pre-flight Canvas validation... + ✗ Q3 [MC]: no correct answer marked + ✗ Q7 [MA]: 1 correct answer — Canvas requires 2+ + +Pre-flight failed: 2 errors +Fix the errors above before generating QTI. +``` + +### Step 3: Using canvasSafe Mode + +The `canvasSafe` mode is enabled automatically with `--format canvas`. It restricts the AI to Canvas-compatible question types, preventing unsupported types from being generated in the first place: + +```bash +# Allowed in canvasSafe mode: +# multiple-choice, true-false, multiple-answers, +# short-answer, essay, numerical +# +# Excluded (not Canvas-native): +# matching, fill-in-multiple-blanks (complex), file-upload +``` + +**Tip:** If you need matching or other non-native types, generate without `--format canvas` first, then convert manually via examark with custom type mapping. + +### Step 4: Pre-flight on Existing QMD Files + +When using `/teaching:canvas` to convert an existing QMD exam file, pre-flight runs as Step 1.5 of the pipeline: + +```bash +/teaching:canvas exam.qmd +``` + +``` +[Scholar] Step 1: Parsing QMD exam... + → 15 questions found + +[Scholar] Step 1.5: Pre-flight Canvas validation... + ✓ Q1-Q12 [MC/TF]: all correct answers defined + ⚠ Q13 [Short]: no sample answer + ⚠ Q14 [Short]: no sample answer + ✓ Q15 [Essay]: no correct answer expected + +Pre-flight passed (0 errors, 2 warnings) + +[Scholar] Step 2: Converting to QTI... +``` + +### Step 5: Fix Common Pre-flight Errors + +**Missing correct answer (MC)** — mark one option with `^` in your QMD: + +```markdown +Which function fits a linear model in R? + +A) glm() +B) lm() ^ +C) fit() +D) model() +``` + +**Multiple Answer needs 2+ correct options:** + +```markdown +Which are valid assumptions of linear regression? (Select all that apply) + +A) Linearity ^ +B) Independence ^ +C) Zero autocorrelation +D) Homoscedasticity ^ +``` + +**Fill-in-Multiple-Blanks needs all blanks defined:** + +```markdown +The [blank1] function estimates [blank2] using ordinary [blank3] squares. + +Answer: + blank1: lm + blank2: coefficients + blank3: least +``` + +### Summary + +Pre-flight validation catches Canvas incompatibilities before conversion. It runs automatically with `--format canvas` and `/teaching:canvas` — no configuration required. + +| Step | What Happens | +|------|-------------| +| Generate exam | Scholar creates JSON with question types and answers | +| Pre-flight | Validates all questions against Canvas requirements | +| Fix errors | Address any blocking validation errors | +| Convert | examark converts valid JSON to QTI | +| Import | Canvas imports clean, well-formed QTI package | + ## Part 3: Converting to Canvas QTI Format ### Step 1: Basic Conversion @@ -1177,7 +1327,8 @@ This enables: You've learned how to: - Install and configure examark for QTI conversion -- Generate Canvas-compatible assessments with Scholar +- Generate Canvas-compatible assessments with `canvasSafe` mode +- Validate questions with pre-flight checks before conversion (v2.17.0) - Convert JSON to QTI and import to Canvas - Organize question banks efficiently - Batch process multiple assessments diff --git a/docs/tutorials/index.md b/docs/tutorials/index.md new file mode 100644 index 00000000..3c46b5fe --- /dev/null +++ b/docs/tutorials/index.md @@ -0,0 +1,110 @@ +# Tutorials + +> **Choose Your Path** — 30 hands-on tutorials covering teaching workflows, research workflows, and administrative tools across Scholar's 33 commands. +> +> **📌 TL;DR - 30 Second Version** +> +> **New to Scholar?** Start with [Your First Exam](teaching/first-exam.md) (10 min) or [Your First Literature Search](research/first-literature-search.md) (15 min). +> +> **Returning user?** Jump to a topic below — every tutorial declares ⏱️ time and 🟢/🔵/🔴 difficulty. +> +> **Want the visual learning path?** See [Teaching learning path](teaching/learning-path.md) for the full progression across 18 teaching commands. + +--- + +## Choose Your Path + +### 🆕 I'm new to Scholar + +Start here. These two tutorials get you productive in under 30 minutes and don't assume prior knowledge. + +| Tutorial | Time | Level | What you'll do | +|----------|------|-------|----------------| +| [Your First Exam](teaching/first-exam.md) | 10 min | 🟢 Beginner | Generate, customize, and export a midterm | +| [Your First Literature Search](research/first-literature-search.md) | 15 min | 🟢 Beginner | Search arXiv, look up DOIs, build a `.bib` file | +| [Demo Course Setup](teaching/demo-course.md) | 20 min | 🟢 Beginner | Scaffold a complete course with config + lesson plans | + +After these, jump to the audience-specific paths below. + +--- + +### 🎓 I'm a teaching instructor + +| Stage | Tutorial | Time | Level | +|-------|----------|------|-------| +| **Foundation** | [Course Configuration](teaching/configuration.md) | 45 min | 🟢 Beginner | +| | [Semester Setup](teaching/semester-setup.md) | 30 min | 🟢 Beginner | +| | [Lesson Plans & Manifests](teaching/lesson-plans-manifest.md) | 25 min | 🔵 Intermediate | +| **Assessments** | [Generating a Course Syllabus](teaching/syllabus.md) | 5–7 min | 🟢 Beginner | +| | [Assignments, Solutions & Rubrics](teaching/assignments-solutions-rubrics.md) | 45 min | 🟢 Beginner | +| | [Creating Quizzes](teaching/quiz.md) | 15 min | 🟢 Beginner | +| | [Rich Exam LaTeX Format](teaching/rich-exam-format.md) (v2.18.0) | 25 min | 🔵 Intermediate | +| **Lectures** | [Lecture Notes Production](teaching/lecture-notes.md) | 40 min | 🔵 Intermediate | +| | [Slide Revision & Validation](teaching/slide-revision-validation.md) (v2.8.0) | 30 min | 🔵 Intermediate | +| | [Weekly Content Creation](teaching/weekly-content.md) | 30 min | 🔵 Intermediate | +| **Quality** | [Student Feedback](teaching/feedback.md) | 20 min | 🔵 Intermediate | +| | [R Code Validation](teaching/r-code-validation.md) | 30 min | 🔵 Intermediate | +| | [R Validation Pipeline](teaching/validate-pipeline.md) | 20 min | 🔵 Intermediate | +| | [Preflight Checks](teaching/preflight-checks.md) | 10 min | 🔵 Intermediate | +| **Workflows** | [Custom Instructions](teaching/custom-instructions.md) (v2.11.0) | 20 min | 🔵 Intermediate | +| | [Validate & Auto-Fix](teaching/validate-and-fix.md) | 25 min | 🔵 Intermediate | +| | [Sync & Diff Workflows](teaching/sync-and-diff.md) | 25 min | 🔵 Intermediate | +| | [Config Management](teaching/config-management.md) | 30 min | 🔵 Intermediate | +| | [Email Integration](teaching/email-integration.md) | 25 min | 🔵 Intermediate | +| | [Schema Migration](teaching/migration.md) | 30 min | 🔵 Intermediate | + +→ For the visual progression, see the [Teaching Learning Path](teaching/learning-path.md). + +--- + +### 🔬 I'm a researcher + +| Stage | Tutorial | Time | Level | +|-------|----------|------|-------| +| **Literature** | [Your First Literature Search](research/first-literature-search.md) | 15 min | 🟢 Beginner | +| | [BibTeX Management](research/bibtex-management.md) | 20 min | 🟢 Beginner | +| | [DOI Lookup](research/doi-lookup.md) | 15 min | 🟢 Beginner | +| | [Finding Research Gaps](research/finding-research-gaps.md) | 25 min | 🔵 Intermediate | +| **Design** | [Hypothesis Generation](research/hypothesis-generation.md) | 20 min | 🔵 Intermediate | +| | [Statistical Method Discovery](research/statistical-method-discovery.md) | 20 min | 🔵 Intermediate | +| | [Analysis Planning](research/analysis-planning.md) | 60 min | 🔴 Advanced | +| | [Simulation Design](research/simulation-design.md) | 45 min | 🔵 Intermediate | +| | [Simulation Study Guide](research/simulation-study.md) | 90 min | 🔴 Advanced | +| **Writing** | [Writing Methods Sections](research/methods-section.md) | 60 min | 🔴 Advanced | +| | [Writing Results Sections](research/results-section.md) | 60 min | 🔴 Advanced | +| | [Reviewing Mathematical Proofs](research/proof-review.md) | 45 min | 🔴 Advanced | +| | [Manuscript Writing](research/manuscript-writing.md) | 60–90 min | 🔴 Advanced | +| | [Reviewer Response](research/reviewer-response.md) | 60 min | 🔴 Advanced | +| **Sim. analysis** | [Analyzing Simulation Results](research/simulation-analysis.md) | 35 min | 🔵 Intermediate | + +--- + +## By Difficulty + +### 🟢 Beginner (start here, ≤ 30 min) + +[Your First Exam](teaching/first-exam.md) · [Your First Literature Search](research/first-literature-search.md) · [Demo Course Setup](teaching/demo-course.md) · [Course Configuration](teaching/configuration.md) · [Semester Setup](teaching/semester-setup.md) · [Generating a Syllabus](teaching/syllabus.md) · [Assignments, Solutions & Rubrics](teaching/assignments-solutions-rubrics.md) · [Creating Quizzes](teaching/quiz.md) · [BibTeX Management](research/bibtex-management.md) · [DOI Lookup](research/doi-lookup.md) + +### 🔵 Intermediate (you've used Scholar before) + +[Rich Exam LaTeX Format](teaching/rich-exam-format.md) (v2.18.0) · [Lecture Notes Production](teaching/lecture-notes.md) · [Slide Revision & Validation](teaching/slide-revision-validation.md) · [Weekly Content Creation](teaching/weekly-content.md) · [Custom Instructions](teaching/custom-instructions.md) · [Lesson Plans & Manifests](teaching/lesson-plans-manifest.md) · [Student Feedback](teaching/feedback.md) · [R Code Validation](teaching/r-code-validation.md) · [R Validation Pipeline](teaching/validate-pipeline.md) · [Preflight Checks](teaching/preflight-checks.md) · [Validate & Auto-Fix](teaching/validate-and-fix.md) · [Sync & Diff Workflows](teaching/sync-and-diff.md) · [Config Management](teaching/config-management.md) · [Email Integration](teaching/email-integration.md) · [Schema Migration](teaching/migration.md) · [Simulation Design](research/simulation-design.md) · [Finding Research Gaps](research/finding-research-gaps.md) · [Hypothesis Generation](research/hypothesis-generation.md) · [Statistical Method Discovery](research/statistical-method-discovery.md) + +### 🔴 Advanced (power users, multi-step research workflows) + +[Analysis Planning](research/analysis-planning.md) · [Simulation Study Guide](research/simulation-study.md) · [Writing Methods Sections](research/methods-section.md) · [Manuscript Writing](research/manuscript-writing.md) · [Reviewer Response](research/reviewer-response.md) + +--- + +## What's New in v2.18.0 + +- [Rich Exam LaTeX Format](teaching/rich-exam-format.md) — evidence-based statistics exams with `--format exam-rich-latex`, F-critical bracket tables, $s$-value calibration, and interaction-aware language + +→ Full release notes: [What's New v2.18.0](../WHATS-NEW-v2.18.0.md) + +--- + +## Can't find what you need? + +- **Help docs:** [Common Issues](../help/COMMON-ISSUES.md) · [FAQ (Teaching)](../help/FAQ-teaching.md) · [FAQ (Research)](../help/FAQ-research.md) +- **Reference:** [API Reference](../API-REFERENCE.md) · [REFCARD](../REFCARD.md) · [Architecture Diagrams](../ARCHITECTURE-DIAGRAMS.md) +- **Command discovery:** Run `/scholar:hub` in Claude Code for an interactive index of all 33 commands. diff --git a/docs/tutorials/research/analysis-planning.md b/docs/tutorials/research/analysis-planning.md index 75dddfff..fc354853 100644 --- a/docs/tutorials/research/analysis-planning.md +++ b/docs/tutorials/research/analysis-planning.md @@ -4,6 +4,8 @@ render_macros: false # Creating Statistical Analysis Plans +**Level:** 🔴 Advanced + A comprehensive tutorial for developing rigorous, pre-registered analysis plans for empirical research. **Learning Objectives**: By the end of this tutorial, you will be able to: diff --git a/docs/tutorials/research/bibtex-management.md b/docs/tutorials/research/bibtex-management.md new file mode 100644 index 00000000..bb614930 --- /dev/null +++ b/docs/tutorials/research/bibtex-management.md @@ -0,0 +1,258 @@ +# Tutorial: BibTeX Management + +**Target Audience:** Researchers maintaining a bibliography in `.bib` files +**Time:** 20 minutes +**Level:** 🟢 Beginner + +## What You'll Learn + +By the end of this tutorial, you'll be able to: + +- Add new BibTeX entries to a `.bib` file with `/research:bib:add` +- Search across one or more `.bib` files with `/research:bib:search` +- Chain `/research:doi` → `/research:bib:add` for one-shot DOI-to-bibliography +- Search across the default Scholar locations (Zotero / Documents / CWD) +- Pipe search output to a clean exported `.bib` subset +- Avoid duplicate entries and malformed keys + +## Prerequisites + +Before starting, make sure you have: + +- [ ] Scholar installed (`brew install data-wise/tap/scholar`) +- [ ] Claude Code running +- [ ] At least one `.bib` file (or willingness to create one) +- [ ] Internet connection (only for the DOI-chain step) + +**Installation Check:** + +```bash +scholar --version +# Should show: scholar v2.18.0 or later +``` + +--- + +## Step 1: Set Up a Bibliography File ⏱️ 2 minutes + +### What You'll Do + +Pick where your `.bib` file lives. Scholar searches three default locations when you omit the file path: + +1. `$HOME/Zotero/bibtex/` (Zotero auto-export) +2. `$HOME/Documents/references/` +3. Current working directory + +For this tutorial, we'll use a project-local `.bib`: + +```bash +mkdir -p ~/Documents/references +touch ~/Documents/references/mediation.bib +``` + +> 💡 **Pro tip:** If you use Zotero, set up [Better BibTeX](https://retorque.re/zotero-better-bibtex/) and auto-export to `~/Zotero/bibtex/library.bib`. Scholar's default search will find it automatically. + +--- + +## Step 2: Add Your First Entry ⏱️ 3 minutes + +### What You'll Do + +Use `/research:bib:add` to write a BibTeX entry to your file: + +```text +/research:bib:add ~/Documents/references/mediation.bib +``` + +Scholar will prompt you for the entry. Paste a BibTeX block like this: + +```bibtex +@article{baron1986, + title = {The moderator-mediator variable distinction in social psychological research}, + author = {Baron, Reuben M. and Kenny, David A.}, + journal = {Journal of Personality and Social Psychology}, + volume = {51}, + number = {6}, + pages = {1173--1182}, + year = {1986}, + doi = {10.1037/0022-3514.51.6.1173} +} +``` + +Scholar will: + +1. Validate the BibTeX syntax +2. Check for an existing entry with the same cite key (refuses duplicates) +3. Append the entry to the target `.bib` file with a trailing newline +4. Confirm the cite key it wrote + +**Expected output:** + +```text +✅ Added @article{baron1986, ...} to mediation.bib + Total entries in file: 1 +``` + +> ⚠️ **Cite-key collisions:** If `baron1986` already exists in the file, Scholar will warn and refuse. Either rename the new entry (`baron1986a`) or update the existing one manually. + +--- + +## Step 3: Chain DOI → Bibliography ⏱️ 3 minutes + +### What You'll Do + +Skip manual paste — let Scholar fetch the entry from a DOI and add it in one flow: + +```text +/research:doi 10.1037/a0020761 +``` + +This calls the DOI metadata endpoint and returns a populated BibTeX block. Then: + +```text +/research:bib:add ~/Documents/references/mediation.bib +``` + +Paste the BibTeX from the previous step's output. Scholar handles the rest. + +**Why two steps and not one?** `/research:doi` is conversational — it gives you a chance to verify the metadata (title, author, year) before committing it to your bibliography. For batch DOI-to-bib workflows, see the [reference manager export pattern](#exporting-clean-subsets) below. + +> 💡 **Pro tip:** Common DOI sources include the paper's PDF metadata, Google Scholar's "Cite → BibTeX", and the publisher's "Export Citation" button. + +--- + +## Step 4: Search a Single `.bib` File ⏱️ 3 minutes + +### What You'll Do + +Find entries by free-text query against any field (title, author, journal, abstract, year): + +```text +/research:bib:search "MacKinnon" ~/Documents/references/mediation.bib +``` + +**Expected output (truncated):** + +```text +Found 3 entries matching "MacKinnon" in mediation.bib: + +1. @article{mackinnon2007, MacKinnon, D. P. (2007) + Mediation analysis (Annual Review of Psychology) + +2. @book{mackinnon2008, MacKinnon, D. P. (2008) + Introduction to Statistical Mediation Analysis (Lawrence Erlbaum) + +3. @article{mackinnon2002, MacKinnon, D. P., et al. (2002) + A comparison of methods to test mediation (Psychological Methods) +``` + +The search is case-insensitive and matches across multiple fields. Quotes are required if your query has spaces. + +--- + +## Step 5: Search Across Default Locations ⏱️ 3 minutes + +### What You'll Do + +Omit the file path to search the three default locations simultaneously: + +```text +/research:bib:search "indirect effect" +``` + +Scholar walks each location, opens every `.bib` file it finds, and aggregates matches with the source file noted next to each result: + +```text +Found 5 entries matching "indirect effect" across 2 files: + +From mediation.bib: + 1. @article{preacher2008, ...} + 2. @article{hayes2009, ...} + +From thesis.bib: + 3. @article{tofighi2011, ...} + 4. @article{tofighi2014, ...} + 5. @inbook{mackinnon2012, ...} +``` + +This is the fastest way to find a citation when you don't remember which file it lives in. + +--- + +## Step 6: Export a Clean Subset ⏱️ 4 minutes + +### What You'll Do + +Pipe a query's full BibTeX entries to a new file — useful for assembling a paper-specific bibliography from your master library: + +```text +/research:bib:search "*" ~/Documents/references/mediation.bib > paper-refs.bib +``` + +The `"*"` wildcard returns every entry. For a topic-scoped export: + +```text +/research:bib:search "mediation" ~/Documents/references/master.bib > paper-refs.bib +``` + +> ⚠️ **Output format note:** Search results are formatted for human reading by default. When the output is redirected (not a terminal), Scholar emits full BibTeX entries instead of the summary view. Always check the resulting `paper-refs.bib` opens cleanly in your LaTeX editor before submitting. + +--- + +## Step 7: Quoted Filenames with Spaces ⏱️ 2 minutes + +### What You'll Do + +If your `.bib` file path has spaces, quote it: + +```text +/research:bib:search "query" "My Bibliography.bib" +``` + +Without quotes, Scholar will interpret `My`, `Bibliography.bib` as separate args and error out. This is a common gotcha when bibliographies are exported from reference managers with default filenames. + +> 💡 **Pro tip:** Symlink frequently-used `.bib` files into a path without spaces (`ln -s "/path/with spaces/master.bib" ~/refs.bib`) to avoid quoting hassle in shell commands and Makefiles. + +--- + +## Cite-Key Conventions + +Scholar accepts any cite key that's valid BibTeX (letters, digits, hyphen, colon, underscore). A few conventions that play well across LaTeX, Quarto, and Pandoc: + +| Style | Example | Best for | +|-------|---------|----------| +| `author-year` | `baron-1986`, `mackinnon-2008` | Single-author papers; readable in `\cite{}` | +| `authorYEAR` | `baron1986`, `mackinnon2008` | Compact, hard to mistype | +| `firstauthor:year:keyword` | `baron:1986:mediation` | Disambiguates when you cite the same author often | + +Avoid: spaces, dots, `&`, `%`, `$`, `#` (LaTeX special characters break `\cite{}`). + +--- + +## Common Issues + +| Problem | Cause | Fix | +|---------|-------|-----| +| `Error: bib file not found` | Path typo or relative path issue | Use absolute path or `cd` to the directory first | +| Duplicate-key warning, refuses add | Cite key already in target file | Rename the new entry with a disambiguating suffix (`baron1986a`) | +| Garbled output from search | Non-UTF-8 encoded `.bib` file | Re-export from your reference manager with UTF-8 encoding | +| Search returns 0 matches when entry visibly exists | Query matches across fields case-insensitively but ignores LaTeX escape sequences (`{Bayesian}`) | Search for the de-braced form (`Bayesian`, not `{Bayesian}`) | + +For deeper troubleshooting, see the [Research Troubleshooting Guide](../../help/TROUBLESHOOTING-research.md). + +--- + +## What's Next + +- **[Your First Literature Search](first-literature-search.md)** — pair this with `/research:arxiv` and `/research:doi` for end-to-end literature workflows +- **[Manuscript Writing](manuscript-writing.md)** — once you have a solid `.bib`, the manuscript commands cite from it automatically +- **[Analysis Planning](analysis-planning.md)** — uses your bibliography to ground methodological choices in literature + +--- + +## See Also + +- `/research:doi` — fetch BibTeX metadata from a DOI +- `/research:arxiv` — search arXiv and emit citation-ready entries +- `/research:lit-gap` — identify gaps in your assembled literature +- [BibTeX field reference](https://en.wikipedia.org/wiki/BibTeX) — full list of supported entry types and fields diff --git a/docs/tutorials/research/doi-lookup.md b/docs/tutorials/research/doi-lookup.md new file mode 100644 index 00000000..617f93bf --- /dev/null +++ b/docs/tutorials/research/doi-lookup.md @@ -0,0 +1,263 @@ +# Tutorial: Looking Up Papers by DOI + +**Target Audience:** Researchers fetching paper metadata from DOIs +**Time:** 15 minutes +**Level:** 🟢 Beginner + +## What You'll Learn + +By the end of this tutorial, you'll be able to: + +- Fetch complete paper metadata from any valid DOI with `/research:doi` +- Read the formatted output (title, authors, journal, year, volume, pages) +- Generate a ready-to-paste BibTeX entry from a DOI +- Chain `/research:doi` → `/research:bib:add` for one-shot DOI-to-bibliography +- Handle different DOI input formats (bare, full URL, URL-encoded) +- Recover from common errors (not found, malformed DOI, network failure) + +## Prerequisites + +Before starting, make sure you have: + +- [ ] Scholar installed (`brew install data-wise/tap/scholar`) +- [ ] Claude Code running +- [ ] Internet connection (Crossref API access required) +- [ ] At least one DOI in hand — pick one from a paper PDF, Google Scholar, or your inbox + +**Installation Check:** + +```bash +scholar --version +# Should show: scholar v2.18.0 or later +``` + +--- + +## Step 1: Run Your First DOI Lookup ⏱️ 2 minutes + +### What You'll Do + +Pass a bare DOI to `/research:doi`. Scholar validates the format (must start with `10.`), then queries Crossref for the metadata. + +```text +/research:doi 10.1037/met0000310 +``` + +**Expected output:** + +```text +Title: A Monte Carlo Confidence Interval Method for Testing Mediation +Authors: Tofighi, Davood; Kelley, Ken +Journal: Psychological Methods +Year: 2020 +Volume: 25, Issue: 2 +Pages: 188-202 +DOI: 10.1037/met0000310 +``` + +Scholar normalizes whitespace and joins multi-author lists with semicolons. Order matches the published byline. + +> 💡 **Pro tip:** DOIs are case-insensitive — `10.1037/MET0000310` and `10.1037/met0000310` resolve to the same paper. Scholar lowercases the suffix before querying. + +--- + +## Step 2: Understand the Returned Fields ⏱️ 2 minutes + +### What You'll Do + +The Crossref response is richer than the summary view. Scholar surfaces seven fields by default: + +| Field | Source | Notes | +|-------|--------|-------| +| Title | Crossref `title[0]` | First entry of the title array | +| Authors | Crossref `author[]` | Full list, `given family` format | +| Journal / Container | Crossref `container-title[0]` | Falls back to `publisher` for books | +| Year | Crossref `issued.date-parts[0][0]` | Print year preferred over online | +| Volume / Issue | Crossref `volume`, `issue` | Omitted if absent | +| Pages | Crossref `page` | Hyphenated range as published | +| DOI | Echo of input | Normalized to lowercase | + +> ⚠️ **Missing fields:** Preprints and book chapters often lack volume, issue, or page numbers. Scholar prints `—` for unavailable fields rather than fabricating values. Always sanity-check before citing. + +--- + +## Step 3: Generate a BibTeX Entry ⏱️ 2 minutes + +### What You'll Do + +After the metadata view, Scholar offers follow-up actions. Ask for BibTeX: + +```text +Generate BibTeX for that paper +``` + +**Expected output:** + +```bibtex +@article{tofighi2020monte, + title = {A Monte Carlo Confidence Interval Method for Testing Mediation}, + author = {Tofighi, Davood and Kelley, Ken}, + journal = {Psychological Methods}, + volume = {25}, + number = {2}, + pages = {188--202}, + year = {2020}, + doi = {10.1037/met0000310} +} +``` + +The cite key follows the `firstauthor + year + first-content-word` convention. You can rename it before saving — see [bibtex-management.md](bibtex-management.md) for cite-key conventions. + +> 💡 **Pro tip:** The hyphenated page range uses LaTeX's en-dash (`--`), not a single hyphen. This renders correctly in both LaTeX and Pandoc-Markdown bibliographies. + +--- + +## Step 4: Chain DOI → Bibliography ⏱️ 3 minutes + +### What You'll Do + +Combine `/research:doi` with `/research:bib:add` to fetch and file an entry in one short flow: + +```text +/research:doi 10.1080/00273171.2017.1354758 +``` + +Review the metadata, then copy the BibTeX block from the response and add it: + +```text +/research:bib:add ~/Documents/references/mediation.bib +``` + +Paste the BibTeX when prompted. Scholar will: + +1. Validate the syntax +2. Refuse the entry if the cite key already exists +3. Append to the target `.bib` with a trailing newline +4. Confirm the key it wrote + +**Expected output:** + +```text +✅ Added @article{tofighi2017indirect, ...} to mediation.bib + Total entries in file: 18 +``` + +**Why two commands and not one?** The intermediate step lets you verify the title, author order, and year before committing the entry. Crossref occasionally has stale or duplicated records — the human checkpoint catches them. + +--- + +## Step 5: Handle Different DOI Input Formats ⏱️ 2 minutes + +### What You'll Do + +Scholar accepts a bare DOI, but real-world copy-paste sources often include a URL prefix. The command validates the format and rejects URL-wrapped input: + +| Input form | Example | Works? | +|------------|---------|--------| +| Bare DOI | `10.1037/met0000310` | ✅ Yes | +| `doi:` prefix | `doi:10.1037/met0000310` | ❌ No — strip the prefix | +| Full URL | `https://doi.org/10.1037/met0000310` | ❌ No — strip everything up to and including `doi.org/` | +| URL-encoded | `10.1037%2Fmet0000310` | ❌ No — decode the `%2F` to `/` | + +**Strip URL wrappers in one keystroke:** + +```bash +# In your terminal, before pasting: +echo "https://doi.org/10.1037/met0000310" | sed 's|^.*doi.org/||' +# → 10.1037/met0000310 +``` + +> ⚠️ **DOI validation:** Scholar checks that input matches `^10\.` (DOIs always start with the `10.` registrant prefix). Anything else returns `Error: Invalid DOI format. DOI should start with '10.'` without hitting the network. + +--- + +## Step 6: Recover from Lookup Errors ⏱️ 2 minutes + +### What You'll Do + +Crossref returns errors for several reasons. Most are recoverable with a small input change. + +**Case A — DOI not in Crossref:** + +```text +/research:doi 10.99999/fake-doi-test +``` + +```text +Error: DOI not found in Crossref index. + Verify the DOI on https://doi.org/10.99999/fake-doi-test +``` + +Some publishers (small society journals, datasets) register DOIs with DataCite rather than Crossref. If `https://doi.org/` resolves in a browser but `/research:doi` doesn't, this is the cause — use the publisher's "Export Citation" button as a fallback. + +**Case B — Network timeout:** + +```text +Error: Crossref API request timed out after 10s +``` + +Retry once. If it fails again, check `ping api.crossref.org`. Crossref has occasional regional outages — `status.crossref.org` reports them. + +**Case C — Rate limited:** + +Crossref enforces a soft limit of ~50 requests/second per IP (much higher than typical interactive use). If you see `429 Too Many Requests`, wait 60 seconds and retry. Batch DOI lookups should sleep 1s between calls. + +--- + +## Step 7: Batch Lookups (Power User) ⏱️ 2 minutes + +### What You'll Do + +For more than a handful of DOIs, drive the command from a shell loop instead of pasting one at a time: + +```bash +DOIS=( + "10.1037/met0000310" + "10.1080/00273171.2017.1354758" + "10.1080/01621459.2020.1765785" +) + +for doi in "${DOIS[@]}"; do + echo "=== $doi ===" + # In a Claude Code session, call: /research:doi $doi + sleep 1 +done +``` + +For each result, review and then chain through `/research:bib:add`. Batches above ~20 DOIs are better served by direct Crossref scripting — `/research:doi` is optimized for interactive verification, not bulk import. + +> 💡 **Pro tip:** If you already have a list of DOIs in a `.csv` or paper RIS export, most reference managers (Zotero, EndNote) can ingest the list directly and export a clean `.bib` file. Use Scholar for the last-mile verification and gap-filling. + +--- + +## Common Issues + +| Problem | Cause | Fix | +|---------|-------|-----| +| `Error: Invalid DOI format` | Input has `https://doi.org/` prefix, `doi:` prefix, or starts with something other than `10.` | Strip the wrapper (see Step 5) and re-run | +| `DOI not found in Crossref index` | DOI is registered with DataCite, not Crossref (common for datasets, small-society journals) | Verify on `https://doi.org/` in browser; fall back to publisher's citation export | +| Missing volume / issue / pages | Preprint, book chapter, or recently-accepted paper not yet finalized in Crossref | Wait for publication, or fill in manually before adding to `.bib` | +| BibTeX cite key collides on add | Same key already in `.bib` (e.g., two papers by same author/year) | Rename with a disambiguating suffix (`tofighi2020a`, `tofighi2020b`) | +| `429 Too Many Requests` | Hit Crossref rate limit during batch lookup | Sleep 60s, then space subsequent calls 1s apart | +| LaTeX special characters in author name | Crossref returned non-ASCII names (e.g., `Müller`) without escaping | Quote the field in the `.bib` (`{Müller, Hans}`) — modern LaTeX (`\usepackage[utf8]{inputenc}`) renders directly | + +For deeper troubleshooting, see the [Research Troubleshooting Guide](../../help/TROUBLESHOOTING-research.md). + +--- + +## What's Next + +- **[BibTeX Management](bibtex-management.md)** — full workflow for adding, searching, and exporting BibTeX entries (DOI lookup is the upstream step) +- **[Your First Literature Search](first-literature-search.md)** — pair `/research:doi` with `/research:arxiv` for end-to-end discovery → citation flow +- **[Manuscript Writing](manuscript-writing.md)** — once your `.bib` is populated, the manuscript commands cite from it automatically + +--- + +## See Also + +- `/research:bib:add` — append a BibTeX entry to a `.bib` file (the natural next step after DOI lookup) +- `/research:bib:search` — search across one or more `.bib` files before adding to avoid duplicates +- `/research:arxiv` — search arXiv for preprints; each result includes a DOI you can feed to `/research:doi` +- `/research:lit-gap` — once you have a working bibliography, identify methodological gaps in the literature +- [API Reference](../../API-REFERENCE.md) — full command reference, flags, and exit codes +- [Crossref REST API docs](https://api.crossref.org/swagger-ui/index.html) — underlying service powering `/research:doi` diff --git a/docs/tutorials/research/finding-research-gaps.md b/docs/tutorials/research/finding-research-gaps.md new file mode 100644 index 00000000..8c8d6176 --- /dev/null +++ b/docs/tutorials/research/finding-research-gaps.md @@ -0,0 +1,252 @@ +# Tutorial: Finding Research Gaps + +**Target Audience:** Researchers identifying under-explored areas in an established literature +**Time:** 25 minutes +**Level:** 🔵 Intermediate + +## What You'll Learn + +By the end of this tutorial, you'll be able to: + +- Run `/research:lit-gap` against a topic, a `.bib` corpus, or an arXiv search history +- Interpret the four-dimensional gap report (methodological, application, theoretical, computational) +- Validate proposed gaps against your domain knowledge before acting on them +- Pair `/research:lit-gap` with `/research:hypothesis` to operationalize gaps into testable claims +- Use gap output to scaffold a dissertation lit-review chapter or grant background section +- Distinguish "real gap" from "search artifact" — when the literature simply wasn't fed enough context +- Iterate productively: refine the topic prompt, re-run, and converge on a defensible research question + +## Prerequisites + +Before starting, make sure you have: + +- [ ] Scholar installed (`brew install data-wise/tap/scholar`) +- [ ] Claude Code running +- [ ] A research domain in mind (broad is fine; "causal mediation analysis" beats "statistics") +- [ ] **Recommended:** an assembled `.bib` file or a recent `/research:arxiv` search log +- [ ] 20–30 minutes of uninterrupted attention (gap reports reward careful reading) + +**Installation Check:** + +```bash +scholar --version +# Should show: scholar v2.18.0 or later +``` + +--- + +## Step 1: Frame the Topic Precisely ⏱️ 3 minutes + +### What You'll Do + +`/research:lit-gap` is only as good as the topic you hand it. Spend three minutes narrowing your domain into a single sentence that names the **method family** plus a **scope constraint**. + +| Vague (avoid) | Sharpened (use) | +|---------------|-----------------| +| "machine learning" | "transformer-based survival analysis for clinical trials" | +| "mediation" | "causal mediation analysis with time-varying confounders" | +| "Bayesian methods" | "Bayesian variable selection in high-dimensional GLMs" | + +> 💡 **Pro tip:** If you can't articulate the topic in one sentence, run `/research:arxiv ""` first to surface the vocabulary that active researchers actually use, then sharpen. + +--- + +## Step 2: Run Your First Gap Analysis ⏱️ 3 minutes + +### What You'll Do + +Invoke `/research:lit-gap` with your sharpened topic: + +```text +/research:lit-gap "causal mediation analysis with time-varying confounders" +``` + +Scholar engages three skills under the hood — `literature-gap-finder`, `cross-disciplinary-ideation`, and `method-transfer-engine` — and produces a structured Markdown report. + +**Expected output skeleton:** + +```text +# Literature Gaps: Causal Mediation Analysis with Time-Varying Confounders + +## Summary +[1-2 sentence overview] + +## Current State of Literature +### Well-Developed Areas +### Active Research Areas + +## Identified Gaps +### Gap 1: [Title] +**Type:** Methodological | Applied | Theoretical | Computational +**Feasibility:** High | Medium | Low +**Potential Impact:** High | Medium | Low +... + +## Promising Research Directions +## Cross-Disciplinary Opportunities +## Recommended Next Steps +``` + +Read the **Summary** and **Identified Gaps** sections first. Skim the rest on a second pass. + +> ⚠️ **Don't act on the first run.** Treat the initial output as a draft. The next steps refine it. + +--- + +## Step 3: Ground the Analysis in a Corpus ⏱️ 4 minutes + +### What You'll Do + +A topic-only run draws from Scholar's general training. To anchor the gap report in *your* literature, point the command at an assembled corpus. + +**Option A — feed an existing `.bib` file:** + +```text +/research:lit-gap "causal mediation analysis with time-varying confounders" +``` + +When Scholar prompts for context, paste the output of: + +```text +/research:bib:search "mediation" ~/Documents/references/mediation.bib +``` + +**Option B — feed a recent arXiv search:** + +```text +/research:arxiv "causal mediation time-varying" --max 20 +/research:lit-gap "causal mediation analysis with time-varying confounders" +``` + +Scholar will read the prior arXiv results from session context and incorporate them. + +**Why this matters:** corpus-grounded gap reports cite specific papers and surface gaps the *current* literature explicitly flags ("future work should..."), instead of generic statistical-methodology gaps. + +> 💡 **Pro tip:** 15–30 papers is the sweet spot. Fewer than 10 produces thin reports; more than 50 dilutes specificity. + +--- + +## Step 4: Read the Four Gap Dimensions ⏱️ 4 minutes + +### What You'll Do + +Every gap is tagged with a **Type**. Understand what each dimension means before you decide which gaps to pursue. + +| Type | What it surfaces | Typical signal in your output | +|------|------------------|-------------------------------| +| **Methodological** | Missing estimators, untested assumptions, robustness | "No method exists for…", "Existing methods assume…" | +| **Application** | Underserved domains, modern data structures | "Has not been applied to…", "Streaming data variant…" | +| **Theoretical** | Missing asymptotics, finite-sample results, optimality | "Consistency under… remains open", "Asymptotic normality requires…" | +| **Computational** | Speed, scalability, software availability | "No open-source implementation", "Does not scale to n > …" | + +Most reports lean methodological. If yours is heavily methodological and you wanted application-focused gaps, **re-run with a more applied framing** (Step 6). + +### ✅ Self-check + +- [ ] Every gap in your report has a Type label +- [ ] You can name at least one gap from each of the four dimensions (or you understand why some are absent) +- [ ] At least one gap cites a specific paper from your corpus (if you fed one in Step 3) + +--- + +## Step 5: Validate Each Gap Against Your Knowledge ⏱️ 4 minutes + +### What You'll Do + +`/research:lit-gap` is a **starting point, not authoritative.** A gap report can hallucinate gaps that were closed years ago, or miss obvious open problems. Run each candidate gap through a three-question filter: + +1. **Is it real?** Search the proposed gap title on Google Scholar with date filter "last 2 years". If you find 5+ papers solving it, the gap is closed (or never existed). +2. **Is it valuable?** Would solving it produce a finding worth a paper? Or is it an incremental tweak nobody would cite? +3. **Is it tractable for *you*?** Match feasibility against your toolkit, data access, and timeline. + +**Mark each gap:** + +| Marker | Meaning | +|--------|---------| +| ✅ | Real, valuable, tractable — pursue | +| 🤔 | Real and valuable but stretch — keep on radar | +| ❌ | Closed, trivial, or off-domain — discard | + +Expect to discard 40–60% of gaps on first pass. That's normal and the filter is doing its job. + +> ⚠️ **Watch for stale citations.** Scholar may cite the most recent paper *it knows about*. If the gap analysis cites work from 2022 and earlier, sanity-check with `/research:arxiv "" --since 2024` before concluding the gap is open. + +--- + +## Step 6: Iterate — Refine and Re-Run ⏱️ 3 minutes + +### What You'll Do + +Use what you learned in Steps 4–5 to refine the prompt and re-run. Common iteration moves: + +- **Too methodological?** Add an application constraint: `"...for electronic health records"`. +- **Too generic?** Add a specific assumption to relax: `"...under non-ignorable missingness"`. +- **Too narrow?** Drop a constraint and broaden one term. +- **Stale results?** Add a recency cue: `"recent advances in "`. + +```text +/research:lit-gap "causal mediation analysis with time-varying confounders in electronic health records" +``` + +Two to three iterations typically converge on a usable gap report. If you're still unhappy after three iterations, the issue is usually the corpus, not the prompt — go back to Step 3 and feed in different papers. + +--- + +## Step 7: Operationalize Gaps into Hypotheses ⏱️ 3 minutes + +### What You'll Do + +A gap is a *space*. A hypothesis is a *claim you can test*. Bridge them with `/research:hypothesis`: + +```text +/research:hypothesis "Sequential ignorability can be relaxed via a sensitivity parameter that bounds unmeasured time-varying confounding in EHR-based mediation studies" +``` + +Feed the hypothesis command the **gap description** and **possible approaches** sections from your validated ✅ gap. Scholar returns a structured hypothesis with predictions, alternative explanations, and study-design implications. + +> 💡 **Pro tip:** Run `/research:hypothesis` once per ✅ gap. Three to five hypotheses gives you a research program; one hypothesis gives you a paper. + +--- + +## Step 8: Hand Off to an Analysis Plan ⏱️ 1 minute + +### What You'll Do + +Once a hypothesis crystallizes, scaffold the empirical work: + +```text +/research:analysis-plan "Sensitivity bounds for time-varying confounding in EHR mediation" +``` + +`/research:analysis-plan` consumes the hypothesis and emits design, estimands, estimator choice, simulation plan, and reporting checklist. Save the output alongside your gap report — together they form the literature-review backbone of a dissertation chapter, grant background, or scoping review. + +--- + +## Common Issues + +| Issue | Cause | Fix | +|-------|-------|-----| +| Report is generic, reads like a textbook | Topic too broad, no corpus fed | Sharpen topic per Step 1; feed `.bib` or arXiv results per Step 3 | +| All gaps tagged methodological, you wanted applied | Default skill weighting favors methodology | Add application constraint to the prompt (e.g., "...in genomics") | +| Cited gap was actually closed in 2024 | Scholar's training data is stale for very recent work | Validate with `/research:arxiv "" --since 2024`; mark stale gaps ❌ | +| Same gap appears in every iteration | The prompt isn't actually changing the search space | Drop the iteration term and try a perpendicular constraint instead | +| Output is one long wall of text | Scholar ran without the structured-output skill engaged | Re-run; if it persists, prepend "Use the structured Research Gap Report format" | +| Cross-disciplinary section is empty | Topic is narrow and self-contained | Expected for niche methods; not a failure | + +For deeper troubleshooting, see the [Research Troubleshooting Guide](../../help/TROUBLESHOOTING-research.md). + +--- + +## What's Next + +- **[Hypothesis Generation](hypothesis-generation.md)** — operationalize each validated gap into a testable hypothesis with predictions and alternatives +- **[Analysis Planning](analysis-planning.md)** — convert a hypothesis into a concrete simulation/empirical study design +- **[Manuscript Writing](manuscript-writing.md)** — gap reports and validated hypotheses are the natural skeleton for a literature-review or background section + +--- + +## See Also + +- `/research:hypothesis` — generate testable hypotheses from a validated gap +- `/research:method-scout` — find candidate methods that could fill a methodological gap +- `/research:arxiv` — pull recent papers to validate (or invalidate) a proposed gap before pursuing it diff --git a/docs/tutorials/research/first-literature-search.md b/docs/tutorials/research/first-literature-search.md index 0c0501ac..3bc6fb86 100644 --- a/docs/tutorials/research/first-literature-search.md +++ b/docs/tutorials/research/first-literature-search.md @@ -2,6 +2,7 @@ **Target Audience:** Researchers new to Scholar's literature tools **Time:** 15 minutes +**Level:** 🟢 Beginner **Difficulty:** Beginner ## What You'll Learn diff --git a/docs/tutorials/research/hypothesis-generation.md b/docs/tutorials/research/hypothesis-generation.md new file mode 100644 index 00000000..e976411e --- /dev/null +++ b/docs/tutorials/research/hypothesis-generation.md @@ -0,0 +1,292 @@ +# Tutorial: Generating Statistical Hypotheses + +**Target Audience:** Researchers translating a research question into testable hypotheses +**Time:** 20 minutes +**Level:** 🔵 Intermediate + +## What You'll Learn + +By the end of this tutorial, you'll be able to: + +- Turn a vague research question into a precise, falsifiable hypothesis pair (H0 / H1) with `/research:hypothesis` +- Specify operational definitions, populations, and estimands that survive a pre-registration review +- Choose between directional, two-sided, equivalence, and non-inferiority framings — and explain why +- Generate multiple competing hypotheses for the same question and rank them by informativeness +- Chain `/research:lit-gap` → `/research:hypothesis` → `/research:method-scout` → `/research:analysis-plan` +- Avoid the four classic pitfalls: vagueness, p-hacking-friendly framings, unfalsifiable claims, and underspecified estimands +- Align hypothesis statements with Scholar v2.18.0's evidence-based reporting policy (compatibility intervals, $s$-values) + +## Prerequisites + +Before starting, make sure you have: + +- [ ] Scholar installed (`brew install data-wise/tap/scholar`) +- [ ] Claude Code running +- [ ] A research question or methodological gap in mind (even one sentence is fine) +- [ ] Optionally: a `.bib` file with key references, or recent output from `/research:lit-gap` + +**Installation Check:** + +```bash +scholar --version +# Should show: scholar v2.18.0 or later +``` + +--- + +## Step 1: Frame Your Research Question ⏱️ 3 minutes + +### What You'll Do + +Write your research question in one sentence before invoking the command. `/research:hypothesis` is only as sharp as the question you give it. + +A useful question has three parts: + +1. **Estimator or method** under investigation (e.g., percentile bootstrap, robust M-estimator) +2. **Property** you care about (coverage, bias, power, MSE) +3. **Conditions** that make the question non-trivial (non-normality, contamination, small n) + +**Weak prompt:** + +> "Does the bootstrap work for mediation?" + +**Strong prompt:** + +> "Does the percentile bootstrap achieve nominal 95% coverage for the indirect effect when the mediator distribution is skewed (skewness > 1) and sample sizes are small (n < 100)?" + +The strong prompt names the estimator, the property, the condition, and a measurable threshold. + +> 💡 **Pro tip:** If you've just finished `/research:lit-gap`, paste its identified gap directly as your question. Scholar will pick up the methodological context automatically. + +--- + +## Step 2: Invoke `/research:hypothesis` ⏱️ 2 minutes + +### What You'll Do + +Run the command with your topic in quotes: + +```text +/research:hypothesis "percentile bootstrap coverage for indirect effects under skewness" +``` + +Or run it without an argument to enter conversational mode: + +```text +/research:hypothesis +``` + +Scholar will prompt you for: + +1. **The methodological question** — what's being asked +2. **Expected finding** — what you predict (this guides direction) +3. **Why it matters** — what changes if H1 is supported + +The command activates three skills automatically: + +- `mathematical-foundations` — for precise notation +- `asymptotic-theory` — for convergence-rate language +- `cross-disciplinary-ideation` — for borrowing framings from adjacent fields + +> ⚠️ **Don't skip the "why it matters" prompt.** Hypotheses that don't change downstream decisions are the most common output failure mode. If you can't name a decision the result would change, refine the question before continuing. + +--- + +## Step 3: Read the H0 / H1 Pair Carefully ⏱️ 3 minutes + +### What You'll Do + +Scholar returns a structured hypothesis document. The core is the H0 / H1 pair: + +```text +H0: The percentile bootstrap CI for the indirect effect ab has coverage + probability equal to the nominal 1 - α level when the mediator + distribution is symmetric. + +H1: The percentile bootstrap CI for the indirect effect ab has coverage + probability less than the nominal 1 - α level (undercoverage) when + the mediator skewness exceeds 1. + +Operational definitions: +- ab: product-of-coefficients indirect effect in a single-mediator model +- Coverage: Pr(L ≤ ab_true ≤ U) over 10,000 simulation replications +- Skewness: third standardized moment of the mediator's residual distribution +- Nominal level: 1 - α = 0.95 +``` + +Check each block: + +- [ ] **H0 is the null you'd reject** — not just "no effect" +- [ ] **H1 is directional if your theory predicts a direction** — otherwise two-sided +- [ ] **Every term has an operational definition** — coverage, effect, "small n" +- [ ] **The estimand is named** — population coverage, not sample coverage +- [ ] **Conditions are testable** — "skewness > 1" not "skewed-ish" + +> 💡 **Pro tip:** If H0 reads like a tautology ("the estimator estimates the parameter"), ask Scholar to re-derive the pair with a more substantive null (e.g., "coverage equals nominal"). Tautologies can't be tested. + +--- + +## Step 4: Choose Your Framing (Directional / Two-Sided / Equivalence) ⏱️ 3 minutes + +### What You'll Do + +Scholar offers four framings. Pick the one that matches your decision. + +| Framing | When to use | H0 form | H1 form | +|---------|-------------|---------|---------| +| **Two-sided** | You'll be surprised by either direction | $\theta = \theta_0$ | $\theta \ne \theta_0$ | +| **Directional** | Theory predicts a sign; opposite sign is uninteresting | $\theta \le \theta_0$ | $\theta > \theta_0$ | +| **Equivalence** | You want to claim "no meaningful difference" | $\|\theta - \theta_0\| \ge \delta$ | $\|\theta - \theta_0\| < \delta$ | +| **Non-inferiority** | New method must be "no worse than" reference | $\theta \le \theta_0 - \delta$ | $\theta > \theta_0 - \delta$ | + +Tell Scholar which framing fits: + +```text +Use equivalence framing with δ = 0.01 coverage units. +The decision changes only if undercoverage exceeds 1 percentage point. +``` + +Scholar regenerates the pair with the requested framing and shows you the explicit $\delta$ (margin), the rejection region, and the implied sample size sensitivity. + +> ⚠️ **The most common mistake here is reaching for two-sided by default.** Two-sided is the right answer when you genuinely don't know the direction. If your title says "improves coverage", you're directional — say so. + +--- + +## Step 5: Generate Competing Hypotheses ⏱️ 3 minutes + +### What You'll Do + +For non-trivial questions, ask Scholar to produce 2–4 competing hypotheses that would explain the same observable phenomenon. This prevents tunnel vision and clarifies what your study can actually distinguish. + +```text +Generate three competing hypotheses for the same research question. +Mark them as primary, alternative-mechanism, and null-equivalence. +``` + +**Expected output (compressed):** + +```text +H1 (primary): Percentile bootstrap undercovers due to asymmetry of the + sampling distribution of ab when M is skewed. + +H1-alt (alternative mechanism): Undercoverage is driven by small-sample + bias in the standard error estimate, not asymmetry per se. + Distinguishing test: holds even when M is symmetric but n is small. + +H1-equiv (null-equivalence): Coverage is within ±0.01 of nominal for all + skewness values in [0, 2]. Practically, percentile bootstrap is + "good enough" and the published warnings are overstated. +``` + +The value isn't picking the "right" one — it's seeing what your study can and cannot distinguish. Pair this with `/research:simulation:design` to build a study that separates them. + +> 💡 **Pro tip:** A simulation study that can't distinguish H1 from H1-alt has a hidden confound. Use the competing-hypothesis output as a checklist when designing simulation conditions. + +--- + +## Step 6: Validate with the Quality Checklist ⏱️ 2 minutes + +### What You'll Do + +Scholar emits a four-axis quality checklist with each hypothesis. Score honestly: + +| Axis | Pass criteria | Common failure | +|------|---------------|----------------| +| **Clear** | All notation defined; conditions stated; no ambiguous words ("better", "robust") | "Method A works well" | +| **Testable** | You can name the study, sample size, and decision rule | "True under all conditions" | +| **Novel** | Not already proven (cite refs); not obvious | "$\bar{x} \to \mu$ as $n \to \infty$" | +| **Motivated** | Connects to literature; addresses a real problem | "Because no one has looked" | + +Fail any axis → return to Step 2 with a revised question. The cycle takes <5 minutes and saves weeks downstream. + +--- + +## Step 7: Align with Scholar's Evidence-Based Reporting Policy ⏱️ 2 minutes + +### What You'll Do + +Scholar v2.18.0 frames inference around **compatibility intervals** and **$s$-values** rather than $p < 0.05$ dichotomies. Rewrite your hypothesis statements to match — this is what `/research:analysis-plan` will expect in the next step. + +**Old framing (avoid):** + +> H1: We will find a statistically significant difference in coverage (p < 0.05). + +**Scholar-aligned framing:** + +> H1: The 95% compatibility interval for the coverage difference will exclude zero +> (equivalently, $s$-value $> 4.32$ bits of evidence against H0), and the point +> estimate will fall outside the equivalence margin $\delta = 0.01$. + +The shift matters because: + +- $p$-values mix evidence and decision; $s$-values are a pure information scale +- Compatibility intervals report a range of values the data are compatible with, not a binary verdict +- Pre-registering "significant difference" invites p-hacking; pre-registering an interval + $\delta$ does not + +> 💡 **Pro tip:** Run your draft hypothesis past the question: "If $p = 0.06$, would my conclusion change?" If yes, the framing is still significance-tied — rewrite around the interval. + +--- + +## Step 8: Chain to the Next Command ⏱️ 2 minutes + +### What You'll Do + +Save the hypothesis document and pass it forward. The natural chain is: + +```text +/research:lit-gap "topic" + ↓ (identifies open question) +/research:hypothesis "topic" + ↓ (produces H0/H1 + estimand + δ) +/research:method-scout "estimand from hypothesis" + ↓ (returns candidate methods + assumption table) +/research:analysis-plan + ↓ (pre-registration-ready SAP) +/research:simulation:design + ↓ (study to test the hypothesis) +``` + +Each command consumes the previous one's output. Scholar will reference the saved hypothesis document by name when you launch the next command in the same session. + +**Save the output:** + +```text +Save the hypothesis document to ~/Documents/research/hypotheses/2026-bootstrap-coverage.md +``` + +Then launch the next step: + +```text +/research:method-scout "estimating coverage of percentile bootstrap CI for indirect effects" +``` + +--- + +## Common Issues + +| Issue | Cause | Fix | +|-------|-------|-----| +| H1 reads like a tautology | Question was a definition, not a claim | Restate as "[method] has [property] under [conditions]" | +| Both H0 and H1 feel unfalsifiable | "All possible scenarios" language; no operational defs | Constrain conditions (e.g., specific skewness range) and define metrics | +| Scholar keeps returning two-sided when you want directional | Direction not stated in the "expected finding" prompt | Re-run and explicitly say "I expect undercoverage" or similar | +| Competing hypotheses are nearly identical | Original question is too narrow | Broaden the mechanism scope; ask Scholar to vary the causal pathway | +| Output mentions $p < 0.05$ as the decision rule | Default fallback when no $\delta$ or compatibility interval was requested | Re-prompt with "use compatibility intervals and an equivalence margin" | +| Operational definition for an estimand is missing | Question used a vague term ("better", "robust") | Replace with a measurable quantity (MSE, coverage probability, breakdown point) | + +For deeper troubleshooting on chaining, see the [Research Troubleshooting Guide](../../help/TROUBLESHOOTING-research.md). + +--- + +## What's Next + +- **[Creating Statistical Analysis Plans](analysis-planning.md)** — turn your H0 / H1 into a pre-registration-ready SAP with primary model, sensitivity analyses, and missing-data plan +- **[Designing Monte Carlo Simulations](simulation-design.md)** — build the simulation study that will actually test the hypothesis, including factor structure and replication count +- **[Finding Research Gaps](finding-research-gaps.md)** — circle back to `/research:lit-gap` when your hypothesis exposes a deeper question + +--- + +## See Also + +- `/research:lit-gap` — identify the open question that motivates the hypothesis +- `/research:method-scout` — find candidate methods that can estimate the quantity in H1 +- `/research:analysis-plan` — convert the hypothesis into a full, pre-registration-ready statistical analysis plan diff --git a/docs/tutorials/research/manuscript-writing.md b/docs/tutorials/research/manuscript-writing.md index 27125d58..77f48762 100644 --- a/docs/tutorials/research/manuscript-writing.md +++ b/docs/tutorials/research/manuscript-writing.md @@ -6,6 +6,7 @@ render_macros: false **Target Audience:** Researchers writing methods, results, or responding to reviews **Time:** 60-90 minutes (broken into 6 focused sessions) +**Level:** 🔴 Advanced **Difficulty:** Intermediate ## What You'll Learn diff --git a/docs/tutorials/research/methods-section.md b/docs/tutorials/research/methods-section.md index 0bab7768..ae0e7c83 100644 --- a/docs/tutorials/research/methods-section.md +++ b/docs/tutorials/research/methods-section.md @@ -2,6 +2,7 @@ **Target Audience:** Researchers writing their first statistical methods section **Time:** 60-90 minutes +**Level:** 🔴 Advanced **Difficulty:** Intermediate ## What You'll Learn diff --git a/docs/tutorials/research/proof-review.md b/docs/tutorials/research/proof-review.md new file mode 100644 index 00000000..cf8ffdd2 --- /dev/null +++ b/docs/tutorials/research/proof-review.md @@ -0,0 +1,295 @@ +# Tutorial: Reviewing Mathematical Proofs + +**Target Audience:** Researchers writing or revising mathematical proofs in statistical manuscripts +**Time:** 45 minutes +**Level:** 🔴 Advanced + +## What You'll Learn + +By the end of this tutorial, you'll be able to: + +- Prepare a proof in LaTeX for review with `/research:manuscript:proof` +- Interpret structured feedback on logical gaps, unjustified steps, and notation +- Apply review patterns to induction, contradiction, direct, and probabilistic proofs +- Synchronize proof notation with your methods section +- Iterate revisions through a second review pass +- Use the command for pre-submission, reviewer rebuttals, or defense prep + +## Prerequisites + +Before starting, make sure you have: + +- [ ] Scholar installed (`brew install data-wise/tap/scholar` or npm global install) +- [ ] Claude Code running +- [ ] A draft proof in LaTeX (theorem statement + proof body) +- [ ] Familiarity with the theorem/lemma you are proving +- [ ] Reference for any standard results you cite +- [ ] Optional: a methods section using the same notation + +**Installation Check:** + +```bash +scholar --version +# Should show: scholar v2.18.0 or later +``` + +--- + +## Step 1: Prepare Your Proof File ⏱️ 5 minutes + +Isolate the theorem and proof into a single `.tex` file. The command works best with a self-contained snippet, not a full manuscript. + +### Minimum Required Structure + +```latex +% theorem1.tex — keep \newcommand macros at the top of the file +\begin{theorem}[Consistency of the M-estimator] +Let $\hat{\theta}_n$ minimize $Q_n(\theta) = n^{-1} \sum_i m(X_i, \theta)$ +over compact $\Theta$. Assume (i) $\theta_0$ uniquely minimizes +$Q_0(\theta) = E[m(X, \theta)]$; (ii) $m(\cdot, \theta)$ continuous a.s.; +(iii) $E[\sup_\theta |m(X, \theta)|] < \infty$. Then $\hat{\theta}_n \xrightarrow{p} \theta_0$. +\end{theorem} + +\begin{proof} +[Your proof body here] +\end{proof} +``` + +> 💡 Custom macros must be in the same file — otherwise they render as text. + +### ✅ Checkpoint 1 + +- [ ] Theorem and proof in one `.tex` file +- [ ] All assumptions listed explicitly +- [ ] Notation defined or macros included + +--- + +## Step 2: Run Your First Review ⏱️ 5 minutes + +Invoke `/research:manuscript:proof` with your file. The command activates the `proof-architect` and `mathematical-foundations` skills automatically. + +```bash +/research:manuscript:proof ~/manuscripts/proofs/theorem1.tex +``` + +### What Scholar Returns + +A structured review covering: + +1. **Logical Structure** — assumptions, flow, circularity, case coverage +2. **Mathematical Rigor** — justified claims, technical conditions, inequalities +3. **Completeness** — missing steps, defined notation, edge cases +4. **Clarity** — strategy, intuition, transitions +5. **Issue List** — each issue tagged Critical / Important / Minor / Suggestion + +> ⚠️ The review does not rewrite your proof. It identifies what to fix and +> suggests how. You remain the author of the mathematics. + +### Example Output (excerpt) + +```markdown +### Issue 3 — Important +**Line:** Second sentence of proof +**Issue:** "Uniform law of large numbers" invoked without verifying +preconditions. Cite a standard reference (Newey & McFadden 1994, Lemma 2.4). +**Fix:** Add the citation inline. +``` + +### ✅ Checkpoint 2 + +- [ ] Command ran without errors +- [ ] You received a structured review with all five sections +- [ ] Each issue has a severity label + +--- + +## Step 3: Triage by Severity ⏱️ 5 minutes + +Spend your time on what matters. + +| Severity | Meaning | Action | +|----------|---------|--------| +| Critical | Logical gap or false claim that invalidates the result | Fix before any submission | +| Important | Step needs justification; result probably stands | Add citation, lemma, or inequality | +| Minor | Notation or exposition issue | Fix during polish pass | +| Suggestion | Optional improvement (intuition, roadmap) | Adopt if space allows | + +> ⚠️ Address every Critical issue before any Important one. Suggestions come last. + +### ✅ Checkpoint 3 + +- [ ] You can list all Critical issues +- [ ] You have a fix in mind for each Critical issue + +--- + +## Step 4: Apply Pattern-Specific Checks ⏱️ 8 minutes + +Different proof styles have different failure modes. Re-read your review with the right lens. + +### Induction Proofs + +> ⚠️ Common failure: the inductive step assumes the conclusion implicitly, +> or the base case is non-trivially false. + +Watch for "circular reasoning" or "inductive hypothesis unused". + +### Contradiction Proofs + +> ⚠️ Common failure: the negation of the conclusion is stated imprecisely, +> leaving an ambiguous contradiction target. + +Look for "negation not stated" or "contradiction not explicit". + +### Direct Proofs + +> ⚠️ Common failure: chains of "it follows that" or "clearly" steps that +> hide a non-trivial argument. + +Look for "unjustified step" and "expand chain" suggestions. + +### Probabilistic Arguments + +> ⚠️ Common failure: convergence mode unspecified (a.s. vs. in probability +> vs. in distribution), or measurability asserted without proof. + +Look for "convergence mode unclear" and "dominated convergence preconditions". + +### ✅ Checkpoint 4 + +- [ ] You identified your proof's style +- [ ] You re-read the review with that style's failure modes in mind + +--- + +## Step 5: Synchronize Notation with the Methods Section ⏱️ 5 minutes + +Reviewers compare your proof's notation to the methods section. The command flags internal inconsistency but not inconsistency with the rest of the paper. Cross-check manually. + +### Workflow + +1. Open your methods draft (or run `/research:manuscript:methods` to generate one) +2. List every symbol in the proof: $X$, $\theta$, $Q_n$, $\hat{\theta}_n$ +3. Verify each appears in the methods section with the same meaning +4. Reconcile differences by editing the proof, not the methods + +### Common Drifts + +| In Methods | In Proof | Fix | +|------------|----------|-----| +| $X_i$ (observation) | $x_i$ (lowercase) | Use $X_i$ in both | +| $\theta_0$ (truth) | $\theta^*$ | Use $\theta_0$ in both | +| $\hat{\theta}_n$ | $\hat{\theta}$ | Add $n$ subscript in proof | + +> 💡 Tip: After fixing notation, re-run `/research:manuscript:proof` to +> confirm no new issues were introduced. + +### ✅ Checkpoint 5 + +- [ ] Every symbol in the proof appears in the methods section +- [ ] Meanings and decorations match exactly + +--- + +## Step 6: Revise and Run a Second Pass ⏱️ 10 minutes + +After addressing Critical and Important issues, run the command again. The second pass catches issues introduced by your revisions and confirms the fixes worked. + +```bash +# Save the first review +cp review-round1.md review-round1-archive.md + +# Edit the proof, then re-run +/research:manuscript:proof ~/manuscripts/proofs/theorem1.tex > review-round2.md + +# Diff the two reviews +diff review-round1-archive.md review-round2.md +``` + +### What to Expect + +- Resolved issues should disappear +- New issues may appear (revisions can introduce gaps) +- Severity distribution should shift toward Minor and Suggestion + +> ⚠️ If a Critical issue persists, the fix did not address the root cause. +> Re-read Scholar's "Explanation" field carefully before trying again. + +Two passes are usually enough. If you need a third, the proof likely needs restructuring (e.g., extract a lemma) rather than incremental edits. + +### ✅ Checkpoint 6 + +- [ ] Round-2 review run and saved +- [ ] All Round-1 Critical issues resolved +- [ ] No new Critical issues introduced + +--- + +## Step 7: Use Cases Beyond Pre-Submission ⏱️ 5 minutes + +Recognize when to invoke `/research:manuscript:proof` outside drafting. + +- **Reviewer rebuttal:** A reviewer wrote "the proof of Theorem 2 is unclear at line 14." Run the command to identify the step, then cite the fix in your response letter (see `reviewer-response.md`). +- **Dissertation defense prep:** Run on every theorem. The Issue List becomes a study guide — be ready to answer each Critical and Important question. +- **Co-author handoff:** Resolve easy issues before sharing — it respects your co-author's time. + +### ✅ Checkpoint 7 + +- [ ] You can identify which use case applies to your current work + +--- + +## Step 8: Archive the Final Review ⏱️ 2 minutes + +Save the final review alongside the proof. + +```bash +mkdir -p ~/manuscripts/proofs/reviews +mv review-round2.md ~/manuscripts/proofs/reviews/theorem1-round2.md +git add ~/manuscripts/proofs/ && git commit -m "Add theorem1 with Scholar proof reviews" +``` + +> 💡 When you submit, attach the final review as a private appendix. +> Some journals appreciate evidence of self-review. + +### ✅ Checkpoint 8 + +- [ ] Proof and reviews committed to version control + +--- + +## Common Issues + +| Issue | Cause | Fix | +|-------|-------|-----| +| Review is too generic | Theorem statement is vague or assumptions implicit | List every assumption explicitly in the theorem block | +| Custom macros render as text | Macros not included in the file | Paste `\newcommand` definitions at top of the `.tex` file | +| Notation flagged in proof only | Proof uses local symbols not in methods | Sync via Step 5 | +| Critical issue persists after fix | Fix addresses symptom, not root cause | Re-read the "Explanation" field; extract a lemma | +| Review missed a known gap | Gap is conceptual, not local to a line | Add a `% REVIEW: check this step` marker and re-run | +| Second pass adds more issues than it resolves | Revision was too aggressive | Revert; revise one issue at a time | + +--- + +## What's Next + +- [Writing Statistical Methods Sections](methods-section.md) — keep your proof notation in lockstep with the methods you reference +- [Your First Reviewer Response](reviewer-response.md) — use proof reviews to back up rebuttals to reviewer concerns +- [Manuscript Writing](manuscript-writing.md) — fold reviewed proofs into the full manuscript workflow + +--- + +## See Also + +- `/research:manuscript:methods` — draft methods sections that match your proof notation +- `/research:manuscript:results` — write results sections that reference theorem conclusions +- `/research:manuscript:reviewer` — respond to reviewer comments, including those about proofs + +--- + +**Tutorial created:** 2026-05-12 +**Scholar version:** 2.18.0 +**Maintainer:** Data-Wise team + +**Feedback:** Found an issue or have suggestions? [Open an issue](https://github.com/Data-Wise/scholar/issues) diff --git a/docs/tutorials/research/results-section.md b/docs/tutorials/research/results-section.md new file mode 100644 index 00000000..a1aab90c --- /dev/null +++ b/docs/tutorials/research/results-section.md @@ -0,0 +1,393 @@ +# Tutorial: Writing the Results Section + +**Target Audience:** Researchers drafting the Results section of a quantitative manuscript +**Time:** 60 minutes +**Level:** 🔴 Advanced + +## What You'll Learn + +By the end of this tutorial, you'll be able to: + +- Feed raw analysis output (R, Python, Stata) into `/research:manuscript:results` and get a publishable draft +- Structure results as descriptive stats → primary analysis → secondary/sensitivity → robustness checks +- Apply v2.18.0 evidence-based reporting style ($s$-values, compatibility intervals, no dichotomous significance language) +- Generate clean in-text references to numbered tables and figures +- Keep notation consistent with the Methods section +- Iterate on a draft to address reviewer comments and add sensitivity analyses +- Validate the final draft against your analysis artifacts before submission + +## Prerequisites + +Before starting, make sure you have: + +- [ ] Scholar installed (`brew install data-wise/tap/scholar`) +- [ ] Claude Code running in the manuscript directory +- [ ] Statistical analysis already run (R, Python, Stata, or SAS output saved) +- [ ] Output tables (CSV, Markdown, or LaTeX) and figures (PDF/PNG) on disk +- [ ] A drafted Methods section (or a plan to draft one with `/research:manuscript:methods`) +- [ ] (Optional) A `.bib` file for in-text citations of cited estimators + +**Installation Check:** + +```bash +scholar --version +# Should show: scholar v2.18.0 or later +``` + +--- + +## Step 1: Inventory Your Analysis Outputs ⏱️ 7 minutes + +Before invoking the command, gather every artifact the Results section will reference. Scholar drafts from what you give it — vague inputs produce vague prose. + +### Build a Results Inventory + +Create `results-inventory.md` next to your analysis with one block per finding: + +```markdown +# Results Inventory + +## Sample +- N analyzed: 343 (after listwise deletion from 350) +- Missingness: sleep 2%, exercise 1%, other vars 0% + +## Primary Analysis (Table 2, Figure 1) +- Estimator: causal mediation (mediation::mediate), B = 5,000, seed = 20250201 +- NIE = 0.42, 95% CI [0.18, 0.71] +- NDE = 1.18, 95% CI [0.74, 1.65] +- Proportion mediated = 26% + +## Secondary (Figure 2) +- Subgroup by sex: NIE_F = 0.51, NIE_M = 0.29 + +## Sensitivity +- ρ-sensitivity threshold (Imai): R² = 0.20 +- Multiple imputation re-run: NIE = 0.43 [0.19, 0.72] +``` + +> 💡 **Pro tip:** Paste raw output from R (`summary()`, `broom::tidy()`) or Python (`statsmodels` summary tables) directly into the follow-up dialogue. Scholar can parse those formats. + +### ✅ Checkpoint 1 + +- [ ] Every numeric estimate that will appear in prose is on disk +- [ ] Each table and figure has a stable filename and number +- [ ] Sample size and exclusions are explicit + +--- + +## Step 2: Generate the First Draft ⏱️ 10 minutes + +Invoke `/research:manuscript:results` with a focused topic string. The command supports two forms: + +- `/research:manuscript:results` — Scholar prompts for the topic +- `/research:manuscript:results ` — pass the topic inline + +### Run the Command + +```text +/research:manuscript:results "causal mediation of stress on depression via sleep and exercise" +``` + +Scholar asks follow-up questions matching its internal process — what analyses ran, what the key findings are, and what tables/figures exist. Paste your inventory from Step 1. + +**Scholar drafts content covering:** + +- Descriptive statistics (Table 1 reference) +- Primary analysis (point estimates with uncertainty) +- Model assessment and assumption diagnostics +- Sensitivity analyses +- In-text references to all tables and figures + +### Expected Draft Skeleton + +```markdown +## Results + +### Sample Characteristics +Of 350 students enrolled, 343 (98%) were retained. Table 1 summarizes +characteristics. The analytic sample was 62% female (M_age = 19.8, SD = 1.4). + +### Mediation of Stress on Depression +Table 2 decomposes the total effect of perceived stress on depression. +The natural indirect effect through sleep and exercise was 0.42 (95% CI +[0.18, 0.71]), about 26% of the total effect (Figure 1). + +### Sensitivity to Unmeasured Confounding +Following Imai et al. (2010), an unmeasured confounder would need to +explain ≥20% of residual variance in both mediators and outcome to +attenuate the indirect effect to zero... +``` + +> ⚠️ **First drafts are not final drafts.** Every number must be checked against the inventory before submission. + +### ✅ Checkpoint 2 + +- [ ] Draft has clear subsections (descriptive → primary → secondary → sensitivity) +- [ ] Every numeric value matches your inventory exactly +- [ ] Each table and figure is referenced at least once + +--- + +## Step 3: Align Notation with the Methods Section ⏱️ 7 minutes + +The Results section must use the same symbols as Methods. If Methods calls perceived stress $X$ and sleep $M_1$, Results must do the same — never re-label mid-paper. + +### Pair the Two Commands + +If you haven't drafted Methods yet, do it first or in parallel: + +```text +/research:manuscript:methods "causal mediation with bootstrap inference" +``` + +Then re-invoke the results command with matching topic phrasing so Scholar reuses notation: + +```text +/research:manuscript:results "causal mediation with bootstrap inference" +``` + +### Notation Consistency Check + +Scan both drafts side by side. Every symbol introduced in Methods (e.g., $X$ for exposure, $M_1, M_2$ for mediators, NIE/NDE/TE for estimands, $B$ for bootstrap reps) must appear unchanged in Results. + +> 💡 **Pro tip:** If you tweak notation in Methods, search-and-replace the same symbols in Results. The two sections evolve together. + +### ✅ Checkpoint 3 + +- [ ] Every symbol used in Results was introduced in Methods +- [ ] Estimand names (NIE/NDE/TE) match across sections +- [ ] Subscripts and indexing are consistent + +--- + +## Step 4: Apply v2.18.0 Evidence-Based Reporting Style ⏱️ 8 minutes + +Scholar v2.18.0 introduced evidence-based reporting as a first-class style. When your journal or statistical philosophy aligns with the $s$-value / compatibility-interval movement (Rafi & Greenland, Amrhein & Greenland), tell Scholar in the follow-up dialogue: + +```text +Use evidence-based reporting style: +- compatibility intervals (not "confidence intervals") where appropriate +- s-values alongside p-values +- no dichotomous "significant" / "non-significant" language +- describe estimates and interval ranges, not threshold tests +``` + +### Before (Traditional NHST) vs After (Evidence-Based) + +> **Before:** The indirect effect was statistically significant ($b = 0.42$, +> $p < .001$), indicating that sleep significantly mediated the stress–depression +> relationship. +> +> **After:** The estimated natural indirect effect was 0.42 (95% compatibility +> interval [0.18, 0.71]; $p = 0.0008$, $s = 10.3$ bits). The interval is +> compatible with indirect effects ranging from modest (0.18) to substantial +> (0.71) increases in depression symptoms per unit of stress. + +### Common Replacements + +| Don't say | Say instead | +|-----------|-------------| +| "statistically significant" | "the interval excludes zero" or "the data are incompatible with the null" | +| "marginally significant" | report the estimate and interval; let the reader judge | +| "non-significant" | "the interval includes zero" or "the data are compatible with no effect" | +| "trend toward significance" | (delete; report point estimate and interval) | +| "confidence interval" | "compatibility interval" (when matching the style guide) | + +### ✅ Checkpoint 4 + +- [ ] No dichotomous significance language remains +- [ ] Each key finding has a point estimate AND an interval +- [ ] Exact p-values are reported (not "p < .05") +- [ ] Where used, $s$-values are computed from $p$ via $s = -\log_2(p)$ + +--- + +## Step 5: Generate Clean Table and Figure Callouts ⏱️ 6 minutes + +A clean Results section threads through tables and figures rather than reciting their contents. Use Scholar to generate the prose, then verify every callout resolves. + +### Ask for Explicit Callouts + +In your follow-up to the command, specify your numbering scheme: + +```text +Use these table/figure references: +- Table 1: Descriptive statistics +- Table 2: Mediation decomposition (TE, NDE, NIE, proportion mediated) +- Figure 1: Path diagram with point estimates +- Figure 2: Forest plot of subgroup NIEs + +Reference each at least once. Don't restate every number from the tables — +highlight the headline finding and point readers to the table for detail. +``` + +### Good Callout Pattern + +> Table 1 summarizes sample characteristics. Of the 343 students retained, +> 62% identified as female, and mean perceived stress was elevated +> ($M = 24.3$, $SD = 6.2$) relative to published norms. +> +> The mediation decomposition appears in Table 2 and is illustrated by the +> path diagram in Figure 1. The natural indirect effect through sleep +> ($\widehat{\text{NIE}}_{M_1} = 0.28$, 95% CI [0.11, 0.49]) was larger +> than the indirect effect through exercise ($\widehat{\text{NIE}}_{M_2} = 0.14$, +> 95% CI [0.02, 0.31]). + +### Verify Every Callout Resolves + +Grep your draft for table and figure references: + +```bash +grep -nE "(Table|Figure) [0-9]" results.md +``` + +Cross-check against your inventory. Every number must point at an artifact you actually have. + +### ✅ Checkpoint 5 + +- [ ] Every table and figure listed in your inventory is cited +- [ ] Callouts highlight the finding, not restate the whole table +- [ ] No orphan "Table 3" references to artifacts that don't exist + +--- + +## Step 6: Add Secondary, Sensitivity, and Robustness Subsections ⏱️ 8 minutes + +Strong Results sections show that the headline finding survives reasonable perturbations. Have Scholar draft a dedicated subsection for each robustness check. + +### Iterate on the Draft + +Re-invoke the command once per robustness analysis, then splice the subsections under a `### Sensitivity and Robustness` heading: + +```text +/research:manuscript:results "subgroup analysis of mediation by sex" +/research:manuscript:results "rho-sensitivity for unmeasured confounding" +/research:manuscript:results "multiple imputation re-analysis" +``` + +### Recommended Subsection Order + +1. Subgroup / effect modification (pre-specified subgroups) +2. Alternative model specification (different covariates, link, estimator) +3. Missing data handling (listwise vs. multiple imputation) +4. Unmeasured confounding (Imai's ρ, E-value) +5. Influential observations (leverage, Cook's distance) + +### Honest Reporting Pattern + +If a robustness check changes the conclusion, say so plainly: + +> The headline indirect effect attenuated by 35% when we excluded participants +> with extreme stress scores (NIE = 0.27, 95% CI [0.06, 0.51]). The +> direction is preserved but the interval now includes values close to zero, +> tempering inferential claims about magnitude. + +### ✅ Checkpoint 6 + +- [ ] At least one sensitivity analysis is reported +- [ ] Robustness checks are described even when they change the conclusion +- [ ] Subgroup analyses note pre-specification status + +--- + +## Step 7: Revise After Reviewer Comments ⏱️ 7 minutes + +When reviewers request additional analyses (the most common comment on a Results section), use Scholar to draft the additions and `/research:manuscript:reviewer` to draft the response letter. + +### Pattern: Reviewer Asks for a New Analysis + +Reviewer 2 says: *"Please clarify whether the mediation holds in participants with clinically elevated PHQ-9 (≥ 10)."* + +1. Run the analysis, save artifacts, and update `results-inventory.md` +2. Invoke Scholar for the new subsection: + + ```text + /research:manuscript:results "mediation in subgroup with PHQ-9 >= 10" + ``` + +3. Splice the new subsection into the Results draft +4. Draft the reviewer reply with `/research:manuscript:reviewer`, which can cite the new Results subsection by section number + +### Pattern: Reviewer Questions a Specific Number + +If a reviewer flags a number as inconsistent with another section, re-run the inventory check: + +```bash +grep -nE "0\.42" results.md methods.md tables/table2.csv +``` + +Fix at the source (the table), then re-invoke `/research:manuscript:results` for just that paragraph if needed. + +### ✅ Checkpoint 7 + +- [ ] Every reviewer-requested analysis is in the Results section +- [ ] The corresponding response letter cites the new subsection +- [ ] Numbers are consistent across Results, Methods, tables, and figures + +--- + +## Step 8: Final Validation Before Submission ⏱️ 7 minutes + +Before declaring the section done, walk three checks end-to-end. + +### Number-by-Number Audit + +Open `results-inventory.md` next to `results.md` and verify each estimate matches digit-for-digit, including precision (don't report 0.42 in prose and 0.418 in the table). + +### Notation and Estimand Audit + +Skim Methods and Results in one sitting. Every symbol, every estimand name, every model term should match. If Methods says "two-stage regression," Results should not say "structural equation model." + +### Reproducibility Check + +The Results section should pair with reproducible artifacts. Confirm: + +- Random seed reported (matches Methods) +- Software version reported (matches Methods) +- Code link is live (if shared) +- Tables and figures regenerate from the same code + +> 💡 **Pro tip:** If your analysis lives in a Quarto or R Markdown file, regenerate the document end-to-end before submission. Inline values (`r round(nie, 2)`) catch drift that hand-edited prose hides. + +### ✅ Checkpoint 8 + +- [ ] Every numeric value in prose matches its source table +- [ ] Methods and Results share consistent notation and estimand names +- [ ] Random seed and software versions are documented and identical across sections +- [ ] Tables and figures regenerate from current code + +--- + +## Common Issues + +| Issue | Likely cause | Fix | +|-------|--------------|-----| +| Numbers in draft don't match my output | Scholar fabricated plausible values when inventory was vague | Paste raw R/Python output verbatim into the follow-up dialogue | +| Notation drifts between Methods and Results | The two drafts were generated without shared topic phrasing | Use the same topic string for both commands; reconcile symbol-by-symbol | +| Draft uses "statistically significant" despite evidence-based intent | Style preference not communicated to Scholar | Explicitly request evidence-based reporting in the follow-up prompt | +| Reference to "Table 3" that doesn't exist | Scholar invented a numbering scheme | Provide explicit table/figure list in the prompt; grep the draft for orphan callouts | +| Mediation proportion doesn't match TE = NDE + NIE | Rounding or copy-paste error from inventory | Recompute from unrounded values; report to consistent precision | +| Sensitivity subsection contradicts primary finding without comment | Iterative drafting lost narrative thread | Re-read the full section end-to-end; add explicit reconciling sentences | +| Reviewer says results are buried in supplementary tables | Too much offloading to tables, too little prose | Highlight the headline number in prose; let tables hold the rest | + +--- + +## What's Next + +- **Pair with Methods:** [Tutorial: Writing Statistical Methods Sections](methods-section.md) — keep notation and software details synchronized +- **Full workflow:** [Tutorial: Manuscript Writing with Scholar](manuscript-writing.md) — Results in context of the entire paper +- **Address reviews:** [Tutorial: Responding to Reviewers](reviewer-response.md) — once your Results draft is reviewed, draft the response letter + +## See Also + +- `/research:manuscript:methods` — draft the companion Methods section with matching notation +- `/research:manuscript:reviewer` — draft point-by-point reviewer responses citing the Results +- `/research:manuscript:proof` — validate any mathematical derivations referenced in Results + +--- + +**Tutorial created:** 2026-05-12 +**Scholar version:** 2.18.0 +**Maintainer:** Data-Wise team + +**Feedback:** Found an issue or have suggestions? [Open an issue](https://github.com/Data-Wise/scholar/issues) diff --git a/docs/tutorials/research/reviewer-response.md b/docs/tutorials/research/reviewer-response.md index c02b31f9..55c38dd7 100644 --- a/docs/tutorials/research/reviewer-response.md +++ b/docs/tutorials/research/reviewer-response.md @@ -2,6 +2,7 @@ **Target Audience:** Researchers responding to peer review for the first time **Time:** 45-60 minutes +**Level:** 🔴 Advanced **Difficulty:** Intermediate ## What You'll Learn diff --git a/docs/tutorials/research/simulation-analysis.md b/docs/tutorials/research/simulation-analysis.md new file mode 100644 index 00000000..a050ba52 --- /dev/null +++ b/docs/tutorials/research/simulation-analysis.md @@ -0,0 +1,275 @@ +# Tutorial: Analyzing Simulation Results + +**Target Audience:** Researchers analyzing the output of a Monte Carlo simulation study +**Time:** 35 minutes +**Level:** 🔵 Intermediate + +## What You'll Learn + +By the end of this tutorial, you'll be able to: + +- Feed simulation output (long-format CSV or `.RData`) into `/research:simulation:analysis` +- Compute the standard performance metrics: bias, MSE, coverage, Type I error, power, interval width +- Produce condition-by-method summary tables ready for a manuscript +- Generate publication-quality figures showing how metrics vary across factors +- Pair the analysis step with `/research:simulation:design` so metrics match the planned conditions +- Report results in line with Scholar v2.18.0's evidence-based reporting policy ($s$-values, compatibility intervals) +- Diagnose convergence failures and outliers before they distort summaries + +## Prerequisites + +Before starting, make sure you have: + +- [ ] Scholar installed (`brew install data-wise/tap/scholar`) +- [ ] Claude Code running +- [ ] A finished simulation with results saved as CSV or `.RData` +- [ ] Long-format output (one row per replication × condition × method) +- [ ] A clear list of conditions varied (e.g., `n`, `effect_size`, `dist`, `method`) +- [ ] The true parameter value (or null hypothesis value) for each condition + +**Installation check:** + +```bash +scholar --version +# Should show: scholar v2.18.0 or later +``` + +> 💡 **Don't have results yet?** Run `/research:simulation:design` first to plan the study, execute it in R, then return here to analyze the output. + +--- + +## Step 1: Inspect Your Results File ⏱️ 4 minutes + +### What You'll Do + +Confirm your results file is in **long-format** (one row per replication) with the columns Scholar expects. + +**Required:** `condition_id`, `method`, `rep_id`, `estimate`, `true_value`. + +**Common optional columns:** `se`, `ci_lower`, `ci_upper`, `p_value`, `converged`, `runtime_sec`. + +Quick inspection in R: + +```r +results <- read.csv("results.csv") +str(results) +table(results$condition_id, results$method) # cells should be balanced +``` + +> ⚠️ **Wide-format trap:** If you have one row per condition with `estimate_method1`, `estimate_method2` as columns, pivot to long format (`tidyr::pivot_longer`) before invoking the analyzer. + +--- + +## Step 2: Invoke the Analysis Command ⏱️ 3 minutes + +### What You'll Do + +Run `/research:simulation:analysis` with a path to your results file: + +```text +/research:simulation:analysis ~/projects/simulations/mediation-bootstrap/results.csv +``` + +You can also invoke it with no argument and Scholar will prompt for the path: + +```text +/research:simulation:analysis +``` + +Scholar will: + +1. Read the file and report row counts per condition × method +2. Engage the `simulation-architect`, `computational-inference`, and `statistical-software-qa` skills +3. Ask you to confirm which columns map to estimate / SE / CI / p-value / true value +4. Ask which factors (e.g., `n`, `effect_size`, `dist`) should become rows vs. columns vs. panels +5. Generate metrics, tables, and figures + +**Expected first output:** + +```text +=== SIMULATION RESULTS === +File: results.csv +Rows: 60,000 +Conditions: 12 (n × dist combinations) +Methods: 2 (bootstrap, delta) +Replications per cell: 2,500 (balanced) +``` + +--- + +## Step 3: Compute Performance Metrics ⏱️ 6 minutes + +### What You'll Do + +Scholar computes the standard battery of performance metrics for each (condition, method) cell. You don't need to write R — but understanding the formulas helps you interpret the tables. + +**Point estimates:** + +```r +bias <- mean(estimates) - true_value +mc_se_bias <- sd(estimates) / sqrt(n_reps) +rmse <- sqrt(mean((estimates - true_value)^2)) +``` + +**Intervals:** + +```r +coverage <- mean(ci_lower <= true_value & ci_upper >= true_value) +mc_se_cov <- sqrt(coverage * (1 - coverage) / n_reps) +avg_width <- mean(ci_upper - ci_lower) +``` + +**Tests:** Type I error = `mean(p_value < 0.05)` under H0; power = the same under H1. + +> 💡 **Always report Monte Carlo SE.** A coverage of 0.93 with MC SE = 0.005 is a meaningful deviation from 0.95; the same point with MC SE = 0.015 is not. Scholar attaches MC SE to every metric. + +--- + +## Step 4: Build Summary Tables ⏱️ 6 minutes + +### What You'll Do + +Scholar produces condition × method tables suitable for direct inclusion in a manuscript. You'll be asked to pick the table layout — typically: + +- **Rows:** methods being compared +- **Columns:** performance metrics (bias, RMSE, coverage, width) +- **Panels:** factor combinations (sample size × effect size) + +**Example (Table 1 — Bias and RMSE):** + +```text +| Method | n | Effect | Bias | MC SE | RMSE | +|------------|-----|--------|--------|--------|-------| +| Bootstrap | 50 | 0.0 | -0.003 | 0.0011 | 0.054 | +| Delta | 50 | 0.0 | 0.018 | 0.0010 | 0.051 | +| Bootstrap | 100 | 0.0 | -0.001 | 0.0008 | 0.038 | +| Delta | 100 | 0.0 | 0.009 | 0.0007 | 0.036 | +``` + +Scholar saves tables as both Markdown (for review) and LaTeX (for the manuscript), and produces a parallel Coverage/Width table for any condition with CI columns. + +--- + +## Step 5: Generate Figures ⏱️ 5 minutes + +### What You'll Do + +Scholar produces four publication-ready figures by default, each with MC error bands, clear axis labels, and a method-keyed legend: + +1. **`figure1_bias.pdf`** — Bias vs. sample size, faceted by effect size, with a reference line at zero. +2. **`figure2_coverage.pdf`** — Coverage vs. sample size, dashed line at 0.95 and dotted Bradley liberal bounds (0.925, 0.975). +3. **`figure3_power.pdf`** — Power vs. effect size, separate lines per sample size, reference at α. +4. **`figure4_rmse.pdf`** — RMSE vs. sample size on a log-y scale, one line per method. + +> 💡 **Custom plots:** Tell Scholar what to facet on. Example: *"Facet figure 1 by `dist` instead of `effect_size`."* It regenerates just that figure. + +--- + +## Step 6: Diagnose Convergence and Outliers ⏱️ 4 minutes + +### What You'll Do + +Before trusting your tables, audit the replication-level data. Scholar runs three diagnostic checks and flags anomalies: + +**Check 1: Balance** + +```text +condition_id method n_reps expected +1 bootstrap 2500 2500 OK +1 delta 2487 2500 13 missing +2 bootstrap 2500 2500 OK +... +``` + +**Check 2: Convergence failures** — counts and percentages per cell (e.g., `(n=50, dist=chisq, delta): 47 / 2500 (1.9%)`). + +**Check 3: Outliers in `estimate`** — replications flagged with `|estimate - median| > 6 * MAD`. + +Scholar will ask whether to: + +1. Drop failed reps and report the convergence rate alongside metrics +2. Keep them and use complete-case summaries (NAs propagate) +3. Re-run those conditions with a higher iteration cap + +> ⚠️ **Don't silently drop.** Manuscript reviewers expect a "convergence rate" column when failure rates exceed ~0.5%. Scholar adds this column automatically when any cell falls below 99.5%. + +--- + +## Step 7: Apply the Evidence-Based Reporting Policy ⏱️ 4 minutes + +### What You'll Do + +Scholar v2.18.0 follows an evidence-based reporting policy: when the analysis involves p-values or confidence intervals, Scholar surfaces $s$-values and compatibility-interval language alongside the conventional metrics. + +**What this changes in your output:** + +- Coverage tables include both nominal "95% CI" and "95% compatibility interval" labels +- Test-based cells report both p-values and $s$-values ($s = -\log_2 p$) +- Recommendation text avoids "significant"/"non-significant" dichotomies in favor of effect-size + interval-width language + +**Example narrative output:** + +```text +At n = 100 under chi-square errors, the bootstrap percentile CI achieved +0.947 coverage (MC SE = 0.0045), compatible with the nominal 0.95 level. +The delta method undercovered at 0.922 (MC SE = 0.0054), a deviation of +roughly 6 MC SEs — strong evidence of systematic undercoverage in this +condition. (s-value for the deviation from 0.95: 4.5 bits.) +``` + +> 💡 **Opt out per-section.** If a target journal requires conventional p-value language, tell Scholar: *"Use conventional p-value language in the test-based section only."* The point-estimate and coverage sections will still use the evidence-based style. + +--- + +## Step 8: Save and Hand Off to Manuscript ⏱️ 3 minutes + +### What You'll Do + +When the run finishes, Scholar writes a self-contained directory: + +```text +simulation-analysis/ +├── README.md # Run metadata, software versions, seeds +├── tables/ # *.md and *.tex per table +├── figures/ # figure1_bias.pdf, figure2_coverage.pdf, ... +├── diagnostics/ # convergence-report.md, outliers.csv +└── interpretation.md # Narrative summary and recommendations +``` + +Hand `interpretation.md` to `/research:manuscript:results` to draft the Results section, and the `tables/` + `figures/` to `/research:manuscript:methods` to describe how performance was measured. + +--- + +## Common Issues + +| Issue | Cause | Fix | +|-------|-------|-----| +| "No `true_value` column found" | Long-format file omits the population parameter | Add a `true_value` column joined from the design matrix before invoking the command | +| Coverage reported as `NA` | Missing `ci_lower`/`ci_upper` (or `se`) | Add CI columns or supply `se` so Scholar can construct a Wald CI | +| Unbalanced cells in Step 6 | Convergence failures dropped rows mid-run | Re-run the failing conditions; or accept and let Scholar report a convergence rate column | +| All bias estimates near zero, but RMSE huge | `true_value` mismatched per condition (e.g., one global value used) | Verify `true_value` varies by condition — join from the condition matrix, don't hardcode | +| Figure 3 (power) is flat at 1.0 | Effect sizes too large for the sample sizes tested | Re-design with smaller effects via `/research:simulation:design` and re-run the simulation | +| Type I error rates outside [0.04, 0.06] across the board | Test statistic miscoded, or null condition mis-specified | Audit the analysis function on a single replication with known truth before re-running | +| "Wide-format detected" warning | Each row holds multiple methods in separate columns | Pivot to long format: one row per (rep × condition × method) | +| Outlier flag fires on every cell | MAD threshold too tight for heavy-tailed estimator | Tell Scholar to use a robust trimmed mean for the summary, or raise the outlier threshold | + +--- + +## What's Next + +- **Plan the next study:** [Designing Monte Carlo Simulation Studies](simulation-design.md) — use lessons learned here to refine the next design (e.g., add a sample size, drop a redundant condition). +- **End-to-end walkthrough:** [Running a Full Simulation Study](simulation-study.md) — see how design, execution, and analysis fit together for a complete project. +- **Write up the findings:** [Writing the Manuscript](manuscript-writing.md) — feed Scholar's `interpretation.md` and `tables/` into a Results-then-Methods drafting workflow. + +## See Also + +- `/research:simulation:design` — plan the conditions, replications, and metrics before running the study +- `/research:simulation:analysis` — (this command) compute metrics, tables, and figures from finished simulation output +- `/research:manuscript:methods` — draft the Methods section describing how performance was measured +- `/research:manuscript:results` — draft the Results section using the analysis artifacts + +--- + +**Last Updated:** 2026-05-12 +**Scholar Version:** 2.18.0 +**Maintainer:** Data-Wise Team diff --git a/docs/tutorials/research/simulation-design.md b/docs/tutorials/research/simulation-design.md index 4d69443c..d7feec03 100644 --- a/docs/tutorials/research/simulation-design.md +++ b/docs/tutorials/research/simulation-design.md @@ -4,6 +4,8 @@ render_macros: false # Designing Monte Carlo Simulation Studies +**Level:** 🔵 Intermediate + A comprehensive tutorial for planning and executing simulation studies in statistical research. **Learning Objectives**: By the end of this tutorial, you will be able to: diff --git a/docs/tutorials/research/simulation-study.md b/docs/tutorials/research/simulation-study.md index 004a544c..9c886826 100644 --- a/docs/tutorials/research/simulation-study.md +++ b/docs/tutorials/research/simulation-study.md @@ -2,6 +2,7 @@ **Target Audience:** Researchers designing Monte Carlo simulation studies **Time:** 25 minutes +**Level:** 🔴 Advanced **Difficulty:** Intermediate ## What You'll Learn diff --git a/docs/tutorials/research/statistical-method-discovery.md b/docs/tutorials/research/statistical-method-discovery.md new file mode 100644 index 00000000..799f78e6 --- /dev/null +++ b/docs/tutorials/research/statistical-method-discovery.md @@ -0,0 +1,264 @@ +# Tutorial: Discovering Statistical Methods + +**Target Audience:** Researchers choosing methods for a new study or revising methodology under reviewer pressure +**Time:** 20 minutes +**Level:** 🔵 Intermediate + +## What You'll Learn + +By the end of this tutorial, you'll be able to: + +- Frame a methodological problem so `/research:method-scout` returns useful candidates +- Read a Method Scout Report and identify the tradeoffs across candidates +- Compare 2-3 candidate methods on assumptions, sample-size requirements, and robustness +- Distinguish within-discipline, cross-discipline, recent, and classic candidates +- Decide when method-scout output is enough versus when to escalate to `/research:analysis-plan` +- Chain `/research:method-scout` → `/research:analysis-plan` → `/simulation:design` for a full design pipeline +- Respond to reviewer methodology pushback with a defensible alternative + +## Prerequisites + +Before starting, make sure you have: + +- [ ] Scholar installed (`brew install data-wise/tap/scholar`) +- [ ] Claude Code running +- [ ] A research question or methodological problem in mind +- [ ] A rough sense of your data structure (sample size, variable types, clustering) +- [ ] (Optional) A `.bib` file with domain literature for context + +**Installation Check:** + +```bash +scholar --version +# Should show: scholar v2.18.0 or later +``` + +--- + +## Step 1: Frame Your Methodological Problem ⏱️ 3 minutes + +### What You'll Do + +Method-scout needs four inputs to return useful candidates. Sketch each on paper or in a scratch file before invoking the command: + +1. **Research problem** — what you're trying to analyze or test (one sentence) +2. **Data characteristics** — sample size, variable types, structure (clustering, longitudinal, missingness) +3. **Constraints** — software available, audience familiarity, deadline +4. **Goal** — estimation, inference, prediction, or description + +**Worked example:** + +```text +Problem: Test whether peer-coaching mediates the effect of a financial-literacy + curriculum on credit-score change, with students nested within schools. +Data: N ≈ 800 students across 24 schools; binary treatment, continuous mediator + and outcome; ~10% attrition at follow-up. +Constraints: R-only shop; reviewers expect a method published before 2020. +Goal: Inference on the indirect effect with valid CIs. +``` + +> 💡 **Pro tip:** Vague inputs produce vague outputs. "Find methods for mediation" returns a textbook list; the framing above returns 3-4 specific candidates with tradeoffs you can actually compare. + +--- + +## Step 2: Invoke `/research:method-scout` ⏱️ 2 minutes + +### What You'll Do + +Launch the command and paste your framed problem when prompted: + +```text +/research:method-scout +``` + +Scholar will ask follow-ups if any of the four inputs are missing or ambiguous. Answer each one — don't skip. The interactive loop is what separates a generic survey from a targeted recommendation. + +**Expected flow:** + +```text +Q: What is the research problem? +A: [your one-sentence problem] + +Q: Describe the data structure. +A: N, variable types, clustering, missingness + +Q: Any constraints on software, audience, or recency? +A: [R-only, reviewers want pre-2020 methods, etc.] + +Q: What's the inferential goal? +A: estimation / inference / prediction / description +``` + +After the last answer, method-scout searches across disciplines and returns a structured report. + +--- + +## Step 3: Read the Method Scout Report ⏱️ 3 minutes + +### What You'll Do + +The output follows a fixed structure. Read top-to-bottom — each section answers a specific decision question: + +- **Research Problem** — restated. Verify Scholar understood you. If off, re-run with sharper framing. +- **Data Characteristics** — your inputs from Step 1, normalized. +- **Candidate Methods** — usually 2-4 entries spanning within-discipline and cross-discipline options. Each entry lists Description, Assumptions, Strengths, Limitations, Software, and a Key reference. +- **Comparison table** — the decision-driver. Rows are criteria (Assumptions, Complexity, Interpretability, Power/Efficiency); columns are methods. Scan rows, not columns. +- **Recommendation** — Scholar's pick with justification. Treat it as a strong default, not a mandate. +- **Next Steps** — concrete follow-up actions (often pointing at `/research:analysis-plan` or `/simulation:design`). + +--- + +## Step 4: Compare Candidates on Three Axes ⏱️ 4 minutes + +### What You'll Do + +Even with the comparison table, narrow to your top 2-3 candidates and evaluate them on three axes that the table compresses: + +| Axis | Question | Where to look | +|------|----------|---------------| +| Assumptions | Which assumptions are testable? Which are untestable? | Method N → Assumptions | +| Sample-size requirements | Does my N support this method at typical effect sizes? | Method N → Limitations + Strengths | +| Robustness | What happens when an assumption is mildly violated? | Method N → Limitations | + +**Worked example (continuing from Step 1):** + +```text +Candidate A: Multilevel mediation via lavaan + cluster bootstrap + - Assumptions: linearity, MAR missingness, exchangeability across schools + - N: 800/24 clusters borderline; 24 clusters at the lower bound for level-2 inference + - Robustness: bootstrap CIs handle non-normal indirect effect well + +Candidate B: Bayesian multilevel SEM (brms / blavaan) + - Assumptions: as above, plus prior specification + - N: 24 clusters acceptable with weakly informative priors + - Robustness: posterior intervals robust to mild misspecification + +Candidate C: MLmed macro (Rockwood) + - Assumptions: linearity, normality of indirect effect + - N: documented down to 20 clusters + - Robustness: known liberal coverage under skewed mediators +``` + +> ⚠️ **Watch for**: "Recommended by Scholar" + "fails my constraints" is common. The recommendation field doesn't always re-check your constraints from Step 1 — verify before committing. + +--- + +## Step 5: Decide — Stop or Escalate? ⏱️ 2 minutes + +### What You'll Do + +After the report, choose one of three paths: + +| Path | When | What to do | +|------|------|------------| +| Stop | Single clear winner, no design questions remaining | Cite the recommendation, implement | +| Escalate to analysis plan | Method clear but specification details (covariates, sensitivity tests, missing-data handling) still open | Run `/research:analysis-plan` | +| Escalate to simulation | Multiple plausible methods, performance under your conditions is unknown | Run `/simulation:design` to compare them empirically | + +**Decision heuristics:** + +- If your reviewer asked "why this method and not that one?" → analysis-plan is enough +- If you need to defend operating characteristics (Type I error, power, coverage) → simulation +- If method-scout returned exactly one candidate that meets your constraints → stop + +> 💡 **Pro tip:** Method-scout's output is a methodological survey, not a design document. Don't try to make it serve double-duty — chain to the right downstream command instead. + +--- + +## Step 6: Chain to `/research:analysis-plan` ⏱️ 3 minutes + +### What You'll Do + +If you're escalating to a full analysis plan, paste the chosen method and its citation into the analysis-plan intake: + +```text +/research:analysis-plan +``` + +When Scholar asks "what statistical model will you use?" reference the method-scout recommendation directly: + +```text +A: Multilevel mediation per Candidate A from prior /research:method-scout run. + Model: random-intercept-at-school for both a-path and b-path, cluster bootstrap + for the indirect effect, MAR missingness handled via FIML in lavaan. + Key reference: [citation from method-scout report] +``` + +Analysis-plan picks up the method choice and walks you through estimands, covariates, sensitivity analyses, and pre-registration. See the [Analysis Planning tutorial](analysis-planning.md) for the full workflow. + +--- + +## Step 7: Chain to `/simulation:design` for Method Comparison ⏱️ 3 minutes + +### What You'll Do + +When you need empirical evidence to choose between candidates — for example, a reviewer asks "would your conclusions change under Method B?" — use simulation: + +```text +/simulation:design +``` + +Feed in two or three candidates from the method-scout report and ask Scholar to design a study comparing them on coverage, Type I error, or power under your data conditions. See the [Simulation Design tutorial](simulation-design.md). + +**Typical comparison setup:** + +```text +Methods to compare: Candidate A (cluster bootstrap), Candidate B (Bayesian SEM), + Candidate C (MLmed macro) +Data conditions: N ∈ {400, 800, 1600}; clusters ∈ {15, 24, 50}; + indirect effect ∈ {0, 0.1, 0.2} +Metrics: Coverage of 95% CI, Type I error rate, power +Reps: 5,000 per condition +``` + +--- + +## Step 8: Apply It to a Reviewer Response ⏱️ 2 minutes + +### What You'll Do + +Method-scout shines in revision contexts. When a reviewer challenges your method: + +1. Frame their critique as a methodological problem (Step 1) +2. Run `/research:method-scout` — but include the reviewer's preferred method by name in the constraints +3. Read the comparison table — the reviewer's method is now one row among several +4. Either adopt their suggestion (with citations) or write a justification using the tradeoff table + +**Example reviewer reply template:** + +> We thank the reviewer for suggesting [Method X]. We considered three alternatives, summarized below. We retain our original approach because [reason from the comparison table], but report [Method X] as a sensitivity analysis in the supplement. + +This pattern turns a defensive response into a transparent one — and the comparison table is the evidence. + +--- + +## Common Issues + +| Issue | Cause | Fix | +|-------|-------|-----| +| Report is generic, lists textbook methods | Vague problem statement, no constraints given | Re-run with specific N, variable types, and at least one constraint | +| Recommendation conflicts with your stated constraint | Constraint not surfaced during intake | Re-run, repeat the constraint in the problem statement itself | +| Only one candidate returned | Problem is highly specialized, or framing is over-constrained | Loosen one constraint (e.g., drop the "pre-2020" requirement) and re-run | +| Candidates all sound similar | Search stayed within one discipline | Add to constraints: "include methods from epidemiology / econometrics / psychometrics" | +| Citations look outdated | Search prioritized seminal papers | Ask for "recent advances from the last 5 years" in a follow-up turn | +| Recommendation lacks software detail | Software constraint not stated | Add explicit software constraint (e.g., "R-only" or "Stata-only") and re-run | + +For deeper troubleshooting, see the [Research Troubleshooting Guide](../../help/TROUBLESHOOTING-research.md). + +--- + +## What's Next + +- **[Creating Statistical Analysis Plans](analysis-planning.md)** — turn the chosen method into a pre-registrable analysis plan with estimands, covariates, and sensitivity tests +- **[Designing Monte Carlo Simulation Studies](simulation-design.md)** — empirically compare candidate methods on coverage, power, and Type I error +- **[BibTeX Management](bibtex-management.md)** — capture the key references from the Method Scout Report into your bibliography for later citation + +--- + +## See Also + +- `/research:hypothesis` — sharpen your research question before scouting methods +- `/research:lit-gap` — find methodological gaps that motivate scouting +- `/research:analysis-plan` — full pre-registration-ready analysis plan +- `/simulation:design` — Monte Carlo design to compare candidates empirically +- `/research:doi` and `/research:bib:add` — capture the citations method-scout returns diff --git a/docs/tutorials/teaching/assignments-solutions-rubrics.md b/docs/tutorials/teaching/assignments-solutions-rubrics.md index 7acf3f75..f64f3c3b 100644 --- a/docs/tutorials/teaching/assignments-solutions-rubrics.md +++ b/docs/tutorials/teaching/assignments-solutions-rubrics.md @@ -1,7 +1,7 @@ # Tutorial: Creating Assignments with Solutions and Rubrics -**Time Estimate:** 45 minutes -**Difficulty:** Beginner +**Time:** 45 minutes +**Level:** 🟢 Beginner **Prerequisites:** - Scholar plugin installed (v2.5.0+) - Claude Code running with ANTHROPIC_API_KEY configured diff --git a/docs/tutorials/teaching/canvas-lms-export.md b/docs/tutorials/teaching/canvas-lms-export.md new file mode 100644 index 00000000..d8e4e074 --- /dev/null +++ b/docs/tutorials/teaching/canvas-lms-export.md @@ -0,0 +1,307 @@ +# Tutorial: Canvas LMS Export (QTI) + +**Target Audience:** Instructors exporting Scholar-generated exams to Canvas LMS +**Time:** 30 minutes +**Level:** 🔵 Intermediate + +Convert a Scholar exam (`.qmd` or `.json`) into a Canvas-ready QTI package (`.qti.zip`) using `/teaching:canvas`. This tutorial walks you from a parsed exam through pre-flight validation, conversion, optional emulation, and the actual Canvas import. + +--- + +## What You'll Learn + +- Convert a `.qmd` or `.json` exam to a Canvas QTI `.zip` package with one command +- Read pre-flight validation output and fix the issues that block Canvas import +- Understand the question-type mapping between Scholar and Canvas (multiple-choice, true-false, short-answer, fill-in-blank, fill-in-multiple-blanks, numerical, essay, matching) +- Use `--dry-run`, `--intermediate`, `--validate`, and `--emulate` to inspect the pipeline at each stage +- Import the generated `.qti.zip` into a Canvas course +- Iterate: fix pre-flight errors, regenerate, and re-import without losing your exam structure + +--- + +## Prerequisites + +- Scholar plugin v2.18.0 or later installed +- `examark` CLI installed (`npm install -g examark` or `brew tap data-wise/tap && brew install examark`) +- Canvas LMS account with **Manage course content** permission (to import QTI packages) +- A `.qmd` or `.json` exam file generated by `/teaching:exam`, `/teaching:quiz`, or hand-authored + +> 💡 If you don't yet have an exam to convert, start with [Tutorial: Your First Exam](first-exam.md) and use the `--format qmd` flag. + +--- + +## Step 1: Inspect Your Exam with `--dry-run` ⏱️ 3 minutes + +Before converting, run a dry-run to confirm Scholar parses your file correctly. This step makes no API calls and writes no files. + +```bash +/teaching:canvas midterm.qmd --dry-run +``` + +**What you'll see:** + +```text +📄 Parsing: midterm.qmd + Title: Midterm Exam — STAT 440 + Questions: 12 + Total points: 100 + Types: multiple-choice(7), short-answer(3), essay(2) + +--- DRY RUN: Parsed Questions --- + +1. [multiple-choice] In linear regression, the residual is defined as... [10pts] + a) y - x + b) y - ŷ ✓ + c) ŷ - ȳ + d) x - x̄ +2. [short-answer] Define homoscedasticity in one sentence. [5pts] +... +--- End Dry Run --- +Remove --dry-run to convert to QTI. +``` + +**✅ Checkpoint:** + +- The question count matches what you authored +- Question types use **hyphen-separated** names (`multiple-choice`, not `multiple_choice`) +- The correct option is marked with `✓` for multiple-choice items + +--- + +## Step 2: Run the Canvas Pre-flight Validator ⏱️ 4 minutes + +Drop `--dry-run` to trigger the Canvas pre-flight validator. It catches questions Canvas will silently drop or import as broken (essay items with no rubric, fill-in-multiple-blanks missing answers, multiple-choice with no correct option, etc.). + +```bash +/teaching:canvas midterm.qmd +``` + +**Sample failing output:** + +```text +🔍 Pre-flight Canvas validation... +❌ Pre-flight errors (fix before converting): + - Q4 (fill-in-multiple-blanks): no answer keys for blanks [city, year] + - Q9 (multiple-choice): no correct answer marked + - Q11 (numerical): answer "approximately 0.05" is not a number + +Fix these issues and re-run /teaching:canvas. +``` + +**✅ Checkpoint:** + +- If you see `✅ All questions valid for Canvas import`, skip to Step 3 +- If you see errors, edit the source `.qmd` (add `[x]` markers, fill in `answer_key`, replace prose with a real number) and re-run +- ⚠️ Warnings (preceded by `⚠️`) are advisory only — they do not block conversion + +> 💡 The pre-flight checks are the same ones `runCanvasPreflightValidation` exports — see [Tutorial: Preflight Checks](preflight-checks.md) for an overview of related health checks. + +--- + +## Step 3: Generate the QTI Package ⏱️ 3 minutes + +Once pre-flight passes, the same command produces `midterm.qti.zip` next to the input file. + +```bash +/teaching:canvas midterm.qmd +``` + +**What you'll see:** + +```text +📄 Parsing: midterm.qmd +🔍 Pre-flight Canvas validation... + ✅ All questions valid for Canvas import +🔄 Converting to examark format... +📦 Running examark CLI... +✅ QTI package created: midterm.qti.zip + +📊 Conversion Summary: + Input: midterm.qmd + Output: midterm.qti.zip + Questions: 12 + Points: 100 + +💡 Import into Canvas: Settings → Import Course Content → QTI .zip +``` + +**✅ Checkpoint:** + +- `midterm.qti.zip` exists in the input directory +- `unzip -l midterm.qti.zip` lists at least `imsmanifest.xml` and one assessment XML file + +To choose a custom output path, pass `--output`: + +```bash +/teaching:canvas midterm.qmd --output ~/canvas-imports/stat440-midterm.qti.zip +``` + +--- + +## Step 4: Understand the Question-Type Mapping ⏱️ 4 minutes + +Scholar emits Canvas-compatible types via the QMD parser's hyphen-separated names. The table below shows what each Scholar type becomes inside Canvas. + +| Scholar type (`.qmd`) | Canvas type | Notes | +|---|---|---| +| `multiple-choice` | Multiple Choice | Exactly one correct option in `answer_key` | +| `multiple-answers` | Multiple Answers | Two or more correct options accepted | +| `true-false` | True/False | Answer must be `True` or `False` | +| `short-answer` | Fill In The Blank | Canvas auto-grades against `answer_key` strings | +| `fill-in-blank` | Fill In The Blank | Single blank, single answer | +| `fill-in-multiple-blanks` | Fill In Multiple Blanks | Each `[blank-name]` token needs an entry in `answer_key` | +| `numerical` | Numerical Answer | Answer must be a number (tolerance supported by examark) | +| `essay` | Essay Question | Manually graded in Canvas; `answer_key` rubric is optional | +| `matching` | Matching | `answer_key` is a `left → right` map | +| `file-upload` | Essay Question | Canvas QTI has no native file-upload — degrades to Essay | + +> ⚠️ Type names **must use hyphens**. `multiple_choice` (underscores) silently parses as the default `essay` fallback and you'll lose the answer key on import. + +**Override the fallback type** when Scholar can't infer a type: + +```bash +/teaching:canvas midterm.qmd --default-type Short +``` + +Valid values: `MC`, `MA`, `TF`, `Short`, `Numeric`, `Essay`, `Match`, `FMB`. The default is `Essay`. + +--- + +## Step 5: Inspect the Intermediate examark Markdown ⏱️ 3 minutes + +When something looks wrong after import, the fastest debugging path is to inspect the examark-format markdown that sits between Scholar and the QTI zip. Pass `--intermediate` to keep it on disk. + +```bash +/teaching:canvas midterm.qmd --intermediate +``` + +Open `midterm.examark.md` and you should see each question prefixed with its examark type marker: + +```markdown +## Question 1 [MC] [10 pts] + +In linear regression, the residual is defined as... + +a) y - x +b) y - ŷ [x] +c) ŷ - ȳ +``` + +**✅ Checkpoint:** + +- The `[MC]`/`[Short]`/`[Essay]` marker on each `##` heading matches Step 1's dry-run output +- Correct options are tagged with `[x]` and point values appear in `[N pts]` brackets + +> 💡 To hand-edit before final export, edit `midterm.examark.md` and run `examark canvas midterm.examark.md` directly — Scholar's pipeline mirrors what the CLI does. + +--- + +## Step 6: Validate and Emulate ⏱️ 4 minutes + +After conversion, two optional flags wrap the examark CLI's verification tools: + +```bash +/teaching:canvas midterm.qmd --validate --emulate +``` + +**What you'll see:** + +```text +✅ QTI package created: midterm.qti.zip + +🔍 Validating QTI package... + ✅ QTI package is valid + +🖥️ Simulating Canvas import... + ✅ Canvas import simulation passed +``` + +| Flag | What it runs | When to use | +|---|---|---| +| `--validate` | `examark verify` — structural QTI/XML check | Always before sharing a `.qti.zip` | +| `--emulate` | `examark emulate-canvas` — simulates the Canvas import path | Spot-check trickier types (matching, FMB) | + +> ⚠️ `examark emulate-canvas` exits non-zero when an essay or short-answer question has no predefined answer. That is expected — Canvas accepts those for **manual grading**. Trust `--validate` as the pass/fail gate; treat `--emulate` output as a hint, not a verdict. + +--- + +## Step 7: Import into Canvas ⏱️ 5 minutes + +You now have a `midterm.qti.zip`. Import it into Canvas: + +1. Open your Canvas course → **Settings** → **Import Course Content** +2. **Content Type:** select **QTI .zip file** +3. **Source:** click **Choose File** and pick `midterm.qti.zip` +4. Leave **All content** selected (or use selective import for a single quiz) +5. Click **Import** and wait for the job to finish +6. Click **Quizzes** in the course sidebar — your exam appears as a draft quiz + +**✅ Checkpoint:** + +- Question count and point values match Step 1's dry-run total +- Multiple-choice questions have the correct answer pre-selected (open one → **Show Question Details**) +- Essay/short-answer questions are marked for manual grading + +> 💡 The imported quiz is in **draft** state — students cannot see it until you click **Publish** in the quiz settings. + +--- + +## Step 8: Iterate — Fix, Regenerate, Re-import ⏱️ 4 minutes + +When you spot a problem in Canvas (typo, wrong answer, missing distractor), fix the source `.qmd`, regenerate the `.qti.zip`, and re-import. Scholar does **not** edit the imported Canvas quiz in place. + +```bash +$EDITOR midterm.qmd # 1. Fix in source +/teaching:canvas midterm.qmd --validate # 2. Regenerate +# 3. Delete previous draft in Canvas (Quizzes → ⋯ → Delete) +# 4. Re-import (Step 7) +``` + +> ⚠️ Canvas does **not** offer a "replace existing quiz" import. Always delete the previous draft before re-importing to avoid duplicates like `midterm (1)`. + +**End-of-semester migration tip:** convert exams in a batch for blueprint courses or multi-section deployments: + +```bash +for exam in exams/*.qmd; do + /teaching:canvas "$exam" --validate --output "canvas-imports/$(basename "$exam" .qmd).qti.zip" +done +``` + +--- + +## Common Issues + +| Issue | Cause | Fix | +|---|---|---| +| `examark not installed` | examark CLI missing from `$PATH` | `npm install -g examark` or `brew tap data-wise/tap && brew install examark` | +| Pre-flight reports `no correct answer marked` | Source `.qmd` has options but no `[x]` marker or `answer_key` entry | Add `[x]` next to the correct option, or set `answer_key.Q3 = "b"` | +| `fill-in-multiple-blanks: no answer keys for blanks` | Question text has `[blank-name]` tokens with no matching entry | Add `answer_key.Q4 = { "city": "Paris", "year": "1789" }` | +| Canvas import says "Invalid QTI package" | Corrupted or empty `.zip` | Re-run with `--intermediate --validate` to confirm examark output | +| Questions imported with no answers | `q.id` already starts with `Q` but code wrapped it again | Update Scholar to v2.17.0+ (fixed in `qmd-exam.js`) | +| Type name silently treated as Essay | Used `multiple_choice` (underscores) instead of `multiple-choice` | Switch to hyphens; see Step 4's type table | +| `examark emulate-canvas` fails for essay items | Essay/short-answer with no predefined answer — expected behavior | Ignore the emulate exit code; trust `--validate` | +| LaTeX math renders as raw `$...$` in Canvas | Canvas does not parse all LaTeX | Edit math in Canvas's equation editor after import, or pre-render to images | + +> 📖 For deeper Canvas debugging (formatting loss, version-specific QTI quirks, ZIP integrity checks), see [TROUBLESHOOTING-teaching.md → Canvas QTI upload problems](../../help/TROUBLESHOOTING-teaching.md#problem-canvas-qti-upload-problems). + +--- + +## What's Next + +- **[Your First Exam](first-exam.md)** — Generate exam content before exporting (start here if you don't yet have a `.qmd`) +- **[Rich Exam LaTeX Format](rich-exam-format.md)** — Author publication-quality exams in Quarto/LaTeX, then export the same source to Canvas +- **[Preflight Checks](preflight-checks.md)** — Run release-wide health checks (the Canvas pre-flight validator is a focused subset) + +--- + +## See Also + +- **`/teaching:exam --format canvas`** — Generate an exam and emit `.qti.zip` directly (skips the intermediate `.qmd` step) +- **`/teaching:preflight`** — Pre-release health checks for the whole Scholar project (version sync, conflict markers, CHANGELOG, `.STATUS`) +- **`/teaching:validate`** — Validate YAML configuration files against Scholar schemas +- **`examark verify .qti.zip`** — Standalone QTI structural validator (wrapped by `--validate`) +- **`examark emulate-canvas .qti.zip`** — Standalone Canvas-import simulator (wrapped by `--emulate`) + +--- + +**You've learned how to convert Scholar exams into Canvas QTI packages, validate them, and import cleanly.** Use `/teaching:canvas` as the final step in your exam pipeline: author in `.qmd`, validate, convert, and deliver to Canvas in under 5 minutes. diff --git a/docs/tutorials/teaching/config-management.md b/docs/tutorials/teaching/config-management.md index ad6d508f..2d364a97 100644 --- a/docs/tutorials/teaching/config-management.md +++ b/docs/tutorials/teaching/config-management.md @@ -1,6 +1,7 @@ # Tutorial: Managing Config & Prompts with `/teaching:config` **Time:** 15 minutes +**Level:** 🔵 Intermediate **Prerequisites:** - Scholar plugin installed (`brew install scholar` or manual installation) diff --git a/docs/tutorials/teaching/configuration.md b/docs/tutorials/teaching/configuration.md index 1d6f5cd8..955af94b 100644 --- a/docs/tutorials/teaching/configuration.md +++ b/docs/tutorials/teaching/configuration.md @@ -631,6 +631,36 @@ style: theorem_style: "theorem-proof" # or "informal" ``` +### Rich Exam Format Overrides + +When using `--format exam-rich-latex`, you can override generation behavior +via the `teaching_style.exam` block in `.flow/teach-config.yml`. These +overrides drive the on-disk `exam-rich.md` prompt — they control evidence- +based reporting language, textbook citations, computational-aid density, and +interaction-aware question wording. + +```yaml +scholar: + teaching_style: + exam: + policy: + report_s_values: true # Require Shannon-information reporting + avoid_significance_language: true # Ban "statistically significant", "reject H0", etc. + use_compatibility_intervals: true # Frame intervals as compatibility, not accept/reject + aids_level: moderate # minimal | moderate | full — scaffolding density + textbook_citation_prefix: "Dean (2017)" # Cited in question stems for chapter refs + interaction_aware_language: true # "marginal mean differences" vs "main effects" +``` + +**Effect:** every exam question generated with `--format exam-rich-latex` +will respect the policy block (no significance language, $s$-value +interpretation in rubrics), cite the textbook prefix where applicable, +include the requested level of computational scaffolding, and use marginal- +mean-difference language for factorial designs with interaction terms. + +Caller-supplied options (`--aids-level`, `--textbook-citation-prefix`) +override the config-file values. + ### Full Advanced Example ```yaml diff --git a/docs/tutorials/teaching/demo-course.md b/docs/tutorials/teaching/demo-course.md index 9551dd35..008f267d 100644 --- a/docs/tutorials/teaching/demo-course.md +++ b/docs/tutorials/teaching/demo-course.md @@ -1,6 +1,7 @@ # Tutorial: Creating a Demo Course **Time:** 5 minutes +**Level:** 🟢 Beginner **Prerequisites:** - Scholar plugin installed (`brew install scholar` or manual installation) diff --git a/docs/tutorials/teaching/email-integration.md b/docs/tutorials/teaching/email-integration.md index 2c31eb53..d8cfd8ce 100644 --- a/docs/tutorials/teaching/email-integration.md +++ b/docs/tutorials/teaching/email-integration.md @@ -1,6 +1,7 @@ # Tutorial: Email Integration **Time:** 10 minutes +**Level:** 🔵 Intermediate **Prerequisites:** - Scholar plugin installed ({{ scholar.version }}+) diff --git a/docs/tutorials/teaching/feedback.md b/docs/tutorials/teaching/feedback.md index 3f5fe9b5..1443f2c4 100644 --- a/docs/tutorials/teaching/feedback.md +++ b/docs/tutorials/teaching/feedback.md @@ -1,6 +1,7 @@ # Tutorial: Generating Student Feedback **Time:** 5-7 minutes +**Level:** 🔵 Intermediate **Prerequisites:** - Scholar plugin installed (`brew install scholar` or manual installation) diff --git a/docs/tutorials/teaching/first-exam.md b/docs/tutorials/teaching/first-exam.md index da7297c2..8586741f 100644 --- a/docs/tutorials/teaching/first-exam.md +++ b/docs/tutorials/teaching/first-exam.md @@ -1,6 +1,7 @@ # Tutorial: Your First Exam **Time:** 10 minutes +**Level:** 🟢 Beginner **Prerequisites:** - Scholar plugin installed (`brew install scholar` or manual installation) diff --git a/docs/tutorials/teaching/learning-path.md b/docs/tutorials/teaching/learning-path.md index 1c3546ae..624e4c46 100644 --- a/docs/tutorials/teaching/learning-path.md +++ b/docs/tutorials/teaching/learning-path.md @@ -204,6 +204,7 @@ Integration with external tools and custom workflows. **Goal:** Get up and running quickly with basic teaching materials. **Time:** 1-2 hours +**Level:** 🟢 Beginner **Steps:** 1. First Exam (10 min) diff --git a/docs/tutorials/teaching/lesson-plans-manifest.md b/docs/tutorials/teaching/lesson-plans-manifest.md index b0bc2bc4..45e09203 100644 --- a/docs/tutorials/teaching/lesson-plans-manifest.md +++ b/docs/tutorials/teaching/lesson-plans-manifest.md @@ -1,6 +1,7 @@ # Tutorial: Working with Lesson Plans Manifests **Time:** 5-7 minutes +**Level:** 🔵 Intermediate **Prerequisites:** - Scholar plugin installed (`brew install scholar` or manual installation) diff --git a/docs/tutorials/teaching/migration.md b/docs/tutorials/teaching/migration.md index b2840f22..91d6457b 100644 --- a/docs/tutorials/teaching/migration.md +++ b/docs/tutorials/teaching/migration.md @@ -1,6 +1,7 @@ # Tutorial: Migrating to Manifest Format **Time:** 7 minutes +**Level:** 🔵 Intermediate **Prerequisites:** - Scholar plugin installed (`brew install scholar` or manual installation) diff --git a/docs/tutorials/teaching/preflight-checks.md b/docs/tutorials/teaching/preflight-checks.md index 2d0d4c3c..27d531a6 100644 --- a/docs/tutorials/teaching/preflight-checks.md +++ b/docs/tutorials/teaching/preflight-checks.md @@ -1,6 +1,7 @@ # Tutorial: Preflight Checks **Time:** 10 minutes +**Level:** 🔵 Intermediate **Prerequisites:** - Scholar plugin installed ({{ scholar.version }}+) diff --git a/docs/tutorials/teaching/quiz.md b/docs/tutorials/teaching/quiz.md index 43e20a69..5f537d07 100644 --- a/docs/tutorials/teaching/quiz.md +++ b/docs/tutorials/teaching/quiz.md @@ -1,6 +1,7 @@ # Tutorial: Creating Quizzes **Time:** 5-7 minutes +**Level:** 🟢 Beginner **Prerequisites:** - Scholar plugin installed (`brew install scholar` or manual installation) diff --git a/docs/tutorials/teaching/r-code-validation.md b/docs/tutorials/teaching/r-code-validation.md index 04f14574..fa4d40e2 100644 --- a/docs/tutorials/teaching/r-code-validation.md +++ b/docs/tutorials/teaching/r-code-validation.md @@ -1,6 +1,7 @@ # Tutorial: R Code Validation **Time:** 15 minutes +**Level:** 🔵 Intermediate **Prerequisites:** - Scholar plugin installed ({{ scholar.version }}+) diff --git a/docs/tutorials/teaching/rich-exam-format.md b/docs/tutorials/teaching/rich-exam-format.md new file mode 100644 index 00000000..eb9ef50e --- /dev/null +++ b/docs/tutorials/teaching/rich-exam-format.md @@ -0,0 +1,237 @@ +# Tutorial: Rich Exam LaTeX Format + +> **⏱️ Time to Complete:** 25 minutes +> **Level:** Intermediate +> **Prerequisites:** Scholar v2.18.0+, Quarto, LuaLaTeX, basic familiarity with `/teaching:exam` + +Generate a publication-quality statistics exam using `/teaching:exam --format exam-rich-latex` — a Quarto/LaTeX format that builds on the LaTeX `exam` document class and embeds an evidence-based reporting policy ($s$-values, compatibility intervals), an F-critical bracket table, an $s$-value calibration table, and interaction-aware question language for factorial designs. + +--- + +## What You'll Learn + +By the end of this tutorial, you'll be able to: + +- Generate an exam in the rich LaTeX format with one command +- Supply `exam_metadata.df_pairs` so Scholar auto-builds an F-critical bracket table +- Override reporting policy, computational-aid density, and citation prefix via `teach-config.yml` +- Understand the interaction-aware language pass and when it fires +- Toggle the `\printanswers` solution rendering and produce a separate answer-key PDF +- Compile the generated `.qmd` to PDF with LuaLaTeX + +--- + +## Prerequisites + +**Required:** + +- Scholar v2.18.0 or later installed +- `ANTHROPIC_API_KEY` set +- Quarto CLI (`quarto --version` should print 1.4+) +- LuaLaTeX (ships with TeX Live / MacTeX / MiKTeX) + +**Helpful:** + +- Familiarity with [`/teaching:exam` basics](first-exam.md) +- A `.flow/teach-config.yml` already set up ([configuration tutorial](configuration.md)) + +--- + +## Tutorial Overview + +| Step | Task | Time | +|------|------|------| +| 1 | Generate a rich-LaTeX exam | 3 min | +| 2 | Inspect the generated `.qmd` | 5 min | +| 3 | Supply `df_pairs` for the bracket table | 4 min | +| 4 | Override defaults via `teach-config.yml` | 5 min | +| 5 | Compile to PDF with LuaLaTeX | 3 min | +| 6 | Render the answer key | 3 min | +| 7 | Understand interaction-aware reframing | 2 min | + +--- + +## Step 1: Generate a Rich-LaTeX Exam ⏱️ 3 min + +```bash +/teaching:exam stat-545-midterm \ + --format exam-rich-latex \ + --topics "two-way ANOVA, factorial designs, contrasts" +``` + +Scholar will: + +1. Load the on-disk prompt at `~/.claude/plugins/scholar/src/teaching/ai/prompts/default/exam-rich.md` (project overrides at `.flow/templates/prompts/exam-rich.md` are preferred when present). +2. Render `{{topic}}`, `{{course_level}}`, `{{question_count}}`, `{{aids_level}}`, `{{textbook_citation_prefix}}`, and `{{interaction_aware_language}}` from your `teach-config.yml` + CLI flags. +3. Generate the exam JSON via Claude. +4. Pipe the result through `ExamRichLatexFormatter`, which prepends the YAML preamble, the reporting-policy quote block, the F-critical bracket table, and the $s$-value calibration table. + +**Output:** `stat-545-midterm.qmd` in your current directory. + +--- + +## Step 2: Inspect the Generated `.qmd` ⏱️ 5 min + +Open the file and skim the structure: + +```yaml +--- +title: "STAT 545 Midterm" +format: + pdf: + documentclass: exam + classoption: [12pt, addpoints] + pdf-engine: lualatex + keep-tex: true + include-in-header: + text: | + \usepackage{amsmath, amssymb, booktabs, array, enumitem} + \pointsinrightmargin + \bracketedpoints + \marksnotpoints + % \printanswers % uncomment to render answer-key PDF +geometry: margin=1in +--- + +\begin{quote} +\textbf{Reporting policy.} Report $s$-values (Shannon information, in bits) +rather than $p$-values, and quantify uncertainty with compatibility intervals. +Avoid the phrases ``statistically significant,'' ``reject,'' and ``fail to +reject''; instead describe the strength of evidence and the practical +implication. +\end{quote} +``` + +Below the policy block you'll find the **F-critical bracket table** (only when you supply `df_pairs`), the **$s$-value calibration table**, and the **questions environment** (`\begin{questions} … \end{questions}`). + +--- + +## Step 3: Supply `df_pairs` for the Bracket Table ⏱️ 4 min + +The bracket table is generated only when `exam_metadata.df_pairs` is provided. Without it, you'll see a `% bracket table omitted: no df_pairs in metadata` comment in the `.qmd`. + +Pass df pairs via a JSON fragment or a `--metadata-file`: + +```bash +/teaching:exam stat-545-midterm \ + --format exam-rich-latex \ + --topics "factorial ANOVA, contrasts" \ + --metadata '{"df_pairs":[{"df1":1,"df2":20},{"df1":2,"df2":30},{"df1":3,"df2":15}]}' +``` + +Scholar will compute $F^*_{0.05}$ for each pair using `jstat.centralF.inv`, dedupe, sort by `(df1, df2)`, and emit: + +```latex +\begin{table}[h] +\centering +\begin{tabular}{rrr} +\toprule +$df_1$ & $df_2$ & $F^*_{0.05}$ \\ +\midrule +1 & 20 & 4.35 \\ +2 & 30 & 3.32 \\ +3 & 15 & 3.29 \\ +\bottomrule +\end{tabular} +\caption{F critical values, $\alpha = 0.05$.} +\end{table} +``` + +> ⚠️ **df validation:** Both `df1` and `df2` must be finite integers $\geq 1$. Invalid pairs throw with a clear message — Scholar refuses to emit a table with `NaN` rows. + +--- + +## Step 4: Override Defaults via `teach-config.yml` ⏱️ 5 min + +The on-disk `exam-rich.md` prompt template reads its policy/citation/aid settings from your `teach-config.yml`: + +```yaml +scholar: + teaching_style: + exam: + policy: + report_s_values: true # Shannon-information reporting + avoid_significance_language: true # Ban "statistically significant", "reject H0", etc. + use_compatibility_intervals: true # Frame intervals as compatibility, not accept/reject + aids_level: moderate # minimal | moderate | full + textbook_citation_prefix: "Dean (2017)" + interaction_aware_language: true # "marginal mean differences" vs "main effects" +``` + +**Precedence:** caller flags > `teaching_style.exam.*` > built-in defaults. See the [Course Configuration tutorial — Rich Exam Format Overrides](configuration.md#rich-exam-format-overrides) for the full block. + +--- + +## Step 5: Compile to PDF ⏱️ 3 min + +```bash +quarto render stat-545-midterm.qmd --to pdf +``` + +You should see a multi-page PDF with: + +- Title block (from the YAML `title:`) +- Reporting-policy quote +- F-critical bracket table (if you supplied `df_pairs`) +- $s$-value calibration table +- Question environment with point boxes in the right margin + +If compilation fails on the first run, check: + +| Error | Fix | +|-------|-----| +| `! Package fontspec error` | Ensure `pdf-engine: lualatex` (not `pdflatex`) in the YAML | +| `! Class exam Error` | Install `texlive-latex-extra` (Linux) or `mactex-extra` (macOS) | +| `Quarto not found` | Install Quarto 1.4+ from | + +--- + +## Step 6: Render the Answer Key ⏱️ 3 min + +The preamble ships with `\printanswers` commented out: + +```latex +% \printanswers % uncomment to render answer-key PDF +``` + +To produce a separate answer-key PDF: + +1. Copy the source: `cp stat-545-midterm.qmd stat-545-midterm-key.qmd` +2. Open the copy and remove the `%` before `\printanswers` +3. Re-render: `quarto render stat-545-midterm-key.qmd --to pdf` + +> ⚠️ **Known limitation (v2.18.0):** the parent `LaTeXFormatter.formatShortAnswer()` does not wrap answers in a `\begin{solution} … \end{solution}` block, so short-answer-only exams produce a byte-identical PDF whether `\printanswers` is on or off. Multiple-choice and essay questions render the toggle correctly. Per-question solution visibility is on the roadmap. + +--- + +## Step 7: Understand Interaction-Aware Reframing ⏱️ 2 min + +When a question's design model contains an interaction term, Scholar reframes "main effects" language to "marginal mean differences" in the student-facing prompt **and** in the essay/short-answer rubric — without touching answer-key content or LaTeX math regions. + +**Trigger:** the AI emits `designModel: { hasInteractionTerm: true }` on a question. + +**Before reframing (factorial design with $A \times B$ interaction):** + +> "Test the **main effects** of factors A and B at $\alpha = 0.05$." + +**After reframing:** + +> "Compare the **marginal mean differences** of factors A and B at $\alpha = 0.05$." + +The pass is purely textual when no `aiProvider` is wired in; it preserves `$…$` and `\(…\)` math regions byte-for-byte and is **idempotent** (running it twice produces the same output as running it once). To disable it for an individual course, set: + +```yaml +scholar: + teaching_style: + exam: + interaction_aware_language: false +``` + +--- + +## What's Next + +- **[`/teaching:exam` reference](../../API-REFERENCE.md#generator-api)** — full generator API +- **[Rich-LaTeX architecture diagram](../../ARCHITECTURE-DIAGRAMS.md#10-rich-exam-latex-pipeline-v2180)** — end-to-end flow +- **[Configuration tutorial — Rich Exam Format Overrides](configuration.md#rich-exam-format-overrides)** — full `teaching_style.exam` block +- **[Spec: Rich Exam Format](../../specs/SPEC-2026-05-12-rich-exam-format.md)** — original design doc diff --git a/docs/tutorials/teaching/semester-setup.md b/docs/tutorials/teaching/semester-setup.md index 0ecb2b5c..cf396f34 100644 --- a/docs/tutorials/teaching/semester-setup.md +++ b/docs/tutorials/teaching/semester-setup.md @@ -1,6 +1,7 @@ # Tutorial: Semester Setup **Time:** 15 minutes +**Level:** 🟢 Beginner **Prerequisites:** - Scholar plugin installed (`brew install scholar` or manual installation) diff --git a/docs/tutorials/teaching/syllabus.md b/docs/tutorials/teaching/syllabus.md index 244cb49e..28946fc0 100644 --- a/docs/tutorials/teaching/syllabus.md +++ b/docs/tutorials/teaching/syllabus.md @@ -1,6 +1,7 @@ # Tutorial: Generating a Course Syllabus **Time:** 5-7 minutes +**Level:** 🟢 Beginner **Prerequisites:** - Scholar plugin installed (`brew install scholar` or manual installation) diff --git a/docs/tutorials/teaching/sync-and-diff.md b/docs/tutorials/teaching/sync-and-diff.md index ffa6174e..248e6031 100644 --- a/docs/tutorials/teaching/sync-and-diff.md +++ b/docs/tutorials/teaching/sync-and-diff.md @@ -1,6 +1,7 @@ # Tutorial: Syncing and Comparing Configurations **Time:** 12 minutes +**Level:** 🔵 Intermediate **Prerequisites:** - Scholar plugin installed (`brew install scholar` or manual installation) diff --git a/docs/tutorials/teaching/validate-and-fix.md b/docs/tutorials/teaching/validate-and-fix.md index 40271295..0cdd4449 100644 --- a/docs/tutorials/teaching/validate-and-fix.md +++ b/docs/tutorials/teaching/validate-and-fix.md @@ -1,6 +1,7 @@ # Tutorial: Validating and Fixing Configuration **Time:** 15 minutes +**Level:** 🔵 Intermediate **Prerequisites:** - Scholar plugin installed (`brew install scholar` or manual installation) diff --git a/docs/tutorials/teaching/validate-pipeline.md b/docs/tutorials/teaching/validate-pipeline.md index cb5a1f48..99c9cf6a 100644 --- a/docs/tutorials/teaching/validate-pipeline.md +++ b/docs/tutorials/teaching/validate-pipeline.md @@ -1,6 +1,7 @@ # Tutorial: R Validation Pipeline **Time:** 15 minutes +**Level:** 🔵 Intermediate **Prerequisites:** - Scholar plugin installed ({{ scholar.version }}+) diff --git a/docs/tutorials/teaching/weekly-content.md b/docs/tutorials/teaching/weekly-content.md index 7f6f833e..f44be244 100644 --- a/docs/tutorials/teaching/weekly-content.md +++ b/docs/tutorials/teaching/weekly-content.md @@ -1,6 +1,7 @@ # Tutorial: Weekly Content Creation **Time:** 20 minutes +**Level:** 🔵 Intermediate **Prerequisites:** - Scholar plugin installed and configured diff --git a/docs/workflows/research/latex-integration.md b/docs/workflows/research/latex-integration.md index 26ea4694..c274b9a2 100644 --- a/docs/workflows/research/latex-integration.md +++ b/docs/workflows/research/latex-integration.md @@ -1031,7 +1031,7 @@ Week 4: Advanced automation (Makefiles, CI/CD) --- -**Document Version:** v2.17.0 +**Document Version:** v2.18.0 **Last Updated:** 2026-02-01 **Word Count:** ~7,500 **Example Scripts:** 15+ diff --git a/docs/workflows/research/literature-review.md b/docs/workflows/research/literature-review.md index 4a509e17..d180b0b3 100644 --- a/docs/workflows/research/literature-review.md +++ b/docs/workflows/research/literature-review.md @@ -1249,7 +1249,7 @@ After completing literature review: --- -**Document Version:** v2.17.0 +**Document Version:** v2.18.0 **Last Updated:** 2026-02-01 **Word Count:** ~9,500 **Time to Complete:** 2-4 weeks for systematic review diff --git a/docs/workflows/research/manuscript-writing.md b/docs/workflows/research/manuscript-writing.md index c3f461e0..84f81e44 100644 --- a/docs/workflows/research/manuscript-writing.md +++ b/docs/workflows/research/manuscript-writing.md @@ -1813,7 +1813,7 @@ pandoc manuscript.md --bibliography references.bib -o manuscript.pdf --- -**Document Version:** 2.17.0 +**Document Version:** 2.18.0 **Last Updated:** 2026-02-04 **Author:** Data-Wise Team **Status:** Complete diff --git a/mkdocs.yml b/mkdocs.yml index c1271752..75abe685 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -39,19 +39,22 @@ plugins: - search - macros +extra_css: + - stylesheets/extra.css + extra: scholar: - version: "2.17.0" - prev_version: "2.16.0" - release_date: "2026-03-04" - test_count: "3,340" - jest_count: "3,203" + version: "2.18.0" + prev_version: "2.17.0" + release_date: "2026-05-12" + test_count: "3,400" + jest_count: "3,263" node_test_count: "137" command_count: 33 teaching_commands: 18 research_commands: 14 hub_commands: 1 - suite_count: 137 + suite_count: 143 markdown_extensions: - pymdownx.highlight: @@ -86,6 +89,7 @@ nav: - 📖 User Guide: USER-GUIDE.md - 📚 Reference Card: REFCARD.md - 🔍 Scholar Hub Tutorial: tutorials/getting-started-with-hub.md + - 📚 All Tutorials (Index): tutorials/index.md - Teaching: - Overview: teaching/index.md @@ -97,6 +101,7 @@ nav: - Workflows: TEACHING-WORKFLOWS.md - Tutorials: - First Exam: tutorials/teaching/first-exam.md + - Rich Exam LaTeX Format (v2.18.0): tutorials/teaching/rich-exam-format.md - Semester Setup: tutorials/teaching/semester-setup.md - Weekly Content Creation: tutorials/teaching/weekly-content.md - Assignments, Solutions & Rubrics: tutorials/teaching/assignments-solutions-rubrics.md @@ -116,6 +121,7 @@ nav: - Email Integration: tutorials/teaching/email-integration.md - Validation & Auto-Fix (Phase 2): tutorials/teaching/validate-and-fix.md - Sync & Diff Workflows (Phase 2): tutorials/teaching/sync-and-diff.md + - Canvas LMS Export (QTI): tutorials/teaching/canvas-lms-export.md - Migration v1 → v2 (Phase 2): tutorials/teaching/migration.md - Learning Path: tutorials/teaching/learning-path.md - Advanced Tutorials: @@ -165,9 +171,17 @@ nav: - Gap Analysis: workflows/research/gap-analysis.md - Tutorials: - First Literature Search: tutorials/research/first-literature-search.md - - Reviewer Response: tutorials/research/reviewer-response.md + - BibTeX Management: tutorials/research/bibtex-management.md + - DOI Lookup: tutorials/research/doi-lookup.md + - Statistical Method Discovery: tutorials/research/statistical-method-discovery.md + - Finding Research Gaps: tutorials/research/finding-research-gaps.md + - Hypothesis Generation: tutorials/research/hypothesis-generation.md - Writing Methods Sections: tutorials/research/methods-section.md + - Writing Results Sections: tutorials/research/results-section.md + - Reviewing Mathematical Proofs: tutorials/research/proof-review.md + - Reviewer Response: tutorials/research/reviewer-response.md - Simulation Design: tutorials/research/simulation-design.md + - Analyzing Simulation Results: tutorials/research/simulation-analysis.md - Simulation Study Guide: tutorials/research/simulation-study.md - Analysis Planning: tutorials/research/analysis-planning.md - Manuscript Writing: tutorials/research/manuscript-writing.md @@ -224,6 +238,7 @@ nav: - Phase 0 Foundation: architecture/PHASE-0-FOUNDATION.md - Release Notes: + - What's New in v2.18.0: WHATS-NEW-v2.18.0.md - What's New in v2.17.0: WHATS-NEW-v2.17.0.md - What's New in v2.16.0: WHATS-NEW-v2.16.0.md - Canvas QTI Enhancements: WHATS-NEW-canvas-qti.md diff --git a/package-lock.json b/package-lock.json index 79bf7068..b5519224 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,19 +1,20 @@ { "name": "@data-wise/scholar", - "version": "2.15.0", + "version": "2.18.0", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "@data-wise/scholar", - "version": "2.15.0", + "version": "2.18.0", "license": "MIT", "dependencies": { "ajv": "^8.17.1", "ajv-formats": "^3.0.1", "ajv-keywords": "^5.1.0", "glob": "^13.0.2", - "js-yaml": "^4.1.1" + "js-yaml": "^4.1.1", + "jstat": "^1.9.6" }, "devDependencies": { "@eslint/js": "^10.0.1", @@ -4220,6 +4221,11 @@ "node": ">=6" } }, + "node_modules/jstat": { + "version": "1.9.6", + "resolved": "https://registry.npmjs.org/jstat/-/jstat-1.9.6.tgz", + "integrity": "sha512-rPBkJbK2TnA8pzs93QcDDPlKcrtZWuuCo2dVR0TFLOJSxhqfWOVCSp8aV3/oSbn+4uY4yw1URtLpHQedtmXfug==" + }, "node_modules/keyv": { "version": "4.5.4", "resolved": "https://registry.npmjs.org/keyv/-/keyv-4.5.4.tgz", diff --git a/package.json b/package.json index d281b9d6..42f682e8 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "@data-wise/scholar", - "version": "2.17.0", + "version": "2.18.0", "description": "Academic workflows for research and teaching - 33 commands for literature management, manuscript writing, simulation studies, course material generation, and config management", "main": "src/plugin-api/commands/index.js", "type": "module", @@ -84,7 +84,8 @@ "ajv-formats": "^3.0.1", "ajv-keywords": "^5.1.0", "glob": "^13.0.2", - "js-yaml": "^4.1.1" + "js-yaml": "^4.1.1", + "jstat": "^1.9.6" }, "devDependencies": { "@eslint/js": "^10.0.1", diff --git a/scripts/m4-validate-rich-format.js b/scripts/m4-validate-rich-format.js new file mode 100644 index 00000000..1b305c84 --- /dev/null +++ b/scripts/m4-validate-rich-format.js @@ -0,0 +1,226 @@ +#!/usr/bin/env node +/** + * Milestone 4 validation driver for the rich-LaTeX exam format. + * + * Loads the STAT 445/545 Spring 2026 fixture, runs it through + * `ExamRichLatexFormatter`, writes the output to `out/m4-validation/`, and + * validates the result against a 12-item section-presence checklist. + * + * Exits 0 if all 12 checks pass; exits 1 otherwise. + */ + +import fs from 'node:fs'; +import path from 'node:path'; +import { fileURLToPath } from 'node:url'; + +import { ExamRichLatexFormatter } from '../src/teaching/formatters/exam-rich-latex.js'; + +const __filename = fileURLToPath(import.meta.url); +const __dirname = path.dirname(__filename); +const REPO_ROOT = path.resolve(__dirname, '..'); + +const FIXTURE_PATH = path.join( + REPO_ROOT, + 'tests/teaching/formatters/fixtures/stat-545-final-spring-2026.fixture.json', +); +const OUT_DIR = path.join(REPO_ROOT, 'out/m4-validation'); +const OUT_QMD = path.join(OUT_DIR, 'stat-545-final-spring-2026.qmd'); + +function ensureDir(dir) { + fs.mkdirSync(dir, { recursive: true }); +} + +function loadFixture(p) { + const raw = fs.readFileSync(p, 'utf8'); + return JSON.parse(raw); +} + +function record(results, id, label, ok, reason) { + results.push({ id, label, ok: !!ok, reason: reason || '' }); +} + +function check(output, fixture) { + const results = []; + + // [a] starts with --- (YAML frontmatter open) + record(results, 'a', 'YAML frontmatter opens with ---', + output.startsWith('---'), + output.startsWith('---') ? '' : `first chars = ${JSON.stringify(output.slice(0, 6))}`); + + // [b] documentclass: exam (may be top-level or nested under format.pdf) + record(results, 'b', 'YAML contains documentclass: exam', + /^\s*documentclass:\s*exam\s*$/m.test(output)); + + // [c] pdf-engine: lualatex (may be top-level or nested under format.pdf) + record(results, 'c', 'YAML sets pdf-engine: lualatex', + /^\s*pdf-engine:\s*lualatex\s*$/m.test(output)); + + // [d] amsmath package preamble + record(results, 'd', 'Contains \\usepackage{amsmath ...', + output.includes('\\usepackage{amsmath')); + + // [e] commented \printanswers toggle + record(results, 'e', 'Contains commented \\printanswers toggle', + /%\s*\\printanswers/.test(output)); + + // [f] Reporting policy block + record(results, 'f', 'Contains Reporting policy. block', + output.includes('Reporting policy.')); + + // [g] Bracket table caption + a row for each df pair + const dfPairs = fixture.exam_metadata?.df_pairs || []; + const hasBracketCaption = output.includes('F critical values'); + let missingRow = null; + for (const { df1, df2 } of dfPairs) { + // Bracket table rows look like: `${df1} & ${df2} & \\` + const re = new RegExp(`(?:^|\\n)\\s*${df1}\\s*&\\s*${df2}\\s*&\\s*[0-9]+\\.[0-9]+\\s*\\\\\\\\`); + if (!re.test(output)) { + missingRow = `(${df1}, ${df2})`; + break; + } + } + record(results, 'g', + `Bracket table caption + row for every df pair (${dfPairs.length} pairs)`, + hasBracketCaption && missingRow === null, + !hasBracketCaption + ? 'caption "F critical values" not found' + : missingRow + ? `missing row for df pair ${missingRow}` + : ''); + + // [h] s-value calibration table substring + record(results, 'h', 'Contains s-value calibration table', + output.includes('$s$-value calibration') || output.includes('s-value calibration')); + + // [i] questions environment + const hasBeginQ = output.includes('\\begin{questions}'); + const hasEndQ = output.includes('\\end{questions}'); + record(results, 'i', 'Contains \\begin{questions} and \\end{questions}', + hasBeginQ && hasEndQ, + !hasBeginQ ? 'no \\begin{questions}' : !hasEndQ ? 'no \\end{questions}' : ''); + + // [j] interaction-aware language pass for the factorial question + const factorialQ = (fixture.questions || []).find( + (q) => q.designModel?.hasInteractionTerm === true, + ); + let interactionOk = false; + let interactionReason = ''; + if (!factorialQ) { + interactionReason = 'no fixture question has designModel.hasInteractionTerm === true'; + } else { + // Find the question stem in the output. + // The reframed `text` field is emitted verbatim as the stem after \question[points]. + // Pull the chunk between `\question[]` for Q2 and the next \question/`\end{questions}`. + const startTag = `\\question[${factorialQ.points}]`; + const startIdx = output.indexOf(startTag); + let qBlock = ''; + if (startIdx >= 0) { + const after = output.slice(startIdx + startTag.length); + // Stop at next \question[ or \end{questions} + const nextQ = after.indexOf('\\question['); + const endQ = after.indexOf('\\end{questions}'); + const stops = [nextQ, endQ].filter((n) => n >= 0); + const stop = stops.length ? Math.min(...stops) : after.length; + qBlock = after.slice(0, stop); + } + const containsMainEffects = /\bmain\s+effects?\b/i.test(qBlock); + const containsMarginalMeans = + /\bmarginal\s+mean\s+differences?\b/i.test(qBlock) || + /\bmarginal\s+means\b/i.test(qBlock); + interactionOk = !containsMainEffects && containsMarginalMeans; + if (!interactionOk) { + interactionReason = + (containsMainEffects ? 'still contains "main effects"; ' : '') + + (!containsMarginalMeans ? 'missing "marginal mean differences"' : ''); + } + } + record(results, 'j', + 'Factorial question reframed (no "main effects"; has "marginal mean differences")', + interactionOk, + interactionReason); + + // [k] one \question macro per fixture question + const questionMacroCount = (output.match(/\\question\[/g) || []).length; + const expectedCount = (fixture.questions || []).length; + record(results, 'k', `Contains ${expectedCount} \\question macros`, + questionMacroCount === expectedCount, + questionMacroCount === expectedCount ? '' : `found ${questionMacroCount}`); + + // [l] valid UTF-8 ending with newline + let utf8Ok = true; + try { + Buffer.from(output, 'utf8').toString('utf8'); + } catch { + utf8Ok = false; + } + const endsWithNewline = output.endsWith('\n'); + record(results, 'l', 'Output is valid UTF-8 and ends with a newline', + utf8Ok && endsWithNewline, + !utf8Ok ? 'invalid UTF-8' : !endsWithNewline ? 'no trailing newline' : ''); + + return results; +} + +function bracketTableRowCount(output) { + // Find the F-critical bracket-table block: + // \begin{table}[h] + // \centering + // \begin{tabular}{rrr} + // ... + // \caption{F critical values, $\alpha = 0.05$.} + // \end{table} + const tableStart = output.indexOf('\\begin{tabular}{rrr}'); + const capIdx = output.indexOf('F critical values'); + if (tableStart < 0 || capIdx < 0 || capIdx < tableStart) return 0; + const tableEnd = output.indexOf('\\end{tabular}', tableStart); + if (tableEnd < 0) return 0; + const block = output.slice(tableStart, tableEnd); + // Count data rows: lines after \midrule, before \bottomrule, that end with `\\` + const midIdx = block.indexOf('\\midrule'); + const botIdx = block.indexOf('\\bottomrule'); + if (midIdx < 0 || botIdx < 0) return 0; + const dataRegion = block.slice(midIdx + '\\midrule'.length, botIdx); + const rows = dataRegion.split('\n').filter((line) => /\\\\\s*$/.test(line.trim())); + return rows.length; +} + +function main() { + console.log('=== M4 Rich-LaTeX Format Validation ===\n'); + + ensureDir(OUT_DIR); + const fixture = loadFixture(FIXTURE_PATH); + + const formatter = new ExamRichLatexFormatter(); + let qmd = formatter.format(fixture); + if (!qmd.endsWith('\n')) qmd += '\n'; + + fs.writeFileSync(OUT_QMD, qmd, 'utf8'); + console.log(`Wrote: ${OUT_QMD}`); + console.log(` size: ${Buffer.byteLength(qmd, 'utf8')} bytes\n`); + + const results = check(qmd, fixture); + let passed = 0; + for (const r of results) { + const tag = r.ok ? 'PASS' : 'FAIL'; + const reason = r.reason ? ` — ${r.reason}` : ''; + console.log(`[${r.id}] ${tag}: ${r.label}${reason}`); + if (r.ok) passed++; + } + console.log(''); + console.log(`M4 SECTION-PRESENCE: ${passed}/${results.length} PASS`); + + const rowCount = bracketTableRowCount(qmd); + const expectedRows = (fixture.exam_metadata?.df_pairs || []).length; + const rowOk = rowCount === expectedRows; + console.log( + `Bracket-table rows: ${rowCount}/${expectedRows} ${rowOk ? 'PASS' : 'FAIL'}`, + ); + + if (passed === results.length && rowOk) { + process.exit(0); + } else { + process.exit(1); + } +} + +main(); diff --git a/src/plugin-api/commands/hub.md b/src/plugin-api/commands/hub.md index 1e2ff9b1..07e4a478 100644 --- a/src/plugin-api/commands/hub.md +++ b/src/plugin-api/commands/hub.md @@ -66,12 +66,13 @@ then render the following box-drawing layout: | Literature .. arxiv, bib-add, bib-search, doi | | Simulation .. design, analysis | | | -| TEACHING (15 commands) | +| TEACHING (18 commands) | | Content ..... exam, quiz, slides, assignment, | -| solution, syllabus, lecture | +| solution, syllabus, lecture, | +| validate-r, canvas | | Assessment .. rubric, feedback | -| Config ...... validate, diff, sync, migrate, demo, | -| config | +| Config ...... validate, diff, sync, migrate, | +| demo, config, preflight | | | +-----------------------------------------------------------+ | /scholar:hub research Drill into research commands | diff --git a/src/plugin-api/commands/teaching/exam.md b/src/plugin-api/commands/teaching/exam.md index dd3d0e1c..9514e114 100644 --- a/src/plugin-api/commands/teaching/exam.md +++ b/src/plugin-api/commands/teaching/exam.md @@ -17,8 +17,11 @@ Generate a comprehensive exam with multiple question types, automatic answer key /teaching:exam midterm /teaching:exam final --questions 15 --difficulty hard /teaching:exam practice --topics "linear regression,hypothesis testing" +/teaching:exam final --format exam-rich-latex ``` +The `exam-rich-latex` format emits a Quarto `.qmd` using the LaTeX `exam` document class with a reporting-policy preamble, F-critical bracket table (from `exam_metadata.df_pairs`), and an $s$-value calibration table. Compile with `quarto render exam.qmd --to pdf` (requires `lualatex`). + **Exam Types:** - `midterm` - Mid-term examination (default) - `final` - Final examination @@ -32,7 +35,7 @@ Generate a comprehensive exam with multiple question types, automatic answer key - `--topics "topic1,topic2"` - Specific topics to cover - `--no-formulas` - Exclude formula sheet - `--no-solutions` - Exclude detailed solutions -- `--format FMT` - Output format: json (default), examark, canvas, md, qmd, tex +- `--format FMT` - Output format: json (default), examark, canvas, md, qmd, tex, exam-rich-latex - `--strict` - Use strict validation - `--variations N` - Generate N variations of the exam - `--dry-run` - Preview what would be generated without API calls diff --git a/src/teaching/ai/prompt-config-bridge.js b/src/teaching/ai/prompt-config-bridge.js index 8c0c1399..6a4c5321 100644 --- a/src/teaching/ai/prompt-config-bridge.js +++ b/src/teaching/ai/prompt-config-bridge.js @@ -22,7 +22,7 @@ import { loadTeachConfig } from '../config/loader.js'; * Current Scholar version for compatibility checking * Should match package.json version */ -const SCHOLAR_VERSION = '2.17.0'; +const SCHOLAR_VERSION = '2.18.0'; /** * Error thrown when prompt configuration fails @@ -186,23 +186,18 @@ export class PromptConfigBridge { } // Phase 5: Version compatibility check - // Use min_scholar_version (Scholar runtime requirement), not prompt_version - // (which is the prompt document's own revision number, unrelated to Scholar versioning) + // Use min_scholar_version (Scholar runtime requirement) via the + // dedicated checkMinScholarVersion helper. checkVersion() has different + // semantics — it's for prompt_version (the prompt's own revision) and + // emits a "stale" warning whenever Scholar is newer than the prompt, + // which is incorrect for a minimum-requirement field. const scholarRequirement = prompt.metadata?.min_scholar_version; if (scholarRequirement) { - const versionCheck = PromptLoader.checkVersion( + const versionCheck = PromptLoader.checkMinScholarVersion( scholarRequirement, SCHOLAR_VERSION ); - if (versionCheck.severity === 'warning') { - warnings.push(versionCheck.message); - if (versionCheck.suggestion) { - warnings.push(versionCheck.suggestion); - } - log(`Version warning: ${versionCheck.message}`); - } - if (versionCheck.severity === 'error') { throw new PromptConfigError( versionCheck.message, diff --git a/src/teaching/ai/prompt-loader.js b/src/teaching/ai/prompt-loader.js index cb0e6225..2a2a772b 100644 --- a/src/teaching/ai/prompt-loader.js +++ b/src/teaching/ai/prompt-loader.js @@ -65,6 +65,7 @@ const VALID_PROMPT_TYPES = [ 'lecture-outline', 'section-content', 'exam', + 'exam-rich', 'quiz', 'slides', 'revealjs-slides', @@ -327,6 +328,41 @@ export class PromptLoader { }; } + /** + * Check that the running Scholar satisfies a prompt's `min_scholar_version` + * declaration. This is distinct from {@link checkVersion}, which compares + * the prompt's own revision number (`prompt_version`) against Scholar. + * + * Semantics: `min_scholar_version` declares the minimum runtime required. + * A prompt with `min_scholar_version: "2.16.0"` works on Scholar 2.16+; + * it is NOT "stale" on Scholar 2.18 — it is fully compatible. + * + * @param {string} minScholarVersion - e.g. "2.16.0" from prompt frontmatter + * @param {string} scholarVersion - current Scholar runtime, e.g. "2.18.0" + * @returns {{ compatible: boolean, severity: ('none'|'error'), message: string }} + */ + static checkMinScholarVersion(minScholarVersion, scholarVersion) { + const [minMajor, minMinor = 0, minPatch = 0] = minScholarVersion.split('.').map(Number); + const [curMajor, curMinor = 0, curPatch = 0] = scholarVersion.split('.').map(Number); + + const cur = [curMajor, curMinor, curPatch]; + const min = [minMajor, minMinor, minPatch]; + + for (let i = 0; i < 3; i++) { + if (cur[i] > min[i]) { + return { compatible: true, severity: 'none', message: 'Scholar runtime satisfies prompt requirement' }; + } + if (cur[i] < min[i]) { + return { + compatible: false, + severity: 'error', + message: `Prompt requires Scholar >= v${minScholarVersion}, but you have v${scholarVersion}` + }; + } + } + return { compatible: true, severity: 'none', message: 'Scholar runtime satisfies prompt requirement' }; + } + /** * Check version compatibility between prompt and Scholar * diff --git a/src/teaching/ai/prompts/default/exam-rich.md b/src/teaching/ai/prompts/default/exam-rich.md new file mode 100644 index 00000000..df7ff21c --- /dev/null +++ b/src/teaching/ai/prompts/default/exam-rich.md @@ -0,0 +1,188 @@ +--- +prompt_version: "1.0" +prompt_type: exam-rich +prompt_description: "Rich exam format with evidence-based reporting, Dean (2017) citations, and interaction-aware language" +min_scholar_version: "2.18.0" +required_variables: + - topic + - course_level + - question_count +optional_variables: + - difficulty + - duration_minutes + - aids_level + - textbook_citation_prefix + - interaction_aware_language + - df_pairs +--- + +# Rich Exam Generator Prompt + +## Identity and Goal + +You are writing a {{course_level}} statistics exam on {{topic}}. The exam +targets {{question_count}} questions over {{duration_minutes}} minutes at +{{difficulty}} difficulty. Every question must be unambiguous, mathematically +rigorous, and pedagogically aligned with the evidence-based reporting policy +below. + +## Evidence-Based Reporting Policy (MANDATORY) + +Students taking this exam are being taught to report inference using +$s$-values (Shannon information, measured in bits) rather than $p$-values. +This policy MUST be reflected in every question stem, every rubric, and every +worked solution in the answer key. + +1. **$s$-values, not $p$-values.** When asking students to report evidence + strength, ask for the $s$-value: $s = -\log_2(p)$. A two-sided $p = 0.05$ + corresponds to roughly $s \approx 4.3$ bits, $p = 0.01$ to $s \approx 6.6$ + bits, and $p = 0.005$ to $s \approx 7.6$ bits. Rubrics should reward + correct $s$-value computation and interpretation. + +2. **Avoid significance language.** Do NOT use these phrases anywhere in the + exam (question stems, rubrics, solutions, distractors, or instructions): + - "statistically significant" / "not statistically significant" + - "reject the null hypothesis" / "reject H0" + - "fail to reject H0" + - "no evidence against" + - "the result is significant at the 0.05 level" + + Instead, use evidence-strength language such as: "the data weakly favor X + over Y," "the observed effect is consistent with values from A to B," "the + $s$-value of $s$ bits indicates [moderate/strong/very strong] information + against the null model." + +3. **Compatibility intervals, not confidence intervals as accept/reject + regions.** When asking about intervals, frame them as "95% compatibility + intervals" — the range of parameter values that the data are statistically + compatible with at the chosen compatibility level. Do NOT frame intervals + as binary accept/reject regions. + +4. **Borderline language.** When a result is numerically borderline (e.g., + $s \approx 3$–$5$ bits), explicitly prefer phrases like "the data weakly + favor," "the observed effect is consistent with," or "the evidence is + modest" rather than any threshold-based dichotomy. + +### Example question stem (demonstrates the policy) + +> A two-sample $t$-test on yield data gives $p = 0.018$. (a) Compute the +> $s$-value. (b) State, in one sentence, what this $s$-value tells you about +> the evidence against the null model of equal means. (c) The 95% +> compatibility interval for $\mu_A - \mu_B$ is $[0.2, 3.8]$ kg. Interpret +> this interval without using accept/reject language. + +## Citation Pattern + +{{#if textbook_citation_prefix}} +When `{{textbook_citation_prefix}}` is supplied, every question that touches +material covered in the textbook MUST cite the relevant chapter or section +using that prefix. Examples: + +- "Per {{textbook_citation_prefix}} Ch. 7, the unrestricted EMS approach + gives ..." +- "Following {{textbook_citation_prefix}} §4.3, derive the expected mean + square for ..." + +**ANOVA derivations:** Use the **Dean (2017) unrestricted EMS approach** +exclusively. Do NOT use the restricted/pooled approach. When asking about +expected mean squares, the answer key must reflect the unrestricted +derivation. +{{/if}} + +## Interaction-Aware Language + +When a question concerns a factorial design and either (a) the question's +`designModel.hasInteractionTerm` is true, or (b) +`{{interaction_aware_language}}` is true and the design is factorial, the +question prompt MUST ask students to compute and interpret **"marginal mean +differences"** rather than "main effects." + +- Use "main effects" language ONLY for additive (no-interaction) models. +- For factorial models with interactions present, use: + "marginal mean of factor A at level $a_1$ averaged over levels of B," + "marginal mean difference between $a_1$ and $a_2$ averaged over B," + "the marginal mean comparison is interpretable only when the + interaction is small relative to the marginal differences." + +## Computational Aids Density + +Respect the `{{aids_level}}` setting for each question's scaffolding: + +- **`minimal`**: question stem + a blank line for the final answer. No + intermediate scaffolding, no formula reminder, no skeleton table. +- **`moderate`** (default): question stem + either an ANOVA-table skeleton + with empty cells for the student to fill in, OR a brief reminder of the + relevant formula (one line of LaTeX). Pick whichever fits the question. +- **`full`**: question stem + ANOVA-table skeleton (when applicable) + + step-by-step working space labeled (a), (b), (c) ... + an explicit formula + reminder. Use this for problems that benefit from guided computation. + +## F-critical Bracket Table + +{{#if df_pairs}} +The exam header will include an F-critical bracket table generated from +`df_pairs = {{df_pairs}}`. When writing questions that require comparing an +observed $F$-statistic against critical values, reference the bracket table +explicitly: "Compare your $F$ to the bracket-table critical value at +$(\text{df}_1, \text{df}_2) = (a, b)$." +{{/if}} + +## Output Format Requirements + +Return STRICT JSON matching the exam template schema. Every question must +have: + +- `id` (e.g., `"Q1"`) +- `points` (positive integer) +- `type` (one of `"multiple-choice"`, `"short-answer"`, `"essay"`, + `"true-false"`, `"numerical"`) +- `text` (the question stem, LaTeX allowed) +- `difficulty` (`"easy"` | `"medium"` | `"hard"`) +- `topic` (subtopic label) +- For multiple-choice: `options` array (4 plausible distractors) +- Optional: `parts` array for multi-part questions + +The top-level `answer_key` object MUST have one entry per question id, with +worked solutions that demonstrate the evidence-based reporting policy. + +## Example Output Shape + +```json +{ + "title": "Midterm Exam — STAT 503 (Dean 2017 Ch. 6–8)", + "exam_type": "midterm", + "duration_minutes": {{duration_minutes}}, + "instructions": "Report all inference using $s$-values. Do not use significance language.", + "questions": [ + { + "id": "Q1", + "type": "short-answer", + "text": "Per Dean (2017) Ch. 7, derive the expected mean square for factor A in a two-factor mixed model using the unrestricted approach. State the $s$-value interpretation of the resulting F-ratio.", + "points": 15, + "difficulty": "medium", + "topic": "Expected Mean Squares" + } + ], + "answer_key": { + "Q1": { + "answer": "EMS(A) = sigma^2 + b*sigma^2_AB + n*b*phi_A (Dean 2017 unrestricted). The F-ratio uses MS(A)/MS(AB). An observed F yielding s = 5.2 bits indicates moderate evidence against the null model of zero A-level marginal mean differences.", + "rubric_points": { + "EMS derivation (8 pts)": "Correct unrestricted EMS with all components", + "s-value interpretation (4 pts)": "Computes s = -log2(p) and uses evidence-strength language", + "No significance language (3 pts)": "Does not use 'significant', 'reject', or accept/reject framing" + } + } + } +} +``` + +## Final Reminders + +- LaTeX must use double backslashes in JSON strings: `\\frac`, `\\alpha`, + `\\beta`, etc. +- Every question must respect the reporting policy in both stem and rubric. +- Use the {{textbook_citation_prefix}} citation pattern whenever applicable. +- Use Dean (2017) unrestricted EMS for ANOVA derivations. +- Use marginal-mean-difference language for factorial designs with + interactions; reserve main-effects language for purely additive designs. +- Total points should sum to a reasonable exam total (typically 100–150). diff --git a/src/teaching/formatters/exam-rich-latex.js b/src/teaching/formatters/exam-rich-latex.js new file mode 100644 index 00000000..5c243de5 --- /dev/null +++ b/src/teaching/formatters/exam-rich-latex.js @@ -0,0 +1,194 @@ +/** + * Exam Rich LaTeX Formatter + * + * Quarto-style .qmd document using the LaTeX `exam` document class, + * extended with a reporting-policy block, an F-critical bracket table, + * and an $s$-value calibration table. Inherits question rendering + * from `LaTeXFormatter`. + */ + +import { LaTeXFormatter } from './latex.js'; +import { generateBracketTable } from './helpers/bracket-table-generator.js'; +import { generateSCalibrationTable } from './helpers/s-calibration-table.js'; +import { applyInteractionAwarePass } from './helpers/interaction-aware-pass.js'; + +export class ExamRichLatexFormatter extends LaTeXFormatter { + constructor(options = {}) { + super(options); + this.aiProvider = options.aiProvider; + } + + /** + * Format a single question, first reframing main-effects language for + * questions whose design model contains an interaction term. The pass + * must precede LaTeX assembly so math regions in the parent's output + * are not exposed to substitution. + * @override + * @private + */ + formatQuestion(question, answerKey, includeAnswers, output) { + const reframed = this.reframeQuestionLanguage(question); + super.formatQuestion(reframed, answerKey, includeAnswers, output); + } + + /** + * Apply the interaction-aware language pass to a question's + * student-facing prompt strings AND its rubric (essay/short-answer). + * Returns a shallow copy; never mutates the input. + * + * Reframes: + * - question.text + * - question.parts[].prompt + * - question.rubric (string, used by LaTeXFormatter.formatEssay) + * + * Does NOT reframe: + * - answer_key content (passed separately to formatQuestion) + * - exam-level instructions / formula_sheet (rendered once, outside + * formatQuestion — extend at the call site if needed) + * - solution_hint or other private metadata fields + * + * @private + */ + reframeQuestionLanguage(question) { + if (!question || question.designModel?.hasInteractionTerm !== true) { + return question; + } + const passOpts = this.aiProvider ? { aiProvider: this.aiProvider } : {}; + const next = { ...question }; + if (typeof next.text === 'string') { + next.text = applyInteractionAwarePass(next.text, question, passOpts); + } + if (typeof next.rubric === 'string') { + next.rubric = applyInteractionAwarePass(next.rubric, question, passOpts); + } + if (Array.isArray(next.parts)) { + next.parts = next.parts.map((part) => { + if (part && typeof part.prompt === 'string') { + return { ...part, prompt: applyInteractionAwarePass(part.prompt, question, passOpts) }; + } + return part; + }); + } + return next; + } + + /** + * Format exam to a Quarto .qmd document with rich-LaTeX scaffolding. + * @override + * @param {Object} content - Exam content (JSON) + * @param {Object} [options] - Format options + * @returns {string} Quarto-flavoured LaTeX document + */ + format(content, options = {}) { + const output = []; + + output.push('---'); + if (content.title) { + // JSON.stringify produces a valid YAML double-quoted scalar (handles + // quotes, backslashes, control chars, leading reserved indicators). + output.push(`title: ${JSON.stringify(String(content.title))}`); + } + output.push('format:'); + output.push(' pdf:'); + output.push(' documentclass: exam'); + output.push(' classoption: [12pt, addpoints]'); + output.push(' pdf-engine: lualatex'); + output.push(' keep-tex: true'); + output.push(' include-in-header:'); + output.push(' text: |'); + output.push(' \\usepackage{amsmath, amssymb, booktabs, array, enumitem}'); + output.push(' \\pointsinrightmargin'); + output.push(' \\bracketedpoints'); + output.push(' \\marksnotpoints'); + output.push(' % \\printanswers % uncomment to render answer-key PDF'); + output.push('geometry: margin=1in'); + output.push('---'); + output.push(''); + + this.addExamConfig(output); + + this.addReportingPolicy(output); + + this.addBracketTable(content, output); + + this.addSCalibrationTable(output); + + if (content.instructions) { + this.addInstructions(content.instructions, output); + } + + this.addExamHeader(content, output); + + output.push('\\begin{questions}'); + output.push(''); + + (content.questions || []).forEach((q) => { + const answers = options.answers !== false; + this.formatQuestion(q, content.answer_key || {}, answers, output); + }); + + output.push('\\end{questions}'); + output.push(''); + + if (content.formula_sheet) { + this.addFormulaSheet(content.formula_sheet, output); + } + + return output.join('\n'); + } + + /** + * Add the reporting-policy quote block. + * @private + */ + addReportingPolicy(output) { + output.push('\\begin{quote}'); + output.push('\\textbf{Reporting policy.} Report $s$-values (Shannon information, in bits) rather than $p$-values, and quantify uncertainty with compatibility intervals. Avoid the phrases ``statistically significant,\'\' ``reject,\'\' and ``fail to reject\'\'; instead describe the strength of evidence and the practical implication.'); + output.push('\\end{quote}'); + output.push(''); + } + + /** + * Add the F-critical bracket table from `exam_metadata.df_pairs`. + * @private + */ + addBracketTable(content, output) { + const dfPairs = content.exam_metadata?.df_pairs; + if (!Array.isArray(dfPairs) || dfPairs.length === 0) { + output.push('% bracket table omitted: no df_pairs in metadata'); + output.push(''); + return; + } + + const tabular = generateBracketTable(dfPairs); + output.push('\\begin{table}[h]'); + output.push('\\centering'); + output.push(tabular); + output.push('\\caption{F critical values, $\\alpha = 0.05$.}'); + output.push('\\end{table}'); + output.push(''); + } + + /** + * Add the $s$-value calibration table. + * @private + */ + addSCalibrationTable(output) { + const tabular = generateSCalibrationTable(); + output.push('\\begin{table}[h]'); + output.push('\\centering'); + output.push(tabular); + output.push('\\caption{$s$-value calibration (bits of information).}'); + output.push('\\end{table}'); + output.push(''); + } + + /** + * Get file extension for the rich-LaTeX format. + * @override + * @returns {string} '.qmd' + */ + getFileExtension() { + return '.qmd'; + } +} diff --git a/src/teaching/formatters/helpers/bracket-table-generator.js b/src/teaching/formatters/helpers/bracket-table-generator.js new file mode 100644 index 00000000..046b8a03 --- /dev/null +++ b/src/teaching/formatters/helpers/bracket-table-generator.js @@ -0,0 +1,49 @@ +import jstat from 'jstat'; + +/** + * Generate a LaTeX tabular for F-distribution critical values. + * @param {Array<{df1: number, df2: number}>} dfPairs + * @param {Object} [options] + * @param {number} [options.alpha=0.05] - significance level (upper-tail) + * @param {number} [options.precision=2] - decimal places in output + * @returns {string} LaTeX tabular (no \begin{table} wrapper, just \begin{tabular}…\end{tabular}) + */ +export function generateBracketTable(dfPairs, options = {}) { + const alpha = options.alpha ?? 0.05; + const precision = options.precision ?? 2; + + const seen = new Set(); + const unique = []; + for (const pair of dfPairs) { + if (!Number.isFinite(pair?.df1) || !Number.isFinite(pair?.df2) + || pair.df1 < 1 || pair.df2 < 1) { + throw new Error( + `Invalid df pair: ${JSON.stringify(pair)} — both df1 and df2 must be finite integers >= 1` + ); + } + const key = `${pair.df1},${pair.df2}`; + if (seen.has(key)) continue; + seen.add(key); + unique.push({ df1: pair.df1, df2: pair.df2 }); + } + + unique.sort((a, b) => (a.df1 - b.df1) || (a.df2 - b.df2)); + + const alphaLabel = String(alpha); + const lines = [ + '\\begin{tabular}{rrr}', + '\\toprule', + `$df_1$ & $df_2$ & $F^*_{${alphaLabel}}$ \\\\`, + '\\midrule', + ]; + + for (const { df1, df2 } of unique) { + const crit = jstat.centralF.inv(1 - alpha, df1, df2); + lines.push(`${df1} & ${df2} & ${crit.toFixed(precision)} \\\\`); + } + + lines.push('\\bottomrule'); + lines.push('\\end{tabular}'); + + return lines.join('\n'); +} diff --git a/src/teaching/formatters/helpers/interaction-aware-pass.js b/src/teaching/formatters/helpers/interaction-aware-pass.js new file mode 100644 index 00000000..6cb6113b --- /dev/null +++ b/src/teaching/formatters/helpers/interaction-aware-pass.js @@ -0,0 +1,78 @@ +/** + * Reframe "main effects" language to "marginal mean differences" when the + * question's design model contains an interaction term. + * + * @param {string} text - The question text (prompt + parts + interpretation). + * May contain LaTeX (preserve $...$ and \(...\) math regions verbatim). + * @param {Object} questionMeta - Question-level metadata. The function + * reframes only when questionMeta.designModel?.hasInteractionTerm === true. + * @param {Object} [options] + * @param {Object} [options.aiProvider] - When present, the helper will + * ask the AI to re-prompt the paragraph(s) surrounding each substitution + * for tonal consistency. When absent, returns the regex-substituted text + * as-is. In v2.18.0 this path is STUBBED: a truthy aiProvider logs a + * single notice and falls through to the regex output. The parameter + * exists so future work can drop in a provider without changing the + * call site. + * @returns {string} The (possibly) reframed text. + */ +export function applyInteractionAwarePass(text, questionMeta, options = {}) { + if (typeof text !== 'string' || text.length === 0) return text; + if (!questionMeta) return text; + if (questionMeta.designModel?.hasInteractionTerm !== true) return text; + + // Split on the math-region regex; with a capturing group, the captured + // math content is interleaved at odd indices. Detect math segments by + // their leading delimiter — cheaper and stateless vs re-running the regex. + const mathRegex = /(\$[^$]*\$|\\\([^)]*\\\))/g; + const segments = text.split(mathRegex); + + const transformed = segments.map((segment) => { + if (segment.startsWith('$') || segment.startsWith('\\(')) { + return segment; + } + return reframeSegment(segment); + }); + + const out = transformed.join(''); + + if (options.aiProvider) { + console.log('[interaction-aware-pass] AI re-prompt stub invoked; passing through.'); + } + + return out; +} + +function reframeSegment(segment) { + let s = segment; + + s = s.replace(/\btest(s|ed|ing)?\s+the\s+main\s+effects?\b/gi, (match, suffix) => { + const verb = suffix === 's' ? 'compares' + : suffix === 'ed' ? 'compared' + : suffix === 'ing' ? 'comparing' + : 'compare'; + const head = preserveCase(verb, match.charAt(0)); + return `${head} the marginal means`; + }); + + s = s.replace(/\bmain\s+effect\s+of\s+([A-Za-z][A-Za-z0-9_]*)/gi, (match, factor) => { + return `${preserveCase('marginal effect of', match.charAt(0))} ${factor}`; + }); + + s = s.replace(/\bmain\s+effects\b/gi, (match) => { + return preserveCase('marginal mean differences', match.charAt(0)); + }); + + s = s.replace(/\bmain\s+effect\b/gi, (match) => { + return preserveCase('marginal mean difference', match.charAt(0)); + }); + + return s; +} + +function preserveCase(replacement, originalFirstChar) { + if (originalFirstChar && originalFirstChar === originalFirstChar.toUpperCase() && originalFirstChar !== originalFirstChar.toLowerCase()) { + return replacement.charAt(0).toUpperCase() + replacement.slice(1); + } + return replacement; +} diff --git a/src/teaching/formatters/helpers/s-calibration-table.js b/src/teaching/formatters/helpers/s-calibration-table.js new file mode 100644 index 00000000..a36fbeea --- /dev/null +++ b/src/teaching/formatters/helpers/s-calibration-table.js @@ -0,0 +1,32 @@ +const S_CALIBRATION_ROWS = [ + { s: '0', bits: '0', interpretation: 'no surprise' }, + { s: '1', bits: '1', interpretation: 'trivial' }, + { s: '2', bits: '2', interpretation: 'weak' }, + { s: '3', bits: '3', interpretation: 'mild' }, + { s: '4.3', bits: '4.3', interpretation: 'moderate ($\\approx p=0.05$)' }, + { s: '6.6', bits: '6.6', interpretation: 'strong ($\\approx p=0.01$)' }, + { s: '13.3', bits: '13.3', interpretation: 'very strong ($\\approx p=0.0001$)' }, +]; + +/** + * Generate a static LaTeX tabular for the s-value calibration (Greenland/Rafi). + * @param {Object} [options] - reserved for future use + * @returns {string} LaTeX tabular (no \begin{table} wrapper) + */ +export function generateSCalibrationTable(options = {}) { + const lines = [ + '\\begin{tabular}{rrl}', + '\\toprule', + '$s$-value & bits of evidence & interpretation \\\\', + '\\midrule', + ]; + + for (const row of S_CALIBRATION_ROWS) { + lines.push(`${row.s} & ${row.bits} & ${row.interpretation} \\\\`); + } + + lines.push('\\bottomrule'); + lines.push('\\end{tabular}'); + + return lines.join('\n'); +} diff --git a/src/teaching/formatters/index.js b/src/teaching/formatters/index.js index cae6ec36..664c727c 100644 --- a/src/teaching/formatters/index.js +++ b/src/teaching/formatters/index.js @@ -9,6 +9,7 @@ import { ExamarkFormatter } from './examark.js'; import { CanvasFormatter } from './canvas.js'; import { QuartoFormatter } from './quarto.js'; import { LaTeXFormatter } from './latex.js'; +import { ExamRichLatexFormatter } from './exam-rich-latex.js'; export { BaseFormatter } from './base.js'; export { MarkdownFormatter } from './markdown.js'; @@ -16,6 +17,7 @@ export { ExamarkFormatter } from './examark.js'; export { CanvasFormatter } from './canvas.js'; export { QuartoFormatter } from './quarto.js'; export { LaTeXFormatter } from './latex.js'; +export { ExamRichLatexFormatter } from './exam-rich-latex.js'; /** * Get formatter by format name @@ -38,12 +40,15 @@ export function getFormatter(format) { case 'tex': case 'latex': return new LaTeXFormatter(); + case 'exam-rich-latex': + case 'rich-latex': + return new ExamRichLatexFormatter(); case 'examark': case 'examark-md': return new ExamarkFormatter(); default: throw new Error( - `Unknown format: ${format}. Supported formats: md, canvas, qmd, tex, examark` + `Unknown format: ${format}. Supported formats: md, canvas, qmd, tex, exam-rich-latex, examark` ); } } @@ -53,7 +58,7 @@ export function getFormatter(format) { * @returns {Array} List of supported format names */ export function getSupportedFormats() { - return ['md', 'markdown', 'canvas', 'qti', 'qmd', 'quarto', 'tex', 'latex', 'examark', 'examark-md']; + return ['md', 'markdown', 'canvas', 'qti', 'qmd', 'quarto', 'tex', 'latex', 'exam-rich-latex', 'rich-latex', 'examark', 'examark-md']; } /** diff --git a/src/teaching/generators/exam.js b/src/teaching/generators/exam.js index 83192c1a..3e785821 100644 --- a/src/teaching/generators/exam.js +++ b/src/teaching/generators/exam.js @@ -15,6 +15,7 @@ import { loadTemplate, mergeTemplates, injectAutoFields, applyDefaults } from '. import { loadTeachConfig } from '../config/loader.js'; import { ValidatorEngine } from '../validators/engine.js'; import { AIProvider } from '../ai/provider.js'; +import { PromptLoader } from '../ai/prompt-loader.js'; import { MarkdownFormatter, CanvasFormatter, @@ -82,11 +83,23 @@ export async function generateExam(options = {}) { } // 4. Build AI prompt (or use pre-merged prompt from InstructionMerger) - const prompt = options.mergedPrompt || buildExamPrompt(examOptions, config); + // Rich exam format uses the on-disk exam-rich.md prompt with evidence-based + // reporting policy + Dean (2017) citations + interaction-aware language. + // Legacy formats keep using the inline buildExamPrompt() path unchanged. + let prompt; + if (options.mergedPrompt) { + prompt = options.mergedPrompt; + } else if (options.format === 'exam-rich-latex' || options.format === 'rich-latex') { + prompt = await buildRichExamPrompt(examOptions, config, options); + } else { + prompt = buildExamPrompt(examOptions, config); + } if (examOptions.debug) { if (options.mergedPrompt) { console.log('🔧 Using merged prompt from InstructionMerger'); + } else if (options.format === 'exam-rich-latex' || options.format === 'rich-latex') { + console.log('📜 Using exam-rich on-disk prompt'); } console.log('🤖 Prompt:', prompt.substring(0, 200) + '...'); } @@ -325,6 +338,66 @@ IMPORTANT: `.trim(); } +/** + * Build the rich-exam prompt by loading the on-disk exam-rich.md policy + * document via PromptLoader and appending generator-specific context + * (topic/course_level/question_count + computational-aid settings). + * + * The loaded body is the policy doc — it carries the evidence-based + * reporting rules, citation pattern, and interaction-aware language. We + * substitute a small number of `{{variable}}` placeholders with values + * pulled from examOptions + config + options. Variables that are not + * supplied are left as the literal placeholder, which still reads as + * coherent prose to the AI. + * + * @param {ExamOptions} examOptions - Resolved exam options + * @param {Object} config - Loaded teach-config + * @param {Object} options - Raw caller options (may include exam_metadata, + * policy, aids_level, textbook_citation_prefix, interaction_aware_language) + * @returns {Promise} Composed prompt + */ +async function buildRichExamPrompt(examOptions, config, options) { + const courseInfo = config.scholar?.course_info || {}; + const teachingStyle = config.scholar?.teaching_style?.exam || {}; + + // Resolve template variables (caller options > teaching_style > defaults) + const vars = { + topic: (examOptions.topics && examOptions.topics.length > 0) + ? examOptions.topics.join(', ') + : (courseInfo.title || 'the course curriculum'), + course_level: courseInfo.level || 'undergraduate', + question_count: String(examOptions.questionCount), + difficulty: examOptions.difficulty, + duration_minutes: String(examOptions.durationMinutes), + aids_level: options.aids_level || teachingStyle.aids_level || 'moderate', + textbook_citation_prefix: + options.textbook_citation_prefix || teachingStyle.textbook_citation_prefix || '', + interaction_aware_language: + options.interaction_aware_language !== undefined + ? String(options.interaction_aware_language) + : String(teachingStyle.interaction_aware_language !== false), + df_pairs: options.exam_metadata?.df_pairs + ? JSON.stringify(options.exam_metadata.df_pairs) + : '' + }; + + // Load the on-disk prompt (project override > plugin default) + const loaded = await PromptLoader.load('exam-rich', process.cwd(), { + debug: examOptions.debug + }); + + // Simple {{variable}} substitution. The PromptLoader does not provide a + // render method, so we do a conservative replace here. Unknown variables + // are left as-is (they read as descriptive placeholders to the AI). + let body = loaded.body; + for (const [key, value] of Object.entries(vars)) { + const pattern = new RegExp(`\\{\\{\\s*${key}\\s*\\}\\}`, 'g'); + body = body.replace(pattern, value); + } + + return body; +} + /** * Generate exam and save to file * @param {ExamOptions} options - Generation options diff --git a/src/teaching/templates/exam.json b/src/teaching/templates/exam.json index c088b0a7..c82ec4c1 100644 --- a/src/teaching/templates/exam.json +++ b/src/teaching/templates/exam.json @@ -186,6 +186,74 @@ } }, "description": "Resources students are allowed to use" + }, + "policy": { + "type": ["object", "null"], + "default": null, + "description": "Evidence-based reporting policy. When present, generation prompts embed these rules.", + "properties": { + "report_s_values": { + "type": "boolean", + "default": true, + "description": "Require Shannon-information (s-value) reporting in addition to or instead of p-values" + }, + "avoid_significance_language": { + "type": "boolean", + "default": true, + "description": "Ban phrases like 'statistically significant', 'reject H0', 'fail to reject', 'no evidence against'" + }, + "use_compatibility_intervals": { + "type": "boolean", + "default": true, + "description": "Frame intervals as 'compatibility intervals', not as accept/reject regions" + } + } + }, + "include_bracket_table": { + "type": "boolean", + "description": "Emit F-critical bracket table from exam_metadata.df_pairs. Defaults to true when format is exam-rich-latex, else false." + }, + "aids_level": { + "type": "string", + "enum": ["minimal", "moderate", "full"], + "default": "moderate", + "description": "Per-question computational-aid scaffolding density" + }, + "textbook_citation_prefix": { + "type": "string", + "default": "", + "description": "Textbook citation prefix injected into question stems (e.g., 'Dean (2017)')" + }, + "interaction_aware_language": { + "type": "boolean", + "default": true, + "description": "When true, generator uses 'marginal mean differences' instead of 'main effects' for questions with designModel.hasInteractionTerm === true" + }, + "exam_metadata": { + "type": "object", + "description": "Optional metadata used by rich exam formats", + "properties": { + "df_pairs": { + "type": "array", + "description": "Degrees-of-freedom pairs for F-critical bracket table", + "items": { + "type": "object", + "required": ["df1", "df2"], + "properties": { + "df1": { + "type": "integer", + "minimum": 1, + "description": "Numerator degrees of freedom" + }, + "df2": { + "type": "integer", + "minimum": 1, + "description": "Denominator degrees of freedom" + } + } + } + } + } } } } diff --git a/tests/README.md b/tests/README.md index f44ec277..afd0f80e 100644 --- a/tests/README.md +++ b/tests/README.md @@ -16,7 +16,7 @@ tests/ └── ai-provider.test.js # AI provider (28 tests) ``` -**Total:** 3,340 tests, all passing ✅ +**Total:** 3,400 tests, all passing ✅ ## Running Tests diff --git a/tests/teaching/ai/prompt-loader.test.js b/tests/teaching/ai/prompt-loader.test.js index f0bedab0..ebfa9ffc 100644 --- a/tests/teaching/ai/prompt-loader.test.js +++ b/tests/teaching/ai/prompt-loader.test.js @@ -428,6 +428,48 @@ prompt_description: "Test" }); }); + // ========================================================================= + // checkMinScholarVersion - Runtime Requirement Tests + // ========================================================================= + describe('checkMinScholarVersion() - Runtime Requirement', () => { + it('returns compatible when current Scholar exceeds the minimum (no warning)', () => { + const result = PromptLoader.checkMinScholarVersion('2.16.0', '2.18.0'); + expect(result.compatible).toBe(true); + expect(result.severity).toBe('none'); + }); + + it('returns compatible when current Scholar matches the minimum exactly', () => { + const result = PromptLoader.checkMinScholarVersion('2.18.0', '2.18.0'); + expect(result.compatible).toBe(true); + expect(result.severity).toBe('none'); + }); + + it('returns error when current Scholar is below the minimum (minor diff)', () => { + const result = PromptLoader.checkMinScholarVersion('2.18.0', '2.16.0'); + expect(result.compatible).toBe(false); + expect(result.severity).toBe('error'); + expect(result.message).toContain('>= v2.18.0'); + expect(result.message).toContain('v2.16.0'); + }); + + it('returns error when current Scholar is below the minimum (major diff)', () => { + const result = PromptLoader.checkMinScholarVersion('3.0.0', '2.18.0'); + expect(result.compatible).toBe(false); + expect(result.severity).toBe('error'); + }); + + it('treats patch versions correctly', () => { + expect(PromptLoader.checkMinScholarVersion('2.16.1', '2.16.0').severity).toBe('error'); + expect(PromptLoader.checkMinScholarVersion('2.16.0', '2.16.1').severity).toBe('none'); + }); + + it('handles missing minor/patch components (defaults to 0)', () => { + expect(PromptLoader.checkMinScholarVersion('2', '2.0.0').compatible).toBe(true); + expect(PromptLoader.checkMinScholarVersion('2.5', '2.5.0').compatible).toBe(true); + expect(PromptLoader.checkMinScholarVersion('2.5', '2.4.99').compatible).toBe(false); + }); + }); + // ========================================================================= // Edge Cases // ========================================================================= diff --git a/tests/teaching/e2e/exam-rich-dogfood.test.js b/tests/teaching/e2e/exam-rich-dogfood.test.js new file mode 100644 index 00000000..87abcba5 --- /dev/null +++ b/tests/teaching/e2e/exam-rich-dogfood.test.js @@ -0,0 +1,162 @@ +/** + * Dogfood Tests: exam-rich.md prompt + template self-consistency + * + * Asserts the *real* on-disk prompt file shipped to users + * (src/teaching/ai/prompts/default/exam-rich.md) still satisfies the + * policy contracts the v2.18.0 spec promises, and that the template's + * new option surface stays in sync with the prompt's `optional_variables` + * declaration. + * + * If a future edit silently removes the "marginal mean differences" + * directive, or drops the Dean (2017) citation, these tests fail. That + * coverage cannot come from a fixture-based test — the prompt itself + * is what ships, and the prompt itself is what these tests verify. + */ + +import fs from 'fs'; +import path from 'path'; +import yaml from 'js-yaml'; +import { PromptLoader } from '../../../src/teaching/ai/prompt-loader.js'; + +const PROMPT_PATH = path.resolve('src/teaching/ai/prompts/default/exam-rich.md'); +const TEMPLATE_PATH = path.resolve('src/teaching/templates/exam.json'); + +function parseFrontmatter(content) { + const match = content.match(/^---\n([\s\S]*?)\n---/); + if (!match) throw new Error('No frontmatter'); + return { + metadata: yaml.load(match[1]), + body: content.slice(match[0].length).trim(), + }; +} + +describe('Dogfood: exam-rich.md prompt file', () => { + let metadata; + let body; + let loaded; + + beforeAll(async () => { + expect(fs.existsSync(PROMPT_PATH)).toBe(true); + const raw = fs.readFileSync(PROMPT_PATH, 'utf8'); + const parsed = parseFrontmatter(raw); + metadata = parsed.metadata; + body = parsed.body; + loaded = await PromptLoader.loadDefault('exam-rich'); + }); + + test('PromptLoader.loadDefault resolves and returns a body', () => { + expect(loaded).toBeDefined(); + expect(loaded.body).toBeDefined(); + expect(loaded.body.length).toBeGreaterThan(500); + }); + + test('frontmatter declares prompt_type: exam-rich', () => { + expect(metadata.prompt_type).toBe('exam-rich'); + }); + + test('frontmatter declares min_scholar_version >= 2.18.0', () => { + expect(metadata.min_scholar_version).toBeDefined(); + expect(metadata.min_scholar_version).toMatch(/^2\.(1[89]|[2-9]\d?)\./); + }); + + test('required_variables include topic, course_level, question_count', () => { + const required = metadata.required_variables ?? []; + expect(required).toEqual( + expect.arrayContaining(['topic', 'course_level', 'question_count']) + ); + }); + + test('optional_variables include the rich-format knobs', () => { + const optional = metadata.optional_variables ?? []; + expect(optional).toEqual( + expect.arrayContaining([ + 'aids_level', + 'textbook_citation_prefix', + 'interaction_aware_language', + ]) + ); + }); + + test('body bans "statistically significant" via explicit avoid/never language', () => { + const lower = body.toLowerCase(); + expect(lower).toMatch(/statistically significant/); + expect(lower).toMatch(/avoid|never|do not|don['']t/); + }); + + test('body cites Dean (2017)', () => { + expect(body).toMatch(/Dean \(2017\)/); + }); + + test('body mentions "marginal mean differences" for interaction designs', () => { + expect(body).toMatch(/marginal mean differences/); + }); + + test('body invokes evidence-based $s$-value reporting', () => { + expect(body).toMatch(/\$s\$-value|\bs-value/); + }); + + test('body references compatibility intervals (not confidence intervals as accept/reject)', () => { + expect(body.toLowerCase()).toMatch(/compatibility interval/); + }); +}); + +describe('Dogfood: template ↔ prompt option-surface consistency', () => { + let template; + let promptMetadata; + + beforeAll(() => { + template = JSON.parse(fs.readFileSync(TEMPLATE_PATH, 'utf8')); + const raw = fs.readFileSync(PROMPT_PATH, 'utf8'); + promptMetadata = parseFrontmatter(raw).metadata; + }); + + test('template declares policy property as an object (object or [object, null])', () => { + const policy = template.properties.policy; + expect(policy).toBeDefined(); + const t = policy.type; + const isObject = t === 'object' || (Array.isArray(t) && t.includes('object')); + expect(isObject).toBe(true); + }); + + test('template declares aids_level enum {minimal, moderate, full}', () => { + const aidsLevel = template.properties.aids_level; + expect(aidsLevel).toBeDefined(); + expect(aidsLevel.enum).toEqual( + expect.arrayContaining(['minimal', 'moderate', 'full']) + ); + }); + + test('template declares textbook_citation_prefix as a string', () => { + const prefix = template.properties.textbook_citation_prefix; + expect(prefix).toBeDefined(); + expect(prefix.type).toBe('string'); + }); + + test('template declares interaction_aware_language as boolean', () => { + const iaLang = template.properties.interaction_aware_language; + expect(iaLang).toBeDefined(); + expect(iaLang.type).toBe('boolean'); + }); + + test('template declares include_bracket_table as boolean', () => { + const ibt = template.properties.include_bracket_table; + expect(ibt).toBeDefined(); + expect(ibt.type).toBe('boolean'); + }); + + test('template exam_metadata accepts a df_pairs array', () => { + const examMeta = template.properties.exam_metadata; + expect(examMeta).toBeDefined(); + expect(examMeta.properties?.df_pairs).toBeDefined(); + expect(examMeta.properties.df_pairs.type).toBe('array'); + }); + + test('every template option the prompt declares is named correctly', () => { + const optional = promptMetadata.optional_variables ?? []; + const templateProps = Object.keys(template.properties); + for (const v of ['aids_level', 'textbook_citation_prefix', 'interaction_aware_language']) { + expect(optional).toContain(v); + expect(templateProps).toContain(v); + } + }); +}); diff --git a/tests/teaching/e2e/exam-rich-latex-e2e.test.js b/tests/teaching/e2e/exam-rich-latex-e2e.test.js new file mode 100644 index 00000000..4d50c7a3 --- /dev/null +++ b/tests/teaching/e2e/exam-rich-latex-e2e.test.js @@ -0,0 +1,183 @@ +/** + * E2E Tests: exam-rich-latex Pipeline + * + * Walks the full fixture → formatter → output path and asserts structural + * invariants. Optionally compiles the output to PDF via Quarto+LuaLaTeX + * when those tools are on PATH (skipped cleanly otherwise so CI without + * a TeX install passes). + * + * The fixture is the same hand-crafted STAT 545 final used by the M4 + * validation driver (`scripts/m4-validate-rich-format.js`), so this + * suite is a regression net for the entire rich-latex pipeline. + */ + +import fs from 'fs'; +import path from 'path'; +import { execFileSync } from 'child_process'; +import yaml from 'js-yaml'; +import { ExamRichLatexFormatter } from '../../../src/teaching/formatters/exam-rich-latex.js'; +import { getFormatter } from '../../../src/teaching/formatters/index.js'; + +const FIXTURE_PATH = path.resolve( + 'tests/teaching/formatters/fixtures/stat-545-final-spring-2026.fixture.json' +); + +function loadFixture() { + return JSON.parse(fs.readFileSync(FIXTURE_PATH, 'utf8')); +} + +function extractFrontmatter(output) { + const match = output.match(/^---\n([\s\S]*?)\n---/); + if (!match) throw new Error('No YAML frontmatter found'); + return yaml.load(match[1]); +} + +function extractBody(output) { + return output.replace(/^---\n[\s\S]*?\n---\n?/, ''); +} + +function hasTool(name) { + try { + execFileSync('which', [name], { stdio: 'pipe' }); + return true; + } catch { + return false; + } +} + +describe('E2E: exam-rich-latex pipeline (fixture → formatter)', () => { + let fixture; + let output; + + beforeAll(() => { + fixture = loadFixture(); + output = new ExamRichLatexFormatter().format(fixture); + }); + + test('fixture loads with the expected shape (sanity)', () => { + expect(fixture.questions).toHaveLength(8); + expect(fixture.exam_metadata?.df_pairs?.length).toBeGreaterThan(0); + expect( + fixture.questions.some((q) => q.designModel?.hasInteractionTerm === true) + ).toBe(true); + }); + + test('formatter produces non-empty output', () => { + expect(output.length).toBeGreaterThan(1000); + expect(output.endsWith('\n')).toBe(true); + }); + + test('output opens with YAML frontmatter', () => { + expect(output.startsWith('---\n')).toBe(true); + expect(output).toMatch(/^---\n[\s\S]*?\n---/); + }); + + test('frontmatter parses as valid YAML with nested format.pdf', () => { + const fm = extractFrontmatter(output); + expect(fm.format).toBeDefined(); + expect(fm.format.pdf).toBeDefined(); + expect(fm.format.pdf.documentclass).toBe('exam'); + expect(fm.format.pdf['pdf-engine']).toBe('lualatex'); + }); + + test('include-in-header.text contains required exam-class packages', () => { + const fm = extractFrontmatter(output); + const header = fm.format.pdf['include-in-header'].text; + expect(header).toMatch(/\\usepackage\{amsmath/); + expect(header).toMatch(/\\pointsinrightmargin/); + expect(header).toMatch(/\\bracketedpoints/); + expect(header).toMatch(/\\marksnotpoints/); + }); + + test('body contains reporting policy block', () => { + const body = extractBody(output); + expect(body).toMatch(/Reporting policy\./); + }); + + test('body contains bracket table caption + s-value calibration table', () => { + const body = extractBody(output); + expect(body).toMatch(/F critical values/); + expect(body).toMatch(/calibration/); + }); + + test('body wraps questions in the exam-class questions environment', () => { + const body = extractBody(output); + expect(body).toMatch(/\\begin\{questions\}/); + expect(body).toMatch(/\\end\{questions\}/); + }); + + test('output contains one \\question macro per fixture question', () => { + const questionMatches = output.match(/\\question(?!s)\b/g) ?? []; + expect(questionMatches.length).toBe(fixture.questions.length); + }); + + test('interaction-bearing question is reframed (no main-effects, has marginal language)', () => { + const factorialQ = fixture.questions.find( + (q) => q.designModel?.hasInteractionTerm === true + ); + expect(factorialQ).toBeDefined(); + expect(output).toMatch(/marginal (mean|effect)/); + expect(output).not.toMatch(/\bmain effects?\b/i); + }); + + test('bracket-table row count equals unique df_pair count from fixture', () => { + const uniqueDfPairs = new Set( + fixture.exam_metadata.df_pairs.map((p) => `${p.df1},${p.df2}`) + ); + const rowMatches = output.match(/^\d+\s*&\s*\d+\s*&\s*[\d.]+\s*\\\\\s*$/gm) ?? []; + expect(rowMatches.length).toBe(uniqueDfPairs.size); + }); + + test('getFormatter("exam-rich-latex") returns ExamRichLatexFormatter', () => { + const f = getFormatter('exam-rich-latex'); + expect(f).toBeInstanceOf(ExamRichLatexFormatter); + expect(f.getFileExtension()).toBe('.qmd'); + }); + + test('rich-latex alias resolves to the same class', () => { + const f = getFormatter('rich-latex'); + expect(f).toBeInstanceOf(ExamRichLatexFormatter); + }); +}); + +describe('E2E: PDF compilation (gated on lualatex+quarto)', () => { + const tmpDir = path.join(process.cwd(), 'tests/.tmp-e2e-rich-latex'); + let toolingAvailable; + + beforeAll(() => { + toolingAvailable = hasTool('lualatex') && hasTool('quarto'); + if (toolingAvailable) { + fs.mkdirSync(tmpDir, { recursive: true }); + } + }); + + afterAll(() => { + if (fs.existsSync(tmpDir)) { + fs.rmSync(tmpDir, { recursive: true, force: true }); + } + }); + + test( + 'compiles fixture to a non-empty PDF', + () => { + if (!toolingAvailable) { + console.log( + '[skip] lualatex or quarto not on PATH; PDF compile test skipped' + ); + return; + } + const fixture = loadFixture(); + const output = new ExamRichLatexFormatter().format(fixture); + const qmdPath = path.join(tmpDir, 'e2e-exam.qmd'); + fs.writeFileSync(qmdPath, output, 'utf8'); + execFileSync('quarto', ['render', 'e2e-exam.qmd', '--to', 'pdf'], { + cwd: tmpDir, + stdio: 'pipe', + }); + const pdfPath = path.join(tmpDir, 'e2e-exam.pdf'); + expect(fs.existsSync(pdfPath)).toBe(true); + expect(fs.statSync(pdfPath).size).toBeGreaterThan(1000); + }, + 90000 + ); +}); diff --git a/tests/teaching/e2e/instruction-merger-e2e.test.js b/tests/teaching/e2e/instruction-merger-e2e.test.js index cf58a6ea..6fa39dd9 100644 --- a/tests/teaching/e2e/instruction-merger-e2e.test.js +++ b/tests/teaching/e2e/instruction-merger-e2e.test.js @@ -539,12 +539,16 @@ describe('E2E: InstructionMerger with Real Prompts', () => { // is accepted by checking the code structure. expect(typeof generateExam).toBe('function'); - // Verify the source contains the mergedPrompt bypass + // Verify the source contains the mergedPrompt bypass. The structure is + // an if/else chain after v2.18.0 (mergedPrompt → rich format → legacy), + // so we check the two essential pieces independently rather than pin a + // single literal expression. const examSource = fs.readFileSync( path.resolve('src/teaching/generators/exam.js'), 'utf8' ); - expect(examSource).toContain('options.mergedPrompt || buildExamPrompt'); + expect(examSource).toMatch(/options\.mergedPrompt/); + expect(examSource).toMatch(/buildExamPrompt\s*\(/); }); it('quiz generator accepts mergedPrompt option', () => { diff --git a/tests/teaching/formatters/exam-rich-latex.test.js b/tests/teaching/formatters/exam-rich-latex.test.js new file mode 100644 index 00000000..85d30c11 --- /dev/null +++ b/tests/teaching/formatters/exam-rich-latex.test.js @@ -0,0 +1,87 @@ +/** + * Tests for ExamRichLatexFormatter wiring of the interaction-aware pass. + */ + +import { ExamRichLatexFormatter } from '../../../src/teaching/formatters/exam-rich-latex.js'; + +describe('ExamRichLatexFormatter — interaction-aware wiring', () => { + test('reframes main-effects language when designModel.hasInteractionTerm is true', () => { + const formatter = new ExamRichLatexFormatter(); + const content = { + title: 'Test Exam', + questions: [ + { + id: 'Q1', + type: 'short-answer', + points: 10, + text: 'Compute the main effects of factors A and B.', + designModel: { hasInteractionTerm: true }, + }, + ], + answer_key: {}, + }; + + const out = formatter.format(content); + + expect(out).toContain('marginal mean differences'); + expect(out).not.toContain('main effects'); + }); + + test('reframes rubric language when designModel.hasInteractionTerm is true', () => { + const formatter = new ExamRichLatexFormatter(); + const content = { + title: 'Test Exam', + questions: [ + { + id: 'Q1', + type: 'essay', + points: 20, + text: 'Discuss the analysis.', + rubric: 'Award full credit for correctly identifying the main effects of A and B.', + designModel: { hasInteractionTerm: true }, + }, + ], + answer_key: {}, + }; + + const out = formatter.format(content); + + expect(out).toContain('marginal mean differences'); + expect(out).not.toContain('main effects of A and B'); + }); + + test('escapes quotes and backslashes in title via JSON-safe YAML quoting', () => { + const formatter = new ExamRichLatexFormatter(); + const content = { + title: 'Midterm: "Tricky" \\Section', + questions: [], + answer_key: {}, + }; + + const out = formatter.format(content); + // JSON.stringify produces a valid YAML double-quoted scalar. + expect(out).toContain('title: "Midterm: \\"Tricky\\" \\\\Section"'); + }); + + test('leaves main-effects language untouched when no interaction term', () => { + const formatter = new ExamRichLatexFormatter(); + const content = { + title: 'Test Exam', + questions: [ + { + id: 'Q1', + type: 'short-answer', + points: 10, + text: 'Compute the main effects of factors A and B.', + designModel: { hasInteractionTerm: false }, + }, + ], + answer_key: {}, + }; + + const out = formatter.format(content); + + expect(out).toContain('main effects'); + expect(out).not.toContain('marginal mean differences'); + }); +}); diff --git a/tests/teaching/formatters/fixtures/stat-545-final-spring-2026.fixture.json b/tests/teaching/formatters/fixtures/stat-545-final-spring-2026.fixture.json new file mode 100644 index 00000000..be2f1686 --- /dev/null +++ b/tests/teaching/formatters/fixtures/stat-545-final-spring-2026.fixture.json @@ -0,0 +1,119 @@ +{ + "title": "STAT 445/545 Final Exam — Spring 2026 (Fixture Skeleton)", + "exam_type": "final", + "duration_minutes": 180, + "total_points": 100, + "instructions": "Closed book. Three letter-size cheat sheets and a hand calculator are permitted. Show all work for full credit.", + "policy": { + "report_s_values": true, + "avoid_significance_language": true, + "use_compatibility_intervals": true + }, + "include_bracket_table": true, + "aids_level": "moderate", + "textbook_citation_prefix": "Dean (2017)", + "interaction_aware_language": true, + "exam_metadata": { + "df_pairs": [ + { "df1": 1, "df2": 20 }, + { "df1": 2, "df2": 30 }, + { "df1": 3, "df2": 24 }, + { "df1": 1, "df2": 40 } + ] + }, + "questions": [ + { + "id": "Q1", + "type": "short-answer", + "topic": "One-way ANOVA", + "points": 14, + "designModel": { "hasInteractionTerm": false }, + "text": "A one-way CRD with $a = 4$ treatment levels and $n = 6$ replicates per cell yields treatment means and pooled error information. Construct the one-way ANOVA table and report evidence using s-values.", + "parts": [ + { "prompt": "Compute SS_treatment, SS_error, and the corresponding mean squares." }, + { "prompt": "Report $F(df_1, df_2)$, the s-value, and a partial $\\hat\\eta^2_p$." }, + { "prompt": "Interpret the result with evidence-based language; avoid 'significant'." } + ] + }, + { + "id": "Q2", + "type": "short-answer", + "topic": "Two-way factorial ANOVA with interaction", + "points": 20, + "designModel": { "hasInteractionTerm": true }, + "text": "A $2 \\times 2$ factorial CRD with cell replication $n = 3$ produces the cell totals below. Given the interaction term in the model, test the main effects of factor A and factor B, then interpret.", + "parts": [ + { "prompt": "Compute marginal mean differences $\\bar{Y}_{2\\cdot} - \\bar{Y}_{1\\cdot}$ and $\\bar{Y}_{\\cdot 2} - \\bar{Y}_{\\cdot 1}$." }, + { "prompt": "Estimate the interaction effects $(\\alpha\\beta)_{ij}$ for all four cells; verify they sum to zero by row and column." }, + { "prompt": "Construct the complete ANOVA table; for each F-test report the s-value and partial $\\hat\\eta^2_p$." }, + { "prompt": "Given the interaction, should the main effects be interpreted as overall effects, or should simple effects within each level of the other factor be examined? Justify." } + ] + }, + { + "id": "Q3", + "type": "short-answer", + "topic": "Multiple linear regression", + "points": 12, + "designModel": { "hasInteractionTerm": false }, + "text": "Given a fitted multiple regression of a continuous response on two continuous predictors with $n = 25$, compute coefficient estimates, residual sums, and a compatibility interval for the slope of the first predictor.", + "parts": [ + { "prompt": "Compute $\\hat\\beta_1$ and its standard error from the supplied $(X^\\top X)^{-1}$." }, + { "prompt": "Construct the 95% compatibility interval for $\\beta_1$." } + ] + }, + { + "id": "Q4", + "type": "short-answer", + "topic": "Contrasts and pairwise comparisons", + "points": 10, + "designModel": { "hasInteractionTerm": false }, + "text": "Using the treatment-mean output from Q1, construct an orthogonal contrast comparing the average of treatments 1 and 2 against the average of treatments 3 and 4. Report the contrast value, its standard error, and an evidence-based interpretation." + }, + { + "id": "Q5", + "type": "short-answer", + "topic": "Randomized complete block design", + "points": 14, + "designModel": { "hasInteractionTerm": false }, + "text": "An RCBD with $b = 5$ blocks and $t = 4$ treatments produces the totals below. Construct the ANOVA table and report the evidence for treatment differences using s-values.", + "parts": [ + { "prompt": "Compute SS_blocks, SS_treatments, SS_error, and corresponding mean squares." }, + { "prompt": "Report $F(df_1, df_2)$ for treatments and the corresponding s-value." } + ] + }, + { + "id": "Q6", + "type": "short-answer", + "topic": "Random effects / variance components", + "points": 10, + "designModel": { "hasInteractionTerm": false }, + "text": "A one-way random-effects model with $a = 6$ randomly sampled levels yields the EMS table shown. Estimate the variance components $\\hat\\sigma^2_\\tau$ and $\\hat\\sigma^2_\\varepsilon$, and report the intraclass correlation." + }, + { + "id": "Q7", + "type": "short-answer", + "topic": "Model selection", + "points": 10, + "designModel": { "hasInteractionTerm": false }, + "text": "Two nested regression models are fit on the same data set ($n = 30$). Compare them using a partial F-test and an information criterion; report your conclusion with evidence-based language." + }, + { + "id": "Q8", + "type": "short-answer", + "topic": "Residual diagnostics", + "points": 10, + "designModel": { "hasInteractionTerm": false }, + "text": "Given residual and fitted-value plots for the multiple regression in Q3, evaluate the constant-variance, normality, and linearity assumptions. Recommend an appropriate remedy for any violation observed." + } + ], + "answer_key": { + "Q1": "ANOVA table with df_treatment = 3, df_error = 20; report F(3, 20), s-value, partial eta-squared; interpret evidence strength rather than significance.", + "Q2": "Marginal differences are 8 and 4; interaction effects (alpha-beta)_ij = +1, -1, -1, +1 with row/column sums zero; given non-trivial interaction, focus on simple effects.", + "Q3": "beta_1-hat with SE from (X'X)^{-1}; 95% CI = beta_1-hat +/- t_{0.975, n-p-1} * SE.", + "Q4": "Orthogonal contrast c = (1/2)(mu_1 + mu_2) - (1/2)(mu_3 + mu_4); report value, SE, and s-value interpretation.", + "Q5": "RCBD ANOVA with df_blocks = 4, df_treatments = 3, df_error = 12; report F and s-value for treatments.", + "Q6": "sigma^2_tau-hat from (MS_treatment - MS_error)/n; ICC = sigma^2_tau / (sigma^2_tau + sigma^2_e).", + "Q7": "Partial F-test with df_1 = q, df_2 = n - p; report AIC delta and evidence-based recommendation.", + "Q8": "Constant variance: examine residuals vs fitted; normality: Q-Q plot; linearity: residual pattern; recommend transformation or weighted regression if violations are present." + } +} diff --git a/tests/teaching/formatters/helpers/bracket-table-generator.test.js b/tests/teaching/formatters/helpers/bracket-table-generator.test.js new file mode 100644 index 00000000..37937cb4 --- /dev/null +++ b/tests/teaching/formatters/helpers/bracket-table-generator.test.js @@ -0,0 +1,67 @@ +/** + * Tests for generateBracketTable F-distribution critical-value helper. + */ + +import { generateBracketTable } from '../../../../src/teaching/formatters/helpers/bracket-table-generator.js'; + +describe('generateBracketTable', () => { + test('returns a LaTeX tabular wrapper', () => { + const out = generateBracketTable([{ df1: 1, df2: 20 }]); + expect(out).toContain('\\begin{tabular}'); + expect(out).toContain('\\end{tabular}'); + expect(out).toContain('\\toprule'); + expect(out).toContain('\\midrule'); + expect(out).toContain('\\bottomrule'); + }); + + test('critical value for (df1=1, df2=20, alpha=0.05) is 4.35', () => { + const out = generateBracketTable([{ df1: 1, df2: 20 }]); + expect(out).toContain('1 & 20 & 4.35'); + }); + + test('critical value for (df1=2, df2=30, alpha=0.05) is 3.32', () => { + const out = generateBracketTable([{ df1: 2, df2: 30 }]); + expect(out).toContain('2 & 30 & 3.32'); + }); + + test('honors custom alpha (alpha=0.01)', () => { + const out = generateBracketTable([{ df1: 1, df2: 20 }], { alpha: 0.01 }); + expect(out).toContain('1 & 20 & 8.10'); + expect(out).toContain('$F^*_{0.01}$'); + }); + + test('dedupes duplicate df pairs', () => { + const out = generateBracketTable([ + { df1: 1, df2: 20 }, + { df1: 1, df2: 20 }, + ]); + const dataRows = out.split('\n').filter((line) => /^\d+ & \d+ & /.test(line)); + expect(dataRows).toHaveLength(1); + expect(dataRows[0]).toContain('1 & 20 & 4.35'); + }); + + test('sorts ascending by (df1, df2)', () => { + const out = generateBracketTable([ + { df1: 2, df2: 30 }, + { df1: 1, df2: 20 }, + ]); + const idxFirst = out.indexOf('1 & 20'); + const idxSecond = out.indexOf('2 & 30'); + expect(idxFirst).toBeGreaterThan(-1); + expect(idxSecond).toBeGreaterThan(-1); + expect(idxFirst).toBeLessThan(idxSecond); + }); + + test('honors custom precision', () => { + const out = generateBracketTable([{ df1: 1, df2: 20 }], { precision: 4 }); + expect(out).toContain('1 & 20 & 4.3512'); + expect(out).not.toMatch(/1 & 20 & 4\.35 /); + }); + + test('throws on invalid df pair (zero, negative, or non-finite)', () => { + expect(() => generateBracketTable([{ df1: 0, df2: 5 }])).toThrow(/Invalid df pair/); + expect(() => generateBracketTable([{ df1: 1, df2: -3 }])).toThrow(/Invalid df pair/); + expect(() => generateBracketTable([{ df1: NaN, df2: 5 }])).toThrow(/Invalid df pair/); + expect(() => generateBracketTable([{ df1: 1 }])).toThrow(/Invalid df pair/); + }); +}); diff --git a/tests/teaching/formatters/helpers/interaction-aware-pass.test.js b/tests/teaching/formatters/helpers/interaction-aware-pass.test.js new file mode 100644 index 00000000..f44de6ac --- /dev/null +++ b/tests/teaching/formatters/helpers/interaction-aware-pass.test.js @@ -0,0 +1,65 @@ +/** + * Tests for applyInteractionAwarePass — reframes "main effects" language + * to "marginal mean differences" when the design model has an interaction term. + */ + +import { jest } from '@jest/globals'; +import { applyInteractionAwarePass } from '../../../../src/teaching/formatters/helpers/interaction-aware-pass.js'; + +describe('applyInteractionAwarePass', () => { + test('returns unchanged when hasInteractionTerm is false', () => { + const input = 'Test the main effect of treatment.'; + const out = applyInteractionAwarePass(input, { designModel: { hasInteractionTerm: false } }); + expect(out).toBe(input); + }); + + test('returns unchanged when questionMeta is null', () => { + const input = 'main effect'; + const out = applyInteractionAwarePass(input, null); + expect(out).toBe(input); + }); + + test('returns unchanged when designModel is missing', () => { + const input = 'main effect'; + const out = applyInteractionAwarePass(input, {}); + expect(out).toBe(input); + }); + + test("reframes 'main effects' (factorial design)", () => { + const input = 'Compute the main effects of factors A and B.'; + const out = applyInteractionAwarePass(input, { designModel: { hasInteractionTerm: true } }); + expect(out).toContain('marginal mean differences'); + expect(out).not.toContain('main effects'); + }); + + test("reframes singular 'main effect of treatment'", () => { + const input = 'The main effect of treatment is...'; + const out = applyInteractionAwarePass(input, { designModel: { hasInteractionTerm: true } }); + expect(out).toContain('marginal effect of treatment'); + }); + + test('preserves LaTeX math regions byte-for-byte', () => { + const input = 'The main effect $\\beta_1$ tests $H_0: \\beta_1 = 0$.'; + const out = applyInteractionAwarePass(input, { designModel: { hasInteractionTerm: true } }); + expect(out).toContain('$\\beta_1$'); + expect(out).toContain('$H_0: \\beta_1 = 0$'); + }); + + test('is idempotent: applying twice equals applying once', () => { + const input = 'Test the main effects of A and B; the main effect of treatment is positive.'; + const meta = { designModel: { hasInteractionTerm: true } }; + const once = applyInteractionAwarePass(input, meta); + const twice = applyInteractionAwarePass(once, meta); + expect(twice).toBe(once); + }); + + test('honors AI stub flag without error and still substitutes', () => { + const input = 'Compute the main effects.'; + const meta = { designModel: { hasInteractionTerm: true } }; + const spy = jest.spyOn(console, 'log').mockImplementation(() => {}); + const out = applyInteractionAwarePass(input, meta, { aiProvider: {} }); + expect(out).toContain('marginal mean differences'); + expect(spy).toHaveBeenCalledWith('[interaction-aware-pass] AI re-prompt stub invoked; passing through.'); + spy.mockRestore(); + }); +}); diff --git a/tests/teaching/generators/exam-rich-prompt.test.js b/tests/teaching/generators/exam-rich-prompt.test.js new file mode 100644 index 00000000..648c7f7d --- /dev/null +++ b/tests/teaching/generators/exam-rich-prompt.test.js @@ -0,0 +1,152 @@ +/** + * Unit Tests: Exam Generator — exam-rich prompt wiring (Wave 2) + * + * Verifies that the gated `options.format === 'exam-rich-latex'` branch + * routes through the on-disk exam-rich.md prompt with the evidence-based + * reporting policy, while the legacy path (default/markdown format) does + * not include any of that policy text. + * + * The AIProvider#generate method is spied on so we can capture the prompt + * string that would be sent to the AI without making a real API call. + */ + +import { describe, it, expect, beforeEach, afterEach, jest } from '@jest/globals'; +import { generateExam } from '../../../src/teaching/generators/exam.js'; +import { AIProvider } from '../../../src/teaching/ai/provider.js'; + +describe('generateExam — exam-rich prompt wiring', () => { + let originalApiKey; + let generateSpy; + + beforeEach(() => { + originalApiKey = process.env.ANTHROPIC_API_KEY; + process.env.ANTHROPIC_API_KEY = 'test-api-key-not-used'; + + // Stub AIProvider#generate so we capture the prompt but don't call the + // real API. We return a benign-shaped result that lets generateExam + // proceed far enough to be tested. Downstream validation may still + // fail with the dummy content; we catch and ignore that — the test + // asserts on the captured prompt. + generateSpy = jest + .spyOn(AIProvider.prototype, 'generate') + .mockResolvedValue({ + success: true, + content: { + title: 'Stub Exam', + exam_type: 'midterm', + duration_minutes: 60, + questions: [ + { + id: 'Q1', + type: 'short-answer', + text: 'Stub question', + points: 10, + difficulty: 'medium', + topic: 'Stub' + } + ], + answer_key: { Q1: 'Stub answer' } + }, + error: null, + metadata: { + model: 'stub', + tokens: 0, + duration: 1, + attempts: 1 + } + }); + }); + + afterEach(() => { + process.env.ANTHROPIC_API_KEY = originalApiKey; + generateSpy.mockRestore(); + }); + + it('uses the exam-rich on-disk prompt when format === "exam-rich-latex"', async () => { + try { + await generateExam({ + type: 'midterm', + questionCount: 5, + difficulty: 'medium', + topics: ['One-way ANOVA'], + format: 'exam-rich-latex', + debug: false + }); + } catch { + // Downstream validation may fail on the stub content — that's fine. + // We only care about the prompt that was sent to the AI. + } + + expect(generateSpy).toHaveBeenCalled(); + const promptArg = generateSpy.mock.calls[0][0]; + expect(typeof promptArg).toBe('string'); + + // Policy markers from exam-rich.md + const containsSValue = + promptArg.includes('$s$-value') || promptArg.includes('s-value'); + expect(containsSValue).toBe(true); + + // The prompt itself IS the policy doc — it must instruct the model to + // AVOID the phrase "statistically significant". So the prompt should + // simultaneously (a) mention the phrase as a banned token and (b) + // contain the word "avoid" / "not use" near it. We check the literal + // ban phrase appears in the prompt body. + const lower = promptArg.toLowerCase(); + expect(lower).toContain('statistically significant'); + expect(lower.includes('avoid') || lower.includes('do not use')).toBe( + true + ); + + // Citation pattern + expect(promptArg).toContain('Dean (2017)'); + + // Interaction-aware language marker + expect(promptArg.toLowerCase()).toContain('marginal mean'); + }); + + it('uses the legacy inline prompt when format is "markdown" (no exam-rich policy text)', async () => { + try { + await generateExam({ + type: 'midterm', + questionCount: 5, + difficulty: 'medium', + topics: ['One-way ANOVA'], + format: 'markdown', + debug: false + }); + } catch { + // Stub content may fail downstream validation — ignore. + } + + expect(generateSpy).toHaveBeenCalled(); + const promptArg = generateSpy.mock.calls[0][0]; + expect(typeof promptArg).toBe('string'); + + // Legacy prompt does NOT carry the evidence-based reporting policy + // markers from exam-rich.md. + expect(promptArg).not.toContain('$s$-value'); + expect(promptArg).not.toContain('Dean (2017)'); + expect(promptArg.toLowerCase()).not.toContain('marginal mean'); + // The legacy prompt does mention LaTeX and the basic exam structure + // — sanity check that we hit the right path. + expect(promptArg).toContain('Question Types Specifications'); + }); + + it('uses the legacy inline prompt when no format option is passed', async () => { + try { + await generateExam({ + type: 'midterm', + questionCount: 5, + topics: ['One-way ANOVA'], + debug: false + }); + } catch { + // Ignore validation failures from stub content. + } + + expect(generateSpy).toHaveBeenCalled(); + const promptArg = generateSpy.mock.calls[0][0]; + expect(promptArg).not.toContain('$s$-value'); + expect(promptArg).not.toContain('Dean (2017)'); + }); +});