From 376aa22121bfcfae167d3c0ebfaf58a18cad69c0 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Fri, 5 Dec 2025 10:20:12 +0000
Subject: [PATCH 1/3] Initial plan
From 7115cc6b492030ad315367c3f48968c1b2ac8815 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Fri, 5 Dec 2025 10:41:08 +0000
Subject: [PATCH 2/3] Implement Designer Feedback Loop and Tooling (Issue #70)
Co-authored-by: SorraTheOrc <250240+SorraTheOrc@users.noreply.github.com>
---
.../config/overlays/aggressive_economy.yml | 33 +
content/config/overlays/fast_pacing.yml | 27 +
docs/gengine/designer_feedback_tooling.md | 513 ++++++++++++
gamedev-agent-thoughts.txt | 235 +++---
pyproject.toml | 1 +
scripts/echoes_balance_studio.py | 745 ++++++++++++++++++
src/gengine/balance_studio/__init__.py | 75 ++
src/gengine/balance_studio/cli.py | 51 ++
src/gengine/balance_studio/overlays.py | 293 +++++++
src/gengine/balance_studio/report_viewer.py | 706 +++++++++++++++++
src/gengine/balance_studio/workflows.py | 610 ++++++++++++++
tests/scripts/test_echoes_balance_studio.py | 481 +++++++++++
12 files changed, 3628 insertions(+), 142 deletions(-)
create mode 100644 content/config/overlays/aggressive_economy.yml
create mode 100644 content/config/overlays/fast_pacing.yml
create mode 100644 docs/gengine/designer_feedback_tooling.md
create mode 100644 scripts/echoes_balance_studio.py
create mode 100644 src/gengine/balance_studio/__init__.py
create mode 100644 src/gengine/balance_studio/cli.py
create mode 100644 src/gengine/balance_studio/overlays.py
create mode 100644 src/gengine/balance_studio/report_viewer.py
create mode 100644 src/gengine/balance_studio/workflows.py
create mode 100644 tests/scripts/test_echoes_balance_studio.py
diff --git a/content/config/overlays/aggressive_economy.yml b/content/config/overlays/aggressive_economy.yml
new file mode 100644
index 00000000..ae5b3a85
--- /dev/null
+++ b/content/config/overlays/aggressive_economy.yml
@@ -0,0 +1,33 @@
+# Example Overlay: Aggressive Economy
+# Demonstrates how to use YAML overlays to test balance changes.
+#
+# This overlay increases resource regeneration and reduces scarcity pressure
+# to test how the game behaves with a more forgiving economy.
+#
+# Usage:
+# echoes-balance-studio sweep --overlay content/config/overlays/aggressive_economy.yml
+#
+# Or use test-tuning for quick experiments:
+# echoes-balance-studio test-tuning --name aggressive_econ \
+# --change economy.regen_scale=1.2 \
+# --change environment.scarcity_pressure_cap=4000
+
+name: aggressive_economy
+description: |
+ Test higher resource regeneration and reduced scarcity.
+ Use this overlay to see how the game behaves when resources
+ are more abundant and scarcity pressure is lower.
+
+overrides:
+ economy:
+ regen_scale: 1.2
+ demand_population_scale: 80000
+ environment:
+ scarcity_pressure_cap: 4000
+ scarcity_unrest_weight: 0.00003
+ scarcity_pollution_weight: 0.00002
+
+metadata:
+ author: balance_team
+ purpose: testing
+ related_issue: example
diff --git a/content/config/overlays/fast_pacing.yml b/content/config/overlays/fast_pacing.yml
new file mode 100644
index 00000000..3f92944d
--- /dev/null
+++ b/content/config/overlays/fast_pacing.yml
@@ -0,0 +1,27 @@
+# Example Overlay: Fast Narrative Pacing
+# Demonstrates how to adjust narrative timing for testing.
+#
+# This overlay increases the rate of story seed activations
+# and reduces quiet periods between events.
+#
+# Usage:
+# echoes-balance-studio sweep --overlay content/config/overlays/fast_pacing.yml
+
+name: fast_narrative_pacing
+description: |
+ Test faster narrative pacing with more frequent story events.
+ Use this to see how players handle increased drama density.
+
+overrides:
+ director:
+ max_active_seeds: 3
+ global_quiet_ticks: 2
+ seed_active_ticks: 4
+ seed_resolve_ticks: 2
+ seed_quiet_ticks: 3
+ story_seed_limit: 4
+
+metadata:
+ author: narrative_team
+ purpose: testing
+ related_issue: example
diff --git a/docs/gengine/designer_feedback_tooling.md b/docs/gengine/designer_feedback_tooling.md
new file mode 100644
index 00000000..69125dae
--- /dev/null
+++ b/docs/gengine/designer_feedback_tooling.md
@@ -0,0 +1,513 @@
+# Designer Feedback Loop and Tooling
+
+This guide explains how to use the Balance Studio tools for iterating on game
+balance without requiring code changes. These tools are designed for designers
+who want to experiment with tuning, diagnose balance issues, and validate
+changes through data-driven analysis.
+
+## Overview
+
+The Balance Studio provides:
+
+- **Interactive CLI** (`echoes-balance-studio`) with guided workflows
+- **YAML overlay system** for testing config changes without modifying base files
+- **HTML dashboard** for exploring sweep results visually
+- **Workflow presets** for common balance iteration tasks
+
+## Quick Start
+
+```bash
+# Run the interactive Balance Studio
+echoes-balance-studio
+
+# Or use specific commands
+echoes-balance-studio sweep --strategies balanced aggressive
+echoes-balance-studio compare --config-a content/config --config-b content/config/sweeps/difficulty-hard
+echoes-balance-studio test-tuning --name boost_regen --change economy.regen_scale=1.2
+echoes-balance-studio view-reports
+echoes-balance-studio generate-report --input build/sweeps/summary.json --output build/report.html
+```
+
+## Workflows
+
+### 1. Run Exploratory Sweep
+
+Executes batch simulations across multiple strategies and configurations to
+explore the balance space.
+
+```bash
+# Interactive mode
+echoes-balance-studio
+
+# Direct command
+echoes-balance-studio sweep \
+ --strategies balanced aggressive diplomatic \
+ --difficulties normal hard \
+ --seeds 42 123 456 \
+ --ticks 100
+```
+
+**When to use:**
+- Initial exploration of a new feature or mechanic
+- Validating balance after significant changes
+- Gathering baseline data for comparison
+
+**Output:**
+- Individual sweep JSON files in `build/studio_sweeps/`
+- Summary report with strategy and difficulty breakdowns
+- Stability distributions and action counts
+
+### 2. Compare Two Configs
+
+Runs side-by-side sweeps with different configurations and produces a
+comparison report.
+
+```bash
+echoes-balance-studio compare \
+ --config-a content/config \
+ --config-b content/config/sweeps/difficulty-hard \
+ --name-a "Normal" \
+ --name-b "Hard" \
+ --strategies balanced
+```
+
+**When to use:**
+- Validating difficulty presets
+- A/B testing configuration changes
+- Comparing before/after tuning adjustments
+
+**Output:**
+- Stability deltas between configurations
+- Per-strategy performance comparison
+- Percentage change metrics
+
+### 3. Test Tuning Change
+
+Creates a temporary configuration overlay with specific changes and compares
+results against the baseline.
+
+```bash
+echoes-balance-studio test-tuning \
+ --name "boost_regen" \
+ --change economy.regen_scale=1.2 \
+ --change environment.scarcity_pressure_cap=6000 \
+ --strategies balanced aggressive
+```
+
+**When to use:**
+- Quick iteration on specific parameters
+- Testing hypotheses about balance issues
+- Validating fixes for identified problems
+
+**Output:**
+- Baseline vs. tuned comparison
+- Saved overlay file for future reference
+- Detailed stability metrics
+
+### 4. View Historical Reports
+
+Browse and inspect previously generated sweep reports.
+
+```bash
+# List available reports
+echoes-balance-studio view-reports
+
+# Output as JSON for processing
+echoes-balance-studio view-reports --json
+```
+
+**When to use:**
+- Reviewing past experiments
+- Tracking balance changes over time
+- Finding regression baselines
+
+---
+
+## YAML Overlay System
+
+Overlays allow you to test configuration changes without modifying base files.
+This is similar to the existing difficulty presets but designed for quick
+experimentation.
+
+### Creating an Overlay
+
+Create a YAML file with your changes:
+
+```yaml
+# my_overlay.yml
+name: aggressive_economy
+description: Test higher resource regeneration and lower scarcity pressure
+
+overrides:
+ economy:
+ regen_scale: 1.2
+ demand_population_scale: 80000
+ environment:
+ scarcity_pressure_cap: 4000
+ scarcity_unrest_weight: 0.00003
+
+metadata:
+ author: designer_name
+ ticket: GAME-1234
+```
+
+### Using an Overlay
+
+```bash
+# Apply overlay during sweep
+echoes-balance-studio sweep --overlay my_overlay.yml
+
+# Or use the test-tuning workflow for quick changes
+echoes-balance-studio test-tuning \
+ --name quick_test \
+ --change economy.regen_scale=1.2
+```
+
+### Overlay Directory
+
+Place overlays in `content/config/overlays/` to make them available for the
+Balance Studio to discover:
+
+```
+content/
+ config/
+ overlays/
+ aggressive_economy.yml
+ conservative_pacing.yml
+ stress_test.yml
+```
+
+---
+
+## Interactive HTML Reports
+
+Generate rich HTML dashboards from sweep results:
+
+```bash
+echoes-balance-studio generate-report \
+ --input build/batch_sweeps/batch_sweep_summary.json \
+ --output build/balance_report.html \
+ --title "Weekly Balance Review"
+```
+
+### Features
+
+- **Strategy Performance Table**: Sortable comparison of all strategies
+- **Difficulty Analysis**: See how each difficulty level affects stability
+- **Stability Distribution**: Histogram of outcomes across all sweeps
+- **Individual Sweep Browser**: Filter and drill into specific runs
+- **Embedded Charts**: Visual representations of key metrics
+
+### Themes
+
+```bash
+# Light theme (default)
+echoes-balance-studio generate-report --theme light ...
+
+# Dark theme
+echoes-balance-studio generate-report --theme dark ...
+```
+
+---
+
+## How to Diagnose Dominant Strategies
+
+When one strategy consistently outperforms others, it may indicate a balance
+issue. Here's how to diagnose and address dominant strategies:
+
+### Step 1: Run a Broad Sweep
+
+```bash
+echoes-balance-studio sweep \
+ --strategies balanced aggressive diplomatic hybrid \
+ --seeds 42 123 456 789 1000 \
+ --ticks 200
+```
+
+### Step 2: Generate and Review the Report
+
+```bash
+echoes-balance-studio generate-report \
+ --input build/studio_sweeps/sweep_*/batch_sweep_summary.json \
+ --output build/dominant_strategy_analysis.html
+```
+
+Look for:
+- **Win rate gaps > 10%** between strategies
+- **Consistently high/low stability** for specific strategies
+- **Action distribution skews** (some actions never used)
+
+### Step 3: Identify Root Causes
+
+Common causes of dominant strategies:
+
+1. **Overpowered actions**: Check action_counts in sweep data
+2. **Resource imbalance**: Review economy.regen_scale and demand weights
+3. **Threshold issues**: Stability thresholds may favor certain playstyles
+4. **Faction mechanics**: Some factions may synergize too well with a strategy
+
+### Step 4: Test Fixes
+
+```bash
+# Example: If aggressive strategy is dominant due to high resource gains
+echoes-balance-studio test-tuning \
+ --name nerf_aggressive \
+ --change economy.regen_scale=0.7 \
+ --strategies balanced aggressive
+```
+
+### Step 5: Validate
+
+Re-run the broad sweep with your changes and confirm the gap has narrowed.
+
+---
+
+## Iterating on Action Costs
+
+Action costs affect how often AI strategies choose specific actions. Here's
+how to tune them:
+
+### Step 1: Identify Underused Actions
+
+Run a sweep and check action frequency distribution:
+
+```bash
+echoes-balance-studio sweep --strategies balanced aggressive diplomatic
+echoes-balance-studio generate-report \
+ --input build/studio_sweeps/*/batch_sweep_summary.json \
+ --output build/action_analysis.html
+```
+
+Actions with < 5% usage may be too expensive or ineffective.
+
+### Step 2: Test Cost Adjustments
+
+```bash
+# Reduce cost of underused action
+echoes-balance-studio test-tuning \
+ --name buff_negotiate \
+ --change actions.negotiate.base_cost=0.5 \
+ --change actions.negotiate.cooldown=2
+```
+
+### Step 3: Monitor Side Effects
+
+Check that buffing one action doesn't make others obsolete. Compare action
+distributions before and after.
+
+### Best Practices
+
+- Make small, incremental changes (10-20% adjustments)
+- Test across multiple strategies
+- Use multiple seeds for statistical validity
+- Document changes with descriptive overlay names
+
+---
+
+## Testing Narrative Pacing Changes
+
+Narrative pacing affects story seed activation, director events, and the
+overall flow of the game.
+
+### Key Pacing Parameters
+
+| Parameter | Location | Effect |
+|-----------|----------|--------|
+| `max_active_seeds` | director | How many story seeds can be active at once |
+| `global_quiet_ticks` | director | Minimum ticks between major events |
+| `seed_active_ticks` | director | How long a seed stays active |
+| `seed_resolve_ticks` | director | Time to resolve after active phase |
+| `seed_quiet_ticks` | director | Cooldown before seed can reactivate |
+
+### Step 1: Baseline Measurement
+
+Run a sweep with current settings:
+
+```bash
+echoes-balance-studio sweep \
+ --ticks 300 \
+ --seeds 42 123
+```
+
+Check story seed activation rates in the output.
+
+### Step 2: Test Pacing Adjustment
+
+```bash
+# Example: Increase drama density
+echoes-balance-studio test-tuning \
+ --name fast_pacing \
+ --change director.max_active_seeds=3 \
+ --change director.global_quiet_ticks=2 \
+ --ticks 300
+```
+
+### Step 3: Review Story Seed Behavior
+
+Look for:
+- **Activation rate**: Are seeds firing at the expected frequency?
+- **Overlap issues**: Are too many seeds active simultaneously?
+- **Dead zones**: Are there long stretches without narrative events?
+
+### Step 4: Iterate
+
+Adjust parameters based on observations:
+- Increase `global_quiet_ticks` if events feel overwhelming
+- Decrease `seed_quiet_ticks` if the game feels slow
+- Adjust `story_seed_limit` to control how many seeds surface per tick
+
+---
+
+## Case Study: Balancing the Industrial Tier Faction
+
+This case study demonstrates a complete balance iteration workflow.
+
+### Problem Statement
+
+Playtest feedback indicates the Industrial Tier faction feels underpowered
+compared to other factions. Players report:
+- Lower legitimacy gains
+- Fewer opportunities for impactful actions
+- Pollution penalties seem too harsh
+
+### Step 1: Gather Data
+
+```bash
+# Run comprehensive sweep focusing on faction behavior
+echoes-balance-studio sweep \
+ --strategies balanced diplomatic \
+ --seeds 42 123 456 789 1000 1001 1002 1003 \
+ --ticks 200 \
+ --output-dir build/industrial_tier_analysis
+```
+
+### Step 2: Analyze Baseline
+
+```bash
+echoes-balance-studio generate-report \
+ --input build/industrial_tier_analysis/batch_sweep_summary.json \
+ --output build/industrial_tier_baseline.html
+```
+
+Review faction legitimacy trends in the sweep data.
+
+### Step 3: Hypothesis Testing
+
+**Hypothesis 1: Pollution penalties are too harsh**
+
+```bash
+echoes-balance-studio test-tuning \
+ --name reduce_pollution_penalty \
+ --change environment.faction_sabotage_pollution_spike=0.015 \
+ --change environment.scarcity_pollution_weight=0.00002 \
+ --strategies balanced \
+ --seeds 42 123 456
+```
+
+**Hypothesis 2: Investment returns are too low**
+
+```bash
+echoes-balance-studio test-tuning \
+ --name boost_investment \
+ --change economy.faction_investment_return=1.5 \
+ --change environment.faction_invest_pollution_relief=0.03 \
+ --strategies balanced \
+ --seeds 42 123 456
+```
+
+### Step 4: Compare Results
+
+```bash
+# Generate comparison between baseline and each hypothesis
+echoes-balance-studio compare \
+ --config-a content/config \
+ --config-b build/industrial_tier_analysis/tuning_reduce_pollution_penalty_*/modified_config \
+ --name-a "Baseline" \
+ --name-b "Reduced Pollution"
+```
+
+### Step 5: Implement and Validate
+
+Based on the data, create a formal overlay for the winning hypothesis:
+
+```yaml
+# content/config/overlays/industrial_tier_balance.yml
+name: industrial_tier_balance_v1
+description: Rebalance Industrial Tier faction after Dec 2024 analysis
+
+overrides:
+ environment:
+ faction_sabotage_pollution_spike: 0.018
+ scarcity_pollution_weight: 0.000025
+ faction_invest_pollution_relief: 0.025
+
+metadata:
+ ticket: GAME-4567
+ analysis_date: 2024-12-01
+ baseline_report: build/industrial_tier_baseline.html
+```
+
+Run a final validation sweep with the new overlay applied.
+
+---
+
+## Tips and Best Practices
+
+### Statistical Validity
+
+- Use at least 5 different seeds for meaningful comparisons
+- Run 100+ ticks to capture mid-to-late game dynamics
+- Repeat experiments if results are marginal
+
+### Documenting Changes
+
+- Always include descriptive names for overlays
+- Reference ticket numbers in metadata
+- Save baseline reports before making changes
+
+### Iterative Approach
+
+1. Make one change at a time when possible
+2. Measure impact before adding more changes
+3. Keep changes small (10-20% parameter adjustments)
+4. Validate that fixes don't create new problems
+
+### Sharing Results
+
+- Use `--json` flag for data that needs processing
+- Generate HTML reports for stakeholder reviews
+- Archive summary JSONs for regression testing
+
+---
+
+## Command Reference
+
+| Command | Description |
+|---------|-------------|
+| `echoes-balance-studio` | Interactive workflow selection |
+| `echoes-balance-studio sweep` | Run exploratory sweeps |
+| `echoes-balance-studio compare` | Compare two configurations |
+| `echoes-balance-studio test-tuning` | Test a tuning change |
+| `echoes-balance-studio view-reports` | Browse historical reports |
+| `echoes-balance-studio generate-report` | Generate HTML dashboard |
+
+### Common Flags
+
+| Flag | Description |
+|------|-------------|
+| `--strategies` | AI strategies to test |
+| `--difficulties` | Difficulty presets |
+| `--seeds` | Random seeds for reproducibility |
+| `--ticks` | Tick budget per sweep |
+| `--output-dir` | Output directory |
+| `--json` | Output as JSON |
+| `--verbose` | Verbose progress output |
+
+---
+
+## See Also
+
+- [AI Tournament & Balance Analysis](./ai_tournament_and_balance_analysis.md)
+- [How to Play Echoes](./how_to_play_echoes.md)
+- [Content Designer Workflow](./content_designer_workflow.md)
+- [Implementation Plan](../simul/emergent_story_game_implementation_plan.md)
diff --git a/gamedev-agent-thoughts.txt b/gamedev-agent-thoughts.txt
index 057d7aa3..b5aba20b 100644
--- a/gamedev-agent-thoughts.txt
+++ b/gamedev-agent-thoughts.txt
@@ -1,164 +1,115 @@
-# GameDev Agent Thoughts - Issue #63: Analysis and Balance Reporting (M11.3)
+# GameDev Agent Thoughts - Issue #70: Designer Feedback Loop and Tooling (M11.6)
## Task Analysis
-Working on Issue #63 - Phase 11, Milestone 11.3, Task 11.3.1.
-
-### Previous Completions
-- Task 11.1.1 (Batch Simulation Sweep Infrastructure) - COMPLETED
-- Task 11.2.1 (Result Aggregation and Storage) - COMPLETED
-
-### Requirements for Task 11.3.1
-
-1. Create `scripts/analyze_balance.py` that processes aggregated sweep results from SQLite database
-2. Generate HTML or Markdown balance reports with sections for:
- - Dominant strategies (win rate deltas >10%)
- - Underperforming mechanics (actions/policies rarely chosen)
- - Unused story seeds
- - Parameter sensitivity analysis (impact of difficulty/config changes)
-3. Statistical analysis including:
- - Confidence intervals
- - Significance testing (t-tests for win rate differences)
- - Trend detection across historical runs
-4. Visual outputs (charts/graphs) showing:
- - Win rate distributions
- - Metric trends over time
- - Parameter correlations
-5. Regression detection: Highlights significant deviations from baseline
-6. At least 12 tests covering report generation, statistical calculations, and edge cases
+Working on Issue #70 - Phase 11, Milestone 11.6, Task 11.6.1.
-## Implementation Summary
+### Requirements from Issue
-### Files Created
+1. Create CLI tool `echoes-balance-studio` with guided workflows for designers:
+ - "Run exploratory sweep"
+ - "Compare two configs"
+ - "Test tuning change"
+ - "View historical reports"
-1. **scripts/analyze_balance.py** - Main balance analysis script with:
- - Dataclasses: `ConfidenceInterval`, `TTestResult`, `TrendAnalysis`, `RegressionAlert`, `BalanceReport`
- - Database query functions for extracting sweep results
- - Statistical analysis functions:
- - `compute_confidence_interval()` - 95% CI using t-distribution
- - `perform_t_test()` - Two-sample t-test for strategy comparison
- - `detect_trend()` - Linear regression for trend detection
- - `detect_regression()` - Compare runs for significant deviations
- - Balance analysis functions:
- - `analyze_dominant_strategies()` - Win rate deltas >10%
- - `analyze_underperforming_mechanics()` - Actions with <5% usage
- - `identify_unused_story_seeds()` - Seeds never activated
- - `analyze_parameter_sensitivity()` - Metrics by difficulty
- - Visualization functions (using matplotlib):
- - `generate_win_rate_chart()` - Bar chart of win rates
- - `generate_trend_chart()` - Line chart of metrics over time
- - `generate_action_distribution_chart()` - Pie chart of actions
- - Report generation:
- - `format_report_markdown()` - Full markdown report
- - `format_report_html()` - HTML with embedded charts
- - CLI with subcommands: `report`, `regression`, `trends`, `stats`
-
-2. **tests/scripts/test_analyze_balance.py** - 39 tests in 12 test classes:
- - `TestConfidenceInterval` (4 tests): CI computation, edge cases, serialization
- - `TestTTest` (4 tests): Significant/non-significant detection, insufficient data
- - `TestTrendDetection` (4 tests): Increasing, decreasing, stable, insufficient data
- - `TestRegressionDetection` (3 tests): Regression alerts, thresholds, serialization
- - `TestDominantStrategies` (3 tests): Detection, balanced scenarios, single strategy
- - `TestUnderperformingMechanics` (3 tests): Detection, all used, empty data
- - `TestUnusedStorySeeds` (3 tests): Identification, full coverage, no reference
- - `TestParameterSensitivity` (2 tests): Difficulty analysis, high variation
- - `TestReportGeneration` (4 tests): Report with data, markdown, HTML, serialization
- - `TestCLI` (6 tests): Report, JSON output, stats, trends, regression commands
- - `TestEdgeCases` (3 tests): Empty database, single result, all failed sweeps
+2. Configuration changes testable via YAML overlays without modifying base configs
+ (similar to difficulty presets)
-## Acceptance Criteria Verification
-
-1. ✅ Script processes aggregated sweep results from SQLite database
-2. ✅ Generates HTML or Markdown balance reports with sections for:
- - ✅ Dominant strategies (win rate deltas >10%)
- - ✅ Underperforming mechanics (actions with <5% usage)
- - ✅ Unused story seeds
- - ✅ Parameter sensitivity analysis
-3. ✅ Statistical analysis includes:
- - ✅ Confidence intervals (95% CI using t-distribution)
- - ✅ Significance testing (two-sample t-tests)
- - ✅ Trend detection (linear regression)
-4. ✅ Visual outputs (charts) showing:
- - ✅ Win rate distributions (bar chart)
- - ✅ Metric trends over time (line chart)
- - ✅ Action distribution (pie chart)
-5. ✅ Regression detection highlights significant deviations from baseline
-6. ✅ 39 tests covering report generation, statistical calculations, and edge cases (requirement was 12+)
-
-## Verification
-
-- All 39 tests pass
-- Ruff linting passes with no errors
-- CLI works correctly with all subcommands
-
-## Progress
-
-- [x] Create scripts/analyze_balance.py
-- [x] Create tests/scripts/test_analyze_balance.py
-- [x] Run linting - PASSED
-- [x] Run tests - 39 PASSED
-- [x] Task completed
+3. Interactive report viewer (HTML dashboard) allowing filtering, sorting,
+ and drilling into results
----
+4. Designer-focused documentation:
+ - "How to diagnose dominant strategies"
+ - "Iterating on action costs"
+ - "Testing narrative pacing changes"
-# Previous Task Notes - Issue #61: Result Aggregation and Storage (M11.2)
+5. Example workflows with case studies (e.g., "Balancing the Industrial Tier faction")
-## Task Analysis
+6. At least 8 tests covering CLI commands, config overlay loading, and report generation
-Working on Issue #61 - Phase 11, Milestone 11.2, Task 11.2.1.
+7. Register new CLI tool in pyproject.toml
-### Requirements
+### Existing Infrastructure Leveraged
-1. Script `scripts/aggregate_sweep_results.py` ingests batch sweep JSON outputs and produces aggregated summary data
-2. Storage format (SQLite database) supports querying by parameter combinations, timestamp, and result metrics
-3. Historical tracking preserves sweep metadata (git commit hash, timestamp, parameter ranges) for reproducibility
-4. Aggregation computes key statistics: win rates by strategy, average stability/unrest/pollution, story seed activation rates, action usage frequencies
-5. Query interface or helper functions support common lookups
-6. At least 8 tests covering aggregation logic, storage/retrieval, and historical queries
+- scripts/run_batch_sweeps.py - Batch simulation sweep infrastructure
+- scripts/aggregate_sweep_results.py - Result aggregation with SQLite storage
+- scripts/analyze_balance.py - Analysis and balance reporting
+- content/config/batch_sweeps.yml - Sweep parameter configuration
+- Difficulty presets in content/config/sweeps/
## Implementation Summary
### Files Created
-1. **scripts/aggregate_sweep_results.py** - Main aggregation script with:
- - Dataclasses: `SweepRecord`, `SweepRunMetadata`, `AggregatedStats`
- - SQLite database setup with versioned schema and indexes
- - `init_database()` - Creates tables and indexes
- - `ingest_sweep_summary()` - Ingests a single batch sweep summary
- - `ingest_sweep_directory()` - Ingests all summaries from a directory
- - `query_sweep_results()` - Query with filters (strategy, difficulty, world, run_id, days, git_commit, limit)
- - `query_sweep_runs()` - Query run metadata
- - `compute_aggregated_stats()` - Computes win rates, averages, action frequencies
- - `compute_stats_by_strategy()` / `compute_stats_by_difficulty()` - Convenience functions
- - CLI with subcommands: `ingest`, `query`, `stats`, `runs`
-
-2. **tests/scripts/test_aggregate_sweep_results.py** - 26 tests in 8 test classes:
- - `TestDatabaseSchema` (3 tests): schema creation, indexes, idempotency
- - `TestIngestion` (3 tests): ingest summary, prevent duplicates, ingest directory
- - `TestQuerying` (6 tests): by strategy, difficulty, run_id, limit, days, git commit
- - `TestAggregation` (4 tests): by strategy, with errors, action frequencies, empty records
- - `TestDataclasses` (3 tests): SweepRecord, SweepRunMetadata, AggregatedStats serialization
- - `TestCLI` (4 tests): ingest, stats JSON, query with filters, runs command
- - `TestHistoricalTracking` (2 tests): multiple runs, date range filtering
+1. **src/gengine/balance_studio/__init__.py** - Package init with exports
+2. **src/gengine/balance_studio/overlays.py** - YAML overlay loading system
+ - ConfigOverlay dataclass
+ - deep_merge() for config merging
+ - load_overlay_directory() for batch loading
+ - create_tuning_overlay() helper
+ - merge_overlays() for combining overlays
+3. **src/gengine/balance_studio/workflows.py** - Workflow implementations
+ - WorkflowResult dataclass
+ - ExploratorySweepConfig, CompareConfigsConfig, TuningTestConfig
+ - run_exploratory_sweep()
+ - run_config_comparison()
+ - run_tuning_test()
+ - list_historical_reports()
+ - view_historical_report()
+ - get_workflow_menu()
+4. **src/gengine/balance_studio/report_viewer.py** - HTML report generation
+ - ReportViewerConfig, FilterState dataclasses
+ - generate_strategy_chart(), generate_difficulty_chart()
+ - generate_stability_distribution_chart()
+ - generate_interactive_html() - full HTML dashboard
+ - write_html_report()
+5. **src/gengine/balance_studio/cli.py** - Entry point for pyproject.toml
+6. **scripts/echoes_balance_studio.py** - Main CLI tool with:
+ - Interactive workflow selection
+ - sweep, compare, test-tuning, view-reports, generate-report commands
+7. **tests/scripts/test_echoes_balance_studio.py** - 30 tests covering:
+ - ConfigOverlay (5 tests)
+ - DeepMerge (3 tests)
+ - LoadOverlayDirectory (3 tests)
+ - CreateTuningOverlay (1 test)
+ - MergeOverlays (2 tests)
+ - ReportViewer (3 tests)
+ - Workflows (6 tests)
+ - CLI (5 tests)
+ - ExploratorySweepConfig (2 tests)
+8. **docs/gengine/designer_feedback_tooling.md** - Designer documentation
+
+### Files Modified
+
+1. **pyproject.toml** - Added echoes-balance-studio script entry
## Acceptance Criteria Verification
-1. ✅ Script ingests batch sweep JSON outputs and produces aggregated summary data
-2. ✅ SQLite storage supports querying by parameter combinations, timestamp, and result metrics
-3. ✅ Historical tracking preserves sweep metadata (git commit hash, timestamp, parameter ranges)
-4. ✅ Aggregation computes: win rates, avg stability, story seed activation rates, action frequencies
-5. ✅ Query interface supports common lookups (by strategy, difficulty, date range, git commit)
-6. ✅ 26 tests covering aggregation logic, storage/retrieval, and historical queries (requirement was 8+)
-
-## Verification
-
-- All 26 tests pass
-- Ruff linting passes with no errors
-- CLI works correctly via subprocess testing
-
-## Progress
-
-- [x] Create scripts/aggregate_sweep_results.py
-- [x] Create tests/scripts/test_aggregate_sweep_results.py
+1. ✅ CLI tool `echoes-balance-studio` provides guided workflows for designers
+2. ✅ Workflows include: "Run exploratory sweep", "Compare two configs",
+ "Test tuning change", "View historical reports"
+3. ✅ Configuration changes testable via YAML overlays
+4. ✅ Interactive report viewer (HTML dashboard) allows filtering, sorting,
+ and drilling into results
+5. ✅ Designer-focused documentation covers:
+ - "How to diagnose dominant strategies"
+ - "Iterating on action costs"
+ - "Testing narrative pacing changes"
+6. ✅ Example workflows with case studies (Balancing Industrial Tier faction)
+7. ✅ 30 tests covering CLI commands, config overlay loading, and report generation
+ (requirement was at least 8)
+8. ✅ CLI registered in pyproject.toml under [project.scripts]
+9. ✅ All code passes ruff linting
+10. ✅ Tests pass
+
+## Status
+- [x] Read and understand existing infrastructure
+- [x] Create balance studio module with overlays
+- [x] Create workflows module
+- [x] Create report viewer module
+- [x] Create CLI tool (echoes_balance_studio.py)
+- [x] Add tests (30 tests - exceeds minimum of 8)
+- [x] Create designer documentation
+- [x] Register CLI in pyproject.toml
- [x] Run linting - PASSED
-- [x] Run tests - 26 PASSED
+- [x] Run tests - 30 PASSED
diff --git a/pyproject.toml b/pyproject.toml
index 8f75b2bc..99cad97e 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -41,6 +41,7 @@ echoes-shell = "gengine.echoes.cli.shell:main"
echoes-gateway-service = "gengine.echoes.gateway.main:main"
echoes-gateway-shell = "gengine.echoes.gateway.client:main"
echoes-llm-service = "gengine.echoes.llm.main:main"
+echoes-balance-studio = "gengine.balance_studio.cli:main"
[build-system]
requires = ["setuptools>=68.0.0"]
diff --git a/scripts/echoes_balance_studio.py b/scripts/echoes_balance_studio.py
new file mode 100644
index 00000000..1287df14
--- /dev/null
+++ b/scripts/echoes_balance_studio.py
@@ -0,0 +1,745 @@
+#!/usr/bin/env python3
+"""Balance Studio CLI - Designer feedback loop and guided workflows.
+
+Provides an interactive interface for designers to iterate on game balance
+without requiring code changes.
+
+Examples
+--------
+Run interactively::
+
+ echoes-balance-studio
+
+Run a specific workflow::
+
+ echoes-balance-studio sweep --strategies balanced aggressive
+ echoes-balance-studio compare --config-a path/to/a --config-b path/to/b
+ echoes-balance-studio test-tuning --name "boost_regen" \\
+ --change economy.regen_scale=1.2
+ echoes-balance-studio view-reports
+ echoes-balance-studio generate-report --input build/sweeps/summary.json
+"""
+
+from __future__ import annotations
+
+import argparse
+import json
+import sys
+from pathlib import Path
+from typing import Any, Sequence
+
+from gengine.balance_studio import (
+ CompareConfigsConfig,
+ ConfigOverlay,
+ ExploratorySweepConfig,
+ ReportViewerConfig,
+ TuningTestConfig,
+ get_workflow_menu,
+ list_historical_reports,
+ run_config_comparison,
+ run_exploratory_sweep,
+ run_tuning_test,
+ view_historical_report,
+ write_html_report,
+)
+
+
+def print_header() -> None:
+ """Print the Balance Studio header."""
+ print()
+ print("=" * 60)
+ print(" ECHOES BALANCE STUDIO")
+ print(" Designer Feedback Loop and Tooling")
+ print("=" * 60)
+ print()
+
+
+def print_workflow_menu() -> None:
+ """Print the workflow menu."""
+ workflows = get_workflow_menu()
+ print("Available Workflows:")
+ print("-" * 40)
+ for i, w in enumerate(workflows, 1):
+ print(f" {i}. {w['name']}")
+ print(f" {w['description']}")
+ print()
+
+
+def interactive_mode() -> int:
+ """Run the interactive workflow selection mode.
+
+ Returns
+ -------
+ int
+ Exit code.
+ """
+ print_header()
+ print_workflow_menu()
+
+ print("Enter workflow number (1-4) or 'q' to quit:")
+ try:
+ choice = input("> ").strip().lower()
+ except (EOFError, KeyboardInterrupt):
+ print("\nExiting.")
+ return 0
+
+ if choice in ("q", "quit", "exit"):
+ return 0
+
+ try:
+ choice_num = int(choice)
+ except ValueError:
+ print(f"Invalid choice: {choice}")
+ return 1
+
+ if choice_num == 1:
+ return interactive_sweep()
+ elif choice_num == 2:
+ return interactive_compare()
+ elif choice_num == 3:
+ return interactive_tuning()
+ elif choice_num == 4:
+ return interactive_view_reports()
+ else:
+ print(f"Invalid choice: {choice_num}")
+ return 1
+
+
+def interactive_sweep() -> int:
+ """Interactive exploratory sweep workflow."""
+ print("\n--- Run Exploratory Sweep ---\n")
+
+ print("Enter strategies (comma-separated, or press Enter for defaults):")
+ print(" Available: balanced, aggressive, diplomatic, hybrid")
+ strategies_input = input("> ").strip()
+ strategies = (
+ [s.strip() for s in strategies_input.split(",") if s.strip()]
+ if strategies_input
+ else ["balanced", "aggressive", "diplomatic"]
+ )
+
+ print("\nEnter difficulty presets (comma-separated, or Enter for 'normal'):")
+ print(" Available: tutorial, easy, normal, hard, brutal")
+ difficulties_input = input("> ").strip()
+ difficulties = (
+ [d.strip() for d in difficulties_input.split(",") if d.strip()]
+ if difficulties_input
+ else ["normal"]
+ )
+
+ print("\nEnter random seeds (comma-separated, or Enter for defaults):")
+ seeds_input = input("> ").strip()
+ try:
+ seeds = (
+ [int(s.strip()) for s in seeds_input.split(",") if s.strip()]
+ if seeds_input
+ else [42, 123, 456]
+ )
+ except ValueError:
+ print("Invalid seeds, using defaults")
+ seeds = [42, 123, 456]
+
+ print("\nEnter tick budget (or Enter for 100):")
+ tick_input = input("> ").strip()
+ tick_budget = int(tick_input) if tick_input else 100
+
+ config = ExploratorySweepConfig(
+ strategies=strategies,
+ difficulties=difficulties,
+ seeds=seeds,
+ tick_budget=tick_budget,
+ )
+
+ print(f"\nRunning sweep with {len(strategies)} strategies, "
+ f"{len(difficulties)} difficulties, {len(seeds)} seeds...")
+ print("This may take a while...\n")
+
+ result = run_exploratory_sweep(config, verbose=True)
+
+ print("\n" + "=" * 40)
+ if result.success:
+ print(f"SUCCESS: {result.message}")
+ print(f"Output: {result.output_path}")
+ else:
+ print(f"FAILED: {result.message}")
+ for err in result.errors:
+ print(f" Error: {err}")
+
+ return 0 if result.success else 1
+
+
+def interactive_compare() -> int:
+ """Interactive config comparison workflow."""
+ print("\n--- Compare Two Configs ---\n")
+
+ print("Enter path to first config directory:")
+ config_a = input("> ").strip()
+ if not config_a:
+ print("Config path required")
+ return 1
+
+ print("\nEnter name for first config (or Enter for 'Config A'):")
+ name_a = input("> ").strip() or "Config A"
+
+ print("\nEnter path to second config directory:")
+ config_b = input("> ").strip()
+ if not config_b:
+ print("Config path required")
+ return 1
+
+ print("\nEnter name for second config (or Enter for 'Config B'):")
+ name_b = input("> ").strip() or "Config B"
+
+ config = CompareConfigsConfig(
+ config_a_path=Path(config_a),
+ config_b_path=Path(config_b),
+ name_a=name_a,
+ name_b=name_b,
+ )
+
+ print("\nRunning comparison sweeps...")
+
+ result = run_config_comparison(config, verbose=True)
+
+ print("\n" + "=" * 40)
+ if result.success:
+ print(f"SUCCESS: {result.message}")
+ print(f"Output: {result.output_path}")
+
+ if "comparison" in result.data:
+ print("\nComparison Results:")
+ for strategy, comp in result.data["comparison"].items():
+ delta = comp.get("delta", 0)
+ direction = "↑" if delta > 0 else "↓" if delta < 0 else "="
+ print(f" {strategy}: {direction} {abs(delta):.3f} "
+ f"({comp.get('delta_percent', 0):.1f}%)")
+ else:
+ print(f"FAILED: {result.message}")
+ for err in result.errors:
+ print(f" Error: {err}")
+
+ return 0 if result.success else 1
+
+
+def interactive_tuning() -> int:
+ """Interactive tuning test workflow."""
+ print("\n--- Test Tuning Change ---\n")
+
+ print("Enter a name for this tuning experiment:")
+ name = input("> ").strip()
+ if not name:
+ name = "tuning_test"
+
+ print("\nEnter config changes as key=value pairs (one per line, blank to finish):")
+ print(" Example: economy.regen_scale=1.2")
+ print(" Example: environment.scarcity_pressure_cap=6000")
+
+ changes: dict[str, Any] = {}
+ while True:
+ line = input("> ").strip()
+ if not line:
+ break
+
+ if "=" not in line:
+ print(" Invalid format, use key=value")
+ continue
+
+ key, value = line.split("=", 1)
+ key = key.strip()
+ value = value.strip()
+
+ # Parse value type
+ try:
+ if "." in value:
+ parsed_value: Any = float(value)
+ else:
+ parsed_value = int(value)
+ except ValueError:
+ if value.lower() in ("true", "false"):
+ parsed_value = value.lower() == "true"
+ else:
+ parsed_value = value
+
+ # Build nested dict from dotted key
+ keys = key.split(".")
+ current = changes
+ for k in keys[:-1]:
+ current = current.setdefault(k, {})
+ current[keys[-1]] = parsed_value
+ print(f" Added: {key} = {parsed_value}")
+
+ if not changes:
+ print("No changes specified")
+ return 1
+
+ print(f"\nTesting {len(changes)} changes...")
+
+ config = TuningTestConfig(
+ name=name,
+ changes=changes,
+ description=f"Interactive tuning test: {name}",
+ )
+
+ result = run_tuning_test(config, verbose=True)
+
+ print("\n" + "=" * 40)
+ if result.success:
+ print(f"SUCCESS: {result.message}")
+ print(f"Output: {result.output_path}")
+
+ if "comparison" in result.data:
+ print("\nTuning Impact:")
+ for strategy, comp in result.data["comparison"].items():
+ delta = comp.get("delta", 0)
+ direction = "↑" if delta > 0 else "↓" if delta < 0 else "="
+ print(f" {strategy}: Baseline {comp.get('stability_a', 0):.3f} "
+ f"→ Tuned {comp.get('stability_b', 0):.3f} "
+ f"({direction}{abs(delta):.3f})")
+ else:
+ print(f"FAILED: {result.message}")
+ for err in result.errors:
+ print(f" Error: {err}")
+
+ return 0 if result.success else 1
+
+
+def interactive_view_reports() -> int:
+ """Interactive report viewing workflow."""
+ print("\n--- View Historical Reports ---\n")
+
+ reports = list_historical_reports()
+
+ if not reports:
+ print("No reports found in build/")
+ return 0
+
+ print("Available Reports:")
+ print("-" * 60)
+ for i, r in enumerate(reports[:10], 1):
+ print(f" {i}. {r['timestamp']}")
+ print(f" Sweeps: {r['completed_sweeps']}/{r['total_sweeps']}")
+ print(f" Strategies: {', '.join(r['strategies'])}")
+ print()
+
+ print("Enter report number to view (or 'q' to quit):")
+ choice = input("> ").strip()
+
+ if choice.lower() in ("q", "quit"):
+ return 0
+
+ try:
+ idx = int(choice) - 1
+ if 0 <= idx < len(reports):
+ report = reports[idx]
+ result = view_historical_report(Path(report["path"]))
+
+ if result.success:
+ print(f"\n{result.message}")
+ print(f"Path: {report['path']}")
+
+ # Print summary
+ data = result.data
+ print("\nSummary:")
+ print(f" Total Sweeps: {data.get('total_sweeps', 0)}")
+ print(f" Completed: {data.get('completed_sweeps', 0)}")
+ print(f" Failed: {data.get('failed_sweeps', 0)}")
+
+ if "strategy_stats" in data:
+ print("\nStrategy Stats:")
+ for strategy, stats in data["strategy_stats"].items():
+ avg = stats.get('avg_stability', 0)
+ print(f" {strategy}: avg_stability={avg:.3f}")
+ else:
+ print(f"Failed to load report: {result.message}")
+ else:
+ print("Invalid report number")
+ except ValueError:
+ print("Invalid input")
+
+ return 0
+
+
+def cmd_sweep(args: argparse.Namespace) -> int:
+ """Handle the sweep command."""
+ config = ExploratorySweepConfig(
+ strategies=args.strategies,
+ difficulties=args.difficulties,
+ seeds=args.seeds,
+ tick_budget=args.ticks,
+ output_dir=Path(args.output_dir),
+ )
+
+ if args.overlay:
+ config.overlay = ConfigOverlay.from_yaml(Path(args.overlay))
+
+ result = run_exploratory_sweep(config, verbose=args.verbose)
+
+ if args.json:
+ print(json.dumps(result.to_dict(), indent=2))
+ else:
+ print(f"{'SUCCESS' if result.success else 'FAILED'}: {result.message}")
+ if result.output_path:
+ print(f"Output: {result.output_path}")
+
+ return 0 if result.success else 1
+
+
+def cmd_compare(args: argparse.Namespace) -> int:
+ """Handle the compare command."""
+ config = CompareConfigsConfig(
+ config_a_path=Path(args.config_a),
+ config_b_path=Path(args.config_b),
+ name_a=args.name_a,
+ name_b=args.name_b,
+ strategies=args.strategies,
+ seeds=args.seeds,
+ tick_budget=args.ticks,
+ output_dir=Path(args.output_dir),
+ )
+
+ result = run_config_comparison(config, verbose=args.verbose)
+
+ if args.json:
+ print(json.dumps(result.to_dict(), indent=2))
+ else:
+ print(f"{'SUCCESS' if result.success else 'FAILED'}: {result.message}")
+ if result.output_path:
+ print(f"Output: {result.output_path}")
+
+ return 0 if result.success else 1
+
+
+def cmd_test_tuning(args: argparse.Namespace) -> int:
+ """Handle the test-tuning command."""
+ # Parse changes from command line
+ changes: dict[str, Any] = {}
+ for change in args.change or []:
+ if "=" not in change:
+ sys.stderr.write(f"Invalid change format: {change}\n")
+ continue
+
+ key, value = change.split("=", 1)
+ key = key.strip()
+ value = value.strip()
+
+ # Parse value
+ try:
+ if "." in value:
+ parsed: Any = float(value)
+ else:
+ parsed = int(value)
+ except ValueError:
+ if value.lower() in ("true", "false"):
+ parsed = value.lower() == "true"
+ else:
+ parsed = value
+
+ # Build nested dict
+ keys = key.split(".")
+ current = changes
+ for k in keys[:-1]:
+ current = current.setdefault(k, {})
+ current[keys[-1]] = parsed
+
+ if not changes:
+ sys.stderr.write("No valid changes specified\n")
+ return 1
+
+ config = TuningTestConfig(
+ name=args.name,
+ changes=changes,
+ description=args.description or f"Tuning test: {args.name}",
+ baseline_config=Path(args.baseline) if args.baseline else None,
+ strategies=args.strategies,
+ seeds=args.seeds,
+ tick_budget=args.ticks,
+ output_dir=Path(args.output_dir),
+ )
+
+ result = run_tuning_test(config, verbose=args.verbose)
+
+ if args.json:
+ print(json.dumps(result.to_dict(), indent=2))
+ else:
+ print(f"{'SUCCESS' if result.success else 'FAILED'}: {result.message}")
+ if result.output_path:
+ print(f"Output: {result.output_path}")
+
+ return 0 if result.success else 1
+
+
+def cmd_view_reports(args: argparse.Namespace) -> int:
+ """Handle the view-reports command."""
+ reports = list_historical_reports(
+ reports_dir=Path(args.reports_dir),
+ limit=args.limit,
+ )
+
+ if args.json:
+ print(json.dumps(reports, indent=2))
+ else:
+ if not reports:
+ print("No reports found")
+ return 0
+
+ print("\nAvailable Reports:")
+ print("-" * 70)
+ for r in reports:
+ print(f" {r['timestamp']} | "
+ f"{r['completed_sweeps']}/{r['total_sweeps']} sweeps | "
+ f"{', '.join(r['strategies'])}")
+ print(f" Path: {r['path']}")
+
+ return 0
+
+
+def cmd_generate_report(args: argparse.Namespace) -> int:
+ """Handle the generate-report command."""
+ input_path = Path(args.input)
+ if not input_path.exists():
+ sys.stderr.write(f"Input file not found: {input_path}\n")
+ return 1
+
+ try:
+ with open(input_path) as f:
+ data = json.load(f)
+ except json.JSONDecodeError as e:
+ sys.stderr.write(f"Failed to parse input: {e}\n")
+ return 1
+
+ config = ReportViewerConfig(
+ title=args.title,
+ include_charts=not args.no_charts,
+ include_raw_data=args.include_raw,
+ theme=args.theme,
+ )
+
+ output_path = Path(args.output)
+ write_html_report(data, output_path, config)
+
+ print(f"Report generated: {output_path}")
+ return 0
+
+
+def main(argv: Sequence[str] | None = None) -> int:
+ """CLI entry point for Balance Studio."""
+ parser = argparse.ArgumentParser(
+ description="Balance Studio - Designer feedback loop and guided workflows",
+ formatter_class=argparse.RawDescriptionHelpFormatter,
+ epilog="""
+Examples:
+ # Run interactively
+ echoes-balance-studio
+
+ # Run exploratory sweep
+ echoes-balance-studio sweep --strategies balanced aggressive
+
+ # Compare two configurations
+ echoes-balance-studio compare \\
+ --config-a content/config \\
+ --config-b content/config/sweeps/difficulty-hard
+
+ # Test a tuning change
+ echoes-balance-studio test-tuning \\
+ --name boost_regen \\
+ --change economy.regen_scale=1.2 \\
+ --change environment.scarcity_pressure_cap=6000
+
+ # View historical reports
+ echoes-balance-studio view-reports
+
+ # Generate HTML report from sweep results
+ echoes-balance-studio generate-report \\
+ --input build/batch_sweeps/batch_sweep_summary.json \\
+ --output build/balance_report.html
+""",
+ )
+
+ subparsers = parser.add_subparsers(dest="command")
+
+ # Sweep command
+ sweep_parser = subparsers.add_parser(
+ "sweep", help="Run an exploratory sweep"
+ )
+ sweep_parser.add_argument(
+ "--strategies", "-s", nargs="+",
+ default=["balanced", "aggressive", "diplomatic"],
+ help="Strategies to test",
+ )
+ sweep_parser.add_argument(
+ "--difficulties", "-d", nargs="+",
+ default=["normal"],
+ help="Difficulty presets to test",
+ )
+ sweep_parser.add_argument(
+ "--seeds", nargs="+", type=int,
+ default=[42, 123, 456],
+ help="Random seeds",
+ )
+ sweep_parser.add_argument(
+ "--ticks", "-t", type=int, default=100,
+ help="Tick budget per sweep",
+ )
+ sweep_parser.add_argument(
+ "--output-dir", "-o", default="build/studio_sweeps",
+ help="Output directory",
+ )
+ sweep_parser.add_argument(
+ "--overlay", help="Path to config overlay YAML",
+ )
+ sweep_parser.add_argument(
+ "--json", action="store_true", help="Output as JSON",
+ )
+ sweep_parser.add_argument(
+ "--verbose", "-v", action="store_true", help="Verbose output",
+ )
+
+ # Compare command
+ compare_parser = subparsers.add_parser(
+ "compare", help="Compare two configurations"
+ )
+ compare_parser.add_argument(
+ "--config-a", "-a", required=True,
+ help="Path to first config directory",
+ )
+ compare_parser.add_argument(
+ "--config-b", "-b", required=True,
+ help="Path to second config directory",
+ )
+ compare_parser.add_argument(
+ "--name-a", default="Config A",
+ help="Display name for first config",
+ )
+ compare_parser.add_argument(
+ "--name-b", default="Config B",
+ help="Display name for second config",
+ )
+ compare_parser.add_argument(
+ "--strategies", "-s", nargs="+", default=["balanced"],
+ help="Strategies to test",
+ )
+ compare_parser.add_argument(
+ "--seeds", nargs="+", type=int, default=[42],
+ help="Random seeds",
+ )
+ compare_parser.add_argument(
+ "--ticks", "-t", type=int, default=100,
+ help="Tick budget per sweep",
+ )
+ compare_parser.add_argument(
+ "--output-dir", "-o", default="build/studio_compare",
+ help="Output directory",
+ )
+ compare_parser.add_argument(
+ "--json", action="store_true", help="Output as JSON",
+ )
+ compare_parser.add_argument(
+ "--verbose", "-v", action="store_true", help="Verbose output",
+ )
+
+ # Test-tuning command
+ tuning_parser = subparsers.add_parser(
+ "test-tuning", help="Test a tuning change"
+ )
+ tuning_parser.add_argument(
+ "--name", "-n", required=True,
+ help="Name for this tuning experiment",
+ )
+ tuning_parser.add_argument(
+ "--change", "-c", action="append",
+ help="Config change as key=value (can be repeated)",
+ )
+ tuning_parser.add_argument(
+ "--description", help="Description of the changes",
+ )
+ tuning_parser.add_argument(
+ "--baseline", help="Path to baseline config directory",
+ )
+ tuning_parser.add_argument(
+ "--strategies", "-s", nargs="+", default=["balanced"],
+ help="Strategies to test",
+ )
+ tuning_parser.add_argument(
+ "--seeds", nargs="+", type=int, default=[42, 123],
+ help="Random seeds",
+ )
+ tuning_parser.add_argument(
+ "--ticks", "-t", type=int, default=100,
+ help="Tick budget per sweep",
+ )
+ tuning_parser.add_argument(
+ "--output-dir", "-o", default="build/studio_tuning",
+ help="Output directory",
+ )
+ tuning_parser.add_argument(
+ "--json", action="store_true", help="Output as JSON",
+ )
+ tuning_parser.add_argument(
+ "--verbose", "-v", action="store_true", help="Verbose output",
+ )
+
+ # View-reports command
+ reports_parser = subparsers.add_parser(
+ "view-reports", help="View historical reports"
+ )
+ reports_parser.add_argument(
+ "--reports-dir", default="build",
+ help="Directory to search for reports",
+ )
+ reports_parser.add_argument(
+ "--limit", "-l", type=int, default=20,
+ help="Maximum reports to list",
+ )
+ reports_parser.add_argument(
+ "--json", action="store_true", help="Output as JSON",
+ )
+
+ # Generate-report command
+ generate_parser = subparsers.add_parser(
+ "generate-report", help="Generate HTML report from sweep results"
+ )
+ generate_parser.add_argument(
+ "--input", "-i", required=True,
+ help="Path to sweep summary JSON",
+ )
+ generate_parser.add_argument(
+ "--output", "-o", required=True,
+ help="Output HTML file path",
+ )
+ generate_parser.add_argument(
+ "--title", default="Balance Studio Report",
+ help="Report title",
+ )
+ generate_parser.add_argument(
+ "--theme", choices=["light", "dark"], default="light",
+ help="Color theme",
+ )
+ generate_parser.add_argument(
+ "--no-charts", action="store_true",
+ help="Disable chart generation",
+ )
+ generate_parser.add_argument(
+ "--include-raw", action="store_true",
+ help="Include raw JSON data section",
+ )
+
+ args = parser.parse_args(argv)
+
+ # If no command, run interactive mode
+ if args.command is None:
+ return interactive_mode()
+
+ # Dispatch to command handler
+ handlers = {
+ "sweep": cmd_sweep,
+ "compare": cmd_compare,
+ "test-tuning": cmd_test_tuning,
+ "view-reports": cmd_view_reports,
+ "generate-report": cmd_generate_report,
+ }
+
+ return handlers[args.command](args)
+
+
+if __name__ == "__main__": # pragma: no cover
+ raise SystemExit(main())
diff --git a/src/gengine/balance_studio/__init__.py b/src/gengine/balance_studio/__init__.py
new file mode 100644
index 00000000..edda0785
--- /dev/null
+++ b/src/gengine/balance_studio/__init__.py
@@ -0,0 +1,75 @@
+"""Balance Studio - Designer feedback loop and tooling.
+
+This module provides guided workflows for designers to iterate on game
+balance without requiring code changes.
+
+Components
+----------
+- overlays: YAML overlay system for configuration testing
+- workflows: Guided workflow implementations
+- report_viewer: Interactive HTML report generation
+
+CLI Tool
+--------
+The `echoes-balance-studio` CLI provides an interactive interface::
+
+ echoes-balance-studio
+
+Or use specific commands::
+
+ echoes-balance-studio sweep --strategies balanced aggressive
+ echoes-balance-studio compare --config-a path/to/a --config-b path/to/b
+ echoes-balance-studio test-tuning --changes economy.regen_scale=1.2
+ echoes-balance-studio view-reports
+"""
+
+from .overlays import (
+ ConfigOverlay,
+ create_tuning_overlay,
+ deep_merge,
+ load_overlay_directory,
+ merge_overlays,
+)
+from .report_viewer import (
+ FilterState,
+ ReportViewerConfig,
+ generate_interactive_html,
+ write_html_report,
+)
+from .workflows import (
+ CompareConfigsConfig,
+ ExploratorySweepConfig,
+ TuningTestConfig,
+ WorkflowResult,
+ get_workflow_menu,
+ list_historical_reports,
+ run_config_comparison,
+ run_exploratory_sweep,
+ run_tuning_test,
+ view_historical_report,
+)
+
+__all__ = [
+ # Overlays
+ "ConfigOverlay",
+ "create_tuning_overlay",
+ "deep_merge",
+ "load_overlay_directory",
+ "merge_overlays",
+ # Report Viewer
+ "FilterState",
+ "ReportViewerConfig",
+ "generate_interactive_html",
+ "write_html_report",
+ # Workflows
+ "CompareConfigsConfig",
+ "ExploratorySweepConfig",
+ "TuningTestConfig",
+ "WorkflowResult",
+ "get_workflow_menu",
+ "list_historical_reports",
+ "run_config_comparison",
+ "run_exploratory_sweep",
+ "run_tuning_test",
+ "view_historical_report",
+]
diff --git a/src/gengine/balance_studio/cli.py b/src/gengine/balance_studio/cli.py
new file mode 100644
index 00000000..0bde6471
--- /dev/null
+++ b/src/gengine/balance_studio/cli.py
@@ -0,0 +1,51 @@
+"""CLI entry point for Balance Studio.
+
+This module provides the main() function that is registered as the
+`echoes-balance-studio` command in pyproject.toml.
+"""
+
+from __future__ import annotations
+
+import sys
+from pathlib import Path
+from typing import Sequence
+
+# Import and re-export main from the script module
+# We need to load it dynamically to avoid circular imports
+
+
+def main(argv: Sequence[str] | None = None) -> int:
+ """Entry point for the echoes-balance-studio command.
+
+ This function imports and runs the main CLI from the scripts module.
+
+ Parameters
+ ----------
+ argv
+ Command-line arguments (defaults to sys.argv[1:]).
+
+ Returns
+ -------
+ int
+ Exit code.
+ """
+ # Import the main function from the script
+ from importlib import util
+
+ script_dir = Path(__file__).resolve().parents[3] / "scripts"
+ script_path = script_dir / "echoes_balance_studio.py"
+
+ spec = util.spec_from_file_location("echoes_balance_studio", script_path)
+ if spec is None or spec.loader is None:
+ sys.stderr.write(f"Failed to load Balance Studio script: {script_path}\n")
+ return 1
+
+ module = util.module_from_spec(spec)
+ sys.modules.setdefault("echoes_balance_studio", module)
+ spec.loader.exec_module(module)
+
+ return module.main(argv)
+
+
+if __name__ == "__main__": # pragma: no cover
+ raise SystemExit(main())
diff --git a/src/gengine/balance_studio/overlays.py b/src/gengine/balance_studio/overlays.py
new file mode 100644
index 00000000..28c478be
--- /dev/null
+++ b/src/gengine/balance_studio/overlays.py
@@ -0,0 +1,293 @@
+"""YAML overlay system for configuration testing.
+
+Allows designers to create configuration overlays that are merged with base
+configs, enabling testing of tuning changes without modifying base files.
+
+Examples
+--------
+Create and apply an overlay::
+
+ overlay = ConfigOverlay.from_yaml(Path("my_overlay.yml"))
+ merged_config = overlay.apply(base_config)
+
+Load overlay directory::
+
+ overlays = load_overlay_directory(Path("content/config/overlays"))
+"""
+
+from __future__ import annotations
+
+import copy
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import Any
+
+import yaml
+
+
+@dataclass
+class ConfigOverlay:
+ """A configuration overlay that can be merged with base configs.
+
+ Attributes
+ ----------
+ name
+ Name of the overlay for display purposes.
+ description
+ Human-readable description of what this overlay changes.
+ source_path
+ Path to the source YAML file.
+ overrides
+ Dictionary of config keys to override values.
+ metadata
+ Additional metadata about the overlay.
+ """
+
+ name: str
+ description: str = ""
+ source_path: Path | None = None
+ overrides: dict[str, Any] = field(default_factory=dict)
+ metadata: dict[str, Any] = field(default_factory=dict)
+
+ @classmethod
+ def from_yaml(cls, path: Path) -> ConfigOverlay:
+ """Load an overlay from a YAML file.
+
+ Parameters
+ ----------
+ path
+ Path to the YAML overlay file.
+
+ Returns
+ -------
+ ConfigOverlay
+ Loaded overlay.
+
+ Raises
+ ------
+ FileNotFoundError
+ If the file does not exist.
+ ValueError
+ If the file is not valid YAML or missing required fields.
+ """
+ if not path.exists():
+ raise FileNotFoundError(f"Overlay file not found: {path}")
+
+ with open(path) as f:
+ data = yaml.safe_load(f) or {}
+
+ if not isinstance(data, dict):
+ raise ValueError(f"Invalid overlay format in {path}: expected dict")
+
+ return cls(
+ name=data.get("name", path.stem),
+ description=data.get("description", ""),
+ source_path=path,
+ overrides=data.get("overrides", {}),
+ metadata=data.get("metadata", {}),
+ )
+
+ @classmethod
+ def from_dict(cls, data: dict[str, Any], name: str = "inline") -> ConfigOverlay:
+ """Create an overlay from a dictionary.
+
+ Parameters
+ ----------
+ data
+ Dictionary with overlay structure.
+ name
+ Name for the overlay.
+
+ Returns
+ -------
+ ConfigOverlay
+ Created overlay.
+ """
+ return cls(
+ name=data.get("name", name),
+ description=data.get("description", ""),
+ overrides=data.get("overrides", data),
+ metadata=data.get("metadata", {}),
+ )
+
+ def apply(self, base_config: dict[str, Any]) -> dict[str, Any]:
+ """Apply this overlay to a base configuration.
+
+ Performs a deep merge where overlay values override base values.
+
+ Parameters
+ ----------
+ base_config
+ Base configuration dictionary.
+
+ Returns
+ -------
+ dict[str, Any]
+ Merged configuration with overlays applied.
+ """
+ return deep_merge(base_config, self.overrides)
+
+ def to_dict(self) -> dict[str, Any]:
+ """Serialize overlay to dictionary.
+
+ Returns
+ -------
+ dict[str, Any]
+ Serialized overlay.
+ """
+ return {
+ "name": self.name,
+ "description": self.description,
+ "source_path": str(self.source_path) if self.source_path else None,
+ "overrides": self.overrides,
+ "metadata": self.metadata,
+ }
+
+ def to_yaml(self, path: Path) -> None:
+ """Write overlay to a YAML file.
+
+ Parameters
+ ----------
+ path
+ Path to write the overlay file.
+ """
+ data = {
+ "name": self.name,
+ "description": self.description,
+ "overrides": self.overrides,
+ "metadata": self.metadata,
+ }
+ path.parent.mkdir(parents=True, exist_ok=True)
+ with open(path, "w") as f:
+ yaml.safe_dump(data, f, default_flow_style=False, sort_keys=False)
+
+
+def deep_merge(base: dict[str, Any], override: dict[str, Any]) -> dict[str, Any]:
+ """Deep merge two dictionaries, with override taking precedence.
+
+ Parameters
+ ----------
+ base
+ Base dictionary.
+ override
+ Override dictionary with values to merge.
+
+ Returns
+ -------
+ dict[str, Any]
+ Merged dictionary.
+ """
+ result = copy.deepcopy(base)
+
+ for key, value in override.items():
+ if key in result and isinstance(result[key], dict) and isinstance(value, dict):
+ result[key] = deep_merge(result[key], value)
+ else:
+ result[key] = copy.deepcopy(value)
+
+ return result
+
+
+def load_overlay_directory(directory: Path) -> list[ConfigOverlay]:
+ """Load all overlays from a directory.
+
+ Parameters
+ ----------
+ directory
+ Directory containing overlay YAML files.
+
+ Returns
+ -------
+ list[ConfigOverlay]
+ List of loaded overlays.
+ """
+ if not directory.exists():
+ return []
+
+ overlays: list[ConfigOverlay] = []
+
+ for path in sorted(directory.glob("*.yml")):
+ try:
+ overlays.append(ConfigOverlay.from_yaml(path))
+ except (ValueError, yaml.YAMLError):
+ continue
+
+ for path in sorted(directory.glob("*.yaml")):
+ try:
+ overlays.append(ConfigOverlay.from_yaml(path))
+ except (ValueError, yaml.YAMLError):
+ continue
+
+ return overlays
+
+
+def create_tuning_overlay(
+ name: str,
+ changes: dict[str, Any],
+ description: str = "",
+) -> ConfigOverlay:
+ """Create a tuning overlay from a dictionary of changes.
+
+ This is a convenience function for quick experimentation.
+
+ Parameters
+ ----------
+ name
+ Name for the overlay.
+ changes
+ Dictionary of configuration changes to apply.
+ description
+ Optional description of the changes.
+
+ Returns
+ -------
+ ConfigOverlay
+ Created overlay.
+
+ Examples
+ --------
+ >>> overlay = create_tuning_overlay(
+ ... "aggressive_economy",
+ ... {"economy": {"regen_scale": 1.2}},
+ ... "Test higher resource regeneration"
+ ... )
+ """
+ return ConfigOverlay(
+ name=name,
+ description=description,
+ overrides=changes,
+ metadata={"type": "tuning_experiment"},
+ )
+
+
+def merge_overlays(overlays: list[ConfigOverlay]) -> ConfigOverlay:
+ """Merge multiple overlays into a single overlay.
+
+ Overlays are applied in order, with later overlays taking precedence.
+
+ Parameters
+ ----------
+ overlays
+ List of overlays to merge.
+
+ Returns
+ -------
+ ConfigOverlay
+ Merged overlay.
+ """
+ if not overlays:
+ return ConfigOverlay(name="empty")
+
+ merged_overrides: dict[str, Any] = {}
+ names: list[str] = []
+
+ for overlay in overlays:
+ merged_overrides = deep_merge(merged_overrides, overlay.overrides)
+ names.append(overlay.name)
+
+ return ConfigOverlay(
+ name=" + ".join(names),
+ description=f"Merged from: {', '.join(names)}",
+ overrides=merged_overrides,
+ metadata={"merged_from": names},
+ )
diff --git a/src/gengine/balance_studio/report_viewer.py b/src/gengine/balance_studio/report_viewer.py
new file mode 100644
index 00000000..00be2fb6
--- /dev/null
+++ b/src/gengine/balance_studio/report_viewer.py
@@ -0,0 +1,706 @@
+"""Interactive HTML report viewer for balance analysis.
+
+Generates an HTML dashboard that allows filtering, sorting, and drilling
+into sweep results without requiring code changes.
+"""
+
+from __future__ import annotations
+
+import base64
+import io
+import json
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import Any
+
+# Optional matplotlib import
+try:
+ import matplotlib
+
+ matplotlib.use("Agg")
+ import matplotlib.pyplot as plt
+
+ HAS_MATPLOTLIB = True
+except ImportError:
+ HAS_MATPLOTLIB = False
+
+
+@dataclass
+class ReportViewerConfig:
+ """Configuration for the report viewer.
+
+ Attributes
+ ----------
+ title
+ Title for the HTML report.
+ include_charts
+ Whether to include embedded charts.
+ include_raw_data
+ Whether to include raw JSON data section.
+ theme
+ Color theme: "light" or "dark".
+ """
+
+ title: str = "Balance Studio Report"
+ include_charts: bool = True
+ include_raw_data: bool = False
+ theme: str = "light"
+
+
+@dataclass
+class FilterState:
+ """Current filter state for the report viewer.
+
+ Attributes
+ ----------
+ strategies
+ Selected strategies to display.
+ difficulties
+ Selected difficulties to display.
+ min_stability
+ Minimum stability threshold.
+ max_stability
+ Maximum stability threshold.
+ show_errors
+ Whether to show failed sweeps.
+ """
+
+ strategies: list[str] = field(default_factory=list)
+ difficulties: list[str] = field(default_factory=list)
+ min_stability: float = 0.0
+ max_stability: float = 1.0
+ show_errors: bool = True
+
+
+def generate_strategy_chart(stats: dict[str, Any]) -> str | None:
+ """Generate a bar chart of average stability by strategy.
+
+ Parameters
+ ----------
+ stats
+ Strategy statistics from sweep results.
+
+ Returns
+ -------
+ str | None
+ Base64-encoded PNG image, or None if matplotlib unavailable.
+ """
+ if not HAS_MATPLOTLIB or not stats:
+ return None
+
+ strategies = list(stats.keys())
+ avg_stabilities = [stats[s].get("avg_stability", 0) for s in strategies]
+
+ fig, ax = plt.subplots(figsize=(8, 5))
+ bars = ax.bar(strategies, avg_stabilities, color=["#3498db", "#e74c3c", "#2ecc71"])
+
+ ax.set_xlabel("Strategy")
+ ax.set_ylabel("Average Stability")
+ ax.set_title("Strategy Performance Comparison")
+ ax.set_ylim(0, 1)
+
+ for bar, val in zip(bars, avg_stabilities, strict=True):
+ ax.text(
+ bar.get_x() + bar.get_width() / 2,
+ val + 0.02,
+ f"{val:.2f}",
+ ha="center",
+ fontsize=10,
+ )
+
+ buf = io.BytesIO()
+ fig.savefig(buf, format="png", bbox_inches="tight", dpi=100)
+ plt.close(fig)
+ buf.seek(0)
+ return base64.b64encode(buf.read()).decode("utf-8")
+
+
+def generate_difficulty_chart(stats: dict[str, Any]) -> str | None:
+ """Generate a bar chart of average stability by difficulty.
+
+ Parameters
+ ----------
+ stats
+ Difficulty statistics from sweep results.
+
+ Returns
+ -------
+ str | None
+ Base64-encoded PNG image, or None if matplotlib unavailable.
+ """
+ if not HAS_MATPLOTLIB or not stats:
+ return None
+
+ difficulties = list(stats.keys())
+ avg_stabilities = [stats[d].get("avg_stability", 0) for d in difficulties]
+
+ # Color gradient from easy (green) to hard (red)
+ colors = plt.cm.RdYlGn(
+ [1.0 - i / max(len(difficulties) - 1, 1) for i in range(len(difficulties))]
+ )
+
+ fig, ax = plt.subplots(figsize=(8, 5))
+ bars = ax.bar(difficulties, avg_stabilities, color=colors)
+
+ ax.set_xlabel("Difficulty")
+ ax.set_ylabel("Average Stability")
+ ax.set_title("Difficulty Level Impact")
+ ax.set_ylim(0, 1)
+
+ for bar, val in zip(bars, avg_stabilities, strict=True):
+ ax.text(
+ bar.get_x() + bar.get_width() / 2,
+ val + 0.02,
+ f"{val:.2f}",
+ ha="center",
+ fontsize=10,
+ )
+
+ buf = io.BytesIO()
+ fig.savefig(buf, format="png", bbox_inches="tight", dpi=100)
+ plt.close(fig)
+ buf.seek(0)
+ return base64.b64encode(buf.read()).decode("utf-8")
+
+
+def generate_stability_distribution_chart(sweeps: list[dict[str, Any]]) -> str | None:
+ """Generate a histogram of stability distribution.
+
+ Parameters
+ ----------
+ sweeps
+ List of sweep results.
+
+ Returns
+ -------
+ str | None
+ Base64-encoded PNG image, or None if matplotlib unavailable.
+ """
+ if not HAS_MATPLOTLIB or not sweeps:
+ return None
+
+ stabilities = [
+ s.get("results", {}).get("final_stability", 0)
+ for s in sweeps
+ if s.get("error") is None
+ ]
+
+ if not stabilities:
+ return None
+
+ fig, ax = plt.subplots(figsize=(8, 5))
+ ax.hist(stabilities, bins=20, edgecolor="white", color="#3498db", alpha=0.7)
+
+ ax.set_xlabel("Final Stability")
+ ax.set_ylabel("Count")
+ ax.set_title("Stability Distribution Across All Sweeps")
+ ax.axvline(0.5, color="red", linestyle="--", label="Win Threshold")
+ ax.legend()
+
+ buf = io.BytesIO()
+ fig.savefig(buf, format="png", bbox_inches="tight", dpi=100)
+ plt.close(fig)
+ buf.seek(0)
+ return base64.b64encode(buf.read()).decode("utf-8")
+
+
+def generate_interactive_html(
+ data: dict[str, Any],
+ config: ReportViewerConfig | None = None,
+) -> str:
+ """Generate an interactive HTML report from sweep data.
+
+ Parameters
+ ----------
+ data
+ Sweep results data (from batch_sweep_summary.json).
+ config
+ Report viewer configuration.
+
+ Returns
+ -------
+ str
+ Complete HTML document.
+ """
+ if config is None:
+ config = ReportViewerConfig()
+
+ # Generate charts
+ charts: dict[str, str | None] = {}
+ if config.include_charts:
+ charts["strategy"] = generate_strategy_chart(data.get("strategy_stats", {}))
+ charts["difficulty"] = generate_difficulty_chart(
+ data.get("difficulty_stats", {})
+ )
+ charts["distribution"] = generate_stability_distribution_chart(
+ data.get("sweeps", [])
+ )
+
+ # Extract unique values for filters
+ sweeps = data.get("sweeps", [])
+ strategies = sorted(
+ set(s.get("parameters", {}).get("strategy", "") for s in sweeps)
+ )
+ difficulties = sorted(
+ set(s.get("parameters", {}).get("difficulty", "") for s in sweeps)
+ )
+
+ # Build HTML
+ theme_colors = _get_theme_colors(config.theme)
+ metadata = data.get("metadata", {})
+ strategy_stats = data.get("strategy_stats", {})
+ difficulty_stats = data.get("difficulty_stats", {})
+
+ # CSS styles (broken into lines for readability)
+ font_family = (
+ "-apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif"
+ )
+ section_h2_style = (
+ "margin-top: 0; border-bottom: 1px solid var(--border-color); "
+ "padding-bottom: 10px"
+ )
+ details_style = (
+ "display: none; padding: 10px; background: rgba(0,0,0,0.05); "
+ "margin-top: 10px; border-radius: 4px"
+ )
+
+ html = f"""
+
+
+
+
+ {config.title}
+
+
+
+
+
+
+
+
+
{data.get('total_sweeps', 0)}
+
Total Sweeps
+
+
+
{data.get('completed_sweeps', 0)}
+
Completed
+
+
+
{data.get('failed_sweeps', 0)}
+
Failed
+
+
+
{data.get('total_duration_seconds', 0):.1f}s
+
Total Duration
+
+
+
+
+
Strategy Performance
+
+
+
+ | Strategy |
+ Count |
+ Completed |
+ Avg Stability |
+ Min |
+ Max |
+
+
+
+ {_generate_strategy_rows(strategy_stats)}
+
+
+ {_generate_chart_section(charts.get('strategy'), 'Strategy Comparison')}
+
+
+
+
Difficulty Analysis
+
+
+
+ | Difficulty |
+ Count |
+ Completed |
+ Avg Stability |
+ Min |
+ Max |
+
+
+
+ {_generate_difficulty_rows(difficulty_stats)}
+
+
+ {_generate_chart_section(charts.get('difficulty'), 'Difficulty Impact')}
+
+
+ {_generate_distribution_section(charts.get('distribution'))}
+
+
+
Individual Sweeps
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ | ID |
+ Strategy |
+ Difficulty |
+ Seed |
+ Stability |
+ Actions |
+ Status |
+ Details |
+
+
+
+ {_generate_sweep_rows(sweeps)}
+
+
+
+
+ {_generate_raw_data_section(data) if config.include_raw_data else ''}
+
+
+
+
+"""
+
+ return html
+
+
+def _get_theme_colors(theme: str) -> dict[str, str]:
+ """Get color scheme for theme."""
+ if theme == "dark":
+ return {
+ "bg": "#1a1a2e",
+ "text": "#eee",
+ "card": "#16213e",
+ "border": "#0f3460",
+ }
+ return {
+ "bg": "#f5f6fa",
+ "text": "#2c3e50",
+ "card": "#ffffff",
+ "border": "#dcdde1",
+ }
+
+
+def _generate_strategy_rows(stats: dict[str, Any]) -> str:
+ """Generate table rows for strategy stats."""
+ rows = []
+ for strategy, s in stats.items():
+ avg = s.get("avg_stability", 0)
+ status_class = "status-ok" if avg >= 0.5 else "status-warn"
+ rows.append(
+ f"""
+ | {strategy} |
+ {s.get('count', 0)} |
+ {s.get('completed', 0)} |
+ {avg:.3f} |
+ {s.get('min_stability', 0):.3f} |
+ {s.get('max_stability', 0):.3f} |
+
"""
+ )
+ return "\n".join(rows)
+
+
+def _generate_difficulty_rows(stats: dict[str, Any]) -> str:
+ """Generate table rows for difficulty stats."""
+ rows = []
+ for difficulty, s in stats.items():
+ avg = s.get("avg_stability", 0)
+ status_class = "status-ok" if avg >= 0.5 else "status-warn"
+ rows.append(
+ f"""
+ | {difficulty} |
+ {s.get('count', 0)} |
+ {s.get('completed', 0)} |
+ {avg:.3f} |
+ {s.get('min_stability', 0):.3f} |
+ {s.get('max_stability', 0):.3f} |
+
"""
+ )
+ return "\n".join(rows)
+
+
+def _generate_sweep_rows(sweeps: list[dict[str, Any]]) -> str:
+ """Generate table rows for individual sweeps."""
+ rows = []
+ for sweep in sweeps:
+ sweep_id = sweep.get("sweep_id", "?")
+ params = sweep.get("parameters", {})
+ results = sweep.get("results", {})
+ error = sweep.get("error")
+
+ stability = results.get("final_stability", 0)
+ if error:
+ status = 'Error'
+ elif stability >= 0.5:
+ status = 'Pass'
+ else:
+ status = 'Low'
+
+ strategy = params.get('strategy', '')
+ difficulty = params.get('difficulty', '')
+ btn = f'