diff --git a/CHECKS.md b/CHECKS.md index 746eb0c..a3075f7 100644 --- a/CHECKS.md +++ b/CHECKS.md @@ -64,6 +64,14 @@ Confirm that the execution phase creates a sandbox for each check and runs the a Inspect the execution phase. Confirm that checks are dispatched concurrently — for example via a `JoinSet`, `FuturesUnordered`, or per-check `tokio::spawn` — and awaited together, rather than run inside a blocking loop that starts and awaits one check before beginning the next. The check fails if check execution is strictly sequential. +# Requirement Concurrency Defaults To The Host's Core Count + +The concurrency cap is user-configurable (a `--concurrency` flag, layered the same way as `--provider`/`--model`/`--effort`/`--executor`), but absent any override its default must equal the number of CPU cores available on the machine running `multi check` — not a hardcoded constant. A fixed default either strands cores on big machines or overcommits small ones. + +## Check Default Concurrency Equals Available Parallelism + +Inspect how the default check concurrency is computed. Confirm that, with no `--concurrency` flag, no `MULTI_CHECKS_CONCURRENCY` environment variable, and no `checks.concurrency` config-file value set, the resolved concurrency is derived from the host's available parallelism (for example via `std::thread::available_parallelism`) rather than a fixed literal such as `2`. The check fails if the default concurrency is a hardcoded number instead of a value computed from the running machine's core count. + # Requirement Authoring Errors Are Actionable When a `CHECKS.md` file is malformed, the tool must tell the author exactly what is wrong and where, instead of failing opaquely. Clear diagnostics are what make the format usable. diff --git a/guides/checks.md b/guides/checks.md index 114a34d..e785b24 100644 --- a/guides/checks.md +++ b/guides/checks.md @@ -159,22 +159,25 @@ Glob, plus the judge tool) — a verification agent observes, it does not mutate ## ⚙️ Configuration -The default **provider**, **model**, **effort**, and **executor** are resolved -from three sources, in order of precedence (highest wins): +The default **provider**, **model**, **effort**, **executor**, and +**concurrency** are resolved from three sources, in order of precedence +(highest wins): -1. **Flags** — `--provider`, `--model`, `--effort`, `--executor` on `multi check`. +1. **Flags** — `--provider`, `--model`, `--effort`, `--executor`, + `--concurrency` on `multi check`. 2. **Environment** — `MULTI_`-prefixed vars mapped into the `checks` namespace, e.g. `MULTI_CHECKS_MODEL`, `MULTI_CHECKS_PROVIDER`, `MULTI_CHECKS_EFFORT`, - `MULTI_CHECKS_EXECUTOR`. + `MULTI_CHECKS_EXECUTOR`, `MULTI_CHECKS_CONCURRENCY`. 3. **Config file** — the `[checks]` table of `MultiTool.toml` (or `.json` / `.jsonc`), discovered up the directory tree like any MultiTool manifest. ```toml [checks] -provider = "anthropic" # anthropic | openai | gemini -model = "claude-sonnet-4-6" # must be a known model ID for the provider -effort = "low" # low | medium | high → thinking-token budget -executor = "cersei" # cersei (in-process, default) | claude (fallback) +provider = "anthropic" # anthropic | openai | gemini +model = "claude-sonnet-4-6" # must be a known model ID for the provider +effort = "low" # low | medium | high → thinking-token budget +executor = "cersei" # cersei (in-process, default) | claude (fallback) +concurrency = 8 # checks run at once; must be > 0 (default: CPU core count) # optional, non-secret base-URL overrides per provider [checks.providers.anthropic] @@ -194,6 +197,11 @@ CLI). `claude` is the legacy `claude -p` shell-out fallback, kept selectable for migration while the in-process path is validated; it requires the `claude` CLI on your `PATH` and will be removed once cersei is proven out. +The **`concurrency`** flag caps how many checks run at once; it must be a +positive integer (`0` is rejected with a clear error). Its default matches the +number of CPU cores available on the machine running `multi check`, so a suite +fans out to use the whole machine rather than leaving cores idle. + **Credentials are environment-only.** API keys are read directly from each provider's native variable and never live in the config file or under the `MULTI_` prefix: diff --git a/src/checks/config/mod.rs b/src/checks/config/mod.rs index 083c7a7..cea1930 100644 --- a/src/checks/config/mod.rs +++ b/src/checks/config/mod.rs @@ -17,6 +17,7 @@ mod models; mod providers; mod schema; +use std::num::NonZeroUsize; use std::time::Duration; use figment::{ @@ -32,15 +33,22 @@ use crate::checks::executor::claude::ClaudeExecutor; pub use providers::{ProviderFactory, ProviderRegistry}; pub use schema::{CliOverrides, Effort, ExecutorKind, ProviderKind}; -/// Maximum number of checks executed concurrently. A small fan-out gives each -/// (CPU-heavy) reasoning agent enough cores to finish promptly. -const DEFAULT_CONCURRENCY: usize = 2; /// Per-agent wall-clock timeout. Generous: the heaviest reasoning checks can /// take a few minutes under contention before they report. const DEFAULT_AGENT_TIMEOUT: Duration = Duration::from_secs(240); /// How many times to (re)run a check whose agent fails to report. const DEFAULT_MAX_ATTEMPTS: usize = 3; +/// Default number of checks executed concurrently: one per available CPU core, +/// so a check suite fans out to use the whole machine rather than leaving cores +/// idle. Falls back to `1` on the rare platform where the count can't be +/// determined. +fn default_concurrency() -> usize { + std::thread::available_parallelism() + .map(NonZeroUsize::get) + .unwrap_or(1) +} + /// The resolved configuration for a `multi check` run. #[derive(Debug, Clone)] pub struct Config { @@ -56,7 +64,8 @@ pub struct Config { pub effort: Effort, /// Which execution engine runs each check (default: in-process cersei). pub executor: ExecutorKind, - /// Maximum number of checks executed concurrently. + /// Maximum number of checks executed concurrently (default: the number of + /// available CPU cores; see [`default_concurrency`]). pub concurrency: usize, /// Per-agent wall-clock timeout (reaps an agent that hangs before reporting). pub agent_timeout: Duration, @@ -148,6 +157,7 @@ pub fn load(overrides: CliOverrides) -> Result { .unwrap_or_else(|| models::default_model(provider).to_string()); let effort = checks.effort.unwrap_or(Effort::Low); let executor = checks.executor.unwrap_or(ExecutorKind::Cersei); + let concurrency = checks.concurrency.unwrap_or_else(default_concurrency); if !models::is_valid_model(provider, &model) { return Err(miette!( @@ -157,6 +167,10 @@ pub fn load(overrides: CliOverrides) -> Result { )); } + if concurrency == 0 { + return Err(miette!("checks.concurrency must be greater than 0")); + } + // Build one handle per provider whose credential is present, then require // that the *selected* provider actually resolved to an available handle. let registry = providers::build_registry(&checks.providers)?; @@ -181,7 +195,7 @@ pub fn load(overrides: CliOverrides) -> Result { model, effort, executor, - concurrency: DEFAULT_CONCURRENCY, + concurrency, agent_timeout: DEFAULT_AGENT_TIMEOUT, max_attempts: DEFAULT_MAX_ATTEMPTS, }; @@ -207,7 +221,7 @@ pub fn configuration() -> Config { model: models::default_model(provider).to_string(), effort: Effort::Low, executor: ExecutorKind::Cersei, - concurrency: DEFAULT_CONCURRENCY, + concurrency: default_concurrency(), agent_timeout: DEFAULT_AGENT_TIMEOUT, max_attempts: DEFAULT_MAX_ATTEMPTS, } @@ -230,6 +244,7 @@ mod tests { model: Some(model.to_string()), effort: Some(Effort::Low), executor: None, + concurrency: None, providers: ProvidersSection::default(), }, } @@ -242,6 +257,8 @@ mod tests { assert_eq!(cfg.model, "claude-sonnet-4-6"); assert_eq!(cfg.executor, ExecutorKind::Cersei); assert!(cfg.concurrency >= 1); + // The default must track the machine's core count, not a hardcoded value. + assert_eq!(cfg.concurrency, default_concurrency()); // The fallback executor is constructible from config alone (DI seam works). let _exec = cfg.build_claude_executor(); } @@ -255,6 +272,7 @@ mod tests { Some("gpt-4o".into()), None, None, + None, ); let checks = resolve_layers(file, overrides).unwrap(); assert_eq!(checks.provider, Some(ProviderKind::OpenAi)); @@ -288,7 +306,8 @@ mod tests { assert_eq!(checks.model.as_deref(), Some("claude-haiku-4-5")); // ...and a flag outranks env. - let overrides = CliOverrides::new(None, Some("claude-opus-4-8".into()), None, None); + let overrides = + CliOverrides::new(None, Some("claude-opus-4-8".into()), None, None, None); let checks = resolve_layers(file, overrides).unwrap(); assert_eq!(checks.model.as_deref(), Some("claude-opus-4-8")); Ok(()) diff --git a/src/checks/config/schema.rs b/src/checks/config/schema.rs index 16e81b0..73acafc 100644 --- a/src/checks/config/schema.rs +++ b/src/checks/config/schema.rs @@ -77,6 +77,10 @@ pub struct ChecksSection { /// Which execution engine runs each check (`cersei` by default). #[serde(default, skip_serializing_if = "Option::is_none")] pub executor: Option, + /// Maximum number of checks executed concurrently (default: the number of + /// available CPU cores). + #[serde(default, skip_serializing_if = "Option::is_none")] + pub concurrency: Option, /// Optional, non-secret per-provider base-URL overrides. #[serde(default)] pub providers: ProvidersSection, @@ -134,6 +138,8 @@ pub struct CliChecksOverrides { pub effort: Option, #[serde(skip_serializing_if = "Option::is_none")] pub executor: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub concurrency: Option, } impl CliOverrides { @@ -143,6 +149,7 @@ impl CliOverrides { model: Option, effort: Option, executor: Option, + concurrency: Option, ) -> Self { Self { checks: CliChecksOverrides { @@ -150,6 +157,7 @@ impl CliOverrides { model, effort, executor, + concurrency, }, } } diff --git a/src/checks/e2e.rs b/src/checks/e2e.rs index d1cd204..7d66aff 100644 --- a/src/checks/e2e.rs +++ b/src/checks/e2e.rs @@ -16,7 +16,7 @@ use miette::Result; use tempfile::TempDir; use tokio::sync::Barrier; -use crate::checks::config::configuration; +use crate::checks::config::{Config, configuration}; use crate::checks::discovery::discover; use crate::checks::executor::{ AgentOutcome, AgentRunRequest, CheckExecutor, CheckReport, FakeExecutor, @@ -207,8 +207,9 @@ impl CheckExecutor for InterleavingExecutor { #[tokio::test(flavor = "multi_thread", worker_threads = 2)] async fn checks_execute_concurrently_not_in_a_barrier() { - // Two independent requirements (two checks total). The default concurrency - // is 2, so both should run at once. + // Two independent requirements (two checks total). Pin concurrency to 2 + // explicitly so both run at once regardless of the host's core count (the + // default now tracks available parallelism, not a fixed value). let dir = TempDir::new().unwrap(); fs::write( dir.path().join("CHECKS.md"), @@ -221,8 +222,10 @@ async fn checks_execute_concurrently_not_in_a_barrier() { let executor = Arc::new(InterleavingExecutor { barrier: Arc::new(Barrier::new(2)), }); - let cfg = configuration(); - assert_eq!(cfg.concurrency, 2, "test assumes a concurrency of 2"); + let cfg = Config { + concurrency: 2, + ..configuration() + }; let outcomes = run_to_outcomes( &cfg, diff --git a/src/checks/mod.rs b/src/checks/mod.rs index 80dcc91..b0f8a61 100644 --- a/src/checks/mod.rs +++ b/src/checks/mod.rs @@ -70,6 +70,7 @@ pub async fn run(terminal: &Terminal, working_dir: &Path, overrides: CliOverride provider = resolved.config.provider.as_str(), model = %resolved.config.model, executor = ?resolved.config.executor, + concurrency = resolved.config.concurrency, available_providers = ?resolved.providers.keys().collect::>(), "resolved checks configuration and provider registry", ); diff --git a/src/config/check/mod.rs b/src/config/check/mod.rs index 94c34d6..f6f1ff3 100644 --- a/src/config/check/mod.rs +++ b/src/config/check/mod.rs @@ -1,3 +1,4 @@ +use std::num::NonZeroUsize; use std::path::{Path, PathBuf}; use clap::Args; @@ -32,6 +33,12 @@ pub struct CheckSubcommand { /// legacy `claude -p` fallback). Overrides `checks.executor` from env/file. #[arg(long, value_enum)] executor: Option, + + /// Maximum number of checks to run concurrently. Must be greater than 0. + /// Overrides `checks.concurrency` from env/file. Defaults to the number of + /// available CPU cores. + #[arg(long)] + concurrency: Option, } impl CheckSubcommand { @@ -48,6 +55,7 @@ impl CheckSubcommand { self.model.clone(), self.effort, self.executor, + self.concurrency.map(NonZeroUsize::get), ) } }