From 10ac032975a2dcebec4145008b93f9a36f984523 Mon Sep 17 00:00:00 2001 From: Brian McMahon Date: Wed, 3 Jun 2026 12:24:30 -0700 Subject: [PATCH] refactor(quant): unify Ledoit-Wolf onto one impl (LV1-AE.a) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The lib hosted two Ledoit-Wolf shrinkage estimators: the numpy hand-rolled quant.factor_risk.ledoit_wolf_cov (Option B) and sklearn LedoitWolf inside quant.factor_risk_xs.estimate_factor_covariance (Option A). Validated they are numerically identical (max abs diff ~1e-21 across n∈[35,1000]; both center + estimate the same shrinkage intensity toward scaled identity). So the xs ledoit_wolf branch now calls the shared numpy ledoit_wolf_cov — one LW impl in the lib. sklearn stays a lazy import for the OAS branch only. Zero numerics change (the persisted predictor F is identical to ~1e-21; no live consumer of F anyway — C.3 solve is gated). Closes audit finding #2. - 0.50.0 -> 0.51.0; +1 consolidation-contract test; suite 1107. Co-Authored-By: Claude Opus 4.8 (1M context) --- pyproject.toml | 2 +- src/alpha_engine_lib/__init__.py | 2 +- src/alpha_engine_lib/quant/factor_risk_xs.py | 11 +++++++++-- tests/test_quant_factor_risk_xs.py | 12 ++++++++++++ 4 files changed, 23 insertions(+), 4 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 57009e8..d609303 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "alpha-engine-lib" -version = "0.50.0" +version = "0.51.0" description = "Shared utilities for the Alpha Engine modules: preflight, logging, ArcticDB, dates, decision capture, cost telemetry, Anthropic payload chokepoint, artifact freshness, RAG, agent schemas, SSM secrets, Telegram + SNS alerts, EC2 spot resilience, SSM log-capture, SSM dispatcher, Step-Functions execution-state projection, S3-conditional-PUT writer locks, and bounded-backoff HTTP retry. Full surface documented in README." readme = "README.md" # EC2 still runs Python 3.9 on the always-on micro instance (boto3 drops diff --git a/src/alpha_engine_lib/__init__.py b/src/alpha_engine_lib/__init__.py index 6400699..1438566 100644 --- a/src/alpha_engine_lib/__init__.py +++ b/src/alpha_engine_lib/__init__.py @@ -1,3 +1,3 @@ """alpha-engine-lib — shared utilities for Alpha Engine modules.""" -__version__ = "0.50.0" +__version__ = "0.51.0" diff --git a/src/alpha_engine_lib/quant/factor_risk_xs.py b/src/alpha_engine_lib/quant/factor_risk_xs.py index c9717d8..9913134 100644 --- a/src/alpha_engine_lib/quant/factor_risk_xs.py +++ b/src/alpha_engine_lib/quant/factor_risk_xs.py @@ -235,8 +235,15 @@ def estimate_factor_covariance( return pd.DataFrame(np.full((K, K), np.nan), index=cols, columns=cols) if shrinkage == "ledoit_wolf": - from sklearn.covariance import LedoitWolf - F = LedoitWolf().fit(clean.to_numpy()).covariance_ + # Single shared Ledoit-Wolf estimator (LV1-AE.a, 2026-06-03). The numpy + # ``quant.factor_risk.ledoit_wolf_cov`` is numerically identical to + # sklearn's ``LedoitWolf`` (max abs diff ~1e-21, validated across + # n∈[35,1000]) — both center the data and estimate the same shrinkage + # intensity toward a scaled-identity target. Consolidating onto the numpy + # impl kills the duplicate reimplementation; sklearn stays a lazy import + # for OAS only. + from .factor_risk import ledoit_wolf_cov + F = ledoit_wolf_cov(clean.to_numpy(), shrinkage="ledoit_wolf") elif shrinkage == "oas": from sklearn.covariance import OAS F = OAS().fit(clean.to_numpy()).covariance_ diff --git a/tests/test_quant_factor_risk_xs.py b/tests/test_quant_factor_risk_xs.py index 68fb4ac..a44d7f1 100644 --- a/tests/test_quant_factor_risk_xs.py +++ b/tests/test_quant_factor_risk_xs.py @@ -29,6 +29,18 @@ ) +def test_ledoit_wolf_branch_uses_shared_numpy_impl(): + """LV1-AE.a consolidation contract: estimate_factor_covariance's ledoit_wolf + path is the shared numpy quant.factor_risk.ledoit_wolf_cov (one LW impl).""" + from alpha_engine_lib.quant.factor_risk import ledoit_wolf_cov + + rng = np.random.RandomState(0) + panel = pd.DataFrame(rng.normal(0, 0.01, (200, 4)), columns=["market", "MOM", "VAL", "QUAL"]) + F = estimate_factor_covariance(panel, shrinkage="ledoit_wolf", min_obs=30) + expected = ledoit_wolf_cov(panel.to_numpy(), shrinkage="ledoit_wolf") + assert np.allclose(F.to_numpy(), expected, atol=1e-15) + + # ─── Helpers ────────────────────────────────────────────────────────────────