From ec5abfb8e662d5932268cc337f6733cce81cbc3a Mon Sep 17 00:00:00 2001
From: Brad Smith <bradsmithmba@gmail.com>
Date: Wed, 10 Jun 2026 15:53:19 -0500
Subject: [PATCH] fix(models): Kelly sizing returns None when historical data
 is absent
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

_calculate_kelly_sizing received win_rate/avg_win/avg_loss with call-site
defaults of 0.5/1.0/1.0 when a strategy had no historical data. Those
defaults yield Kelly = (0.5*1 - 0.5)/1 = 0, so every strategy without
backtest data was sized at 0 — indistinguishable from a genuine "no edge"
(Kelly = 0) result and presented to the user as "position size: 0%".

Take the metrics dict directly and return None when win_rate, avg_win, and
avg_loss are not all present. ScoredStrategy.kelly_size becomes
Optional[float]; recommendation_engine renders None as "insufficient
historical data to size" instead of formatting it as 0.0%.

A present-but-no-edge case still returns 0.0, so callers can distinguish
"no data" (None) from "no edge" (0.0). Two tests cover both.

Closes #17

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
---
 src/models/recommendation_engine.py | 13 ++++++++----
 src/models/scoring_engine.py        | 33 +++++++++++++++++------------
 tests/models/test_scoring_engine.py | 24 +++++++++++++++++++++
 3 files changed, 52 insertions(+), 18 deletions(-)

diff --git a/src/models/recommendation_engine.py b/src/models/recommendation_engine.py
index 970054d..61378b4 100644
--- a/src/models/recommendation_engine.py
+++ b/src/models/recommendation_engine.py
@@ -358,10 +358,15 @@ def _generate_recommendation_explanation(self,
             f"Max risk: {strategy.max_drawdown:.1%}"
         )
 
-        # Position sizing
-        explanations.append(
-            f"Recommended position size: {strategy.kelly_size:.1%} of capital"
-        )
+        # Position sizing (kelly_size is None when historical data is absent)
+        if strategy.kelly_size is None:
+            explanations.append(
+                "Recommended position size: insufficient historical data to size"
+            )
+        else:
+            explanations.append(
+                f"Recommended position size: {strategy.kelly_size:.1%} of capital"
+            )
 
         # Add strategy-specific insights
         if strategy.strategy_type in [StrategyType.IRON_CONDOR, StrategyType.BUTTERFLY]:
diff --git a/src/models/scoring_engine.py b/src/models/scoring_engine.py
index 24aed17..4f8948d 100644
--- a/src/models/scoring_engine.py
+++ b/src/models/scoring_engine.py
@@ -22,7 +22,7 @@ class ScoredStrategy:
     raw_probability: float  # Neural network output
     risk_adjusted_score: float  # 0-100 normalized score
     expected_value: float  # Expected P/L
-    kelly_size: float  # Position size recommendation (0-1)
+    kelly_size: Optional[float]  # Position size recommendation (0-1); None if no historical data
     max_drawdown: float  # Historical/estimated max drawdown
     var_95: float  # 95% Value at Risk
     confidence: float  # Overall confidence (0-1)
@@ -104,12 +104,8 @@ def score_strategies(self,
                 metrics.get('win_rate', 0.5)
             )
 
-            # Calculate Kelly position size
-            kelly_size = self._calculate_kelly_sizing(
-                metrics.get('win_rate', 0.5),
-                metrics.get('avg_win', 1.0),
-                metrics.get('avg_loss', 1.0)
-            )
+            # Calculate Kelly position size (None when historical data is absent)
+            kelly_size = self._calculate_kelly_sizing(metrics)
 
             # Normalize score to 0-100 range
             normalized_score = self._normalize_score(risk_adjusted)
@@ -192,10 +188,7 @@ def _calculate_expected_value(self,
         # Can be enhanced with more sophisticated models
         return expected_return * probability * win_rate
 
-    def _calculate_kelly_sizing(self,
-                               win_rate: float,
-                               avg_win: float,
-                               avg_loss: float) -> float:
+    def _calculate_kelly_sizing(self, metrics: Dict[str, float]) -> Optional[float]:
         """
         Calculate position size using Kelly criterion.
 
@@ -205,16 +198,28 @@ def _calculate_kelly_sizing(self,
         - p = probability of winning
         - q = probability of losing (1-p)
         - b = ratio of win to loss
+
+        Returns None when the required historical inputs (win_rate, avg_win,
+        avg_loss) are not all present. Sizing has no statistical basis without
+        them, and returning 0 would be indistinguishable from a real "no edge"
+        result (Kelly = 0). Callers must treat None as "insufficient data".
         """
+        if not all(key in metrics for key in ("win_rate", "avg_win", "avg_loss")):
+            return None
+
+        win_rate = metrics["win_rate"]
+        avg_win = metrics["avg_win"]
+        avg_loss = metrics["avg_loss"]
+
         if avg_loss <= 0 or win_rate <= 0 or win_rate >= 1:
-            return 0
+            return 0.0
 
         p = win_rate
         q = 1 - p
         b = avg_win / avg_loss
 
         if b <= 0:
-            return 0
+            return 0.0
 
         # Full Kelly
         kelly_full = (p * b - q) / b
@@ -223,7 +228,7 @@ def _calculate_kelly_sizing(self,
         kelly_fraction = kelly_full * self.kelly_fraction
 
         # Cap at maximum position size
-        return max(0, min(kelly_fraction, self.max_position_size))
+        return max(0.0, min(kelly_fraction, self.max_position_size))
 
     def _normalize_score(self, raw_score: float) -> float:
         """Normalize score to 0-100 range for interpretability."""
diff --git a/tests/models/test_scoring_engine.py b/tests/models/test_scoring_engine.py
index d9a18e9..7a954bd 100644
--- a/tests/models/test_scoring_engine.py
+++ b/tests/models/test_scoring_engine.py
@@ -145,6 +145,30 @@ def test_kelly_sizing(self, scoring_engine, sample_probabilities,
         # Iron Condor has higher win rate, should have higher Kelly size
         assert iron_condor.kelly_size > straddle.kelly_size
 
+    def test_kelly_sizing_none_without_historical_data(self, scoring_engine):
+        """Kelly size is None (not 0) when win_rate/avg_win/avg_loss are absent.
+
+        Regression for issue #17: returning 0 made "no data" indistinguishable
+        from a genuine "no edge" result.
+        """
+        probs = {StrategyType.IRON_CONDOR: 0.5}
+        risk_metrics = {StrategyType.IRON_CONDOR: {'max_drawdown': 0.1, 'var_95': 0.03}}
+        expected_returns = {StrategyType.IRON_CONDOR: 0.1}
+
+        scored = scoring_engine.score_strategies(probs, risk_metrics, expected_returns)
+        assert scored[0].kelly_size is None
+
+    def test_kelly_sizing_zero_for_no_edge(self, scoring_engine):
+        """Kelly size is 0.0 (not None) when data is present but there is no edge."""
+        probs = {StrategyType.IRON_CONDOR: 0.5}
+        risk_metrics = {StrategyType.IRON_CONDOR: {
+            'win_rate': 0.5, 'avg_win': 1.0, 'avg_loss': 1.0,
+            'max_drawdown': 0.1, 'var_95': 0.03}}
+        expected_returns = {StrategyType.IRON_CONDOR: 0.1}
+
+        scored = scoring_engine.score_strategies(probs, risk_metrics, expected_returns)
+        assert scored[0].kelly_size == 0.0
+
     def test_risk_adjustment(self, scoring_engine):
         """Test risk adjustment in scoring."""
         # High probability but high risk