From f2160d3f15502dd8fab8e1cd450c56835eb5a599 Mon Sep 17 00:00:00 2001 From: Ryan Robson Date: Fri, 15 May 2026 21:39:19 -0500 Subject: [PATCH] fix(ml): replace deprecated PassiveAggressiveRegressor; guard partial_fit MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit sklearn 1.8 deprecates PassiveAggressiveRegressor (removal in 1.10) and its partial_fit() no longer accepts sample_weight, breaking the incremental-learning backup-models loop at incremental_engine.py:552 on PA instances (TypeError: ... unexpected keyword argument 'sample_weight'). Two changes: 1. Drop PassiveAggressiveRegressor from `backup_models` and replace it with sklearn's recommended substitute — SGDRegressor configured as PA-1 (loss='epsilon_insensitive', penalty=None, learning_rate='pa1', eta0=1.0). SGDRegressor.partial_fit accepts sample_weight, so every model in the loop now safely receives the weight signal. 2. In `realtime_feedback.update_model_incremental`, `current_model` is loaded dynamically from disk via joblib and could be any sklearn estimator. Wrap the `partial_fit(..., sample_weight=...)` call in a try/except TypeError fallback that retries without weights, so an older serialized PA model doesn't blow up the whole feedback update. Verified: full ML test suite (analyzer.ml) — 341 tests pass. --- analyzer/ml/learning/incremental_engine.py | 16 +++++++++++++--- analyzer/ml/monitoring/realtime_feedback.py | 10 ++++++++-- 2 files changed, 21 insertions(+), 5 deletions(-) diff --git a/analyzer/ml/learning/incremental_engine.py b/analyzer/ml/learning/incremental_engine.py index 0c08628..944eedc 100644 --- a/analyzer/ml/learning/incremental_engine.py +++ b/analyzer/ml/learning/incremental_engine.py @@ -27,7 +27,7 @@ import joblib from sklearn.base import BaseEstimator, RegressorMixin from sklearn.ensemble import RandomForestRegressor - from sklearn.linear_model import PassiveAggressiveRegressor, SGDRegressor + from sklearn.linear_model import SGDRegressor from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score from sklearn.preprocessing import StandardScaler @@ -451,10 +451,20 @@ def initialize_models(self) -> bool: n_estimators=20, max_depth=8, memory_limit=5000 ) - # Backup models for comparison + # Backup models for comparison. + # PassiveAggressiveRegressor was deprecated in sklearn 1.8 and removed + # in 1.10; the sklearn-recommended replacement is SGDRegressor with + # the PA-1 configuration, which also accepts sample_weight in + # partial_fit (PA did not). self.backup_models = [ SGDRegressor(learning_rate="adaptive", eta0=0.01, random_state=42), - PassiveAggressiveRegressor(C=1.0, random_state=42), + SGDRegressor( + loss="epsilon_insensitive", + penalty=None, + learning_rate="pa1", + eta0=1.0, + random_state=42, + ), ] logger.info("Incremental learning models initialized") diff --git a/analyzer/ml/monitoring/realtime_feedback.py b/analyzer/ml/monitoring/realtime_feedback.py index 00135cb..fc5c5bd 100644 --- a/analyzer/ml/monitoring/realtime_feedback.py +++ b/analyzer/ml/monitoring/realtime_feedback.py @@ -177,8 +177,14 @@ def update_model_incremental( # Check if model supports partial_fit if hasattr(self.current_model, "partial_fit"): - # Direct incremental update - self.current_model.partial_fit(X, y, sample_weight=weights) + # Direct incremental update. Older estimators + # (e.g. PassiveAggressiveRegressor) do not accept sample_weight in + # partial_fit, so retry without weights on TypeError rather than + # let the whole update fail. + try: + self.current_model.partial_fit(X, y, sample_weight=weights) + except TypeError: + self.current_model.partial_fit(X, y) update_method = "partial_fit" else: # Simulate incremental learning with weighted update