diff --git a/database/init.sql b/database/init.sql
index 3810773..54bf038 100644
--- a/database/init.sql
+++ b/database/init.sql
@@ -33,6 +33,14 @@ CREATE TABLE cml_stats (
max_rsl REAL,
mean_rsl REAL,
stddev_rsl REAL,
+ completeness_percent_6h REAL,
+ total_records_6h BIGINT,
+ valid_records_6h BIGINT,
+ mean_rsl_6h REAL,
+ stddev_rsl_6h REAL,
+ completeness_percent_1h REAL,
+ mean_rsl_1h REAL,
+ stddev_rsl_1h REAL,
last_rsl REAL,
last_update TIMESTAMPTZ DEFAULT NOW(),
PRIMARY KEY (cml_id, user_id)
@@ -100,6 +108,69 @@ BEGIN
END;
$$ LANGUAGE plpgsql;
+-- update_cml_stats_windowed(target_cml_id, target_user_id)
+--
+-- Computes 6-hour and 1-hour windowed statistics in a single table scan using
+-- FILTER clauses. TimescaleDB chunk exclusion prunes all chunks older than 6 hours
+-- at the storage level, so the query only touches the current uncompressed chunk
+-- regardless of total dataset size.
+CREATE OR REPLACE FUNCTION update_cml_stats_windowed(
+ target_cml_id TEXT,
+ target_user_id TEXT DEFAULT 'demo_openmrg'
+) RETURNS VOID AS $$
+DECLARE
+ now_ts TIMESTAMPTZ := NOW();
+BEGIN
+ INSERT INTO cml_stats (
+ cml_id, user_id,
+ completeness_percent_6h, total_records_6h, valid_records_6h,
+ mean_rsl_6h, stddev_rsl_6h,
+ completeness_percent_1h, mean_rsl_1h, stddev_rsl_1h,
+ last_rsl, last_update
+ )
+ SELECT
+ target_cml_id, target_user_id,
+ -- 6h window
+ ROUND(
+ 100.0 * COUNT(rsl) FILTER (WHERE time >= now_ts - INTERVAL '6 hours')
+ / NULLIF(COUNT(*) FILTER (WHERE time >= now_ts - INTERVAL '6 hours'), 0),
+ 2),
+ COUNT(*) FILTER (WHERE time >= now_ts - INTERVAL '6 hours'),
+ COUNT(rsl) FILTER (WHERE time >= now_ts - INTERVAL '6 hours'),
+ ROUND(AVG(rsl) FILTER (WHERE time >= now_ts - INTERVAL '6 hours')::numeric, 2),
+ ROUND(STDDEV(rsl) FILTER (WHERE time >= now_ts - INTERVAL '6 hours')::numeric, 2),
+ -- 1h window
+ ROUND(
+ 100.0 * COUNT(rsl) FILTER (WHERE time >= now_ts - INTERVAL '1 hour')
+ / NULLIF(COUNT(*) FILTER (WHERE time >= now_ts - INTERVAL '1 hour'), 0),
+ 2),
+ ROUND(AVG(rsl) FILTER (WHERE time >= now_ts - INTERVAL '1 hour')::numeric, 2),
+ ROUND(STDDEV(rsl) FILTER (WHERE time >= now_ts - INTERVAL '1 hour')::numeric, 2),
+ -- last_rsl: unconstrained so we get the true last RSL even if the CML
+ -- has been quiet for more than 6 hours
+ (SELECT rsl FROM cml_data
+ WHERE cml_id = target_cml_id
+ AND user_id = target_user_id
+ ORDER BY time DESC LIMIT 1),
+ now_ts
+ FROM cml_data
+ WHERE cml_id = target_cml_id
+ AND user_id = target_user_id
+ AND time >= now_ts - INTERVAL '6 hours'
+ ON CONFLICT (cml_id, user_id) DO UPDATE SET
+ completeness_percent_6h = EXCLUDED.completeness_percent_6h,
+ total_records_6h = EXCLUDED.total_records_6h,
+ valid_records_6h = EXCLUDED.valid_records_6h,
+ mean_rsl_6h = EXCLUDED.mean_rsl_6h,
+ stddev_rsl_6h = EXCLUDED.stddev_rsl_6h,
+ completeness_percent_1h = EXCLUDED.completeness_percent_1h,
+ mean_rsl_1h = EXCLUDED.mean_rsl_1h,
+ stddev_rsl_1h = EXCLUDED.stddev_rsl_1h,
+ last_rsl = EXCLUDED.last_rsl,
+ last_update = EXCLUDED.last_update;
+END;
+$$ LANGUAGE plpgsql;
+
SELECT create_hypertable('cml_data', 'time');
-- Per-user lookup indexes.
@@ -211,8 +282,10 @@ GRANT SELECT ON cml_data TO webserver_role;
GRANT SELECT ON cml_metadata TO webserver_role;
GRANT SELECT ON cml_stats TO webserver_role;
--- Parser calls update_cml_stats() to upsert per-CML statistics.
+-- Parser calls update_cml_stats() to upsert per-CML lifetime statistics.
GRANT EXECUTE ON FUNCTION update_cml_stats(TEXT, TEXT) TO demo_openmrg, demo_orange_cameroun;
+-- Parser calls update_cml_stats_windowed() from the background stats timer.
+GRANT EXECUTE ON FUNCTION update_cml_stats_windowed(TEXT, TEXT) TO demo_openmrg, demo_orange_cameroun;
-- Row-Level Security on cml_metadata and cml_stats.
-- cml_data is excluded: TimescaleDB does not allow RLS on compressed
diff --git a/database/migrations/009_add_windowed_stats.sql b/database/migrations/009_add_windowed_stats.sql
new file mode 100644
index 0000000..837b897
--- /dev/null
+++ b/database/migrations/009_add_windowed_stats.sql
@@ -0,0 +1,95 @@
+-- Migration 009: add windowed cml_stats columns and update_cml_stats_windowed function
+--
+-- Replaces the expensive full-history refresh (update_cml_stats, called for every CML
+-- every 60 seconds) with a cheap windowed refresh that only touches the current
+-- uncompressed TimescaleDB chunk (~6 hours of data), reducing stats refresh time from
+-- 20-30 s to < 1 s and Postgres CPU from ~100% to < 5%.
+--
+-- New columns on cml_stats: 6-hour and 1-hour windowed completeness, record counts,
+-- mean RSL, and stddev RSL.
+-- New function: update_cml_stats_windowed(target_cml_id, target_user_id)
+--
+-- The parser must be updated alongside this migration:
+-- - parser/db_writer.py: replace refresh_stats() call with refresh_windowed_stats()
+-- - parser/db_writer.py: wire _update_stats_for_cmls() into write_rawdata() so
+-- lifetime columns (total_records, min_rsl, max_rsl, ...) stay current.
+--
+-- Apply with:
+-- docker compose exec -T database psql -U myuser -d mydatabase \
+-- < database/migrations/009_add_windowed_stats.sql
+
+ALTER TABLE cml_stats
+ ADD COLUMN IF NOT EXISTS completeness_percent_6h REAL,
+ ADD COLUMN IF NOT EXISTS total_records_6h BIGINT,
+ ADD COLUMN IF NOT EXISTS valid_records_6h BIGINT,
+ ADD COLUMN IF NOT EXISTS mean_rsl_6h REAL,
+ ADD COLUMN IF NOT EXISTS stddev_rsl_6h REAL,
+ ADD COLUMN IF NOT EXISTS completeness_percent_1h REAL,
+ ADD COLUMN IF NOT EXISTS mean_rsl_1h REAL,
+ ADD COLUMN IF NOT EXISTS stddev_rsl_1h REAL;
+
+-- update_cml_stats_windowed(target_cml_id, target_user_id)
+--
+-- Computes 6-hour and 1-hour windowed statistics in a single table scan using
+-- FILTER clauses. TimescaleDB chunk exclusion prunes all chunks older than 6 hours
+-- at the storage level, so the query only touches the current uncompressed chunk
+-- regardless of total dataset size.
+CREATE OR REPLACE FUNCTION update_cml_stats_windowed(
+ target_cml_id TEXT,
+ target_user_id TEXT DEFAULT 'demo_openmrg'
+) RETURNS VOID AS $$
+DECLARE
+ now_ts TIMESTAMPTZ := NOW();
+BEGIN
+ INSERT INTO cml_stats (
+ cml_id, user_id,
+ completeness_percent_6h, total_records_6h, valid_records_6h,
+ mean_rsl_6h, stddev_rsl_6h,
+ completeness_percent_1h, mean_rsl_1h, stddev_rsl_1h,
+ last_rsl, last_update
+ )
+ SELECT
+ target_cml_id, target_user_id,
+ -- 6h window
+ ROUND(
+ 100.0 * COUNT(rsl) FILTER (WHERE time >= now_ts - INTERVAL '6 hours')
+ / NULLIF(COUNT(*) FILTER (WHERE time >= now_ts - INTERVAL '6 hours'), 0),
+ 2),
+ COUNT(*) FILTER (WHERE time >= now_ts - INTERVAL '6 hours'),
+ COUNT(rsl) FILTER (WHERE time >= now_ts - INTERVAL '6 hours'),
+ ROUND(AVG(rsl) FILTER (WHERE time >= now_ts - INTERVAL '6 hours')::numeric, 2),
+ ROUND(STDDEV(rsl) FILTER (WHERE time >= now_ts - INTERVAL '6 hours')::numeric, 2),
+ -- 1h window
+ ROUND(
+ 100.0 * COUNT(rsl) FILTER (WHERE time >= now_ts - INTERVAL '1 hour')
+ / NULLIF(COUNT(*) FILTER (WHERE time >= now_ts - INTERVAL '1 hour'), 0),
+ 2),
+ ROUND(AVG(rsl) FILTER (WHERE time >= now_ts - INTERVAL '1 hour')::numeric, 2),
+ ROUND(STDDEV(rsl) FILTER (WHERE time >= now_ts - INTERVAL '1 hour')::numeric, 2),
+ -- last_rsl: unconstrained so we get the true last RSL even if the CML
+ -- has been quiet for more than 6 hours
+ (SELECT rsl FROM cml_data
+ WHERE cml_id = target_cml_id
+ AND user_id = target_user_id
+ ORDER BY time DESC LIMIT 1),
+ now_ts
+ FROM cml_data
+ WHERE cml_id = target_cml_id
+ AND user_id = target_user_id
+ AND time >= now_ts - INTERVAL '6 hours'
+ ON CONFLICT (cml_id, user_id) DO UPDATE SET
+ completeness_percent_6h = EXCLUDED.completeness_percent_6h,
+ total_records_6h = EXCLUDED.total_records_6h,
+ valid_records_6h = EXCLUDED.valid_records_6h,
+ mean_rsl_6h = EXCLUDED.mean_rsl_6h,
+ stddev_rsl_6h = EXCLUDED.stddev_rsl_6h,
+ completeness_percent_1h = EXCLUDED.completeness_percent_1h,
+ mean_rsl_1h = EXCLUDED.mean_rsl_1h,
+ stddev_rsl_1h = EXCLUDED.stddev_rsl_1h,
+ last_rsl = EXCLUDED.last_rsl,
+ last_update = EXCLUDED.last_update;
+END;
+$$ LANGUAGE plpgsql;
+
+GRANT EXECUTE ON FUNCTION update_cml_stats_windowed(TEXT, TEXT)
+ TO demo_openmrg, demo_orange_cameroun;
diff --git a/parser/db_writer.py b/parser/db_writer.py
index de69fb8..49b4e41 100644
--- a/parser/db_writer.py
+++ b/parser/db_writer.py
@@ -304,12 +304,16 @@ def write_rawdata(self, df) -> int:
)
)
- # Commit immediately after insert; stats are updated separately
+ # Update lifetime stats for CMLs in this batch (same transaction as the insert)
+ cml_ids = df_subset["cml_id"].unique().tolist()
+ self._update_stats_for_cmls(cml_ids)
+
+ # Single commit covers both the data insert and the stats update
try:
if self.conn:
self.conn.commit()
except Exception:
- logger.exception("Failed to commit raw data")
+ logger.exception("Failed to commit raw data and stats")
raise
return rows_written
@@ -383,3 +387,25 @@ def refresh_stats(self) -> None:
finally:
if cur and not cur.closed:
cur.close()
+
+ def refresh_windowed_stats(self) -> None:
+ """Recalculate windowed (6h/1h) cml_stats for all known CMLs.
+ Cheap: only touches the most-recent uncompressed TimescaleDB chunk."""
+ cur = self.conn.cursor()
+ try:
+ cur.execute(
+ "SELECT update_cml_stats_windowed(cml_id::text, %s) "
+ "FROM (SELECT DISTINCT cml_id FROM cml_metadata WHERE user_id = %s) t",
+ (self.user_id, self.user_id),
+ )
+ self.conn.commit()
+ logger.info("Refreshed windowed cml_stats for all CMLs")
+ except Exception:
+ try:
+ self.conn.rollback()
+ except Exception:
+ pass
+ logger.exception("Failed to refresh windowed cml_stats")
+ finally:
+ if cur and not cur.closed:
+ cur.close()
diff --git a/parser/main.py b/parser/main.py
index 2bff93e..81a4498 100644
--- a/parser/main.py
+++ b/parser/main.py
@@ -125,14 +125,14 @@ def stats_loop():
# Run immediately on startup so Grafana has fresh stats without
# waiting a full interval after the backlog is processed.
try:
- stats_db.refresh_stats()
+ stats_db.refresh_windowed_stats()
except Exception:
- logger.exception("Stats thread: initial refresh_stats failed")
+ logger.exception("Stats thread: initial refresh_windowed_stats failed")
while not stop_event.wait(Config.STATS_REFRESH_INTERVAL):
try:
- stats_db.refresh_stats()
+ stats_db.refresh_windowed_stats()
except Exception:
- logger.exception("Stats thread: refresh_stats failed")
+ logger.exception("Stats thread: refresh_windowed_stats failed")
stats_db.close()
stats_thread = threading.Thread(target=stats_loop, daemon=True, name="stats-refresh")
diff --git a/parser/tests/test_db_writer.py b/parser/tests/test_db_writer.py
index 67e52b0..bf04abc 100644
--- a/parser/tests/test_db_writer.py
+++ b/parser/tests/test_db_writer.py
@@ -319,6 +319,33 @@ def test__update_stats_for_cmls_rollback_on_error(mock_connection):
mock_connection.rollback.assert_called()
+def test_refresh_windowed_stats_commits_on_success(mock_connection):
+ """refresh_windowed_stats calls update_cml_stats_windowed and commits."""
+ writer = DBWriter("postgresql://test", user_id="demo_openmrg")
+ writer.conn = mock_connection
+
+ writer.refresh_windowed_stats()
+
+ cur = mock_connection.cursor.return_value
+ cur.execute.assert_called_once()
+ sql = cur.execute.call_args.args[0]
+ assert "update_cml_stats_windowed" in sql
+ mock_connection.commit.assert_called_once()
+
+
+def test_refresh_windowed_stats_rollback_on_error(mock_connection):
+ """refresh_windowed_stats rolls back and swallows the exception on DB error."""
+ writer = DBWriter("postgresql://test", user_id="demo_openmrg")
+ writer.conn = mock_connection
+
+ mock_connection.cursor.return_value.execute.side_effect = Exception("DB error")
+
+ writer.refresh_windowed_stats() # must not raise
+
+ mock_connection.rollback.assert_called_once()
+ mock_connection.commit.assert_not_called()
+
+
# ---------------------------------------------------------------------------
# log_file_event
# ---------------------------------------------------------------------------
diff --git a/parser/tests/test_main.py b/parser/tests/test_main.py
index 3d3b992..f5a2842 100644
--- a/parser/tests/test_main.py
+++ b/parser/tests/test_main.py
@@ -121,6 +121,91 @@ def test_metadata_exception_is_swallowed(
mock_batch.assert_called_once()
+def _run_stats_loop(tmp_path, mock_event, *, configure_db=None):
+ """Run main(), then invoke the captured stats_loop closure under the same patches.
+
+ Calling stats_loop() inside the with-block is critical: if called after
+ the block exits, DBWriter and Config are unpatched, connect() fails against
+ a real DB, and the retry loop hangs forever.
+
+ Returns the MagicMock instance used as every DBWriter in the test.
+ """
+ captured = {}
+
+ class CapturingThread(threading.Thread):
+ def __init__(self, *args, target=None, name=None, **kwargs):
+ super().__init__(*args, target=target, name=name, **kwargs)
+ if name == "stats-refresh":
+ captured["stats_loop"] = target
+
+ def start(self):
+ pass # don't spawn real threads in unit tests
+
+ mock_db = MagicMock()
+
+ with patch("parser.main.threading.Thread", CapturingThread), \
+ patch("parser.main.threading.Event", return_value=mock_event), \
+ patch("parser.main.FileManager"), \
+ patch("parser.main.FileWatcher"), \
+ patch("parser.main.DBWriter", return_value=mock_db), \
+ patch("parser.main.Config.PARSER_ENABLED", True), \
+ patch("parser.main.Config.PROCESS_EXISTING_ON_STARTUP", False), \
+ patch("parser.main.Config.DATABASE_URL", "postgresql://test"), \
+ patch("parser.main.Config.USER_ID", "test_user"), \
+ patch("parser.main.Config.STATS_REFRESH_INTERVAL", 60), \
+ patch("parser.main.Config.INCOMING_DIR", tmp_path), \
+ patch("parser.main.Config.ARCHIVED_DIR", tmp_path), \
+ patch("parser.main.Config.QUARANTINE_DIR", tmp_path), \
+ patch("parser.main.time.sleep", side_effect=KeyboardInterrupt):
+ try:
+ main()
+ except (KeyboardInterrupt, SystemExit):
+ pass
+ if configure_db is not None:
+ configure_db(mock_db)
+ if "stats_loop" in captured:
+ captured["stats_loop"]()
+
+ return mock_db
+
+
+def test_stats_loop_calls_refresh_windowed_stats_on_startup(tmp_path):
+ """stats_loop calls refresh_windowed_stats once before entering the timer loop."""
+ mock_event = MagicMock()
+ mock_event.is_set.return_value = False # connect loop: enter → connect succeeds → break
+ mock_event.wait.return_value = True # timer wait → loop exits immediately
+
+ mock_db = _run_stats_loop(tmp_path, mock_event)
+
+ mock_db.refresh_windowed_stats.assert_called_once()
+ mock_db.close.assert_called() # called by stats_loop (possibly also by main cleanup)
+
+
+def test_stats_loop_initial_refresh_error_is_swallowed(tmp_path):
+ """stats_loop swallows exceptions raised by the startup refresh_windowed_stats call."""
+ mock_event = MagicMock()
+ mock_event.is_set.return_value = False
+ mock_event.wait.return_value = True
+
+ def configure(db):
+ db.refresh_windowed_stats.side_effect = Exception("stats error")
+
+ _run_stats_loop(tmp_path, mock_event, configure_db=configure) # must not raise
+
+
+def test_stats_loop_calls_refresh_windowed_stats_in_timer_loop(tmp_path):
+ """stats_loop calls refresh_windowed_stats again on each timer tick."""
+ mock_event = MagicMock()
+ mock_event.is_set.return_value = False
+ # First timer wait → False (enter loop body), second → True (exit)
+ mock_event.wait.side_effect = [False, True]
+
+ mock_db = _run_stats_loop(tmp_path, mock_event)
+
+ # startup call + 1 timer-loop call = 2 total
+ assert mock_db.refresh_windowed_stats.call_count == 2
+
+
def test_stats_loop_creates_dbwriter_with_config_user_id(tmp_path):
"""stats_loop must pass user_id=Config.USER_ID to DBWriter.
diff --git a/scripts/generate_config.py b/scripts/generate_config.py
index ca7a6de..a0acf41 100644
--- a/scripts/generate_config.py
+++ b/scripts/generate_config.py
@@ -280,6 +280,7 @@ def generate_users_json(users: list[dict], existing_json: dict) -> dict:
GRANT SELECT ON cml_data_secure TO {user_id};
GRANT SELECT ON cml_data_1h_secure TO {user_id};
GRANT EXECUTE ON FUNCTION update_cml_stats(TEXT, TEXT) TO {user_id};
+GRANT EXECUTE ON FUNCTION update_cml_stats_windowed(TEXT, TEXT) TO {user_id};
-- file_processing_log: parser INSERTs a row for every processed file;
-- webserver_role only needs SELECT.
diff --git a/webserver/main.py b/webserver/main.py
index 750a3c4..5792b4b 100644
--- a/webserver/main.py
+++ b/webserver/main.py
@@ -528,45 +528,33 @@ def api_cml_stats():
cur.execute(
"""
SELECT
- cs.cml_id::text,
- cs.total_records,
- cs.valid_records,
- cs.null_records,
- cs.completeness_percent,
- cs.min_rsl,
- cs.max_rsl,
- cs.mean_rsl,
- cs.stddev_rsl,
- cs.last_rsl,
- ROUND(STDDEV(cd.rsl)::numeric, 2) as stddev_last_60min
- FROM cml_stats cs
- LEFT JOIN (
- SELECT cml_id, rsl
- FROM cml_data_secure
- WHERE time >= (SELECT MAX(bucket) FROM cml_data_1h_secure) - INTERVAL '60 minutes'
- ) cd ON cs.cml_id = cd.cml_id
- GROUP BY cs.cml_id, cs.total_records, cs.valid_records, cs.null_records,
- cs.completeness_percent, cs.min_rsl, cs.max_rsl, cs.mean_rsl,
- cs.stddev_rsl, cs.last_rsl
- ORDER BY cs.cml_id
- """
+ cml_id::text,
+ completeness_percent_6h,
+ total_records_6h,
+ valid_records_6h,
+ mean_rsl_6h,
+ stddev_rsl_6h,
+ completeness_percent_1h,
+ stddev_rsl_1h,
+ last_rsl
+ FROM cml_stats
+ ORDER BY cml_id
+ """
)
data = cur.fetchall()
cur.close()
stats = [
{
- "cml_id": str(row[0]),
- "total_records": int(row[1]),
- "valid_records": int(row[2]),
- "null_records": int(row[3]),
- "completeness_percent": safe_float(row[4]),
- "min_rsl": safe_float(row[5]),
- "max_rsl": safe_float(row[6]),
- "mean_rsl": safe_float(row[7]),
- "stddev_rsl": safe_float(row[8]),
- "last_rsl": safe_float(row[9]),
- "stddev_last_60min": safe_float(row[10]),
+ "cml_id": str(row[0]),
+ "completeness_percent": safe_float(row[1]), # 6h window
+ "total_records": int(row[2] or 0),
+ "valid_records": int(row[3] or 0),
+ "mean_rsl": safe_float(row[4]),
+ "stddev_rsl": safe_float(row[5]),
+ "completeness_percent_1h": safe_float(row[6]),
+ "stddev_last_60min": safe_float(row[7]), # pre-computed 1h stddev
+ "last_rsl": safe_float(row[8]),
}
for row in data
]
@@ -613,8 +601,8 @@ def get_archive_statistics(user_id: str):
with user_db_scope(user_id) as conn:
cur = conn.cursor()
- # Row count via secure view
- cur.execute("SELECT COUNT(*) FROM cml_data_secure")
+ # Row count from precomputed cml_stats (RLS enforced via user_db_scope)
+ cur.execute("SELECT COALESCE(SUM(total_records), 0) FROM cml_stats")
stats["total_records"] = cur.fetchone()[0]
# CML count (RLS enforced)
diff --git a/webserver/templates/realtime.html b/webserver/templates/realtime.html
index 86f2ad1..4cbcf33 100644
--- a/webserver/templates/realtime.html
+++ b/webserver/templates/realtime.html
@@ -168,13 +168,13 @@
function buildPopupText(cmlId, stats) {
var popupText = 'CML ' + cmlId + '
';
if (stats) {
- popupText += 'Completeness: ' + stats.completeness_percent + '%
';
- popupText += 'Valid records: ' + stats.valid_records + '/' + stats.total_records + '
';
+ popupText += 'Completeness (6h): ' + stats.completeness_percent + '%
';
+ popupText += 'Valid records (6h): ' + stats.valid_records + '/' + stats.total_records + '
';
if (stats.last_rsl !== null) {
popupText += 'Last RSL: ' + stats.last_rsl + ' dBm
';
}
if (stats.stddev_last_60min !== null) {
- popupText += 'Std Dev (60min): ' + stats.stddev_last_60min + '
';
+ popupText += 'Std Dev (1h): ' + stats.stddev_last_60min + '
';
}
}
return popupText;
@@ -226,9 +226,9 @@
var select = L.DomUtil.create('select', '', container);
select.id = 'coloringOption';
select.innerHTML = `
-
+
-
+
`;
// Prevent map interactions when using the select
diff --git a/webserver/tests/test_api_cml_stats.py b/webserver/tests/test_api_cml_stats.py
index f90834e..6c903d4 100644
--- a/webserver/tests/test_api_cml_stats.py
+++ b/webserver/tests/test_api_cml_stats.py
@@ -24,21 +24,19 @@ def test_api_cml_stats_returns_cached_stats(monkeypatch):
mock_cursor = Mock()
mock_conn.cursor.return_value = mock_cursor
- # Row fields: cml_id, total_records, valid_records, null_records, completeness_percent,
- # min_rsl, max_rsl, mean_rsl, stddev_rsl, last_rsl, stddev_last_60min
+ # Row fields: cml_id, completeness_percent_6h, total_records_6h, valid_records_6h,
+ # mean_rsl_6h, stddev_rsl_6h, completeness_percent_1h, stddev_rsl_1h, last_rsl
mock_cursor.fetchall.return_value = [
(
"10001",
- 10,
- 9,
- 1,
- 90.0,
- -60.0,
- -40.0,
- -50.0,
- 3.0,
- -45.0,
- 1.5,
+ 94.2, # completeness_percent_6h
+ 2160, # total_records_6h
+ 2031, # valid_records_6h
+ -50.0, # mean_rsl_6h
+ 3.0, # stddev_rsl_6h
+ 90.0, # completeness_percent_1h
+ 1.3, # stddev_rsl_1h
+ -45.0, # last_rsl
)
]
@@ -67,5 +65,7 @@ def mock_user_db_scope(user_id):
assert len(data) == 1
row = data[0]
assert row["cml_id"] == "10001"
- assert row["completeness_percent"] == 90.0
+ assert row["completeness_percent"] == 94.2
+ assert row["completeness_percent_1h"] == 90.0
+ assert row["stddev_last_60min"] == 1.3
assert row["last_rsl"] == -45.0
diff --git a/webserver/tests/test_api_routes.py b/webserver/tests/test_api_routes.py
index 8799bc4..40485b0 100644
--- a/webserver/tests/test_api_routes.py
+++ b/webserver/tests/test_api_routes.py
@@ -87,6 +87,32 @@ def test_api_data_time_range_no_data(auth_client):
assert data["latest"] is None
+def test_get_archive_statistics_reads_total_records_from_cml_stats(auth_client):
+ """get_archive_statistics must use SUM(total_records) from cml_stats, not COUNT(*).
+
+ Regression guard for the query that replaced:
+ SELECT COUNT(*) FROM cml_data_secure
+ with:
+ SELECT COALESCE(SUM(total_records), 0) FROM cml_stats
+ """
+ client, cursor = auth_client
+ # fetchone called 3 times: total_records, cml_count, date_range
+ cursor.fetchone.side_effect = [(99000,), (12,), (None,)]
+
+ result = wm.get_archive_statistics("demo_openmrg")
+
+ assert result["total_records"] == 99000
+ assert result["cml_count"] == 12
+
+ executed_sql = [c.args[0] for c in cursor.execute.call_args_list]
+ assert any(
+ "SUM(total_records)" in sql and "cml_stats" in sql for sql in executed_sql
+ ), "expected COALESCE(SUM(total_records), 0) FROM cml_stats"
+ assert not any("COUNT(*)" in sql and "cml_data" in sql for sql in executed_sql), (
+ "COUNT(*) FROM cml_data must not be used (full hypertable scan regression)"
+ )
+
+
def test_overview_reads_total_records_from_cml_stats(monkeypatch, auth_client):
"""overview() must query cml_stats for total_records, not scan cml_data_secure.