evanlow · limsiokuan · Jun 24, 2026 · Jun 24, 2026 · Jun 24, 2026 · Jun 24, 2026
diff --git a/deployment_scripts/refresh_hsi_constituents.py b/deployment_scripts/refresh_hsi_constituents.py
@@ -1,132 +1,26 @@
 #!/usr/bin/env python3
-"""Refresh the HSI constituents CSV from Wikipedia.
+"""Compatibility wrapper for refreshing HSI constituents.
 
-Run this script periodically (e.g. monthly) to keep
-``data/hsi_constituents.csv`` up to date with current index membership.
-
-Usage::
-
-    python deployment_scripts/refresh_hsi_constituents.py
-
-Requires ``pandas`` and ``requests`` (both listed in requirements.txt).
-The script extracts the "Constituents of Hang Seng Index" table and writes
-the standard screener columns used by the app:
-
-* ``symbol``: yfinance-compatible HK ticker (e.g. ``0700.HK``)
-* ``display_symbol``: zero-padded HK code for UI display (e.g. ``0700``)
-* ``security``: company name
-* ``sector``: sub-index bucket from source table (best available grouping)
-* ``sub_industry``: left blank when not available in source
+The maintained implementation now lives in ``web.maintenance`` so refreshes are
+validated, backed up, cache-invalidated, and reported consistently.
 """
 
-import csv
-import logging
-import re
-import sys
-from io import StringIO
 from pathlib import Path
+import json
+import sys
 
-logger = logging.getLogger(__name__)
-logging.basicConfig(level=logging.INFO, format="%(levelname)s: %(message)s")
-
-
-def _normalise_hk_ticker(raw: str) -> tuple[str, str] | None:
-    """Convert a source ticker cell into ``(symbol, display_symbol)``.
-
-    Examples:
-    * "700" -> ("0700.HK", "0700")
-    * "0700" -> ("0700.HK", "0700")
-    * "0700.HK" -> ("0700.HK", "0700")
-    """
-    text = str(raw).strip().upper()
-    if not text:
-        return None
-
-    # Keep only the leading numeric code if extra text appears in cell.
-    m = re.search(r"(\d{1,5})", text)
-    if not m:
-        return None
-
-    code = m.group(1).zfill(4)
-    return f"{code}.HK", code
-
-
-def _load_source_table() -> "object":
-    """Load the HSI constituents table from Wikipedia into a DataFrame."""
-    try:
-        import pandas as pd
-        import requests
-    except ImportError as exc:
-        raise RuntimeError("pandas and requests are required: pip install pandas requests") from exc
-
-    url = "https://en.wikipedia.org/wiki/Hang_Seng_Index"
-    logger.info("Fetching HSI constituent table from Wikipedia: %s", url)
-
-    resp = requests.get(url, headers={"User-Agent": "Mozilla/5.0"}, timeout=30)
-    resp.raise_for_status()
-
-    tables = pd.read_html(StringIO(resp.text))
-    for df in tables:
-        cols = {str(c).strip().lower(): c for c in df.columns}
-        if "ticker" in cols and "name" in cols:
-            ticker_col = cols["ticker"]
-            name_col = cols["name"]
-            sub_index_col = cols.get("sub-index")
-
-            out = pd.DataFrame()
-            out["raw_ticker"] = df[ticker_col]
-            out["security"] = df[name_col]
-            out["sector"] = df[sub_index_col] if sub_index_col is not None else ""
-            return out
+# Allow direct execution via ``python deployment_scripts/refresh_hsi_constituents.py``
+# without requiring the package to be installed first.
+sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
 
-    raise RuntimeError("Could not find expected HSI constituents table (Ticker/Name columns)")
+from web.maintenance.runner import MaintenanceRunner  # noqa: E402
+from web.maintenance.tasks import STATUS_FAILED  # noqa: E402
 
 
 def main() -> int:
-    """Fetch HSI constituents and write data/hsi_constituents.csv."""
-    try:
-        import pandas as pd
-    except ImportError:
-        logger.error("pandas is required: pip install pandas")
-        return 1
-
-    repo_root = Path(__file__).resolve().parent.parent
-    output_path = repo_root / "data" / "hsi_constituents.csv"
-
-    try:
-        source_df = _load_source_table()
-    except Exception as exc:
-        logger.error("Failed to fetch HSI constituents: %s", exc)
-        return 1
-
-    rows = []
-    for _, rec in source_df.iterrows():
-        normalised = _normalise_hk_ticker(rec.get("raw_ticker"))
-        if normalised is None:
-            continue
-        symbol, display_symbol = normalised
-        security = str(rec.get("security") or "").strip()
-        sector = str(rec.get("sector") or "").strip()
-        rows.append(
-            {
-                "symbol": symbol,
-                "display_symbol": display_symbol,
-                "security": security,
-                "sector": sector,
-                "sub_industry": "",
-            }
-        )
-
-    if not rows:
-        logger.error("No constituents parsed from source table")
-        return 1
-
-    df = pd.DataFrame(rows)
-    df = df.drop_duplicates(subset=["symbol"]).sort_values(by=["display_symbol"]).reset_index(drop=True)
-    df.to_csv(output_path, index=False, quoting=csv.QUOTE_ALL)
-
-    logger.info("Written %d HSI constituents to %s", len(df), output_path)
-    return 0
+    report = MaintenanceRunner().run(tasks=["hsi_constituents"], dry_run=False)
+    print(json.dumps(report.to_dict(), indent=2, sort_keys=True))
+    return 1 if report.status == STATUS_FAILED else 0
 
 
 if __name__ == "__main__":

diff --git a/docs/MAINTENANCE.md b/docs/MAINTENANCE.md
@@ -0,0 +1,94 @@
+# System Maintenance: Market Metadata Refresh & Data Hygiene
+
+TWS Robot includes a metadata-only System Maintenance workflow for keeping index universes and market-event context fresh without touching trading execution paths.
+
+## What it maintains
+
+- S&P 500, STI, and HSI constituent CSV files
+- Market events through the existing market-events service
+- Validation reports and backups
+
+## Safety boundaries
+
+The maintenance workflow must not place orders, change strategy behavior, start/stop strategies, modify autonomous trading configuration, or bypass emergency-stop controls.
+
+Allowed writes are limited to:
+
+- `data/*_constituents.csv`
+- `data/backups/constituents/...`
+- `reports/maintenance/...`
+- Existing market-event rows through `data.market_events`
+
+## Web console
+
+Open:
+
+```text
+/maintenance
+```
+
+Available actions:
+
+- **Dry Run All** — fetches proposed metadata and writes reports without replacing files
+- **Dry Run Constituents** — previews S&P 500/STI/HSI constituent changes
+- **Apply Constituents Refresh** — backs up and replaces constituent files only after validation passes
+- **Refresh Market Events** — calls the existing event service for portfolio/strategy symbols
+- **Validate Metadata Only** — validates current local metadata files
+
+## CLI
+
+Dry-run is the safe default unless `--apply` is passed.
+
+```bash
+python -m web.maintenance run --dry-run
+python -m web.maintenance run --task sp500_constituents --dry-run
+python -m web.maintenance run --task hsi_constituents --apply
+python -m web.maintenance run --task market_events --apply --symbol AAPL --symbol MSFT
+python -m web.maintenance validate
+```
+
+Legacy wrappers remain available:
+
+```bash
+python scripts/refresh_sp500_constituents.py
+python deployment_scripts/refresh_hsi_constituents.py
+```
+
+## Validation rules
+
+Constituent refreshes are rejected before file replacement when:
+
+- Required columns are missing
+- Row count is below the configured market threshold
+- Symbols are blank or duplicated
+- Symbol format does not match market-specific rules
+- Count change is greater than 25%, unless explicitly allowed
+
+A warning is recorded when count change is greater than 10%.
+
+Default minimum counts:
+
+| Universe | Minimum rows |
+| --- | ---: |
+| S&P 500 | 450 |
+| STI | 25 |
+| HSI | 70 |
+
+## Reports and backups
+
+Each run writes both JSON and Markdown reports:
+
+```text
+reports/maintenance/maintenance_*.json
+reports/maintenance/maintenance_*.md
+```
+
+Apply mode creates timestamped backups before replacing any existing constituent file:
+
+```text
+data/backups/constituents/YYYYMMDD_HHMMSS/<filename>.csv
+```
+
+## Recommended cadence
+
+Run manually every 2–3 days, or daily if desired, preferably outside active market hours. Because this is metadata-only, it is designed not to interfere with paper/live trading paths, but off-peak operation is still cleaner.
diff --git a/scripts/refresh_sp500_constituents.py b/scripts/refresh_sp500_constituents.py
@@ -1,58 +1,26 @@
 #!/usr/bin/env python3
-"""Refresh the S&P 500 constituents CSV from Wikipedia.
+"""Compatibility wrapper for refreshing S&P 500 constituents.
 
-Run this script periodically (e.g. monthly) to keep
-``data/sp500_constituents.csv`` up to date with current index membership.
-
-Usage::
-
-    python scripts/refresh_sp500_constituents.py
-
-Requires ``pandas`` and ``lxml`` (both listed in requirements.txt).
-Tickers are normalised for yfinance compatibility: dots replaced with hyphens
-(e.g. ``BRK.B`` → ``BRK-B``).
+The maintained implementation now lives in ``web.maintenance`` so refreshes are
+validated, backed up, cache-invalidated, and reported consistently.
 """
 
-import csv
-import logging
-import sys
 from pathlib import Path
+import json
+import sys
+
+# Allow direct execution via ``python scripts/refresh_sp500_constituents.py``
+# without requiring the package to be installed first.
+sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
 
-logger = logging.getLogger(__name__)
-logging.basicConfig(level=logging.INFO, format="%(levelname)s: %(message)s")
+from web.maintenance.runner import MaintenanceRunner  # noqa: E402
+from web.maintenance.tasks import STATUS_FAILED  # noqa: E402
 
 
 def main() -> int:
-    """Fetch the S&P 500 constituent list and write it to data/sp500_constituents.csv."""
-    try:
-        import pandas as pd
-    except ImportError:
-        logger.error("pandas is required: pip install pandas")
-        return 1
-
-    repo_root = Path(__file__).resolve().parent.parent
-    output_path = repo_root / "data" / "sp500_constituents.csv"
-
-    logger.info("Fetching S&P 500 constituent table from Wikipedia…")
-    url = "https://en.wikipedia.org/wiki/List_of_S%26P_500_companies"
-    try:
-        tables = pd.read_html(url)
-    except Exception as exc:
-        logger.error("Failed to fetch Wikipedia table: %s", exc)
-        return 1
-
-    df = tables[0][["Symbol", "Security", "GICS Sector", "GICS Sub-Industry"]]
-    df.columns = ["symbol", "security", "sector", "sub_industry"]
-
-    # Normalise tickers for yfinance compatibility
-    df["symbol"] = df["symbol"].str.replace(".", "-", regex=False)
-
-    # Remove duplicates (same symbol appearing twice due to share classes)
-    df = df.drop_duplicates(subset=["symbol"])
-
-    df.to_csv(output_path, index=False, quoting=csv.QUOTE_ALL)
-    logger.info("Written %d constituents to %s", len(df), output_path)
-    return 0
+    report = MaintenanceRunner().run(tasks=["sp500_constituents"], dry_run=False)
+    print(json.dumps(report.to_dict(), indent=2, sort_keys=True))
+    return 1 if report.status == STATUS_FAILED else 0
 
 
 if __name__ == "__main__":