Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
6bb839f
Add maintenance task result models
evanlow Jun 24, 2026
c319f87
Add maintenance metadata validators
evanlow Jun 24, 2026
0d9eedc
Add maintenance source package
evanlow Jun 24, 2026
3e42725
Add S&P 500 maintenance source
evanlow Jun 24, 2026
d900e56
Add HSI maintenance source
evanlow Jun 24, 2026
e13b632
Add STI maintenance source
evanlow Jun 24, 2026
5696f09
Add maintenance report writer
evanlow Jun 24, 2026
ce2de3f
Add maintenance runner
evanlow Jun 24, 2026
6083fa8
Expose maintenance runner package
evanlow Jun 24, 2026
44ab5f7
Add maintenance CLI entrypoint
evanlow Jun 24, 2026
eabb65d
Add maintenance page route
evanlow Jun 24, 2026
af6525b
Add maintenance API routes
evanlow Jun 24, 2026
2e45bb4
Register maintenance routes
evanlow Jun 24, 2026
38a8532
Add maintenance console template
evanlow Jun 24, 2026
3fb9677
Add maintenance runner tests
evanlow Jun 24, 2026
be683b6
Route S&P refresh script through maintenance runner
evanlow Jun 24, 2026
5c823f0
Route HSI refresh script through maintenance runner
evanlow Jun 24, 2026
1dd14b8
Document system maintenance workflow
evanlow Jun 24, 2026
7e06aac
Make S&P maintenance wrapper import-safe
evanlow Jun 24, 2026
dca13b5
Make HSI maintenance wrapper import-safe
evanlow Jun 24, 2026
f4ad29a
Avoid maintenance package import cycle
evanlow Jun 24, 2026
f5ee971
fix: address PR #197 review feedback
Copilot Jun 24, 2026
4761b1e
fix: add logger to maintenance runner and log task exceptions server-…
Copilot Jun 24, 2026
5092bbb
fix: update docstring for include_portfolio_symbols and rename backup…
Copilot Jun 24, 2026
941d0c0
fix: rename backup_timestamp_dir to timestamped_backup_dir; use colon…
Copilot Jun 24, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
132 changes: 13 additions & 119 deletions deployment_scripts/refresh_hsi_constituents.py
Original file line number Diff line number Diff line change
@@ -1,132 +1,26 @@
#!/usr/bin/env python3
"""Refresh the HSI constituents CSV from Wikipedia.
"""Compatibility wrapper for refreshing HSI constituents.

Run this script periodically (e.g. monthly) to keep
``data/hsi_constituents.csv`` up to date with current index membership.

Usage::

python deployment_scripts/refresh_hsi_constituents.py

Requires ``pandas`` and ``requests`` (both listed in requirements.txt).
The script extracts the "Constituents of Hang Seng Index" table and writes
the standard screener columns used by the app:

* ``symbol``: yfinance-compatible HK ticker (e.g. ``0700.HK``)
* ``display_symbol``: zero-padded HK code for UI display (e.g. ``0700``)
* ``security``: company name
* ``sector``: sub-index bucket from source table (best available grouping)
* ``sub_industry``: left blank when not available in source
The maintained implementation now lives in ``web.maintenance`` so refreshes are
validated, backed up, cache-invalidated, and reported consistently.
"""

import csv
import logging
import re
import sys
from io import StringIO
from pathlib import Path
import json
import sys

logger = logging.getLogger(__name__)
logging.basicConfig(level=logging.INFO, format="%(levelname)s: %(message)s")


def _normalise_hk_ticker(raw: str) -> tuple[str, str] | None:
"""Convert a source ticker cell into ``(symbol, display_symbol)``.

Examples:
* "700" -> ("0700.HK", "0700")
* "0700" -> ("0700.HK", "0700")
* "0700.HK" -> ("0700.HK", "0700")
"""
text = str(raw).strip().upper()
if not text:
return None

# Keep only the leading numeric code if extra text appears in cell.
m = re.search(r"(\d{1,5})", text)
if not m:
return None

code = m.group(1).zfill(4)
return f"{code}.HK", code


def _load_source_table() -> "object":
"""Load the HSI constituents table from Wikipedia into a DataFrame."""
try:
import pandas as pd
import requests
except ImportError as exc:
raise RuntimeError("pandas and requests are required: pip install pandas requests") from exc

url = "https://en.wikipedia.org/wiki/Hang_Seng_Index"
logger.info("Fetching HSI constituent table from Wikipedia: %s", url)

resp = requests.get(url, headers={"User-Agent": "Mozilla/5.0"}, timeout=30)
resp.raise_for_status()

tables = pd.read_html(StringIO(resp.text))
for df in tables:
cols = {str(c).strip().lower(): c for c in df.columns}
if "ticker" in cols and "name" in cols:
ticker_col = cols["ticker"]
name_col = cols["name"]
sub_index_col = cols.get("sub-index")

out = pd.DataFrame()
out["raw_ticker"] = df[ticker_col]
out["security"] = df[name_col]
out["sector"] = df[sub_index_col] if sub_index_col is not None else ""
return out
# Allow direct execution via ``python deployment_scripts/refresh_hsi_constituents.py``
# without requiring the package to be installed first.
sys.path.insert(0, str(Path(__file__).resolve().parent.parent))

raise RuntimeError("Could not find expected HSI constituents table (Ticker/Name columns)")
from web.maintenance.runner import MaintenanceRunner # noqa: E402
from web.maintenance.tasks import STATUS_FAILED # noqa: E402


def main() -> int:
"""Fetch HSI constituents and write data/hsi_constituents.csv."""
try:
import pandas as pd
except ImportError:
logger.error("pandas is required: pip install pandas")
return 1

repo_root = Path(__file__).resolve().parent.parent
output_path = repo_root / "data" / "hsi_constituents.csv"

try:
source_df = _load_source_table()
except Exception as exc:
logger.error("Failed to fetch HSI constituents: %s", exc)
return 1

rows = []
for _, rec in source_df.iterrows():
normalised = _normalise_hk_ticker(rec.get("raw_ticker"))
if normalised is None:
continue
symbol, display_symbol = normalised
security = str(rec.get("security") or "").strip()
sector = str(rec.get("sector") or "").strip()
rows.append(
{
"symbol": symbol,
"display_symbol": display_symbol,
"security": security,
"sector": sector,
"sub_industry": "",
}
)

if not rows:
logger.error("No constituents parsed from source table")
return 1

df = pd.DataFrame(rows)
df = df.drop_duplicates(subset=["symbol"]).sort_values(by=["display_symbol"]).reset_index(drop=True)
df.to_csv(output_path, index=False, quoting=csv.QUOTE_ALL)

logger.info("Written %d HSI constituents to %s", len(df), output_path)
return 0
report = MaintenanceRunner().run(tasks=["hsi_constituents"], dry_run=False)
print(json.dumps(report.to_dict(), indent=2, sort_keys=True))
return 1 if report.status == STATUS_FAILED else 0


if __name__ == "__main__":
Expand Down
94 changes: 94 additions & 0 deletions docs/MAINTENANCE.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
# System Maintenance: Market Metadata Refresh & Data Hygiene

TWS Robot includes a metadata-only System Maintenance workflow for keeping index universes and market-event context fresh without touching trading execution paths.

## What it maintains

- S&P 500, STI, and HSI constituent CSV files
- Market events through the existing market-events service
- Validation reports and backups

## Safety boundaries

The maintenance workflow must not place orders, change strategy behavior, start/stop strategies, modify autonomous trading configuration, or bypass emergency-stop controls.

Allowed writes are limited to:

- `data/*_constituents.csv`
- `data/backups/constituents/...`
- `reports/maintenance/...`
- Existing market-event rows through `data.market_events`

## Web console

Open:

```text
/maintenance
```

Available actions:

- **Dry Run All** — fetches proposed metadata and writes reports without replacing files
- **Dry Run Constituents** — previews S&P 500/STI/HSI constituent changes
- **Apply Constituents Refresh** — backs up and replaces constituent files only after validation passes
- **Refresh Market Events** — calls the existing event service for portfolio/strategy symbols
- **Validate Metadata Only** — validates current local metadata files

## CLI

Dry-run is the safe default unless `--apply` is passed.

```bash
python -m web.maintenance run --dry-run
python -m web.maintenance run --task sp500_constituents --dry-run
python -m web.maintenance run --task hsi_constituents --apply
python -m web.maintenance run --task market_events --apply --symbol AAPL --symbol MSFT
python -m web.maintenance validate
```

Legacy wrappers remain available:

```bash
python scripts/refresh_sp500_constituents.py
python deployment_scripts/refresh_hsi_constituents.py
```

## Validation rules

Constituent refreshes are rejected before file replacement when:

- Required columns are missing
- Row count is below the configured market threshold
- Symbols are blank or duplicated
- Symbol format does not match market-specific rules
- Count change is greater than 25%, unless explicitly allowed

A warning is recorded when count change is greater than 10%.

Default minimum counts:

| Universe | Minimum rows |
| --- | ---: |
| S&P 500 | 450 |
| STI | 25 |
| HSI | 70 |

## Reports and backups

Each run writes both JSON and Markdown reports:

```text
reports/maintenance/maintenance_*.json
reports/maintenance/maintenance_*.md
```

Apply mode creates timestamped backups before replacing any existing constituent file:

```text
data/backups/constituents/YYYYMMDD_HHMMSS/<filename>.csv
```

## Recommended cadence

Run manually every 2–3 days, or daily if desired, preferably outside active market hours. Because this is metadata-only, it is designed not to interfere with paper/live trading paths, but off-peak operation is still cleaner.
60 changes: 14 additions & 46 deletions scripts/refresh_sp500_constituents.py
Original file line number Diff line number Diff line change
@@ -1,58 +1,26 @@
#!/usr/bin/env python3
"""Refresh the S&P 500 constituents CSV from Wikipedia.
"""Compatibility wrapper for refreshing S&P 500 constituents.

Run this script periodically (e.g. monthly) to keep
``data/sp500_constituents.csv`` up to date with current index membership.

Usage::

python scripts/refresh_sp500_constituents.py

Requires ``pandas`` and ``lxml`` (both listed in requirements.txt).
Tickers are normalised for yfinance compatibility: dots replaced with hyphens
(e.g. ``BRK.B`` → ``BRK-B``).
The maintained implementation now lives in ``web.maintenance`` so refreshes are
validated, backed up, cache-invalidated, and reported consistently.
"""

import csv
import logging
import sys
from pathlib import Path
import json
import sys

# Allow direct execution via ``python scripts/refresh_sp500_constituents.py``
# without requiring the package to be installed first.
sys.path.insert(0, str(Path(__file__).resolve().parent.parent))

logger = logging.getLogger(__name__)
logging.basicConfig(level=logging.INFO, format="%(levelname)s: %(message)s")
from web.maintenance.runner import MaintenanceRunner # noqa: E402
from web.maintenance.tasks import STATUS_FAILED # noqa: E402


def main() -> int:
"""Fetch the S&P 500 constituent list and write it to data/sp500_constituents.csv."""
try:
import pandas as pd
except ImportError:
logger.error("pandas is required: pip install pandas")
return 1

repo_root = Path(__file__).resolve().parent.parent
output_path = repo_root / "data" / "sp500_constituents.csv"

logger.info("Fetching S&P 500 constituent table from Wikipedia…")
url = "https://en.wikipedia.org/wiki/List_of_S%26P_500_companies"
try:
tables = pd.read_html(url)
except Exception as exc:
logger.error("Failed to fetch Wikipedia table: %s", exc)
return 1

df = tables[0][["Symbol", "Security", "GICS Sector", "GICS Sub-Industry"]]
df.columns = ["symbol", "security", "sector", "sub_industry"]

# Normalise tickers for yfinance compatibility
df["symbol"] = df["symbol"].str.replace(".", "-", regex=False)

# Remove duplicates (same symbol appearing twice due to share classes)
df = df.drop_duplicates(subset=["symbol"])

df.to_csv(output_path, index=False, quoting=csv.QUOTE_ALL)
logger.info("Written %d constituents to %s", len(df), output_path)
return 0
report = MaintenanceRunner().run(tasks=["sp500_constituents"], dry_run=False)
print(json.dumps(report.to_dict(), indent=2, sort_keys=True))
return 1 if report.status == STATUS_FAILED else 0


if __name__ == "__main__":
Expand Down
Loading