diff --git a/backend/scripts/ingest/backfill_ecos_cycle.py b/backend/scripts/ingest/backfill_ecos_cycle.py new file mode 100644 index 00000000..1db15b80 --- /dev/null +++ b/backend/scripts/ingest/backfill_ecos_cycle.py @@ -0,0 +1,90 @@ +"""ecos_timeseries.cycle 100% NULL 채움. + +audit-null-orphan-2026-05-04 발견 — ECOS API ETL 이 ``cycle`` 컬럼 미적재. +3 stat_code (121Y006/722Y001/901Y009) × ECOS StatisticItemList API 호출 후 +``(stat_code, item_code1)`` 매핑으로 cycle UPDATE. + +ECOS API rate limit / pagination: +- 페이지당 1000 item (901Y009 는 페이지 2 추가 호출 필요) +- 기준: 2026-05-05 — 901Y009 = 1,743 items (페이지 1+2 합산), 121Y006 = 57, 722Y001 = 48 + +결과: 0% → 100% (2,783/2,783) + +사용법: + cd backend && python scripts/ingest/backfill_ecos_cycle.py +""" + +from __future__ import annotations + +import sys +from pathlib import Path + +import httpx +import sqlalchemy as sa + +sys.path.insert(0, str(Path(__file__).resolve().parents[2])) +from src.config.settings import settings # noqa: E402 + +PAGE = 1000 + + +def _fetch_meta(stat_code: str, key: str) -> dict[str, str]: + """stat_code 의 모든 item 메타 (item_code → cycle) 가져오기. 페이지네이션 포함.""" + out: dict[str, str] = {} + start = 1 + while True: + url = f"http://ecos.bok.or.kr/api/StatisticItemList/{key}/json/kr/{start}/{start + PAGE - 1}/{stat_code}" + block = httpx.get(url, timeout=30).json().get("StatisticItemList") + if not block or not block.get("row"): + break + rows = block["row"] + for row in rows: + ic = row.get("ITEM_CODE") + cy = row.get("CYCLE") + if ic and cy: + out[ic] = cy + if len(rows) < PAGE: + break + start += PAGE + return out + + +def main() -> None: + key = settings.ecos_api_key + if not key: + raise RuntimeError("ECOS_API_KEY missing in settings") + + engine = sa.create_engine(settings.postgres_url) + + # DB 의 stat_code distinct + with engine.connect() as conn: + stat_codes = [r[0] for r in conn.execute(sa.text("SELECT DISTINCT stat_code FROM ecos_timeseries")).fetchall()] + print(f"ecos_timeseries stat_codes: {stat_codes}") + + total_updates = 0 + for sc in stat_codes: + meta = _fetch_meta(sc, key) + print(f" {sc}: meta {len(meta)} items") + with engine.begin() as conn: + updated = 0 + for ic, cy in meta.items(): + result = conn.execute( + sa.text( + "UPDATE ecos_timeseries SET cycle=:cy WHERE stat_code=:sc AND item_code1=:ic AND cycle IS NULL" + ), + {"cy": cy, "sc": sc, "ic": ic}, + ) + updated += result.rowcount + total_updates += updated + print(f" updated: {updated} rows") + + with engine.connect() as conn: + n = conn.execute(sa.text("SELECT COUNT(*) FROM ecos_timeseries")).scalar() + n_cy = conn.execute(sa.text("SELECT COUNT(*) FROM ecos_timeseries WHERE cycle IS NOT NULL")).scalar() + pct = (n_cy / n * 100) if n else 0 + print() + print(f"=== AFTER ===\n cycle non-NULL: {n_cy}/{n} ({pct:.1f}%) — total update {total_updates}") + + +if __name__ == "__main__": + main() diff --git a/backend/scripts/ingest/fill_ttareungi_dong_code.py b/backend/scripts/ingest/fill_ttareungi_dong_code.py new file mode 100644 index 00000000..0a4a0f6e --- /dev/null +++ b/backend/scripts/ingest/fill_ttareungi_dong_code.py @@ -0,0 +1,95 @@ +"""master_ttareungi_station.dong_code 채우기 (마포 한정). + +PR #184 가 ttareungi station 의 lat/lon + sigungu_code 채움 (3,230 row API 매핑). +본 스크립트는 그 결과 위에서 마포(sigungu_code='11440') station 의 dong_code 를 +``dong_centroid`` 16 동과의 haversine 거리 비교로 매핑. + +- 마포 station: 가장 가까운 dong_centroid → dong_code 적용 +- 마포 외 station: 서울 전체 dong_centroid 부재 (E4 한계) — skip +- ``opened_at`` 컬럼: 따릉이 API 응답에 없음 — skip + +사용법: + cd backend && python scripts/ingest/fill_ttareungi_dong_code.py + +idempotent — dong_code 이미 채워진 row 는 skip. +""" + +from __future__ import annotations + +import math +import sys +from pathlib import Path + +import sqlalchemy as sa + +sys.path.insert(0, str(Path(__file__).resolve().parents[2])) +from src.config.settings import settings # noqa: E402 + + +def _haversine_m(lat1: float, lon1: float, lat2: float, lon2: float) -> float: + """두 좌표 간 거리 (미터).""" + radius = 6371000 # Earth radius m + phi1, phi2 = math.radians(lat1), math.radians(lat2) + dphi = math.radians(lat2 - lat1) + dlon = math.radians(lon2 - lon1) + a = math.sin(dphi / 2) ** 2 + math.cos(phi1) * math.cos(phi2) * math.sin(dlon / 2) ** 2 + return radius * 2 * math.atan2(math.sqrt(a), math.sqrt(1 - a)) + + +def main() -> None: + engine = sa.create_engine(settings.postgres_url) + + with engine.connect() as conn: + centroids = conn.execute( + sa.text("SELECT dong_code, lat, lon FROM dong_centroid WHERE lat IS NOT NULL") + ).fetchall() + cents = [(r._mapping["dong_code"], r._mapping["lat"], r._mapping["lon"]) for r in centroids] + print(f"dong_centroid: {len(cents)} 동") + + rows = conn.execute( + sa.text( + "SELECT station_id, lat, lon FROM master_ttareungi_station " + "WHERE sigungu_code='11440' AND dong_code IS NULL AND lat IS NOT NULL" + ) + ).fetchall() + print(f"마포 ttareungi (dong_code NULL): {len(rows)}") + + matched: list[tuple[str, str]] = [] + for r in rows: + sid = r._mapping["station_id"] + lat = r._mapping["lat"] + lon = r._mapping["lon"] + best = min(cents, key=lambda c: _haversine_m(lat, lon, c[1], c[2])) + matched.append((sid, best[0])) + + print(f"matched: {len(matched)}") + + with engine.begin() as conn: + for sid, dc in matched: + conn.execute( + sa.text("UPDATE master_ttareungi_station SET dong_code=:dc WHERE station_id=:sid"), + {"dc": dc, "sid": sid}, + ) + + with engine.connect() as conn: + total = conn.execute(sa.text("SELECT COUNT(*) FROM master_ttareungi_station")).scalar() + null_dc = conn.execute( + sa.text("SELECT COUNT(*) FROM master_ttareungi_station WHERE dong_code IS NULL") + ).scalar() + mapo_filled = conn.execute( + sa.text( + "SELECT COUNT(*) FROM master_ttareungi_station WHERE sigungu_code='11440' AND dong_code IS NOT NULL" + ) + ).scalar() + + print() + print("=== AFTER ===") + print(f" total {total}, dong_code NULL {null_dc} ({null_dc / total * 100:.1f}%)") + print(f" 마포 dong_code 채워짐: {mapo_filled}") + print() + print("미적재 사유 (마포 외 5,298 row):") + print(" 서울 전체 dong_centroid 부재 (E4 한계). DongCentroid 서울 확장 별 PR 필요.") + + +if __name__ == "__main__": + main() diff --git a/backend/src/config/settings.py b/backend/src/config/settings.py index 95e2184a..48590044 100644 --- a/backend/src/config/settings.py +++ b/backend/src/config/settings.py @@ -41,6 +41,8 @@ class Settings(BaseSettings): sgis_secret_key: str = os.getenv("SGIS_SECRET_KEY", "") molit_api_key: str = os.getenv("MOLIT_API_KEY", "") ftc_api_key: str = os.getenv("FTC_API_KEY", "") + kakao_api_key: str = os.getenv("KAKAO_API_KEY", "") + ecos_api_key: str = os.getenv("ECOS_API_KEY", "") law_oc: str = os.getenv("LAW_OC", "") # Naver DataLab API diff --git a/docs/retrospective/2026-05-05.md b/docs/retrospective/2026-05-05.md index 9999cf88..5e04c208 100644 --- a/docs/retrospective/2026-05-05.md +++ b/docs/retrospective/2026-05-05.md @@ -549,3 +549,63 @@ ``` --- + +## 12:31:32 세션 완료 + +### 변경 파일 +- backend/src/agents/legal/categories.py +- docs/retrospective/2026-05-05.md + +### diff 요약 +``` + backend/src/agents/legal/categories.py | 18 +++++++++--------- + docs/retrospective/2026-05-05.md | 9 +++++++++ + 2 files changed, 18 insertions(+), 9 deletions(-) +``` + +--- + +## 12:36:12 세션 완료 + +### 변경 파일 +- backend/src/agents/legal/categories.py +- docs/retrospective/2026-05-05.md + +### diff 요약 +``` + backend/src/agents/legal/categories.py | 18 +++++++++--------- + docs/retrospective/2026-05-05.md | 24 ++++++++++++++++++++++++ + 2 files changed, 33 insertions(+), 9 deletions(-) +``` + +--- + +## 12:40:27 세션 완료 + +### 변경 파일 +- backend/src/agents/legal/categories.py +- docs/retrospective/2026-05-05.md + +### diff 요약 +``` + backend/src/agents/legal/categories.py | 18 ++++++++-------- + docs/retrospective/2026-05-05.md | 39 ++++++++++++++++++++++++++++++++++ + 2 files changed, 48 insertions(+), 9 deletions(-) +``` + +--- + +## 12:41:12 세션 완료 + +### 변경 파일 +- backend/src/agents/legal/categories.py +- docs/retrospective/2026-05-05.md + +### diff 요약 +``` + backend/src/agents/legal/categories.py | 18 ++++++------ + docs/retrospective/2026-05-05.md | 54 ++++++++++++++++++++++++++++++++++ + 2 files changed, 63 insertions(+), 9 deletions(-) +``` + +---