From b907ef1476263836de7e4ef2111c0ddcadcc61fa Mon Sep 17 00:00:00 2001 From: youneedpython Date: Wed, 25 Feb 2026 20:52:57 +0900 Subject: [PATCH] feat(backend): add backend performance metrics and optimize RAG retrieval --- backend/app/core/config.py | 5 ++- backend/app/core/metrics.py | 43 ++++++++++++++++++++++ backend/app/main.py | 55 ++++++++++++++++++++++++---- backend/app/routers/chat.py | 28 +++++++++++--- backend/app/service/chain_builder.py | 15 ++++++-- backend/app/service/llm_service.py | 16 ++++++-- 6 files changed, 142 insertions(+), 20 deletions(-) create mode 100644 backend/app/core/metrics.py diff --git a/backend/app/core/config.py b/backend/app/core/config.py index ff69b37..8b97fc3 100644 --- a/backend/app/core/config.py +++ b/backend/app/core/config.py @@ -67,4 +67,7 @@ def validate_runtime_env(): # Database settings # ====================================== # DATABASE_URL = os.getenv("DATABASE_URL", "sqlite:///./test.db") -DATABASE_URL = os.getenv("DATABASE_URL", "sqlite:///./dev.db") \ No newline at end of file +DATABASE_URL = os.getenv("DATABASE_URL", "sqlite:///./dev.db") + + +METRICS_ENABLED = os.getenv("METRICS_ENABLED", "true").lower() == "true" \ No newline at end of file diff --git a/backend/app/core/metrics.py b/backend/app/core/metrics.py new file mode 100644 index 0000000..3a7a0f2 --- /dev/null +++ b/backend/app/core/metrics.py @@ -0,0 +1,43 @@ +# app/core/metrics.py +import os +import time +from typing import Any + +from app.core.logger import get_logger +from app.core.config import METRICS_ENABLED + +logger = get_logger("metrics") + + +def now() -> float: + """Monotonic start time for duration measurement.""" + return time.perf_counter() + + +def _fmt(fields: dict[str, Any]) -> str: + parts: list[str] = [] + for k, v in fields.items(): + if v is None: + continue + parts.append(f"{k}={v}") + return "|".join(parts) + + +def span(name: str, start: float, **fields: Any) -> int: + """Emit a span metric log and return elapsed ms.""" + if not METRICS_ENABLED: + return -1 + ms = int((time.perf_counter() - start) * 1000) + base = f"METRIC|event=span|name={name}|ms={ms}" + extra = _fmt(fields) + logger.info(base + (f"|{extra}" if extra else "")) + return ms + + +def mark(name: str, **fields: Any) -> None: + """Emit a mark log (no duration).""" + if not METRICS_ENABLED: + return + base = f"METRIC|event=mark|name={name}" + extra = _fmt(fields) + logger.info(base + (f"|{extra}" if extra else "")) \ No newline at end of file diff --git a/backend/app/main.py b/backend/app/main.py index e8366a5..0242ca2 100644 --- a/backend/app/main.py +++ b/backend/app/main.py @@ -27,7 +27,8 @@ set_request_id, ) from app.core.logger import get_logger -from app.core.config import validate_runtime_env +from app.core.config import METRICS_ENABLED, validate_runtime_env + logger = get_logger("Chatbot-law-prod.middleware.request_id") validate_runtime_env() ## 앱 실행 시점에 환경변수 검증 @@ -60,20 +61,60 @@ async def request_id_middleware(request: Request, call_next): ## 저장 set_request_id(request_id) + + ## 계측 코드 ################################## + ## (추가) 요청 단위 metrics 저장소 준비 + request.state.metrics = {} + ############################################## + + response = None + try: response: Response = await call_next(request) + return response finally: - duration_ms = (time.perf_counter() - start) * 1000 + duration_ms = int((time.perf_counter() - start) * 1000) + ## logger 포맷에 request_id가 자동 포함됨 logger.info( - f'{request.method} {request.url.path} completed in {duration_ms: .2f}ms' + f'{request.method} {request.url.path} completed in {duration_ms}ms' ) + + + ## 계측 코드 ################################## + ## (추가) /health는 제외(로그 오염 방지) + ## (추가) 운영용 요청 요약 METRIC 로그 + if METRICS_ENABLED: + path = request.url.path + # health는 제외 권장 (헬스체크로 로그 오염 방지) + if path != "/health": + m = getattr(request.state, "metrics", {}) or {} + parts = [ + "METRIC|event=request", + f"path={path}", + f"method={request.method}", + f"ms_total={duration_ms}", + ] + # 라우터에서 기록한 값이 있으면 함께 출력 + for key in ( + "ms_db_user", + "ms_history_load", + "ms_ask_llm", + "ms_db_assistant", + ): + if key in m: + parts.append(f"{key}={m[key]}") + logger.info("|".join(parts)) + ############################################## + + + ## 응답헤더에 X-Request-ID 포함하여 클라이언트에 전송 + if response is not None: + response.headers[REQUEST_ID_HEADER] = request_id + ## 다음 요청에 섞이지 않게 초기화 set_request_id(None) - - ## 응답헤더에 X-Request-ID 포함하여 클라이언트에 전송 - response.headers[REQUEST_ID_HEADER] = request_id - return response + ## 라우터 등록 (라우트 테이블에 등록) diff --git a/backend/app/routers/chat.py b/backend/app/routers/chat.py index 2f65121..fb73e1b 100644 --- a/backend/app/routers/chat.py +++ b/backend/app/routers/chat.py @@ -16,9 +16,10 @@ """ -from fastapi import APIRouter, Depends, HTTPException +from fastapi import APIRouter, Depends, HTTPException, Request from sqlalchemy.orm import Session +from app.core.metrics import now, span from app.db import get_db from app.repository.chat import append_message from app.schemas.chat_request import ChatRequest @@ -35,6 +36,7 @@ def chat( session_id: str, payload: ChatRequest, + request: Request, # ✅ 추가: request.state.metrics에 적재하기 위함 db: Session = Depends(get_db), ): if not payload.message.strip(): @@ -43,27 +45,40 @@ def chat( # ------------------------------------------------------------------ # 1. user 메시지 저장 # ------------------------------------------------------------------ + t = now() + message = payload.message.strip() append_message( db=db, conversation_id=session_id, role="user", - content=payload.message, + content=message, ) + ms_db_user = span("db_append_user", t) + request.state.metrics["ms_db_user"] = ms_db_user # ✅ 라우터 단위 메트릭 저장소에 기록 # ------------------------------------------------------------------ # 2. LLM 호출 (RAG) - # ask_llm은 (answer, session_id, sources)를 반환 + # ask_llm은 (answer, session_id, sources, extra)를 반환 # ------------------------------------------------------------------ - answer, _, sources = ask_llm( + t = now() + message = payload.message.strip() + answer, _, sources, extra = ask_llm( db=db, - message=payload.message, + message=message, session_id=session_id, ) + ms_ask_llm = span("ask_llm_total", t) + request.state.metrics["ms_ask_llm"] = ms_ask_llm # ✅ 라우터 단위 메트릭 저장소에 기록 + + # history load도 요청 요약에 포함(2단계 요구) + if extra and "ms_history_load" in extra: + request.state.metrics["ms_history_load"] = extra["ms_history_load"] # ------------------------------------------------------------------ # 3. assistant 메시지 저장 # ※ DB에는 answer만 저장 (sources는 응답 메타 정보) # ------------------------------------------------------------------ + t = now() append_message( db=db, conversation_id=session_id, @@ -71,6 +86,9 @@ def chat( content=answer, ) + ms_db_assistant = span("db_append_assistant", t) + request.state.metrics["ms_db_assistant"] = ms_db_assistant # ✅ 라우터 단위 메트릭 저장소에 기록 + # ------------------------------------------------------------------ # 4. 응답 반환 # ------------------------------------------------------------------ diff --git a/backend/app/service/chain_builder.py b/backend/app/service/chain_builder.py index e654179..d84a4c7 100644 --- a/backend/app/service/chain_builder.py +++ b/backend/app/service/chain_builder.py @@ -9,7 +9,8 @@ from langchain_core.output_parsers import StrOutputParser from langchain_core.prompts import ChatPromptTemplate -from app.core.config import OPENAI_MODEL +from app.core.metrics import now, span, mark +from app.core.config import OPENAI_MODEL, RAG_TOP_K from app.core.logger import get_logger from app.service.retriever_service import get_retriever @@ -208,15 +209,23 @@ def _invoke(inputs: Dict[str, Any]) -> Dict[str, Any]: query = inputs["input"] # 1) Retrieve + t = now() docs = retriever.invoke(query) + span("rag_retrieve", t, k=int(RAG_TOP_K), docs=len(docs)) logger.info("Retrieved %d documents from Pinecone", len(docs)) # 2) Build context + sources + t = now() context, sources = _format_docs_with_citation_numbers(docs) + msg = prompt.invoke({"input": query, "context": context}) + span("prompt_build", t, docs=len(docs), chars_context=len(context)) # 3) LLM answer with forced citation format - msg = prompt.invoke({"input": query, "context": context}) - answer = parser.invoke(llm.invoke(msg)).strip() + mark("llm_start", model=OPENAI_MODEL) + t = now() + raw = llm.invoke(msg) + span("llm_total", t, model=OPENAI_MODEL) + answer = parser.invoke(raw).strip() return {"answer": answer, "sources": sources} diff --git a/backend/app/service/llm_service.py b/backend/app/service/llm_service.py index 67a094f..2afd6b1 100644 --- a/backend/app/service/llm_service.py +++ b/backend/app/service/llm_service.py @@ -1,10 +1,12 @@ import uuid from functools import lru_cache -from typing import Optional +from typing import Optional, Any -from requests import Session +# from requests import Session +from sqlalchemy.orm import Session # ✅ 기존 코드의 requests.Session은 오타/부정확 가능성 높음 from app.core.logger import get_logger +from app.core.metrics import now, span from app.repository.chat import list_messages from app.service.chain_builder import build_rag_chain @@ -20,14 +22,19 @@ def get_chain(): def ask_llm(db: Session, message: str, session_id: Optional[str] = None): """ Returns: - (answer: str, session_id: str, sources: list[dict]) + (answer: str, session_id: str, sources: list[dict], extra: dict) """ if not session_id: session_id = str(uuid.uuid4()) logger.info("Generated new session_id=%s", session_id) HISTORY_LIMIT = 20 + + # 1. 대화 기록 로드 (최대 HISTORY_LIMIT개) + t = now() history = list_messages(db, session_id, limit=HISTORY_LIMIT) + ms_history_load = span("db_history_load", t, limit=HISTORY_LIMIT) + history_text = "\n".join([f"{m.role}: {m.content}" for m in history]).strip() if history and history[-1].role == "user" and history[-1].content.strip() == message.strip(): @@ -52,4 +59,5 @@ def ask_llm(db: Session, message: str, session_id: Optional[str] = None): len(sources), ) - return answer, session_id, sources + extra: dict[str, Any] = {"ms_history_load": ms_history_load} + return answer, session_id, sources, extra