Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion backend/app/core/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,4 +67,7 @@ def validate_runtime_env():
# Database settings
# ======================================
# DATABASE_URL = os.getenv("DATABASE_URL", "sqlite:///./test.db")
DATABASE_URL = os.getenv("DATABASE_URL", "sqlite:///./dev.db")
DATABASE_URL = os.getenv("DATABASE_URL", "sqlite:///./dev.db")


METRICS_ENABLED = os.getenv("METRICS_ENABLED", "true").lower() == "true"
43 changes: 43 additions & 0 deletions backend/app/core/metrics.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
# app/core/metrics.py
import os
import time
from typing import Any

from app.core.logger import get_logger
from app.core.config import METRICS_ENABLED

logger = get_logger("metrics")


def now() -> float:
"""Monotonic start time for duration measurement."""
return time.perf_counter()


def _fmt(fields: dict[str, Any]) -> str:
parts: list[str] = []
for k, v in fields.items():
if v is None:
continue
parts.append(f"{k}={v}")
return "|".join(parts)


def span(name: str, start: float, **fields: Any) -> int:
"""Emit a span metric log and return elapsed ms."""
if not METRICS_ENABLED:
return -1
ms = int((time.perf_counter() - start) * 1000)
base = f"METRIC|event=span|name={name}|ms={ms}"
extra = _fmt(fields)
logger.info(base + (f"|{extra}" if extra else ""))
return ms


def mark(name: str, **fields: Any) -> None:
"""Emit a mark log (no duration)."""
if not METRICS_ENABLED:
return
base = f"METRIC|event=mark|name={name}"
extra = _fmt(fields)
logger.info(base + (f"|{extra}" if extra else ""))
55 changes: 48 additions & 7 deletions backend/app/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,8 @@
set_request_id,
)
from app.core.logger import get_logger
from app.core.config import validate_runtime_env
from app.core.config import METRICS_ENABLED, validate_runtime_env


logger = get_logger("Chatbot-law-prod.middleware.request_id")
validate_runtime_env() ## ์•ฑ ์‹คํ–‰ ์‹œ์ ์— ํ™˜๊ฒฝ๋ณ€์ˆ˜ ๊ฒ€์ฆ
Expand Down Expand Up @@ -60,20 +61,60 @@ async def request_id_middleware(request: Request, call_next):
## ์ €์žฅ
set_request_id(request_id)


## ๊ณ„์ธก ์ฝ”๋“œ ##################################
## (์ถ”๊ฐ€) ์š”์ฒญ ๋‹จ์œ„ metrics ์ €์žฅ์†Œ ์ค€๋น„
request.state.metrics = {}
##############################################

response = None

try:
response: Response = await call_next(request)
return response
finally:
duration_ms = (time.perf_counter() - start) * 1000
duration_ms = int((time.perf_counter() - start) * 1000)

## logger ํฌ๋งท์— request_id๊ฐ€ ์ž๋™ ํฌํ•จ๋จ
logger.info(
f'{request.method} {request.url.path} completed in {duration_ms: .2f}ms'
f'{request.method} {request.url.path} completed in {duration_ms}ms'
)


## ๊ณ„์ธก ์ฝ”๋“œ ##################################
## (์ถ”๊ฐ€) /health๋Š” ์ œ์™ธ(๋กœ๊ทธ ์˜ค์—ผ ๋ฐฉ์ง€)
## (์ถ”๊ฐ€) ์šด์˜์šฉ ์š”์ฒญ ์š”์•ฝ METRIC ๋กœ๊ทธ
if METRICS_ENABLED:
path = request.url.path
# health๋Š” ์ œ์™ธ ๊ถŒ์žฅ (ํ—ฌ์Šค์ฒดํฌ๋กœ ๋กœ๊ทธ ์˜ค์—ผ ๋ฐฉ์ง€)
if path != "/health":
m = getattr(request.state, "metrics", {}) or {}
parts = [
"METRIC|event=request",
f"path={path}",
f"method={request.method}",
f"ms_total={duration_ms}",
]
# ๋ผ์šฐํ„ฐ์—์„œ ๊ธฐ๋กํ•œ ๊ฐ’์ด ์žˆ์œผ๋ฉด ํ•จ๊ป˜ ์ถœ๋ ฅ
for key in (
"ms_db_user",
"ms_history_load",
"ms_ask_llm",
"ms_db_assistant",
):
if key in m:
parts.append(f"{key}={m[key]}")
logger.info("|".join(parts))
##############################################


## ์‘๋‹ตํ—ค๋”์— X-Request-ID ํฌํ•จํ•˜์—ฌ ํด๋ผ์ด์–ธํŠธ์— ์ „์†ก
if response is not None:
response.headers[REQUEST_ID_HEADER] = request_id

## ๋‹ค์Œ ์š”์ฒญ์— ์„ž์ด์ง€ ์•Š๊ฒŒ ์ดˆ๊ธฐํ™”
set_request_id(None)

## ์‘๋‹ตํ—ค๋”์— X-Request-ID ํฌํ•จํ•˜์—ฌ ํด๋ผ์ด์–ธํŠธ์— ์ „์†ก
response.headers[REQUEST_ID_HEADER] = request_id
return response



## ๋ผ์šฐํ„ฐ ๋“ฑ๋ก (๋ผ์šฐํŠธ ํ…Œ์ด๋ธ”์— ๋“ฑ๋ก)
Expand Down
28 changes: 23 additions & 5 deletions backend/app/routers/chat.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,10 @@
"""


from fastapi import APIRouter, Depends, HTTPException
from fastapi import APIRouter, Depends, HTTPException, Request
from sqlalchemy.orm import Session

from app.core.metrics import now, span
from app.db import get_db
from app.repository.chat import append_message
from app.schemas.chat_request import ChatRequest
Expand All @@ -35,6 +36,7 @@
def chat(
session_id: str,
payload: ChatRequest,
request: Request, # โœ… ์ถ”๊ฐ€: request.state.metrics์— ์ ์žฌํ•˜๊ธฐ ์œ„ํ•จ
db: Session = Depends(get_db),
):
if not payload.message.strip():
Expand All @@ -43,34 +45,50 @@ def chat(
# ------------------------------------------------------------------
# 1. user ๋ฉ”์‹œ์ง€ ์ €์žฅ
# ------------------------------------------------------------------
t = now()
message = payload.message.strip()
append_message(
db=db,
conversation_id=session_id,
role="user",
content=payload.message,
content=message,
)
ms_db_user = span("db_append_user", t)
request.state.metrics["ms_db_user"] = ms_db_user # โœ… ๋ผ์šฐํ„ฐ ๋‹จ์œ„ ๋ฉ”ํŠธ๋ฆญ ์ €์žฅ์†Œ์— ๊ธฐ๋ก

# ------------------------------------------------------------------
# 2. LLM ํ˜ธ์ถœ (RAG)
# ask_llm์€ (answer, session_id, sources)๋ฅผ ๋ฐ˜ํ™˜
# ask_llm์€ (answer, session_id, sources, extra)๋ฅผ ๋ฐ˜ํ™˜
# ------------------------------------------------------------------
answer, _, sources = ask_llm(
t = now()
message = payload.message.strip()
answer, _, sources, extra = ask_llm(
db=db,
message=payload.message,
message=message,
session_id=session_id,
)
ms_ask_llm = span("ask_llm_total", t)
request.state.metrics["ms_ask_llm"] = ms_ask_llm # โœ… ๋ผ์šฐํ„ฐ ๋‹จ์œ„ ๋ฉ”ํŠธ๋ฆญ ์ €์žฅ์†Œ์— ๊ธฐ๋ก

# history load๋„ ์š”์ฒญ ์š”์•ฝ์— ํฌํ•จ(2๋‹จ๊ณ„ ์š”๊ตฌ)
if extra and "ms_history_load" in extra:
request.state.metrics["ms_history_load"] = extra["ms_history_load"]

# ------------------------------------------------------------------
# 3. assistant ๋ฉ”์‹œ์ง€ ์ €์žฅ
# โ€ป DB์—๋Š” answer๋งŒ ์ €์žฅ (sources๋Š” ์‘๋‹ต ๋ฉ”ํƒ€ ์ •๋ณด)
# ------------------------------------------------------------------
t = now()
append_message(
db=db,
conversation_id=session_id,
role="assistant",
content=answer,
)

ms_db_assistant = span("db_append_assistant", t)
request.state.metrics["ms_db_assistant"] = ms_db_assistant # โœ… ๋ผ์šฐํ„ฐ ๋‹จ์œ„ ๋ฉ”ํŠธ๋ฆญ ์ €์žฅ์†Œ์— ๊ธฐ๋ก

# ------------------------------------------------------------------
# 4. ์‘๋‹ต ๋ฐ˜ํ™˜
# ------------------------------------------------------------------
Expand Down
15 changes: 12 additions & 3 deletions backend/app/service/chain_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate

from app.core.config import OPENAI_MODEL
from app.core.metrics import now, span, mark
from app.core.config import OPENAI_MODEL, RAG_TOP_K
from app.core.logger import get_logger
from app.service.retriever_service import get_retriever

Expand Down Expand Up @@ -208,15 +209,23 @@ def _invoke(inputs: Dict[str, Any]) -> Dict[str, Any]:
query = inputs["input"]

# 1) Retrieve
t = now()
docs = retriever.invoke(query)
span("rag_retrieve", t, k=int(RAG_TOP_K), docs=len(docs))
logger.info("Retrieved %d documents from Pinecone", len(docs))

# 2) Build context + sources
t = now()
context, sources = _format_docs_with_citation_numbers(docs)
msg = prompt.invoke({"input": query, "context": context})
span("prompt_build", t, docs=len(docs), chars_context=len(context))

# 3) LLM answer with forced citation format
msg = prompt.invoke({"input": query, "context": context})
answer = parser.invoke(llm.invoke(msg)).strip()
mark("llm_start", model=OPENAI_MODEL)
t = now()
raw = llm.invoke(msg)
span("llm_total", t, model=OPENAI_MODEL)
answer = parser.invoke(raw).strip()

return {"answer": answer, "sources": sources}

Expand Down
16 changes: 12 additions & 4 deletions backend/app/service/llm_service.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
import uuid
from functools import lru_cache
from typing import Optional
from typing import Optional, Any

from requests import Session
# from requests import Session
from sqlalchemy.orm import Session # โœ… ๊ธฐ์กด ์ฝ”๋“œ์˜ requests.Session์€ ์˜คํƒ€/๋ถ€์ •ํ™• ๊ฐ€๋Šฅ์„ฑ ๋†’์Œ

from app.core.logger import get_logger
from app.core.metrics import now, span
from app.repository.chat import list_messages
from app.service.chain_builder import build_rag_chain

Expand All @@ -20,14 +22,19 @@ def get_chain():
def ask_llm(db: Session, message: str, session_id: Optional[str] = None):
"""
Returns:
(answer: str, session_id: str, sources: list[dict])
(answer: str, session_id: str, sources: list[dict], extra: dict)
"""
if not session_id:
session_id = str(uuid.uuid4())
logger.info("Generated new session_id=%s", session_id)

HISTORY_LIMIT = 20

# 1. ๋Œ€ํ™” ๊ธฐ๋ก ๋กœ๋“œ (์ตœ๋Œ€ HISTORY_LIMIT๊ฐœ)
t = now()
history = list_messages(db, session_id, limit=HISTORY_LIMIT)
ms_history_load = span("db_history_load", t, limit=HISTORY_LIMIT)

history_text = "\n".join([f"{m.role}: {m.content}" for m in history]).strip()

if history and history[-1].role == "user" and history[-1].content.strip() == message.strip():
Expand All @@ -52,4 +59,5 @@ def ask_llm(db: Session, message: str, session_id: Optional[str] = None):
len(sources),
)

return answer, session_id, sources
extra: dict[str, Any] = {"ms_history_load": ms_history_load}
return answer, session_id, sources, extra