levilevente · levilevente · Jun 15, 2026 · Jun 7, 2026 · Jun 7, 2026 · Jun 7, 2026
diff --git a/.gitignore b/.gitignore
@@ -28,4 +28,11 @@ npm-debug.log*
 yarn-debug.log*
 yarn-error.log*
 .pytest_cache/
-__pycache__/
+__pycache__/
+
+# Secrets
+secrets/
+
+.idea/
+
+k8s/secrets.yaml
diff --git a/apps/agent-backend/.dockerignore b/apps/agent-backend/.dockerignore
@@ -0,0 +1,17 @@
+__pycache__
+*.py[cod]
+*.egg-info
+.venv
+venv
+.env
+.env.*
+.git
+.gitignore
+.dockerignore
+Dockerfile
+docker-compose.yml
+requirements.txt
+.ruff_cache
+.mypy_cache
+tests
+chroma_db
diff --git a/apps/agent-backend/.env.example b/apps/agent-backend/.env.example
@@ -0,0 +1,21 @@
+# Copy to `.env` and fill in your values:
+
+# Local dev:  poetry run uvicorn server.serve:app --host 0.0.0.0 --port 8002
+# Docker:     docker compose up agent-service
+
+# Required — Groq LLM
+GROQ_API_KEY=gsk_your_groq_key_here
+MODEL_NAME=llama3.1
+LLM_LOCAL=False
+
+# Auth — session cookie validation
+CORE_BACKEND_URL=http://localhost:8000
+
+# Optional — Tavily web search
+TAVILY_API_KEY=
+
+ENVIRONMENT=dev
+PORT=8002
+LOGGING_LEVEL=INFO
+
+ALLOWED_ORIGINS=http://localhost:5173,http://127.0.0.1:5173
diff --git a/apps/agent-backend/Dockerfile b/apps/agent-backend/Dockerfile
@@ -0,0 +1,23 @@
+FROM python:3.12-slim
+
+WORKDIR /app
+
+ENV POETRY_VERSION=2.2.1 \
+    POETRY_VIRTUALENVS_CREATE=false \
+    POETRY_NO_INTERACTION=1 \
+    PYTHONDONTWRITEBYTECODE=1 \
+    PYTHONUNBUFFERED=1 \
+    PYTHONPATH=/app
+
+RUN pip install --no-cache-dir poetry==${POETRY_VERSION}
+
+COPY pyproject.toml poetry.lock README.md ./
+RUN poetry install --no-root
+
+COPY galacticview_bot ./galacticview_bot
+COPY server ./server
+RUN poetry install
+
+EXPOSE 8002
+
+CMD ["uvicorn", "server.serve:app", "--host", "0.0.0.0", "--port", "8002", "--proxy-headers", "--forwarded-allow-ips", "*"]
diff --git a/apps/agent-backend/galacticview_bot/nodes/reasoner.py b/apps/agent-backend/galacticview_bot/nodes/reasoner.py
@@ -1,17 +1,66 @@
+from groq import BadRequestError
+from langchain_core.messages import BaseMessage
 from loguru import logger
+
 from galacticview_bot.core.state import AgentState
 from galacticview_bot.nodes.model import llm
 from galacticview_bot.tools.search import tavily_search_tool
 
 tools = [tool for tool in [tavily_search_tool] if tool is not None]
 llm_with_tools = llm.bind_tools(tools, tool_choice="auto") if tools else llm
 
+MAX_TOOL_RETRIES = 3
+TOOL_TEMPERATURE = 0.3
+
+
+def _is_tool_use_failed(error: BadRequestError) -> bool:
+    body = str(error)
+    return "tool_use_failed" in body
+
+
+def _invoke_with_tool_retry(messages: list[BaseMessage]) -> BaseMessage:
+    """Invoke the tool-enabled LLM, retrying with lower temperature on Groq tool_use_failed."""
+    temperature = TOOL_TEMPERATURE
+
+    for attempt in range(MAX_TOOL_RETRIES):
+        try:
+            bound_llm = llm_with_tools.bind(temperature=temperature)
+            return bound_llm.invoke(messages)
+        except BadRequestError as error:
+            if not _is_tool_use_failed(error) or attempt == MAX_TOOL_RETRIES - 1:
+                raise
+
+            temperature = max(temperature - 0.1, 0.0)
+            logger.warning(
+                "Groq tool call failed (attempt {}/{}), retrying with temperature {}",
+                attempt + 1,
+                MAX_TOOL_RETRIES,
+                temperature,
+            )
+
+    raise RuntimeError("Tool invocation failed after retries")
+
+
 def reasoner(state: AgentState) -> dict:
     """
     The brain. Decides whether to search or answer.
     """
     logger.info("Entering Reasoner Node")
     messages = state["messages"]
 
-    response = llm_with_tools.invoke(messages)
+    if tools:
+        try:
+            response = _invoke_with_tool_retry(messages)
+        except BadRequestError as error:
+            if _is_tool_use_failed(error):
+                logger.warning(
+                    "Tool calling failed after retries, answering without search: {}",
+                    error,
+                )
+                response = llm.invoke(messages)
+            else:
+                raise
+    else:
+        response = llm_with_tools.invoke(messages)
+
     return {"messages": [response]}
diff --git a/apps/agent-backend/galacticview_bot/nodes/tool_node.py b/apps/agent-backend/galacticview_bot/nodes/tool_node.py
@@ -4,7 +4,7 @@
 from galacticview_bot.core.state import AgentState
 from galacticview_bot.tools.search import tavily_search_tool
 
-tools = [tavily_search_tool]
+tools = [tool for tool in [tavily_search_tool] if tool is not None]
 
 def custom_tool_node(state: AgentState) -> dict:
     """

diff --git a/apps/agent-backend/galacticview_bot/tools/search.py b/apps/agent-backend/galacticview_bot/tools/search.py
@@ -10,6 +10,8 @@
 class TavilyInput(BaseModel):
     query: str = Field(description="The search query to find information on the internet.")
 
+tavily_search_tool = None
+
 if os.getenv("TAVILY_API_KEY"):
     logger.info("TAVILY_API_KEY found. Initializing Tavily search tool.")
     tavily_search_tool = TavilySearch(

diff --git a/apps/agent-backend/poetry.lock b/apps/agent-backend/poetry.lock
diff --git a/apps/agent-backend/pyproject.toml b/apps/agent-backend/pyproject.toml
@@ -20,6 +20,7 @@ dependencies = [
     "loguru (>=0.7.3,<0.8.0)",
     "fastapi[standard] (>=0.123.0,<0.124.0)",
     "slowapi (>=0.1.9,<0.2.0)",
+    "httpx (>=0.28.1,<0.29.0)",
     "langchain-community (>=0.4.1,<0.5.0)",
     "langchain-openai (>=1.2.1,<2.0.0)",
 ]

diff --git a/apps/agent-backend/server/cors.py b/apps/agent-backend/server/cors.py
@@ -0,0 +1,33 @@
+import os
+
+from fastapi import FastAPI
+from fastapi.middleware.cors import CORSMiddleware
+
+_DEV_ORIGIN_REGEX = r"^https?://(localhost|127\.0\.0\.1)(:\d+)?$"
+_DEFAULT_ORIGINS = "http://localhost:5173,http://127.0.0.1:5173"
+
+
+def add_cors_middleware(app: FastAPI) -> None:
+    """Apply CORS. In dev, allow any localhost port; in prod, use ALLOWED_ORIGINS."""
+    if os.getenv("ENVIRONMENT", "prod") == "dev":
+        app.add_middleware(
+            CORSMiddleware,
+            allow_origin_regex=_DEV_ORIGIN_REGEX,
+            allow_credentials=True,
+            allow_methods=["*"],
+            allow_headers=["*"],
+        )
+        return
+
+    allowed_origins = [
+        origin.strip()
+        for origin in os.getenv("ALLOWED_ORIGINS", _DEFAULT_ORIGINS).split(",")
+        if origin.strip()
+    ]
+    app.add_middleware(
+        CORSMiddleware,
+        allow_origins=allowed_origins,
+        allow_credentials=True,
+        allow_methods=["*"],
+        allow_headers=["*"],
+    )
diff --git a/apps/agent-backend/server/dependencies.py b/apps/agent-backend/server/dependencies.py
@@ -0,0 +1,44 @@
+import os
+
+import httpx
+from dotenv import load_dotenv
+from fastapi import HTTPException, Request
+
+load_dotenv()
+
+CORE_BACKEND_URL = os.getenv("CORE_BACKEND_URL", "http://localhost:8000")
+AUTH_REQUEST_TIMEOUT = 5.0
+
+
+async def _verify_session(session_cookie: str) -> dict:
+    """
+    Forwards the session cookie to the core-backend and returns the user dict.
+    Raises HTTPException on any auth failure.
+    """
+    async with httpx.AsyncClient(timeout=AUTH_REQUEST_TIMEOUT) as client:
+        try:
+            response = await client.get(
+                f"{CORE_BACKEND_URL}/auth/me",
+                cookies={"session": session_cookie},
+            )
+        except httpx.RequestError as exc:
+            raise HTTPException(status_code=502, detail="Auth service unavailable") from exc
+
+    if response.status_code == 401:
+        raise HTTPException(status_code=401, detail="Invalid or expired session")
+
+    if response.status_code != 200:
+        raise HTTPException(status_code=502, detail="Auth service returned an error")
+
+    return response.json().get("user", {})
+
+
+async def require_auth(request: Request) -> None:
+    """
+    Lightweight auth guard — verifies the session cookie is present and valid.
+    """
+    session_cookie = request.cookies.get("session")
+    if not session_cookie:
+        raise HTTPException(status_code=401, detail="Not authenticated")
+
+    await _verify_session(session_cookie)
diff --git a/apps/agent-backend/server/dto/chat_type_out.py b/apps/agent-backend/server/dto/chat_type_out.py
@@ -1,7 +1,11 @@
-from pydantic import BaseModel
+from pydantic import BaseModel, ConfigDict, Field
+
 
 class ChatTypeOut(BaseModel):
     """Data transfer object for chat type output."""
+
+    model_config = ConfigDict(populate_by_name=True)
+
     title: str
     content: str
-    key_metrics: list[str]
+    key_metrics: list[str] = Field(serialization_alias="keyMetrics")
diff --git a/apps/agent-backend/server/serve.py b/apps/agent-backend/server/serve.py
@@ -1,16 +1,15 @@
-from fastapi import FastAPI, Request
-from fastapi.middleware.cors import CORSMiddleware
-from slowapi import Limiter, _rate_limit_exceeded_handler
-from slowapi.errors import RateLimitExceeded
-import uvicorn
-
 import os
 
-from .service import chat_ask_question
+import uvicorn
+from fastapi import APIRouter, Depends, FastAPI, Request
+from loguru import logger
+from slowapi import Limiter, _rate_limit_exceeded_handler
+from slowapi.errors import RateLimitExceeded
 
+from .dependencies import require_auth  # loads .env via load_dotenv()
+from .cors import add_cors_middleware
 from .dto import ChatTypeIn, ChatTypeOut
-
-from loguru import logger
+from .service import chat_ask_question
 
 def get_real_ip(request: Request) -> str:
     """
@@ -31,40 +30,43 @@ def get_real_ip(request: Request) -> str:
 app.state.limiter = limiter
 app.add_exception_handler(RateLimitExceeded, _rate_limit_exceeded_handler) # type: ignore
 
-app.add_middleware(
-    CORSMiddleware,
-    allow_origins=["*"],
-    allow_credentials=True,
-    allow_methods=["*"],
-    allow_headers=["*"],
-)
+add_cors_middleware(app)
 
 
-@app.post("/chat")
+agent_router = APIRouter(prefix="/agent", tags=["agent"])
+
+
+@agent_router.post("/chat")
 @limiter.limit("7/minute")
-def chat_endpoint(request: Request, body: ChatTypeIn) -> ChatTypeOut:
+def chat_endpoint(
+    request: Request,
+    body: ChatTypeIn,
+    _: None = Depends(require_auth),
+) -> ChatTypeOut:
     """
     Process chat questions using the agent and return structured responses.
     Rate limited to 7 requests per minute per IP.
     """
-    logger.info("Received request to /chat endpoint")
+    logger.info("Received request to /agent/chat endpoint")
     response_data: ChatTypeOut = chat_ask_question(body)
     logger.info("Sending response back to client")
     return response_data
 
+
+app.include_router(agent_router)
+
 def main() -> None:
     """
     Main function to run the FastAPI app using Uvicorn.
     """
     env = os.getenv("ENVIRONMENT", "prod")
 
     reload = env == "dev"
-    host = "127.0.0.1"
-    if env == "prod":
-        host = "0.0.0.0"
+    host = "0.0.0.0"
 
-    logger.info(f"Starting server on {host}:8000 with reload={reload}")
-    uvicorn.run("server.serve:app", host=host, port=8000, reload=reload)
+    port = int(os.getenv("PORT", "8002"))
+    logger.info(f"Starting agent server on {host}:{port} (reload={reload}, ENVIRONMENT={env})")
+    uvicorn.run("server.serve:app", host=host, port=port, reload=reload)
 
 if __name__ == "__main__":
     main()
diff --git a/apps/agent-backend/server/service.py b/apps/agent-backend/server/service.py
@@ -6,6 +6,8 @@
 from langchain_core.messages import HumanMessage
 
 import json
+import uuid
+
 
 def chat_ask_question(chat_input: ChatTypeIn) -> ChatTypeOut:
     """
@@ -20,8 +22,7 @@ def chat_ask_question(chat_input: ChatTypeIn) -> ChatTypeOut:
     }
 
     try:
-        # Hardcoded to a single thread ID to maintain context across all requests for initial testing.
-        thread_id = "initial-single-context-thread"
+        thread_id = str(uuid.uuid4())
 
         config = {
             "configurable": {"thread_id": thread_id},