From 19de85058d6fc8fd6fedb9f0649de6cd134c910d Mon Sep 17 00:00:00 2001 From: Joseph Lee <175lee.joseph@gmail.com> Date: Mon, 18 May 2026 12:46:42 -0400 Subject: [PATCH 1/9] replaced curl command wtih "ollama list" --- docker-compose.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docker-compose.yml b/docker-compose.yml index 7a0fe90..7a0c7f0 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -23,7 +23,8 @@ services: ports: - "11434:11434" healthcheck: - test: ["CMD", "curl", "-f", "http://127.0.0.1:11434/"] + # ollama/ollama has no curl; use the bundled CLI against the local server + test: ["CMD", "ollama", "list"] interval: 10s timeout: 5s retries: 10 From e915ae770fa88cf927997f3d915b63e2c0771b18 Mon Sep 17 00:00:00 2001 From: Joseph Lee <175lee.joseph@gmail.com> Date: Mon, 18 May 2026 14:35:36 -0400 Subject: [PATCH 2/9] Added unit tests and github action to run tests on merge to main --- .github/workflows/tests.yml | 27 +++++++++ tests/__init__.py | 1 + tests/support.py | 28 +++++++++ tests/test_api.py | 80 ++++++++++++++++++++++++++ tests/test_db.py | 45 +++++++++++++++ tests/test_ollama_client.py | 60 ++++++++++++++++++++ tests/test_rag_engine.py | 110 ++++++++++++++++++++++++++++++++++++ 7 files changed, 351 insertions(+) create mode 100644 .github/workflows/tests.yml create mode 100644 tests/__init__.py create mode 100644 tests/support.py create mode 100644 tests/test_api.py create mode 100644 tests/test_db.py create mode 100644 tests/test_ollama_client.py create mode 100644 tests/test_rag_engine.py diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml new file mode 100644 index 0000000..fd1a415 --- /dev/null +++ b/.github/workflows/tests.yml @@ -0,0 +1,27 @@ +name: Unit tests + +on: + push: + branches: [main] + pull_request: + branches: [main] + +jobs: + test: + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: "3.12" + cache: pip + cache-dependency-path: requirements.txt + + - name: Install dependencies + run: pip install -r requirements.txt + + - name: Run unit tests + run: python -m unittest discover -s tests -v diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..1efeaa2 --- /dev/null +++ b/tests/__init__.py @@ -0,0 +1 @@ +"""StatShift unit tests.""" diff --git a/tests/support.py b/tests/support.py new file mode 100644 index 0000000..830663c --- /dev/null +++ b/tests/support.py @@ -0,0 +1,28 @@ +"""Shared helpers for StatShift unit tests.""" + +from __future__ import annotations + +import sys +import tempfile +import unittest +from pathlib import Path + +ROOT = Path(__file__).resolve().parents[1] +if str(ROOT) not in sys.path: + sys.path.insert(0, str(ROOT)) + +from api.db import ReadOnlyDatabase # noqa: E402 +from scripts.init_db import init_database # noqa: E402 + + +class DatabaseTestCase(unittest.TestCase): + """Provides a seeded temporary SQLite database.""" + + def setUp(self) -> None: + self._tmp = tempfile.TemporaryDirectory() + self.db_path = Path(self._tmp.name) / "statshift_test.db" + init_database(self.db_path) + self.db = ReadOnlyDatabase(self.db_path) + + def tearDown(self) -> None: + self._tmp.cleanup() diff --git a/tests/test_api.py b/tests/test_api.py new file mode 100644 index 0000000..fe392b0 --- /dev/null +++ b/tests/test_api.py @@ -0,0 +1,80 @@ +"""Unit tests for the FastAPI read-only service.""" + +from __future__ import annotations + +import unittest +from unittest.mock import patch + +from fastapi.testclient import TestClient + +import api.main as main_module +from api.main import app +from tests.support import DatabaseTestCase + + +class APITests(DatabaseTestCase): + def setUp(self) -> None: + super().setUp() + self.settings_patcher = patch.object(main_module.settings, "db_path", self.db_path) + # main.py imports `db` by value; patch the name bound in api.main + self.db_patcher = patch.object(main_module, "db", self.db) + self.settings_patcher.start() + self.db_patcher.start() + self.client = TestClient(app) + + def tearDown(self) -> None: + self.db_patcher.stop() + self.settings_patcher.stop() + super().tearDown() + + def test_health_ok(self) -> None: + response = self.client.get("/health") + self.assertEqual(response.status_code, 200) + payload = response.json() + self.assertEqual(payload["status"], "ok") + self.assertTrue(payload["read_only"]) + self.assertEqual(payload["db_path"], str(self.db_path)) + + def test_list_documents(self) -> None: + response = self.client.get("/documents", params={"limit": 3}) + self.assertEqual(response.status_code, 200) + docs = response.json() + self.assertEqual(len(docs), 3) + + def test_get_document(self) -> None: + response = self.client.get("/documents/1") + self.assertEqual(response.status_code, 200) + self.assertEqual(response.json()["id"], 1) + + def test_get_document_not_found(self) -> None: + response = self.client.get("/documents/99999") + self.assertEqual(response.status_code, 404) + + def test_search(self) -> None: + response = self.client.get("/search", params={"q": "Curry", "limit": 5}) + self.assertEqual(response.status_code, 200) + results = response.json() + self.assertTrue(results) + self.assertIn("Curry", results[0]["title"]) + + def test_categories(self) -> None: + response = self.client.get("/categories") + self.assertEqual(response.status_code, 200) + categories = response.json()["categories"] + self.assertIn("league", categories) + + def test_write_methods_blocked(self) -> None: + for method in ("post", "put", "patch", "delete"): + response = getattr(self.client, method)("/documents") + self.assertEqual(response.status_code, 405) + self.assertIn("Write operations are disabled", response.json()["detail"]) + + def test_health_missing_database(self) -> None: + missing = self.db_path.parent / "missing.db" + with patch.object(main_module.settings, "db_path", missing): + response = self.client.get("/health") + self.assertEqual(response.status_code, 503) + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/test_db.py b/tests/test_db.py new file mode 100644 index 0000000..9095b62 --- /dev/null +++ b/tests/test_db.py @@ -0,0 +1,45 @@ +"""Unit tests for read-only SQLite access.""" + +from __future__ import annotations + +from tests.support import DatabaseTestCase + + +class ReadOnlyDatabaseTests(DatabaseTestCase): + def test_list_documents_returns_seeded_rows(self) -> None: + docs = self.db.list_documents(limit=50) + self.assertGreaterEqual(len(docs), 8) + self.assertIn("title", docs[0]) + self.assertIn("category", docs[0]) + self.assertIn("content", docs[0]) + + def test_list_documents_filters_by_category(self) -> None: + players = self.db.list_documents(category="player", limit=50) + self.assertTrue(players) + self.assertTrue(all(doc["category"] == "player" for doc in players)) + + def test_get_document_returns_row(self) -> None: + doc = self.db.get_document(1) + self.assertIsNotNone(doc) + assert doc is not None + self.assertEqual(doc["id"], 1) + self.assertIn("Lakers", doc["title"]) + + def test_get_document_missing_returns_none(self) -> None: + self.assertIsNone(self.db.get_document(99999)) + + def test_search_finds_relevant_document(self) -> None: + results = self.db.search_documents("Wembanyama", limit=5) + self.assertTrue(results) + self.assertTrue( + any("Wembanyama" in doc["title"] or "Wembanyama" in doc["content"] for doc in results) + ) + + def test_search_empty_query_uses_like_fallback(self) -> None: + results = self.db.search_documents("Lakers", limit=3) + self.assertLessEqual(len(results), 3) + + def test_list_categories(self) -> None: + categories = self.db.list_categories() + self.assertIn("player", categories) + self.assertIn("team", categories) diff --git a/tests/test_ollama_client.py b/tests/test_ollama_client.py new file mode 100644 index 0000000..b9ea2f6 --- /dev/null +++ b/tests/test_ollama_client.py @@ -0,0 +1,60 @@ +"""Unit tests for the Ollama HTTP client.""" + +from __future__ import annotations + +import unittest +from unittest.mock import MagicMock, patch + +import httpx + +from rag.ollama_client import OllamaClient, OllamaError + + +class OllamaClientTests(unittest.TestCase): + def setUp(self) -> None: + self.client = OllamaClient(base_url="http://ollama.test", model="gemma2:2b") + + @patch("rag.ollama_client.httpx.get") + def test_is_available_true(self, mock_get: MagicMock) -> None: + mock_get.return_value = MagicMock(status_code=200, raise_for_status=MagicMock()) + self.assertTrue(self.client.is_available()) + mock_get.assert_called_once_with("http://ollama.test/api/tags", timeout=5.0) + + @patch("rag.ollama_client.httpx.get") + def test_is_available_false_on_error(self, mock_get: MagicMock) -> None: + mock_get.side_effect = httpx.ConnectError("down") + self.assertFalse(self.client.is_available()) + + @patch("rag.ollama_client.httpx.get") + def test_list_models(self, mock_get: MagicMock) -> None: + mock_get.return_value = MagicMock( + raise_for_status=MagicMock(), + json=MagicMock(return_value={"models": [{"name": "gemma2:2b"}, {"name": "llama3"}]}), + ) + self.assertEqual(self.client.list_models(), ["gemma2:2b", "llama3"]) + + @patch("rag.ollama_client.httpx.post") + def test_generate_returns_response_text(self, mock_post: MagicMock) -> None: + mock_post.return_value = MagicMock( + status_code=200, + raise_for_status=MagicMock(), + json=MagicMock(return_value={"response": " Answer text "}), + ) + result = self.client.generate("prompt") + self.assertEqual(result, "Answer text") + mock_post.assert_called_once() + payload = mock_post.call_args.kwargs["json"] + self.assertEqual(payload["model"], "gemma2:2b") + self.assertEqual(payload["prompt"], "prompt") + self.assertFalse(payload["stream"]) + + @patch("rag.ollama_client.httpx.post") + def test_generate_missing_model_raises(self, mock_post: MagicMock) -> None: + mock_post.return_value = MagicMock(status_code=404) + with self.assertRaises(OllamaError) as ctx: + self.client.generate("prompt") + self.assertIn("gemma2:2b", str(ctx.exception)) + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/test_rag_engine.py b/tests/test_rag_engine.py new file mode 100644 index 0000000..1768fdd --- /dev/null +++ b/tests/test_rag_engine.py @@ -0,0 +1,110 @@ +"""Unit tests for RAG orchestration.""" + +from __future__ import annotations + +import unittest +from unittest.mock import MagicMock, patch + +import httpx + +from rag.engine import APIError, RAGEngine +from rag.ollama_client import OllamaClient, OllamaError + + +class RAGEngineTests(unittest.TestCase): + def setUp(self) -> None: + self.mock_ollama = MagicMock(spec=OllamaClient) + self.mock_ollama.model = "gemma2:2b" + self.engine = RAGEngine( + api_base_url="http://api.test", + ollama=self.mock_ollama, + top_k=2, + ) + + def test_format_context_empty(self) -> None: + self.assertEqual( + RAGEngine._format_context([]), + "No relevant documents were retrieved.", + ) + + def test_format_context_numbered_blocks(self) -> None: + sources = [ + {"title": "Doc A", "category": "team", "content": "Line one."}, + {"title": "Doc B", "category": "player", "content": "Line two."}, + ] + context = RAGEngine._format_context(sources) + self.assertIn("[1] Doc A (team)", context) + self.assertIn("[2] Doc B (player)", context) + self.assertIn("Line one.", context) + + def test_build_prompt_includes_query_and_context(self) -> None: + prompt = RAGEngine._build_prompt("Who led scoring?", "ctx block") + self.assertIn("StatShift", prompt) + self.assertIn("ctx block", prompt) + self.assertIn("Who led scoring?", prompt) + + @patch("rag.engine.httpx.get") + def test_ask_returns_rag_result(self, mock_get: MagicMock) -> None: + mock_get.return_value = MagicMock( + raise_for_status=MagicMock(), + json=MagicMock( + return_value=[ + { + "id": 1, + "title": "Curry splits", + "category": "player", + "content": "26.4 PPG", + } + ] + ), + ) + self.mock_ollama.generate.return_value = "Curry averaged 26.4 PPG [1]." + + result = self.engine.ask("How did Curry score?") + + self.assertEqual(result.answer, "Curry averaged 26.4 PPG [1].") + self.assertEqual(len(result.sources), 1) + self.assertIn("Curry splits", result.prompt) + mock_get.assert_called_once_with( + "http://api.test/search", + params={"q": "How did Curry score?", "limit": 2}, + timeout=15.0, + ) + self.mock_ollama.generate.assert_called_once() + + @patch("rag.engine.httpx.get") + def test_search_api_error_raises(self, mock_get: MagicMock) -> None: + mock_get.side_effect = httpx.ConnectError("refused") + with self.assertRaises(APIError): + self.engine.ask("test") + + @patch("rag.engine.httpx.get") + def test_ask_ollama_error_wrapped(self, mock_get: MagicMock) -> None: + mock_get.return_value = MagicMock( + raise_for_status=MagicMock(), + json=MagicMock(return_value=[]), + ) + self.mock_ollama.generate.side_effect = httpx.ConnectError("down") + with self.assertRaises(OllamaError): + self.engine.ask("test") + + @patch("rag.engine.httpx.get") + def test_health_reports_status(self, mock_get: MagicMock) -> None: + mock_get.return_value = MagicMock( + status_code=200, + json=MagicMock(return_value={"status": "ok"}), + ) + self.mock_ollama.is_available.return_value = True + self.mock_ollama.list_models.return_value = ["gemma2:2b"] + + health = self.engine.health() + + self.assertTrue(health["api_ok"]) + self.assertEqual(health["api_detail"], "ok") + self.assertTrue(health["ollama_ok"]) + self.assertEqual(health["ollama_models"], ["gemma2:2b"]) + self.assertEqual(health["configured_model"], "gemma2:2b") + + +if __name__ == "__main__": + unittest.main() From 43199e00bc0c081b0b8b1003f54abac9db7b4a29 Mon Sep 17 00:00:00 2001 From: jlee733 <75899901+jlee733@users.noreply.github.com> Date: Mon, 18 May 2026 14:34:55 -0400 Subject: [PATCH 3/9] Add MIT License to the project --- LICENSE | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) create mode 100644 LICENSE diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..2a156d6 --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2026 jlee733 + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. From e749f743576cf1b1a8d281fdd483741fd875a0f1 Mon Sep 17 00:00:00 2001 From: Joseph Lee <175lee.joseph@gmail.com> Date: Mon, 18 May 2026 15:34:36 -0400 Subject: [PATCH 4/9] Added intent detection --- README.md | 32 +++++++++-- api/main.py | 2 +- app/streamlit_app.py | 21 +++++-- rag/__init__.py | 9 ++- rag/engine.py | 70 ++++++++++++++++------ rag/intent.py | 121 +++++++++++++++++++++++++++++++++++++++ rag/ollama_client.py | 2 +- scripts/init_db.py | 77 +++++++++++++------------ tests/test_api.py | 6 +- tests/test_db.py | 18 ++++-- tests/test_intent.py | 45 +++++++++++++++ tests/test_rag_engine.py | 70 +++++++++++++++------- 12 files changed, 373 insertions(+), 100 deletions(-) create mode 100644 rag/intent.py create mode 100644 tests/test_intent.py diff --git a/README.md b/README.md index 87c1e40..333335c 100644 --- a/README.md +++ b/README.md @@ -1,10 +1,12 @@ # StatShift -Local-only RAG stack matching the architecture diagram: +Local fantasy football statistics assistant. Query seeded NFL/fantasy data through a read-only API, or chat with a local LLM for open-ended analysis. **Streamlit** → **RAG** → **FastAPI (read-only)** → **SQLite** **RAG** → **Ollama** → **Gemma** +Factual questions (stats, matchups, injuries) are answered from the database. Conversational prompts go to Gemma. + ## Prerequisites - **Docker (recommended):** Docker Desktop or Docker Engine with Compose v2 @@ -12,8 +14,6 @@ Local-only RAG stack matching the architecture diagram: ## Run with Docker -Yes — you interact with Streamlit in your **browser on your machine**. The UI container listens on `0.0.0.0:8501` and Compose publishes it to `localhost:8501`. Buttons, text input, and reruns all work normally; only the server runs inside Docker. - ```bash docker compose up --build ``` @@ -28,7 +28,7 @@ Optional: API docs at http://localhost:8000/docs # Stop docker compose down -# Reset DB + models +# Reset DB + models (re-seeds fantasy football sample data) docker compose down -v ``` @@ -40,7 +40,7 @@ Services: | `api` | FastAPI read-only API | 8000 | | `ollama` | Gemma inference | 11434 | -Inside the Compose network, the UI talks to `http://api:8000` and `http://ollama:11434`. You do not need to call those from the browser. +Inside the Compose network, the UI talks to `http://api:8000` and `http://ollama:11434`. ## Local setup (without Docker) @@ -73,7 +73,17 @@ chmod +x scripts/run_api.sh scripts/run_ui.sh ./scripts/run_ui.sh ``` -Open http://127.0.0.1:8501 and ask questions about the seeded basketball stats. +Open http://127.0.0.1:8501 and ask about players, weekly matchups, injuries, or PPR scoring from the seeded data. + +If you previously ran an older build with different seed data, delete `data/statshift.db` or run `docker compose down -v` before re-initializing. + +## Tests + +```bash +python -m unittest discover -s tests -v +``` + +CI runs the same suite on pull requests and pushes to `main`. ## Configuration @@ -96,3 +106,13 @@ API_BASE_URL=http://127.0.0.1:8000 | `GET /categories` | Distinct categories | `POST`, `PUT`, `PATCH`, and `DELETE` return **405** — writes are blocked at the API layer; SQLite is opened in read-only mode for queries. + +## Sample data categories + +| Category | Examples | +|----------|----------| +| `player` | McCaffrey workload, Chase targets, Kelce red-zone usage | +| `team` | Bills pace and scoring | +| `matchup` | Week 12 RB defensive matchups | +| `injury` | Week 15 practice reports | +| `league` | PPR scoring, passing leaders | diff --git a/api/main.py b/api/main.py index 29a8693..03d9c78 100644 --- a/api/main.py +++ b/api/main.py @@ -13,7 +13,7 @@ app = FastAPI( title="StatShift API", - description="Local read-only API over SQLite. Write operations are blocked.", + description="Read-only fantasy football stats API over SQLite. Write operations are blocked.", version="0.1.0", ) diff --git a/app/streamlit_app.py b/app/streamlit_app.py index c2892f0..f19a7a9 100644 --- a/app/streamlit_app.py +++ b/app/streamlit_app.py @@ -22,7 +22,9 @@ ) st.title("StatShift") -st.caption("Local prototype — Streamlit → RAG → FastAPI (read-only) → SQLite → Ollama/Gemma") +st.caption( + "Fantasy football stats — Streamlit → RAG → FastAPI (read-only) → SQLite → Ollama/Gemma" +) engine = RAGEngine() @@ -55,8 +57,8 @@ st.caption(f"Ollama: {settings.ollama_base_url}") query = st.text_input( - "Ask about players, teams, or league stats", - placeholder="e.g. How did Victor Wembanyama perform in 2024-25?", + "Ask about players, matchups, injuries, or fantasy scoring", + placeholder="e.g. How many targets did Ja'Marr Chase have in 2024?", ) col1, col2 = st.columns([1, 4]) @@ -66,7 +68,7 @@ show_prompt = st.checkbox("Show prompt sent to Gemma") if run and query.strip(): - with st.spinner("Retrieving context and generating answer..."): + with st.spinner("Routing your question..."): try: result = engine.ask(query.strip()) st.session_state["last_result"] = result @@ -77,17 +79,26 @@ result = st.session_state.get("last_result") if result: + route_label = "API (database)" if result.route == "api" else "Gemma (chat)" + st.caption( + f"Route: **{route_label}** · intent: `{result.intent.value}` — {result.intent_reason}" + ) + st.subheader("Answer") st.markdown(result.answer) st.subheader("Retrieved sources") + if not result.sources: + st.caption("No API retrieval for this route.") for doc in result.sources: with st.expander(f"[{doc['id']}] {doc['title']} · {doc['category']}"): st.write(doc["content"]) - if show_prompt: + if show_prompt and result.prompt: st.subheader("Prompt") st.code(result.prompt) + elif show_prompt and result.route == "api": + st.caption("No Gemma prompt — this answer came directly from API search results.") elif run and not query.strip(): st.warning("Enter a question first.") diff --git a/rag/__init__.py b/rag/__init__.py index c317619..0212f36 100644 --- a/rag/__init__.py +++ b/rag/__init__.py @@ -1,3 +1,10 @@ from rag.engine import RAGEngine, RAGResult +from rag.intent import IntentResult, PromptIntent, detect_prompt_intent -__all__ = ["RAGEngine", "RAGResult"] +__all__ = [ + "RAGEngine", + "RAGResult", + "IntentResult", + "PromptIntent", + "detect_prompt_intent", +] diff --git a/rag/engine.py b/rag/engine.py index 9aacd6f..2051d9c 100644 --- a/rag/engine.py +++ b/rag/engine.py @@ -1,13 +1,14 @@ -"""RAG orchestration: retrieve from API, generate with Ollama/Gemma.""" +"""RAG orchestration: route by intent to API retrieval or Gemma chat.""" from __future__ import annotations from dataclasses import dataclass -from typing import Any +from typing import Any, Literal import httpx from config import settings +from rag.intent import IntentResult, PromptIntent, detect_prompt_intent from rag.ollama_client import OllamaClient, OllamaError @@ -15,11 +16,17 @@ class APIError(RuntimeError): pass +Route = Literal["api", "gemma"] + + @dataclass class RAGResult: answer: str sources: list[dict[str, Any]] prompt: str + intent: PromptIntent + route: Route + intent_reason: str class RAGEngine: @@ -57,29 +64,58 @@ def _format_context(sources: list[dict[str, Any]]) -> str: return "\n\n".join(blocks) @staticmethod - def _build_prompt(query: str, context: str) -> str: - return f"""You are StatShift, a local basketball analytics assistant. -Answer using ONLY the context below. If the context is insufficient, say what is missing. -Keep answers concise and cite source numbers like [1] when you use a fact. - -Context: -{context} - -Question: {query} + def _answer_from_sources(sources: list[dict[str, Any]]) -> str: + if not sources: + return ( + "No matching records were found in the StatShift database for that question. " + "Try rephrasing with a player, team, week, or season." + ) + lines = ["Based on stored records:\n"] + for idx, doc in enumerate(sources, start=1): + lines.append( + f"[{idx}] **{doc['title']}** ({doc['category']})\n{doc['content']}" + ) + return "\n\n".join(lines) -Answer:""" + @staticmethod + def _build_conversational_prompt(query: str) -> str: + return f"""You are StatShift, a friendly fantasy football analytics assistant. +Answer the user's message helpfully. You may use general NFL and fantasy football knowledge. +If they ask for specific stats from the StatShift database, suggest they ask a direct factual question. + +User: {query} + +Assistant:""" + + def ask(self, query: str, *, intent: IntentResult | None = None) -> RAGResult: + resolved = intent or detect_prompt_intent(query) + + if resolved.intent == PromptIntent.DEFINITIVE: + sources = self._search(query) + return RAGResult( + answer=self._answer_from_sources(sources), + sources=sources, + prompt="", + intent=resolved.intent, + route="api", + intent_reason=resolved.reason, + ) - def ask(self, query: str) -> RAGResult: - sources = self._search(query) - context = self._format_context(sources) - prompt = self._build_prompt(query, context) + prompt = self._build_conversational_prompt(query) try: answer = self.ollama.generate(prompt) except httpx.HTTPError as exc: raise OllamaError( "Could not reach Ollama. Start it locally and ensure Gemma is pulled." ) from exc - return RAGResult(answer=answer, sources=sources, prompt=prompt) + return RAGResult( + answer=answer, + sources=[], + prompt=prompt, + intent=resolved.intent, + route="gemma", + intent_reason=resolved.reason, + ) def health(self) -> dict[str, Any]: api_ok = False diff --git a/rag/intent.py b/rag/intent.py new file mode 100644 index 0000000..97287b8 --- /dev/null +++ b/rag/intent.py @@ -0,0 +1,121 @@ +"""Heuristic intent detection for user prompts.""" + +from __future__ import annotations + +import re +from dataclasses import dataclass +from enum import Enum + + +class PromptIntent(str, Enum): + """Whether a prompt should be answered from the API or via Gemma.""" + + DEFINITIVE = "definitive" + CONVERSATIONAL = "conversational" + + +@dataclass(frozen=True) +class IntentResult: + intent: PromptIntent + reason: str + + +_QUESTION_STARTERS = re.compile( + r"^\s*(" + r"what|who|whom|which|when|where|why|how|" + r"is|are|was|were|do|does|did|can|could|will|would|should|has|have|had" + r")\b", + re.IGNORECASE, +) + +_DEFINITIVE_PATTERNS: tuple[tuple[re.Pattern[str], str], ...] = ( + (re.compile(r"\b(how many|how much|how often)\b", re.I), "quantity question"), + (re.compile(r"\b(what was|what is|what were|what are)\b", re.I), "fact lookup"), + (re.compile(r"\b(who (led|won|scored|averaged|had|is|was|threw))\b", re.I), "who question"), + (re.compile(r"\b(how did|how does|how do)\b", re.I), "performance question"), + (re.compile(r"\b(average[ds]?|stats?|statline|fantasy points?|fppg|ppg)\b", re.I), "stats terms"), + (re.compile(r"\b(yards?|touchdowns?|tds?|receptions?|targets?|carries|attempts?)\b", re.I), "football counting stats"), + (re.compile(r"\b(ppr|half[- ]?ppr|standard scoring|snap share|target share|red[- ]?zone)\b", re.I), "fantasy terms"), + (re.compile(r"\b(week \d{1,2}|matchup|projection|start[- ]?sit)\b", re.I), "weekly fantasy context"), + (re.compile(r"\b(injury|injured|questionable|doubtful|out|ir|trade[d]?|waiver)\b", re.I), "injury/roster terms"), + (re.compile(r"\b\d{4}\s*(season|stats?)?\b"), "season reference"), + (re.compile(r"\b\d+(\.\d+)?\s*%"), "percentage"), + (re.compile(r"\b\d+(\.\d+)?\s*(yds|yards|tds|rec|tgt|car)\b", re.I), "rate stat"), +) + +_CONVERSATIONAL_PATTERNS: tuple[tuple[re.Pattern[str], str], ...] = ( + (re.compile(r"^\s*(hi|hello|hey|thanks|thank you)\b", re.I), "greeting"), + (re.compile(r"\b(what do you think|in your opinion|do you believe)\b", re.I), "opinion"), + (re.compile(r"\b(should i draft|would you draft|dynasty value)\b", re.I), "draft advice"), + (re.compile(r"\b(explain like|eli5|tell me a story|write a|poem|joke)\b", re.I), "creative"), + (re.compile(r"\b(brainstorm|ideas? for|help me (think|decide))\b", re.I), "open-ended help"), + (re.compile(r"\b(compare .+ (vs\.?|versus|or) .+ (opinion|better overall))\b", re.I), "subjective compare"), + (re.compile(r"\b(best (ever|of all time)|goat|all[- ]time greatest)\b", re.I), "subjective ranking"), + (re.compile(r"\b(why do you think|why is fantasy football)\b", re.I), "general discussion"), +) + + +def detect_prompt_intent(prompt: str) -> IntentResult: + """Classify whether a prompt needs a definitive DB-backed answer or open chat. + + Definitive prompts are routed to the read-only API (search/retrieval). + Conversational prompts are sent directly to Gemma without API retrieval. + """ + text = prompt.strip() + if not text: + return IntentResult( + PromptIntent.CONVERSATIONAL, + reason="empty prompt", + ) + + definitive_score = 0 + conversational_score = 0 + definitive_reasons: list[str] = [] + conversational_reasons: list[str] = [] + + for pattern, label in _CONVERSATIONAL_PATTERNS: + if pattern.search(text): + conversational_score += 4 + conversational_reasons.append(label) + + for pattern, label in _DEFINITIVE_PATTERNS: + if pattern.search(text): + definitive_score += 2 + definitive_reasons.append(label) + + if conversational_score == 0: + if text.endswith("?"): + definitive_score += 2 + definitive_reasons.append("question mark") + + if _QUESTION_STARTERS.search(text): + definitive_score += 2 + definitive_reasons.append("question starter") + + word_count = len(text.split()) + if word_count <= 12 and re.search( + r"\b(stats?|statline|overview|splits|profile|report|summary|projection)\b", + text, + re.I, + ): + definitive_score += 2 + definitive_reasons.append("stats lookup phrase") + + if definitive_score > conversational_score: + reason = ", ".join(definitive_reasons) or "factual question signals" + return IntentResult(PromptIntent.DEFINITIVE, reason=reason) + + if conversational_score > 0: + reason = ", ".join(conversational_reasons) + return IntentResult(PromptIntent.CONVERSATIONAL, reason=reason) + + if text.endswith("?") or _QUESTION_STARTERS.search(text): + return IntentResult( + PromptIntent.DEFINITIVE, + reason="unclassified question defaulting to API", + ) + + return IntentResult( + PromptIntent.CONVERSATIONAL, + reason="no definitive signals; defaulting to Gemma", + ) diff --git a/rag/ollama_client.py b/rag/ollama_client.py index 594894b..136814d 100644 --- a/rag/ollama_client.py +++ b/rag/ollama_client.py @@ -1,4 +1,4 @@ -"""Thin client for local Ollama (Gemma) inference.""" +"""Thin client for local Ollama inference (Gemma by default).""" from __future__ import annotations diff --git a/scripts/init_db.py b/scripts/init_db.py index 317e847..a92ff1f 100644 --- a/scripts/init_db.py +++ b/scripts/init_db.py @@ -14,75 +14,76 @@ DOCUMENTS = [ { - "title": "2024-25 Lakers team overview", - "category": "team", + "title": "Christian McCaffrey 2024 rushing workload", + "category": "player", "content": ( - "The Los Angeles Lakers finished the 2024-25 regular season 50-32. " - "They ranked 8th in offensive rating (115.4) and 12th in defensive rating (112.1). " - "Anthony Davis averaged 24.7 PPG, 12.6 RPG, and 2.4 BPG. " - "LeBron James averaged 25.3 PPG, 7.4 RPG, and 8.1 APG at age 40." + "Christian McCaffrey played 16 games in 2024 with 272 carries for 1,459 rushing yards " + "and 14 rushing TDs. He added 67 receptions for 564 yards and 7 receiving TDs in PPR leagues. " + "His 21.8 fantasy PPG ranked RB1 in season-long formats." ), }, { - "title": "Stephen Curry shooting splits", + "title": "Ja'Marr Chase 2024 target share", "category": "player", "content": ( - "Stephen Curry shot 45.2% from the field, 40.8% from three, and 92.1% from the line " - "in 2024-25. He attempted 11.8 threes per game and led the Warriors with 26.4 PPG. " - "His true shooting percentage was 62.8%." + "Ja'Marr Chase led the NFL with 175 targets in 2024, a 28.4% team target share. " + "He posted 127 receptions, 1,708 yards, and 12 TDs. " + "In PPR scoring he averaged 22.1 fantasy points per game." ), }, { - "title": "Celtics defensive profile", - "category": "team", + "title": "Week 12 RB matchup notes", + "category": "matchup", "content": ( - "Boston held opponents to 108.9 points per 100 possessions, 3rd in the league. " - "Jrue Holiday and Derrick White anchored perimeter defense. " - "Boston forced the 5th-most turnovers per game (15.2)." + "Week 12 featured several run-heavy game scripts. " + "Baltimore allowed 5.1 yards per carry to RBs (4th-most). " + "Derrick Henry projects as an RB1 with 18-22 expected touches against that front. " + "Green Bay limited RB receiving work to 3.8 targets per game over the prior four weeks." ), }, { - "title": "Victor Wembanyama sophomore season", - "category": "player", + "title": "2024 NFL passing yards leaders", + "category": "league", "content": ( - "Victor Wembanyama averaged 22.1 PPG, 10.8 RPG, 3.6 BPG, and 1.2 SPG in 2024-25. " - "San Antonio improved defensively when he was on the floor (+8.2 net rating swing). " - "He shot 48.9% inside the arc and 34.2% from three on low volume." + "Joe Burrow led the NFL with 4,918 passing yards in 2024. " + "Jared Goff (4,629) and Baker Mayfield (4,500) rounded out the top three. " + "League average team passing yards per game was 224.6." ), }, { - "title": "League pace and efficiency leaders", - "category": "league", + "title": "Travis Kelce red-zone usage 2024", + "category": "player", "content": ( - "League average pace was 99.2 possessions per 48 minutes in 2024-25. " - "Indiana played the fastest pace (102.4); New York played the slowest (96.1). " - "Denver led offensive rating (119.8); Charlotte ranked last defensively (118.4 DRtg)." + "Travis Kelce drew 22 red-zone targets in 2024, 8th among tight ends. " + "He scored 5 red-zone TDs on a 22.7% red-zone target share inside the 20. " + "His 12.4 fantasy PPG in half-PPR was TE3 in season-long ranks." ), }, { - "title": "Nikola Jokic playmaking", - "category": "player", + "title": "Buffalo Bills offensive pace 2024", + "category": "team", "content": ( - "Nikola Jokic recorded 9.2 APG with a 42.1% assist rate, highest among centers. " - "He posted a 31.8 PER and 14.2 win shares. Denver's offense was +12.4 per 100 with him on court." + "Buffalo averaged 64.2 plays per game (7th) and 3.04 drives leading to scores per game. " + "Josh Allen accounted for 42 total TDs (32 pass, 10 rush). " + "Bills pass rate over expected was +4.2% in neutral script situations." ), }, { - "title": "Injury report: March 2025", + "title": "Injury report: Week 15 2024", "category": "injury", "content": ( - "Kawhi Leonard missed 18 games with knee management. " - "Joel Embiid was limited to 35 games due to knee soreness. " - "Ja Morant returned in late February after a shoulder strain." + "Tyreek Hill was listed questionable with a wrist sprain but practiced fully Friday. " + "Joe Mixon missed practice Wednesday with an ankle issue, then returned limited Thursday. " + "Mark Andrews was ruled out with an ankle injury before Sunday kickoff." ), }, { - "title": "Trade deadline summary", - "category": "transaction", + "title": "PPR scoring and roster construction", + "category": "league", "content": ( - "At the 2025 deadline, Milwaukee acquired a two-way wing for second-round picks. " - "Phoenix moved a veteran point guard for cap flexibility. " - "No blockbuster deals moved the title odds more than 3% in consensus models." + "Standard PPR awards 1 point per reception. " + "In 2024, WRs with 8+ targets per game outscored WRs below 6 targets by 6.1 PPG on average. " + "Zero-RB builds that waited until rounds 4-5 for a first RB gained +0.8 PPG vs early-RB builds in best-ball data." ), }, ] diff --git a/tests/test_api.py b/tests/test_api.py index fe392b0..ff4ee04 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -51,17 +51,17 @@ def test_get_document_not_found(self) -> None: self.assertEqual(response.status_code, 404) def test_search(self) -> None: - response = self.client.get("/search", params={"q": "Curry", "limit": 5}) + response = self.client.get("/search", params={"q": "Chase", "limit": 5}) self.assertEqual(response.status_code, 200) results = response.json() self.assertTrue(results) - self.assertIn("Curry", results[0]["title"]) + self.assertIn("Chase", results[0]["title"]) def test_categories(self) -> None: response = self.client.get("/categories") self.assertEqual(response.status_code, 200) categories = response.json()["categories"] - self.assertIn("league", categories) + self.assertIn("matchup", categories) def test_write_methods_blocked(self) -> None: for method in ("post", "put", "patch", "delete"): diff --git a/tests/test_db.py b/tests/test_db.py index 9095b62..82fee58 100644 --- a/tests/test_db.py +++ b/tests/test_db.py @@ -23,23 +23,29 @@ def test_get_document_returns_row(self) -> None: self.assertIsNotNone(doc) assert doc is not None self.assertEqual(doc["id"], 1) - self.assertIn("Lakers", doc["title"]) + self.assertIn("McCaffrey", doc["title"]) def test_get_document_missing_returns_none(self) -> None: self.assertIsNone(self.db.get_document(99999)) def test_search_finds_relevant_document(self) -> None: - results = self.db.search_documents("Wembanyama", limit=5) + results = self.db.search_documents("Chase", limit=5) self.assertTrue(results) self.assertTrue( - any("Wembanyama" in doc["title"] or "Wembanyama" in doc["content"] for doc in results) + any("Chase" in doc["title"] or "Chase" in doc["content"] for doc in results) ) - def test_search_empty_query_uses_like_fallback(self) -> None: - results = self.db.search_documents("Lakers", limit=3) + def test_search_team_content(self) -> None: + results = self.db.search_documents("Bills", limit=3) self.assertLessEqual(len(results), 3) def test_list_categories(self) -> None: categories = self.db.list_categories() self.assertIn("player", categories) - self.assertIn("team", categories) + self.assertIn("matchup", categories) + + +if __name__ == "__main__": + import unittest + + unittest.main() diff --git a/tests/test_intent.py b/tests/test_intent.py new file mode 100644 index 0000000..a7e2e3b --- /dev/null +++ b/tests/test_intent.py @@ -0,0 +1,45 @@ +"""Unit tests for prompt intent detection.""" + +from __future__ import annotations + +import unittest + +from rag.intent import PromptIntent, detect_prompt_intent + + +class DetectPromptIntentTests(unittest.TestCase): + def test_factual_player_question_is_definitive(self) -> None: + result = detect_prompt_intent( + "How many targets did Ja'Marr Chase have in 2024?" + ) + self.assertEqual(result.intent, PromptIntent.DEFINITIVE) + + def test_stats_lookup_is_definitive(self) -> None: + result = detect_prompt_intent("Christian McCaffrey 2024 stats") + self.assertEqual(result.intent, PromptIntent.DEFINITIVE) + + def test_passing_leader_question_is_definitive(self) -> None: + result = detect_prompt_intent("Who led the NFL in passing yards in 2024?") + self.assertEqual(result.intent, PromptIntent.DEFINITIVE) + + def test_opinion_question_is_conversational(self) -> None: + result = detect_prompt_intent( + "What do you think about the Bills playoff chances?" + ) + self.assertEqual(result.intent, PromptIntent.CONVERSATIONAL) + + def test_greeting_is_conversational(self) -> None: + result = detect_prompt_intent("Hello, can you help me?") + self.assertEqual(result.intent, PromptIntent.CONVERSATIONAL) + + def test_creative_request_is_conversational(self) -> None: + result = detect_prompt_intent("Write a poem about fantasy football") + self.assertEqual(result.intent, PromptIntent.CONVERSATIONAL) + + def test_empty_prompt_is_conversational(self) -> None: + result = detect_prompt_intent(" ") + self.assertEqual(result.intent, PromptIntent.CONVERSATIONAL) + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/test_rag_engine.py b/tests/test_rag_engine.py index 1768fdd..9f4a6ef 100644 --- a/tests/test_rag_engine.py +++ b/tests/test_rag_engine.py @@ -8,6 +8,7 @@ import httpx from rag.engine import APIError, RAGEngine +from rag.intent import IntentResult, PromptIntent from rag.ollama_client import OllamaClient, OllamaError @@ -37,56 +38,81 @@ def test_format_context_numbered_blocks(self) -> None: self.assertIn("[2] Doc B (player)", context) self.assertIn("Line one.", context) - def test_build_prompt_includes_query_and_context(self) -> None: - prompt = RAGEngine._build_prompt("Who led scoring?", "ctx block") + def test_answer_from_sources_empty(self) -> None: + answer = RAGEngine._answer_from_sources([]) + self.assertIn("No matching records", answer) + + def test_answer_from_sources_includes_content(self) -> None: + sources = [ + { + "title": "Ja'Marr Chase 2024 target share", + "category": "player", + "content": "175 targets", + }, + ] + answer = RAGEngine._answer_from_sources(sources) + self.assertIn("175 targets", answer) + self.assertIn("Chase", answer) + + def test_build_conversational_prompt_includes_query(self) -> None: + prompt = RAGEngine._build_conversational_prompt("Tell me a fun fact") + self.assertIn("Tell me a fun fact", prompt) self.assertIn("StatShift", prompt) - self.assertIn("ctx block", prompt) - self.assertIn("Who led scoring?", prompt) @patch("rag.engine.httpx.get") - def test_ask_returns_rag_result(self, mock_get: MagicMock) -> None: + def test_ask_definitive_uses_api_not_gemma(self, mock_get: MagicMock) -> None: mock_get.return_value = MagicMock( raise_for_status=MagicMock(), json=MagicMock( return_value=[ { "id": 1, - "title": "Curry splits", + "title": "Ja'Marr Chase 2024 target share", "category": "player", - "content": "26.4 PPG", + "content": "175 targets", } ] ), ) - self.mock_ollama.generate.return_value = "Curry averaged 26.4 PPG [1]." + intent = IntentResult(PromptIntent.DEFINITIVE, reason="test") - result = self.engine.ask("How did Curry score?") + result = self.engine.ask("How many targets did Chase have?", intent=intent) - self.assertEqual(result.answer, "Curry averaged 26.4 PPG [1].") + self.assertEqual(result.route, "api") + self.assertIn("175 targets", result.answer) self.assertEqual(len(result.sources), 1) - self.assertIn("Curry splits", result.prompt) - mock_get.assert_called_once_with( - "http://api.test/search", - params={"q": "How did Curry score?", "limit": 2}, - timeout=15.0, - ) + self.assertEqual(result.prompt, "") + mock_get.assert_called_once() + self.mock_ollama.generate.assert_not_called() + + @patch("rag.engine.httpx.get") + def test_ask_conversational_uses_gemma_not_api(self, mock_get: MagicMock) -> None: + self.mock_ollama.generate.return_value = "Fantasy football is a weekly game." + intent = IntentResult(PromptIntent.CONVERSATIONAL, reason="test") + + result = self.engine.ask("Hello!", intent=intent) + + self.assertEqual(result.route, "gemma") + self.assertEqual(result.answer, "Fantasy football is a weekly game.") + self.assertEqual(result.sources, []) + self.assertIn("Hello!", result.prompt) + mock_get.assert_not_called() self.mock_ollama.generate.assert_called_once() @patch("rag.engine.httpx.get") def test_search_api_error_raises(self, mock_get: MagicMock) -> None: mock_get.side_effect = httpx.ConnectError("refused") + intent = IntentResult(PromptIntent.DEFINITIVE, reason="test") with self.assertRaises(APIError): - self.engine.ask("test") + self.engine.ask("How many yards did Burrow throw for?", intent=intent) @patch("rag.engine.httpx.get") def test_ask_ollama_error_wrapped(self, mock_get: MagicMock) -> None: - mock_get.return_value = MagicMock( - raise_for_status=MagicMock(), - json=MagicMock(return_value=[]), - ) self.mock_ollama.generate.side_effect = httpx.ConnectError("down") + intent = IntentResult(PromptIntent.CONVERSATIONAL, reason="test") with self.assertRaises(OllamaError): - self.engine.ask("test") + self.engine.ask("Hi there", intent=intent) + mock_get.assert_not_called() @patch("rag.engine.httpx.get") def test_health_reports_status(self, mock_get: MagicMock) -> None: From d8b522736c9a5386fe3112e48fd47e12c543c4dd Mon Sep 17 00:00:00 2001 From: Joseph Lee <175lee.joseph@gmail.com> Date: Mon, 18 May 2026 19:48:40 -0400 Subject: [PATCH 5/9] Added mock draft feature and unit tests --- .gitignore | 2 + Dockerfile | 24 +- README.md | 24 +- app/mock_draft_tab.py | 273 ++++++++++++++++++ app/streamlit_app.py | 104 ++++--- docker-compose.yml | 11 + draft/__init__.py | 4 + draft/engine.py | 192 +++++++++++++ draft/espn_loader.py | 240 ++++++++++++++++ draft/ffanalytics_loader.py | 415 ++++++++++++++++++++++++++++ draft/models.py | 89 ++++++ draft/player_pool.py | 60 ++++ draft/rpy2_setup.py | 51 ++++ requirements.txt | 1 + scripts/sync_espn_players.py | 24 ++ scripts/sync_ffanalytics_players.py | 24 ++ sdks/espnSDK.py | 142 ++++++++++ tests/test_draft_engine.py | 83 ++++++ tests/test_espn_loader.py | 46 +++ tests/test_ffanalytics_loader.py | 102 +++++++ 20 files changed, 1865 insertions(+), 46 deletions(-) create mode 100644 app/mock_draft_tab.py create mode 100644 draft/__init__.py create mode 100644 draft/engine.py create mode 100644 draft/espn_loader.py create mode 100644 draft/ffanalytics_loader.py create mode 100644 draft/models.py create mode 100644 draft/player_pool.py create mode 100644 draft/rpy2_setup.py create mode 100644 scripts/sync_espn_players.py create mode 100644 scripts/sync_ffanalytics_players.py create mode 100644 sdks/espnSDK.py create mode 100644 tests/test_draft_engine.py create mode 100644 tests/test_espn_loader.py create mode 100644 tests/test_ffanalytics_loader.py diff --git a/.gitignore b/.gitignore index 441ea33..24ed429 100644 --- a/.gitignore +++ b/.gitignore @@ -4,5 +4,7 @@ __pycache__/ .env data/*.db data/*.db-* +data/espn_active_players.json +data/ffanalytics_players.json .DS_Store .streamlit/secrets.toml diff --git a/Dockerfile b/Dockerfile index 602b1e1..24a1d2d 100644 --- a/Dockerfile +++ b/Dockerfile @@ -4,18 +4,38 @@ WORKDIR /app ENV PYTHONDONTWRITEBYTECODE=1 \ PYTHONUNBUFFERED=1 \ - PIP_NO_CACHE_DIR=1 + PIP_NO_CACHE_DIR=1 \ + R_HOME=/usr/lib/R RUN apt-get update \ - && apt-get install -y --no-install-recommends curl \ + && apt-get install -y --no-install-recommends \ + curl \ + git \ + r-base \ + r-base-dev \ + libcurl4-openssl-dev \ + libssl-dev \ + libxml2-dev \ && rm -rf /var/lib/apt/lists/* +# CRAN may not ship ffanalytics for the distro R version; install from GitHub. +RUN Rscript -e '\ + install.packages("remotes", repos = "https://cloud.r-project.org"); \ + remotes::install_github( \ + "FantasyFootballAnalytics/ffanalytics", \ + upgrade = "never", \ + dependencies = TRUE \ + ) \ +' + COPY requirements.txt . RUN pip install -r requirements.txt COPY config.py . COPY api/ api/ COPY rag/ rag/ +COPY draft/ draft/ +COPY sdks/ sdks/ COPY app/ app/ COPY scripts/ scripts/ diff --git a/README.md b/README.md index 333335c..6d4c2c5 100644 --- a/README.md +++ b/README.md @@ -12,6 +12,8 @@ Factual questions (stats, matchups, injuries) are answered from the database. Co - **Docker (recommended):** Docker Desktop or Docker Engine with Compose v2 - **Or local Python 3.11+** and [Ollama](https://ollama.com) with Gemma (`ollama pull gemma2:2b`) +The Docker image includes R, [ffanalytics](https://github.com/FantasyFootballAnalytics/ffanalytics), and `rpy2` for the Mock Draft tab — no separate R install when using Compose. + ## Run with Docker ```bash @@ -77,13 +79,33 @@ Open http://127.0.0.1:8501 and ask about players, weekly matchups, injuries, or If you previously ran an older build with different seed data, delete `data/statshift.db` or run `docker compose down -v` before re-initializing. +## Mock Draft (Docker Compose) + +With the stack running (`docker compose up --build`), open **http://localhost:8501** → **Mock Draft**. The player pool comes from **ffanalytics** (projections for Standard / Half-PPR / PPR plus ADP). R and ffanalytics are already in the `ui` image. + +The cache lives in the **`statshift_data` volume** at `/app/data/ffanalytics_players.json` (24-hour TTL). It survives container restarts until you run `docker compose down -v`. + +**First visit:** opening Mock Draft or clicking **Refresh player pool** triggers a scrape (often several minutes). **Later visits** use the cache. + +Optional — pre-warm the cache before using the UI: + +```bash +# One-off sync service (stack does not need to be up) +docker compose --profile sync run --rm ffanalytics-sync + +# Or, while ui is running +docker compose exec ui python scripts/sync_ffanalytics_players.py +``` + +After rebuilding the image (`docker compose up --build`), run a sync again if ffanalytics or R packages changed. + ## Tests ```bash python -m unittest discover -s tests -v ``` -CI runs the same suite on pull requests and pushes to `main`. +CI runs the same suite on pull requests and pushes to `main` (ffanalytics parsing tests do not call R). ## Configuration diff --git a/app/mock_draft_tab.py b/app/mock_draft_tab.py new file mode 100644 index 0000000..0b12dfc --- /dev/null +++ b/app/mock_draft_tab.py @@ -0,0 +1,273 @@ +"""Streamlit UI for mock fantasy drafts.""" + +from __future__ import annotations + +import streamlit as st + +from draft.engine import MockDraftEngine +from draft.ffanalytics_loader import load_active_players, load_player_cache +from draft.models import ROSTER_SLOTS, ScoringFormat +from draft.player_pool import cache_status, refresh_players +from draft.rpy2_setup import rpy2_session + + +def _init_draft_state() -> None: + if "mock_draft" not in st.session_state: + st.session_state["mock_draft"] = None + if "draft_pool" not in st.session_state: + st.session_state["draft_pool"] = None + + +def _ensure_player_pool(*, force_refresh: bool = False) -> list: + if force_refresh: + with rpy2_session(): + with st.spinner("Loading players from ffanalytics…"): + st.session_state["draft_pool"] = refresh_players(force_refresh=True) + elif st.session_state.get("draft_pool") is None: + # Use JSON cache only on tab open — avoid a long R scrape until user clicks Refresh + st.session_state["draft_pool"] = load_player_cache() or [] + return st.session_state["draft_pool"] + + +def _render_settings(pool_size: int) -> tuple[ScoringFormat, int, int, int] | None: + st.subheader("League settings") + col1, col2, col3, col4 = st.columns(4) + + with col1: + scoring_label = st.selectbox( + "Scoring", + options=[s.value for s in ScoringFormat], + index=0, + ) + with col2: + league_size = st.selectbox("League size", options=[8, 10, 12, 14], index=2) + with col3: + draft_slot = st.number_input( + "Your draft slot", + min_value=1, + max_value=int(league_size), + value=min(5, int(league_size)), + step=1, + ) + with col4: + if pool_size: + max_rounds = max(1, pool_size // max(int(league_size), 1)) + else: + max_rounds = 15 + rounds = st.number_input( + "Rounds", + min_value=1, + max_value=max(1, max_rounds), + value=min(15, max(1, max_rounds)), + step=1, + disabled=pool_size == 0, + ) + + scoring = ScoringFormat(scoring_label) + st.caption( + f"Player pool: **{pool_size}** ranked players (QB/RB/WR/TE/K) · " + f"up to **{max_rounds}** rounds for a **{league_size}**-team league" + ) + return scoring, int(league_size), int(draft_slot), int(rounds) + + +def _render_draft_board(draft: MockDraftEngine) -> None: + st.subheader("Draft board") + rows = [] + for pick in draft.picks: + team_label = ( + f"Team {pick.team_index + 1} (You)" + if pick.team_index == draft.settings.draft_slot - 1 + else f"Team {pick.team_index + 1}" + ) + rows.append( + { + "Overall": pick.overall, + "Round": pick.round, + "Pick": pick.pick_in_round, + "Team": team_label, + "Player": pick.player.name, + "Pos": pick.player.position.value, + "Team abbr": pick.player.team, + "FPG": round( + pick.player.fantasy_points(draft.settings.scoring), 1 + ), + } + ) + if rows: + st.dataframe(rows, use_container_width=True, hide_index=True) + else: + st.caption("No picks yet.") + + +def _render_user_roster(draft: MockDraftEngine) -> None: + st.subheader("Your roster") + roster = draft.user_roster() + if not roster.picks: + st.caption("No players drafted yet.") + return + + rows = [ + { + "Player": p.name, + "Pos": p.position.value, + "Team": p.team, + "FPG": round(p.fantasy_points(draft.settings.scoring), 1), + } + for p in roster.picks + ] + st.dataframe(rows, use_container_width=True, hide_index=True) + + filled = [] + for slot, need in ROSTER_SLOTS.items(): + if slot == "FLEX": + continue + pos_key = slot + have = sum(1 for p in roster.picks if p.position.value == pos_key) + filled.append(f"{slot}: {have}/{need}") + st.caption("Starters · " + " · ".join(filled)) + + +def _render_pick_controls(draft: MockDraftEngine) -> None: + if draft.is_complete: + if draft.pool_exhausted and len(draft.picks) < draft.settings.league_size * draft.requested_rounds: + st.warning( + f"Draft stopped early: player pool ran out after **{len(draft.picks)}** picks " + f"(round {draft.picks[-1].round if draft.picks else 0})." + ) + else: + st.success("Draft complete.") + return + + st.subheader("On the clock") + round_num = draft.current_round + overall = draft.current_overall + team_num = draft.current_team_index + 1 + + if draft.is_user_turn: + st.info(f"Round {round_num} · Pick {overall} — **Your pick**") + ranked = draft.rank_available()[:25] + if not ranked: + st.error("No players left in the pool.") + return + options = { + f"{p.name} ({p.position.value}, {p.team}) — " + f"{p.fantasy_points(draft.settings.scoring):.1f} FPG": p + for p in ranked + } + choice = st.selectbox("Select player", options=list(options.keys())) + if st.button("Draft player", type="primary", use_container_width=True): + draft.make_pick(options[choice]) + draft.run_cpu_picks_until_user() + st.rerun() + else: + st.warning(f"Round {round_num} · Pick {overall} — Team {team_num} is picking…") + if st.button("Simulate to your pick", use_container_width=True): + draft.run_cpu_picks_until_user() + st.rerun() + + +def render_mock_draft_tab() -> None: + _init_draft_state() + st.markdown( + "Configure scoring and league size, then run a **snake mock draft** with " + "**projections and ADP** from [ffanalytics](https://github.com/FantasyFootballAnalytics/ffanalytics) " + "(QB/RB/WR/TE/K)." + ) + + st.caption(cache_status()) + + refresh_col, _ = st.columns([1, 3]) + with refresh_col: + if st.button("Refresh player pool from ffanalytics", use_container_width=True): + try: + with rpy2_session(): + with st.spinner("Scraping projections and ADP via ffanalytics…"): + pool_loaded = load_active_players(force_refresh=True) + refresh_players(force_refresh=True) + st.session_state["draft_pool"] = pool_loaded + if not pool_loaded: + st.error("ffanalytics returned no draftable players.") + else: + st.success(f"Loaded {len(pool_loaded)} players.") + st.rerun() + except Exception as exc: + st.error(f"ffanalytics sync failed: {exc}") + + pool = _ensure_player_pool() + pool_size = len(pool) + + if pool_size == 0: + st.warning( + "No player pool loaded. Click **Refresh player pool from ffanalytics** " + "(first run may take several minutes)." + ) + + settings = _render_settings(pool_size) + if settings is None: + return + + scoring, league_size, draft_slot, rounds = settings + + slot_cols = st.columns([1, 1, 2]) + with slot_cols[0]: + if st.button( + "Start new draft", + type="primary", + use_container_width=True, + disabled=pool_size == 0, + ): + try: + st.session_state["mock_draft"] = MockDraftEngine.create( + scoring=scoring, + league_size=league_size, + draft_slot=draft_slot, + rounds=rounds, + pool=pool, + ) + st.rerun() + except ValueError as exc: + st.error(str(exc)) + with slot_cols[1]: + if st.button("Reset", use_container_width=True): + st.session_state["mock_draft"] = None + st.rerun() + + draft: MockDraftEngine | None = st.session_state.get("mock_draft") + if draft is None: + st.info("Set your parameters and click **Start new draft**.") + st.markdown( + f""" + **Roster slots:** {", ".join(f"{k}×{v}" for k, v in ROSTER_SLOTS.items())} + + **Player source:** ffanalytics projections + ADP (~{pool_size} players). + First load scrapes multiple sites and may take several minutes; cached 24 hours + in the `statshift_data` Docker volume (`data/ffanalytics_players.json`). + """ + ) + return + + meta = draft.settings + rounds_note = f"{meta.rounds} rounds" + if draft.requested_rounds > meta.rounds: + rounds_note += f" (capped from {draft.requested_rounds}; pool limit)" + st.caption( + f"{meta.scoring.value} · {meta.league_size} teams · " + f"slot {meta.draft_slot} · {rounds_note} · " + f"Pick {min(draft.current_overall, draft.total_picks)} of {draft.total_picks} · " + f"{len(draft.available)} players left" + ) + + board_col, roster_col = st.columns([3, 2]) + with board_col: + _render_pick_controls(draft) + _render_draft_board(draft) + with roster_col: + _render_user_roster(draft) + + st.subheader("Best available") + for player in draft.rank_available()[:8]: + st.write( + f"**{player.name}** ({player.position.value}, {player.team}) — " + f"{player.fantasy_points(draft.settings.scoring):.1f} FPG" + ) diff --git a/app/streamlit_app.py b/app/streamlit_app.py index f19a7a9..fb51af5 100644 --- a/app/streamlit_app.py +++ b/app/streamlit_app.py @@ -11,7 +11,9 @@ if str(ROOT) not in sys.path: sys.path.insert(0, str(ROOT)) +from app.mock_draft_tab import render_mock_draft_tab from config import settings +from draft.rpy2_setup import init_rpy2_on_main_thread from rag.engine import RAGEngine from rag.ollama_client import OllamaError @@ -28,6 +30,16 @@ engine = RAGEngine() + +@st.cache_resource +def _rpy2_ready() -> bool: + """Initialize rpy2 once on Streamlit's main thread (required for Mock Draft).""" + init_rpy2_on_main_thread() + return True + + +_rpy2_ready() + with st.sidebar: st.header("System status") if st.button("Check health", use_container_width=True): @@ -56,49 +68,55 @@ st.caption(f"API: {settings.api_base_url}") st.caption(f"Ollama: {settings.ollama_base_url}") -query = st.text_input( - "Ask about players, matchups, injuries, or fantasy scoring", - placeholder="e.g. How many targets did Ja'Marr Chase have in 2024?", -) +ask_tab, draft_tab = st.tabs(["Ask StatShift", "Mock Draft"]) -col1, col2 = st.columns([1, 4]) -with col1: - run = st.button("Run RAG", type="primary", use_container_width=True) -with col2: - show_prompt = st.checkbox("Show prompt sent to Gemma") - -if run and query.strip(): - with st.spinner("Routing your question..."): - try: - result = engine.ask(query.strip()) - st.session_state["last_result"] = result - except OllamaError as exc: - st.error(str(exc)) - except Exception as exc: - st.error(f"RAG failed: {exc}") - -result = st.session_state.get("last_result") -if result: - route_label = "API (database)" if result.route == "api" else "Gemma (chat)" - st.caption( - f"Route: **{route_label}** · intent: `{result.intent.value}` — {result.intent_reason}" +with ask_tab: + query = st.text_input( + "Ask about players, matchups, injuries, or fantasy scoring", + placeholder="e.g. How many targets did Ja'Marr Chase have in 2024?", ) - st.subheader("Answer") - st.markdown(result.answer) - - st.subheader("Retrieved sources") - if not result.sources: - st.caption("No API retrieval for this route.") - for doc in result.sources: - with st.expander(f"[{doc['id']}] {doc['title']} · {doc['category']}"): - st.write(doc["content"]) - - if show_prompt and result.prompt: - st.subheader("Prompt") - st.code(result.prompt) - elif show_prompt and result.route == "api": - st.caption("No Gemma prompt — this answer came directly from API search results.") - -elif run and not query.strip(): - st.warning("Enter a question first.") + col1, col2 = st.columns([1, 4]) + with col1: + run = st.button("Run RAG", type="primary", use_container_width=True) + with col2: + show_prompt = st.checkbox("Show prompt sent to Gemma") + + if run and query.strip(): + with st.spinner("Routing your question..."): + try: + result = engine.ask(query.strip()) + st.session_state["last_result"] = result + except OllamaError as exc: + st.error(str(exc)) + except Exception as exc: + st.error(f"RAG failed: {exc}") + + result = st.session_state.get("last_result") + if result: + route_label = "API (database)" if result.route == "api" else "Gemma (chat)" + st.caption( + f"Route: **{route_label}** · intent: `{result.intent.value}` — {result.intent_reason}" + ) + + st.subheader("Answer") + st.markdown(result.answer) + + st.subheader("Retrieved sources") + if not result.sources: + st.caption("No API retrieval for this route.") + for doc in result.sources: + with st.expander(f"[{doc['id']}] {doc['title']} · {doc['category']}"): + st.write(doc["content"]) + + if show_prompt and result.prompt: + st.subheader("Prompt") + st.code(result.prompt) + elif show_prompt and result.route == "api": + st.caption("No Gemma prompt — this answer came directly from API search results.") + + elif run and not query.strip(): + st.warning("Enter a question first.") + +with draft_tab: + render_mock_draft_tab() diff --git a/docker-compose.yml b/docker-compose.yml index 7a0c7f0..b38dcaf 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -48,6 +48,8 @@ services: API_BASE_URL: http://api:8000 OLLAMA_BASE_URL: http://ollama:11434 OLLAMA_MODEL: gemma2:2b + volumes: + - statshift_data:/app/data ports: - "8501:8501" depends_on: @@ -59,6 +61,15 @@ services: condition: service_completed_successfully restart: unless-stopped + # Optional: pre-warm mock-draft player cache (ffanalytics scrape; can take several minutes) + ffanalytics-sync: + profiles: [sync] + build: . + volumes: + - statshift_data:/app/data + entrypoint: ["python", "scripts/sync_ffanalytics_players.py"] + restart: "no" + volumes: statshift_data: ollama_data: diff --git a/draft/__init__.py b/draft/__init__.py new file mode 100644 index 0000000..303528a --- /dev/null +++ b/draft/__init__.py @@ -0,0 +1,4 @@ +from draft.engine import MockDraftEngine +from draft.models import LeagueSettings, Player, ScoringFormat + +__all__ = ["MockDraftEngine", "LeagueSettings", "Player", "ScoringFormat"] diff --git a/draft/engine.py b/draft/engine.py new file mode 100644 index 0000000..f02f4b6 --- /dev/null +++ b/draft/engine.py @@ -0,0 +1,192 @@ +"""Snake-draft engine for mock fantasy leagues.""" + +from __future__ import annotations + +from dataclasses import dataclass, field + +from draft.models import ( + ROSTER_SLOTS, + DraftPick, + LeagueSettings, + Player, + Position, + ScoringFormat, + TeamRoster, +) +from draft.player_pool import get_players, max_rounds_for_league + + +def _snake_team_index(round_num: int, pick_in_round: int, league_size: int) -> int: + if round_num % 2 == 1: + return pick_in_round - 1 + return league_size - pick_in_round + + +@dataclass +class MockDraftEngine: + settings: LeagueSettings + pool: list[Player] = field(default_factory=list) + picks: list[DraftPick] = field(default_factory=list) + rosters: list[TeamRoster] = field(default_factory=list) + available: list[Player] = field(default_factory=list) + requested_rounds: int = 15 + max_supported_rounds: int = 15 + + def __post_init__(self) -> None: + if not self.pool: + self.pool = get_players() + self.rosters = [TeamRoster(team_index=i) for i in range(self.settings.league_size)] + self.available = sorted( + self.pool, + key=lambda p: p.fantasy_points(self.settings.scoring), + reverse=True, + ) + + @property + def total_picks(self) -> int: + return self.settings.league_size * self.settings.rounds + + @property + def pool_exhausted(self) -> bool: + return not self.available + + @property + def is_complete(self) -> bool: + return len(self.picks) >= self.total_picks or self.pool_exhausted + + @property + def current_overall(self) -> int: + return len(self.picks) + 1 + + @property + def current_round(self) -> int: + return (len(self.picks) // self.settings.league_size) + 1 + + @property + def current_pick_in_round(self) -> int: + return (len(self.picks) % self.settings.league_size) + 1 + + @property + def current_team_index(self) -> int: + return _snake_team_index( + self.current_round, + self.current_pick_in_round, + self.settings.league_size, + ) + + @property + def is_user_turn(self) -> bool: + return ( + not self.is_complete + and self.current_team_index == self.settings.draft_slot - 1 + ) + + def drafted_names(self) -> set[str]: + return {pick.player.name for pick in self.picks} + + def _roster_need_score(self, roster: TeamRoster, player: Player) -> float: + pos = player.position + counts = {p: roster.count(p) for p in Position} + need = 0.0 + + if pos == Position.QB and counts[Position.QB] < ROSTER_SLOTS["QB"]: + need += 3.0 + elif pos == Position.RB and counts[Position.RB] < ROSTER_SLOTS["RB"]: + need += 2.5 + elif pos == Position.WR and counts[Position.WR] < ROSTER_SLOTS["WR"]: + need += 2.5 + elif pos == Position.TE and counts[Position.TE] < ROSTER_SLOTS["TE"]: + need += 2.0 + elif pos in (Position.RB, Position.WR, Position.TE): + starters = ( + min(counts[Position.RB], ROSTER_SLOTS["RB"]) + + min(counts[Position.WR], ROSTER_SLOTS["WR"]) + + min(counts[Position.TE], ROSTER_SLOTS["TE"]) + ) + flex_filled = roster.flex_eligible_count() - starters + if flex_filled < ROSTER_SLOTS["FLEX"]: + need += 1.5 + elif pos == Position.K and counts[Position.K] < ROSTER_SLOTS["K"]: + need += 0.8 + elif pos == Position.DEF and counts[Position.DEF] < ROSTER_SLOTS["DEF"]: + need += 0.8 + else: + need -= 1.0 + + return need + + def rank_available(self, team_index: int | None = None) -> list[Player]: + idx = team_index if team_index is not None else self.current_team_index + roster = self.rosters[idx] + scoring = self.settings.scoring + + def score(player: Player) -> float: + value = player.fantasy_points(scoring) + need = self._roster_need_score(roster, player) + adp_bonus = max(0.0, (80.0 - player.adp) / 80.0) * 0.5 + return value + need * 2.0 + adp_bonus + + return sorted(self.available, key=score, reverse=True) + + def make_pick(self, player: Player) -> DraftPick: + if self.is_complete: + raise RuntimeError("Draft is already complete") + if player.name not in {p.name for p in self.available}: + raise ValueError(f"{player.name} is not available") + + team_index = self.current_team_index + pick = DraftPick( + round=self.current_round, + pick_in_round=self.current_pick_in_round, + overall=self.current_overall, + team_index=team_index, + player=player, + ) + self.picks.append(pick) + self.rosters[team_index].picks.append(player) + self.available = [p for p in self.available if p.name != player.name] + return pick + + def auto_pick(self) -> DraftPick: + ranked = self.rank_available() + if not ranked: + raise RuntimeError("No players available") + return self.make_pick(ranked[0]) + + def run_cpu_picks_until_user(self) -> list[DraftPick]: + made: list[DraftPick] = [] + while not self.is_complete and not self.is_user_turn: + if self.pool_exhausted: + break + made.append(self.auto_pick()) + return made + + def user_roster(self) -> TeamRoster: + return self.rosters[self.settings.draft_slot - 1] + + @staticmethod + def create( + scoring: ScoringFormat, + league_size: int, + draft_slot: int, + rounds: int = 15, + *, + pool: list[Player] | None = None, + ) -> MockDraftEngine: + player_pool = pool if pool is not None else get_players() + if not player_pool: + raise ValueError( + "Player pool is empty. Refresh the player pool from ffanalytics first." + ) + supported = len(player_pool) // max(league_size, 1) + effective_rounds = max(1, min(rounds, supported)) + settings = LeagueSettings( + scoring=scoring, + league_size=league_size, + draft_slot=draft_slot, + rounds=effective_rounds, + ) + engine = MockDraftEngine(settings=settings, pool=player_pool) + engine.requested_rounds = rounds + engine.max_supported_rounds = supported + return engine diff --git a/draft/espn_loader.py b/draft/espn_loader.py new file mode 100644 index 0000000..46b94a4 --- /dev/null +++ b/draft/espn_loader.py @@ -0,0 +1,240 @@ +"""Load active NFL players from ESPN into draft Player models.""" + +from __future__ import annotations + +import json +import re +from concurrent.futures import ThreadPoolExecutor, as_completed +from datetime import datetime, timezone +from pathlib import Path +from typing import Any + +import httpx + +from config import settings +from draft.models import Player, Position +from sdks.espnSDK import ESPNEndpoint + +FANTASY_ABBREV: dict[str, Position] = { + "QB": Position.QB, + "RB": Position.RB, + "WR": Position.WR, + "TE": Position.TE, + "K": Position.K, + "PK": Position.K, + "FB": Position.RB, +} + +POSITION_BASE_FP: dict[Position, float] = { + Position.QB: 17.0, + Position.RB: 11.0, + Position.WR: 10.0, + Position.TE: 8.5, + Position.K: 7.5, +} + +CACHE_PATH = settings.project_root / "data" / "espn_active_players.json" +CACHE_MAX_AGE_HOURS = 24 +_LIST_PAGE_SIZE = 1000 +_RESOLVE_WORKERS = 32 + + +def _cache_path(path: Path | None = None) -> Path: + return path or CACHE_PATH + + +def _team_id_from_ref(ref: str) -> str | None: + match = re.search(r"/teams/(\d+)", ref) + return match.group(1) if match else None + + +def _estimate_fantasy_points(position: Position, experience_years: int) -> tuple[float, float, float]: + base = POSITION_BASE_FP[position] + min(experience_years, 12) * 0.35 + if position in (Position.WR, Position.RB, Position.TE): + return ( + round(base + 1.2, 1), + round(base + 0.6, 1), + round(base, 1), + ) + return (round(base, 1), round(base, 1), round(base, 1)) + + +def parse_athlete_payload( + payload: dict[str, Any], + team_abbreviations: dict[str, str], +) -> Player | None: + pos_data = payload.get("position") or {} + abbrev = (pos_data.get("abbreviation") or "").upper() + position = FANTASY_ABBREV.get(abbrev) + if position is None: + return None + + name = payload.get("displayName") or payload.get("fullName") or "" + if not name: + return None + + experience = payload.get("experience") or {} + years = int(experience.get("years") or 0) + + team = "" + team_ref = payload.get("team") or {} + if isinstance(team_ref, dict): + ref = team_ref.get("$ref", "") + team_id = team_ref.get("id") or _team_id_from_ref(ref) + if team_id: + team = team_abbreviations.get(str(team_id), "") + + fp_ppr, fp_half, fp_std = _estimate_fantasy_points(position, years) + espn_id = str(payload.get("id") or "") + + return Player( + name=name, + position=position, + fp_ppr=fp_ppr, + fp_half=fp_half, + fp_std=fp_std, + team=team, + adp=999.0, + ) + + +def _load_team_abbreviations(http: httpx.Client, timeout: float) -> dict[str, str]: + teams_api = ESPNEndpoint("teams", client=http, timeout=timeout) + mapping: dict[str, str] = {} + for item in teams_api.iter_items(limit=50): + payload = teams_api.resolve_ref(item) + team_id = str(payload.get("id", "")) + abbrev = payload.get("abbreviation") or payload.get("shortDisplayName") or "" + if team_id and abbrev: + mapping[team_id] = abbrev + return mapping + + +def _collect_athlete_refs(api: ESPNEndpoint) -> list[str]: + refs: list[str] = [] + for page in api.iter_pages(limit=_LIST_PAGE_SIZE, params={"active": "true"}): + for item in page.get("items", []): + ref = item.get("$ref") + if ref: + refs.append(ref) + return refs + + +def _resolve_athlete( + ref: str, + team_map: dict[str, str], + http: httpx.Client, +) -> Player | None: + response = http.get(ref) + response.raise_for_status() + return parse_athlete_payload(response.json(), team_map) + + +def fetch_active_players( + *, + timeout: float = 30.0, + max_workers: int = _RESOLVE_WORKERS, +) -> list[Player]: + """Fetch all active NFL players draftable in fantasy (QB/RB/WR/TE/K).""" + with ESPNEndpoint("athletes", timeout=timeout) as api: + team_map = _load_team_abbreviations(api.client, timeout) + refs = _collect_athlete_refs(api) + + players: list[Player] = [] + with ThreadPoolExecutor(max_workers=max_workers) as executor: + futures = { + executor.submit(_resolve_athlete, ref, team_map, api.client): ref + for ref in refs + } + for future in as_completed(futures): + try: + player = future.result() + if player is not None: + players.append(player) + except httpx.HTTPError: + continue + + players.sort(key=lambda p: p.fp_ppr, reverse=True) + for rank, player in enumerate(players, start=1): + player.adp = float(rank) + return players + + +def save_player_cache(players: list[Player], path: Path | None = None) -> Path: + target = _cache_path(path) + target.parent.mkdir(parents=True, exist_ok=True) + payload = { + "fetched_at": datetime.now(timezone.utc).isoformat(), + "count": len(players), + "players": [ + { + "name": p.name, + "position": p.position.value, + "fp_ppr": p.fp_ppr, + "fp_half": p.fp_half, + "fp_std": p.fp_std, + "team": p.team, + "adp": p.adp, + } + for p in players + ], + } + target.write_text(json.dumps(payload, indent=2), encoding="utf-8") + return target + + +def load_player_cache(path: Path | None = None) -> list[Player] | None: + target = _cache_path(path) + if not target.exists(): + return None + try: + payload = json.loads(target.read_text(encoding="utf-8")) + except (json.JSONDecodeError, OSError): + return None + + players: list[Player] = [] + for row in payload.get("players", []): + try: + players.append( + Player( + name=row["name"], + position=Position(row["position"]), + fp_ppr=float(row["fp_ppr"]), + fp_half=float(row["fp_half"]), + fp_std=float(row["fp_std"]), + team=row.get("team", ""), + adp=float(row.get("adp", 999)), + ) + ) + except (KeyError, ValueError): + continue + return players if players else None + + +def cache_is_fresh(path: Path | None = None, max_age_hours: int = CACHE_MAX_AGE_HOURS) -> bool: + target = _cache_path(path) + if not target.exists(): + return False + try: + payload = json.loads(target.read_text(encoding="utf-8")) + fetched_at = datetime.fromisoformat(payload["fetched_at"]) + age_hours = (datetime.now(timezone.utc) - fetched_at).total_seconds() / 3600 + return age_hours < max_age_hours + except (json.JSONDecodeError, OSError, KeyError, ValueError): + return False + + +def load_active_players( + *, + force_refresh: bool = False, + cache_path: Path | None = None, +) -> list[Player]: + path = _cache_path(cache_path) + if not force_refresh and cache_is_fresh(path): + cached = load_player_cache(path) + if cached: + return cached + + players = fetch_active_players() + save_player_cache(players, path) + return players diff --git a/draft/ffanalytics_loader.py b/draft/ffanalytics_loader.py new file mode 100644 index 0000000..478632d --- /dev/null +++ b/draft/ffanalytics_loader.py @@ -0,0 +1,415 @@ +"""Load fantasy player projections and ADP via R ffanalytics (rpy2).""" + +from __future__ import annotations + +import json +from datetime import datetime, timezone +from typing import Any + +from config import settings +from draft.models import Player, Position + +CACHE_PATH = settings.project_root / "data" / "ffanalytics_players.json" +CACHE_MAX_AGE_HOURS = 24 + +DRAFT_POSITIONS = ("QB", "RB", "WR", "TE", "K") +DEFAULT_SCRAPE_SOURCES = ("FantasyPros", "ESPN", "CBS", "Yahoo") +DEFAULT_ADP_SOURCES = ("ESPN", "Yahoo", "CBS", "NFL") + +POSITION_MAP: dict[str, Position] = { + "QB": Position.QB, + "RB": Position.RB, + "WR": Position.WR, + "TE": Position.TE, + "K": Position.K, + "PK": Position.K, +} + + +def _cache_path(path: Any = None) -> Any: + return path or CACHE_PATH + + +def current_nfl_season() -> int: + """Season year used by ffanalytics scrape_data (Mar+ = current calendar year).""" + now = datetime.now() + return now.year if now.month >= 3 else now.year - 1 + + +def _require_rpy2(): + try: + import rpy2.robjects as ro # noqa: F401 + from rpy2.robjects.packages import importr # noqa: F401 + except ImportError as exc: + raise RuntimeError( + "rpy2 is not installed. Run: pip install rpy2" + ) from exc + return ro, importr + + +def _to_python_scalar(value: Any) -> Any: + """Coerce pandas/numpy scalars to plain Python types for merge logic.""" + if value is None: + return None + type_name = type(value).__module__ + if type_name == "numpy": + return value.item() if hasattr(value, "item") else value + try: + if value != value: # NaN + return None + except (TypeError, ValueError): + pass + return value + + +def _r_dataframe_to_records(r_df: Any) -> list[dict[str, Any]]: + import rpy2.robjects as ro + + pdf = ro.conversion.rpy2py(r_df) + if pdf is None or len(pdf) == 0: + return [] + records = pdf.to_dict(orient="records") + return [ + {key: _to_python_scalar(val) for key, val in row.items()} + for row in records + ] + + +def _pick_column(row: dict[str, Any], *candidates: str) -> Any: + for key in candidates: + if key in row and row[key] is not None: + val = row[key] + try: + if val != val: # NaN + continue + except TypeError: + pass + return val + return None + + +def _float_or(value: Any, default: float = 0.0) -> float: + if value is None: + return default + try: + return float(value) + except (TypeError, ValueError): + return default + + +def _player_name_from_row(row: dict[str, Any]) -> str | None: + name = _pick_column(row, "player_name", "player", "name", "Player") + if name: + return str(name).strip() + first = _pick_column(row, "first_name") + last = _pick_column(row, "last_name") + if first or last: + return f"{first or ''} {last or ''}".strip() + return None + + +def _adp_from_row(row: dict[str, Any]) -> float | None: + """ADP from ffanalytics get_adp (avg column or adp_ columns).""" + avg = _pick_column(row, "avg", "average", "mean") + if avg is not None: + val = _float_or(avg, 0.0) + return val if val > 0 else None + values: list[float] = [] + for key, raw in row.items(): + if not str(key).startswith("adp_"): + continue + val = _float_or(raw, 0.0) + if val > 0: + values.append(val) + if not values: + return None + return sum(values) / len(values) + + +def parse_projection_row( + row: dict[str, Any], + *, + points: float, + adp_by_id: dict[str, float], + adp_by_name: dict[str, float], +) -> Player | None: + pos_raw = _pick_column(row, "pos", "position") + if pos_raw is None: + return None + position = POSITION_MAP.get(str(pos_raw).upper()) + if position is None: + return None + + name = _player_name_from_row(row) + if not name: + return None + + team = str(_pick_column(row, "team", "nfl_team") or "").strip() + player_id = _pick_column(row, "id", "player_id") + adp = 999.0 + if player_id is not None: + adp = adp_by_id.get(str(player_id), adp) + if adp >= 999.0: + adp = adp_by_name.get(name.lower(), adp) + + pts = round(points, 1) + return Player( + name=name, + position=position, + fp_ppr=pts, + fp_half=pts, + fp_std=pts, + team=team, + adp=adp, + ) + + +def merge_projection_tables( + std_rows: list[dict[str, Any]], + half_rows: list[dict[str, Any]], + ppr_rows: list[dict[str, Any]], + adp_rows: list[dict[str, Any]], +) -> list[Player]: + """Combine standard, half-PPR, and PPR projection rows into Player models.""" + adp_by_id: dict[str, float] = {} + adp_by_name: dict[str, float] = {} + for row in adp_rows: + pid = _pick_column(row, "id", "player_id") + adp_val = _adp_from_row(row) + if adp_val is None: + continue + if pid is not None: + adp_by_id[str(pid)] = adp_val + name = _player_name_from_row(row) + if name: + adp_by_name[name.lower()] = adp_val + + def index_by_id(rows: list[dict[str, Any]]) -> dict[str, tuple[dict[str, Any], float]]: + out: dict[str, tuple[dict[str, Any], float]] = {} + for row in rows: + pid = _pick_column(row, "id", "player_id") + if pid is None: + continue + pts = _float_or(_pick_column(row, "points", "avg_points", "fpts")) + if pts <= 0: + continue + out[str(pid)] = (row, pts) + return out + + std_idx = index_by_id(std_rows) + half_idx = index_by_id(half_rows) + ppr_idx = index_by_id(ppr_rows) + + all_ids = set(std_idx) | set(half_idx) | set(ppr_idx) + players: list[Player] = [] + + for pid in all_ids: + base_row, _ = std_idx.get(pid) or half_idx.get(pid) or ppr_idx.get(pid) or ({}, 0.0) + if not base_row: + continue + fp_std = std_idx.get(pid, (base_row, 0.0))[1] + fp_half = half_idx.get(pid, (base_row, fp_std))[1] + fp_ppr = ppr_idx.get(pid, (base_row, fp_std))[1] + if fp_std <= 0 and fp_half <= 0 and fp_ppr <= 0: + continue + + player = parse_projection_row( + base_row, + points=fp_ppr or fp_half or fp_std, + adp_by_id=adp_by_id, + adp_by_name=adp_by_name, + ) + if player is None: + continue + player.fp_std = round(fp_std or fp_half or fp_ppr, 1) + player.fp_half = round(fp_half or fp_std or fp_ppr, 1) + player.fp_ppr = round(fp_ppr or fp_half or fp_std, 1) + players.append(player) + + players.sort(key=lambda p: p.adp if p.adp < 999 else 9999) + for rank, player in enumerate(players, start=1): + if player.adp >= 999: + player.adp = float(rank) + return players + + +def fetch_ffanalytics_players( + *, + season: int | None = None, + scrape_sources: tuple[str, ...] = DEFAULT_SCRAPE_SOURCES, + adp_sources: tuple[str, ...] = DEFAULT_ADP_SOURCES, +) -> list[Player]: + """Scrape projections (3 scoring formats) and ADP from ffanalytics.""" + from draft.rpy2_setup import rpy2_session + + with rpy2_session(): + return _fetch_ffanalytics_players_impl( + season=season, + scrape_sources=scrape_sources, + adp_sources=adp_sources, + ) + + +def _run_ffanalytics_pipeline_r( + ro: Any, + *, + season: int, + scrape_sources: tuple[str, ...], + adp_sources: tuple[str, ...], +) -> tuple[Any, Any, Any, Any]: + """ + Run scrape → projections (3 formats) → ADP entirely in R. + + Intermediate objects must not round-trip through rpy2; that drops attributes + (e.g. season/year) and breaks projections_table(). + """ + from rpy2.robjects.vectors import IntVector, StrVector + + ro.globalenv[".statshift_src"] = StrVector(list(scrape_sources)) + ro.globalenv[".statshift_adp_src"] = StrVector(list(adp_sources)) + ro.globalenv[".statshift_season"] = IntVector([season]) + ro.globalenv[".statshift_pos"] = StrVector(list(DRAFT_POSITIONS)) + + ro.r( + """ + { + library(ffanalytics) + build_scoring <- function(rec_pts) { + s <- unserialize(serialize(ffanalytics::scoring, NULL)) + s$rec$rec <- rec_pts + s + } + scraped <- scrape_data( + src = .statshift_src, + pos = .statshift_pos, + season = .statshift_season, + week = 0L + ) + .statshift_proj_std <<- add_player_info( + projections_table(scraped, scoring_rules = build_scoring(0)) + ) + .statshift_proj_half <<- add_player_info( + projections_table(scraped, scoring_rules = build_scoring(0.5)) + ) + .statshift_proj_ppr <<- add_player_info( + projections_table(scraped, scoring_rules = build_scoring(1)) + ) + .statshift_adp <<- get_adp(sources = .statshift_adp_src) + invisible(NULL) + } + """ + ) + return ( + ro.globalenv[".statshift_proj_std"], + ro.globalenv[".statshift_proj_half"], + ro.globalenv[".statshift_proj_ppr"], + ro.globalenv[".statshift_adp"], + ) + + +def _fetch_ffanalytics_players_impl( + *, + season: int | None, + scrape_sources: tuple[str, ...], + adp_sources: tuple[str, ...], +) -> list[Player]: + ro, _importr = _require_rpy2() + season = season or current_nfl_season() + + proj_std, proj_half, proj_ppr, adp_df = _run_ffanalytics_pipeline_r( + ro, + season=season, + scrape_sources=scrape_sources, + adp_sources=adp_sources, + ) + + players = merge_projection_tables( + _r_dataframe_to_records(proj_std), + _r_dataframe_to_records(proj_half), + _r_dataframe_to_records(proj_ppr), + _r_dataframe_to_records(adp_df), + ) + players.sort(key=lambda p: p.fp_ppr, reverse=True) + return players + + +def save_player_cache(players: list[Player], path: Any = None) -> Any: + target = _cache_path(path) + target.parent.mkdir(parents=True, exist_ok=True) + payload = { + "fetched_at": datetime.now(timezone.utc).isoformat(), + "season": current_nfl_season(), + "source": "ffanalytics", + "count": len(players), + "players": [ + { + "name": p.name, + "position": p.position.value, + "fp_ppr": p.fp_ppr, + "fp_half": p.fp_half, + "fp_std": p.fp_std, + "team": p.team, + "adp": p.adp, + } + for p in players + ], + } + target.write_text(json.dumps(payload, indent=2), encoding="utf-8") + return target + + +def load_player_cache(path: Any = None) -> list[Player] | None: + target = _cache_path(path) + if not target.exists(): + return None + try: + payload = json.loads(target.read_text(encoding="utf-8")) + except (json.JSONDecodeError, OSError): + return None + + players: list[Player] = [] + for row in payload.get("players", []): + try: + players.append( + Player( + name=row["name"], + position=Position(row["position"]), + fp_ppr=float(row["fp_ppr"]), + fp_half=float(row["fp_half"]), + fp_std=float(row["fp_std"]), + team=row.get("team", ""), + adp=float(row.get("adp", 999)), + ) + ) + except (KeyError, ValueError): + continue + return players if players else None + + +def cache_is_fresh(path: Any = None, max_age_hours: int = CACHE_MAX_AGE_HOURS) -> bool: + target = _cache_path(path) + if not target.exists(): + return False + try: + payload = json.loads(target.read_text(encoding="utf-8")) + fetched_at = datetime.fromisoformat(payload["fetched_at"]) + age_hours = (datetime.now(timezone.utc) - fetched_at).total_seconds() / 3600 + return age_hours < max_age_hours + except (json.JSONDecodeError, OSError, KeyError, ValueError): + return False + + +def load_active_players( + *, + force_refresh: bool = False, + cache_path: Any = None, +) -> list[Player]: + path = _cache_path(cache_path) + if not force_refresh and cache_is_fresh(path): + cached = load_player_cache(path) + if cached: + return cached + + players = fetch_ffanalytics_players() + save_player_cache(players, path) + return players diff --git a/draft/models.py b/draft/models.py new file mode 100644 index 0000000..dc5d748 --- /dev/null +++ b/draft/models.py @@ -0,0 +1,89 @@ +"""Data models for mock fantasy drafts.""" + +from __future__ import annotations + +from dataclasses import dataclass, field +from enum import Enum + + +class ScoringFormat(str, Enum): + PPR = "PPR" + HALF_PPR = "Half-PPR" + STANDARD = "Standard" + + +class Position(str, Enum): + QB = "QB" + RB = "RB" + WR = "WR" + TE = "TE" + K = "K" + DEF = "DEF" + + +ROSTER_SLOTS: dict[str, int] = { + "QB": 1, + "RB": 2, + "WR": 2, + "TE": 1, + "FLEX": 1, + "K": 1, + "DEF": 1, +} + + +@dataclass(frozen=True) +class LeagueSettings: + scoring: ScoringFormat + league_size: int + draft_slot: int + rounds: int = 15 + + def __post_init__(self) -> None: + if self.league_size < 4 or self.league_size > 16: + raise ValueError("league_size must be between 4 and 16") + if self.draft_slot < 1 or self.draft_slot > self.league_size: + raise ValueError("draft_slot must be between 1 and league_size") + if self.rounds < 1: + raise ValueError("rounds must be at least 1") + + +@dataclass +class Player: + name: str + position: Position + fp_ppr: float + fp_half: float + fp_std: float + team: str = "" + adp: float = 99.0 + + def fantasy_points(self, scoring: ScoringFormat) -> float: + if scoring == ScoringFormat.PPR: + return self.fp_ppr + if scoring == ScoringFormat.HALF_PPR: + return self.fp_half + return self.fp_std + + +@dataclass +class DraftPick: + round: int + pick_in_round: int + overall: int + team_index: int + player: Player + + +@dataclass +class TeamRoster: + team_index: int + picks: list[Player] = field(default_factory=list) + + def count(self, position: Position) -> int: + return sum(1 for p in self.picks if p.position == position) + + def flex_eligible_count(self) -> int: + return sum( + 1 for p in self.picks if p.position in (Position.RB, Position.WR, Position.TE) + ) diff --git a/draft/player_pool.py b/draft/player_pool.py new file mode 100644 index 0000000..b94c41c --- /dev/null +++ b/draft/player_pool.py @@ -0,0 +1,60 @@ +"""Player pool for mock drafts — loaded from ffanalytics via rpy2.""" + +from __future__ import annotations + +from draft.ffanalytics_loader import ( + CACHE_PATH, + cache_is_fresh, + load_active_players, + load_player_cache, +) +from draft.models import Player + +_players: list[Player] | None = None + + +def get_players(*, force_refresh: bool = False) -> list[Player]: + """Return the draftable player pool (cached ffanalytics projections + ADP).""" + global _players + if force_refresh or _players is None: + _players = load_active_players(force_refresh=force_refresh) + return _players + + +def player_pool_size() -> int: + if _players is not None: + return len(_players) + cached = load_player_cache() + if cached: + return len(cached) + return 0 + + +def max_rounds_for_league(league_size: int) -> int: + """Maximum draft rounds supported without running out of players.""" + size = player_pool_size() + if size == 0: + return 15 + return size // max(league_size, 1) + + +# Lazy default for imports; call refresh_players() before draft in UI +PLAYERS: list[Player] = [] + + +def refresh_players(*, force_refresh: bool = False) -> list[Player]: + """Load or refresh pool and update module-level PLAYERS.""" + global PLAYERS, _players + _players = load_active_players(force_refresh=force_refresh) + PLAYERS = _players + return _players + + +def cache_status() -> str: + if cache_is_fresh(): + cached = load_player_cache() + count = len(cached) if cached else 0 + return f"ffanalytics cache ({count} players) at {CACHE_PATH.name}" + if CACHE_PATH.exists(): + return "ffanalytics cache stale — will refresh on next load" + return "No ffanalytics cache — will scrape on first Mock Draft load" diff --git a/draft/rpy2_setup.py b/draft/rpy2_setup.py new file mode 100644 index 0000000..94f1c14 --- /dev/null +++ b/draft/rpy2_setup.py @@ -0,0 +1,51 @@ +"""Initialize rpy2 conversion rules for Streamlit (main thread + context manager).""" + +from __future__ import annotations + +from contextlib import contextmanager +from typing import Any, Iterator + +_converter: Any = None + + +def get_rpy2_converter() -> Any: + """Build (once) the default + pandas conversion bundle.""" + global _converter + if _converter is not None: + return _converter + + from rpy2.robjects import default_converter, numpy2ri, pandas2ri + + _converter = default_converter + numpy2ri.converter + pandas2ri.converter + return _converter + + +def init_rpy2_on_main_thread() -> Any: + """ + Call once from Streamlit's main script thread before any R work. + + Sets global conversion rules and marks this thread as R's init thread. + """ + import rpy2.rinterface_lib.embedded as embedded + from rpy2.robjects import conversion + + if not embedded.isinitialized(): + embedded.set_init_thread() + + converter = get_rpy2_converter() + conversion.set_conversion(converter) + return converter + + +@contextmanager +def rpy2_session() -> Iterator[Any]: + """ + Run rpy2/R code with conversion rules applied. + + Use around every block that calls into R from Streamlit reruns or worker threads. + """ + from rpy2.robjects.conversion import localconverter + + converter = get_rpy2_converter() + with localconverter(converter): + yield converter diff --git a/requirements.txt b/requirements.txt index 92b5e20..a948c94 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,3 +4,4 @@ httpx>=0.27.0 streamlit>=1.40.0 pydantic>=2.9.0 pydantic-settings>=2.6.0 +rpy2>=3.5.0 diff --git a/scripts/sync_espn_players.py b/scripts/sync_espn_players.py new file mode 100644 index 0000000..fddec1a --- /dev/null +++ b/scripts/sync_espn_players.py @@ -0,0 +1,24 @@ +#!/usr/bin/env python3 +"""Download active NFL players from ESPN and cache for mock drafts.""" + +from __future__ import annotations + +import sys +from pathlib import Path + +ROOT = Path(__file__).resolve().parents[1] +if str(ROOT) not in sys.path: + sys.path.insert(0, str(ROOT)) + +from draft.espn_loader import CACHE_PATH, fetch_active_players, save_player_cache + + +def main() -> None: + print("Fetching active NFL players from ESPN (this may take 1–2 minutes)...") + players = fetch_active_players() + path = save_player_cache(players) + print(f"Saved {len(players)} draftable players to {path}") + + +if __name__ == "__main__": + main() diff --git a/scripts/sync_ffanalytics_players.py b/scripts/sync_ffanalytics_players.py new file mode 100644 index 0000000..a7a1fff --- /dev/null +++ b/scripts/sync_ffanalytics_players.py @@ -0,0 +1,24 @@ +#!/usr/bin/env python3 +"""Scrape ffanalytics projections/ADP and cache for mock drafts.""" + +from __future__ import annotations + +import sys +from pathlib import Path + +ROOT = Path(__file__).resolve().parents[1] +if str(ROOT) not in sys.path: + sys.path.insert(0, str(ROOT)) + +from draft.ffanalytics_loader import CACHE_PATH, fetch_ffanalytics_players, save_player_cache + + +def main() -> None: + print("Scraping projections and ADP via ffanalytics (may take several minutes)...") + players = fetch_ffanalytics_players() + path = save_player_cache(players) + print(f"Saved {len(players)} draftable players to {path}") + + +if __name__ == "__main__": + main() diff --git a/sdks/espnSDK.py b/sdks/espnSDK.py new file mode 100644 index 0000000..b952ce6 --- /dev/null +++ b/sdks/espnSDK.py @@ -0,0 +1,142 @@ +"""Thin client for ESPN's public NFL core API.""" + +from __future__ import annotations + +import platform +import socket +from types import TracebackType +from typing import Any, Iterator + +import httpx + + +class ESPNClient: + """Shared ESPN API settings (no HTTP session).""" + + base_url: str = "https://sports.core.api.espn.com/v2/sports/football/leagues/nfl/" + + def __init__( + self, + timeout: float = 30.0, + host_name: str | None = None, + host_hardware: str | None = None, + ) -> None: + self.host_name = host_name or socket.gethostname() + self.host_hardware = host_hardware or platform.machine() + self.name = f"{self.host_name}_{self.host_hardware}" + self.timeout = timeout + + +class ESPNEndpoint(ESPNClient): + """Call a single ESPN resource with optional query params and pagination.""" + + def __init__( + self, + endpoint: str, + timeout: float = 30.0, + client: httpx.Client | None = None, + host_name: str | None = None, + host_hardware: str | None = None, + ) -> None: + ESPNClient.__init__( + self, + timeout=timeout, + host_name=host_name, + host_hardware=host_hardware, + ) + self.endpoint = endpoint + self._owns_client = client is None + self.client = client or httpx.Client( + timeout=timeout, + follow_redirects=True, + headers={"User-Agent": f"StatShift/{self.name}"}, + ) + + def close(self) -> None: + if self._owns_client: + self.client.close() + + def __enter__(self) -> ESPNEndpoint: + return self + + def __exit__( + self, + exc_type: type[BaseException] | None, + exc_val: BaseException | None, + exc_tb: TracebackType | None, + ) -> None: + self.close() + + def _endpoint_url(self) -> str: + path = self.endpoint.lstrip("/") + if path and not path.endswith("/"): + path = f"{path}/" + return f"{self.base_url}{path}" + + def get( + self, + url: str | None = None, + params: dict[str, Any] | None = None, + ) -> dict[str, Any]: + response = self.client.get(url or self._endpoint_url(), params=params) + response.raise_for_status() + return response.json() + + def get_page( + self, + *, + page: int = 1, + limit: int | None = None, + params: dict[str, Any] | None = None, + ) -> dict[str, Any]: + """Fetch one page. Pass filters via `params` (merged with page/limit).""" + query = dict(params or {}) + query["page"] = page + if limit is not None: + query["limit"] = limit + + return self.get(params=query) + + def iter_pages( + self, + *, + limit: int = 100, + params: dict[str, Any] | None = None, + max_pages: int | None = None, + ) -> Iterator[dict[str, Any]]: + """Yield each paginated JSON response until pageCount is exhausted.""" + page = 1 + pages_fetched = 0 + + while True: + payload = self.get_page(page=page, limit=limit, params=params) + yield payload + + pages_fetched += 1 + if max_pages is not None and pages_fetched >= max_pages: + break + + page_count = payload.get("pageCount", 1) + if page >= page_count: + break + page += 1 + + def iter_items( + self, + *, + limit: int = 100, + params: dict[str, Any] | None = None, + max_pages: int | None = None, + ) -> Iterator[dict[str, Any]]: + """Yield each item from `items` across pages (usually `{$ref: ...}` links).""" + for payload in self.iter_pages( + limit=limit, params=params, max_pages=max_pages + ): + yield from payload.get("items", []) + + def resolve_ref(self, item: dict[str, Any]) -> dict[str, Any]: + """Follow a hypermedia `$ref` link to load full resource JSON.""" + ref = item.get("$ref") + if not ref: + raise ValueError("Item has no $ref field") + return self.get(ref) diff --git a/tests/test_draft_engine.py b/tests/test_draft_engine.py new file mode 100644 index 0000000..a20ad7a --- /dev/null +++ b/tests/test_draft_engine.py @@ -0,0 +1,83 @@ +"""Unit tests for mock draft engine.""" + +from __future__ import annotations + +import unittest + +from draft.engine import MockDraftEngine, _snake_team_index +from draft.models import Player, Position, ScoringFormat + + +def _test_pool() -> list[Player]: + return [ + Player(f"QB {i}", Position.QB, 20.0, 20.0, 20.0, adp=float(i)) + for i in range(1, 5) + ] + [ + Player(f"RB {i}", Position.RB, 15.0, 14.0, 13.0, adp=float(10 + i)) + for i in range(1, 10) + ] + [ + Player(f"WR {i}", Position.WR, 14.0, 13.0, 12.0, adp=float(20 + i)) + for i in range(1, 10) + ] + + +class SnakeDraftTests(unittest.TestCase): + def test_snake_order_round_two_reverses(self) -> None: + self.assertEqual(_snake_team_index(1, 1, 10), 0) + self.assertEqual(_snake_team_index(1, 10, 10), 9) + self.assertEqual(_snake_team_index(2, 1, 10), 9) + self.assertEqual(_snake_team_index(2, 10, 10), 0) + + def test_ppr_ranks_pass_catchers_higher(self) -> None: + pool = _test_pool() + draft = MockDraftEngine.create( + scoring=ScoringFormat.PPR, + league_size=4, + draft_slot=1, + rounds=1, + pool=pool, + ) + wr = next(p for p in draft.rank_available() if p.position == Position.WR) + draft.make_pick(wr) + self.assertEqual(draft.picks[0].player.position, Position.WR) + + def test_auto_pick_reduces_available_pool(self) -> None: + draft = MockDraftEngine.create( + scoring=ScoringFormat.STANDARD, + league_size=4, + draft_slot=4, + rounds=2, + pool=_test_pool(), + ) + draft.run_cpu_picks_until_user() + self.assertEqual(len(draft.picks), 3) + self.assertTrue(draft.is_user_turn) + + def test_create_rejects_empty_pool(self) -> None: + with self.assertRaises(ValueError) as ctx: + MockDraftEngine.create( + scoring=ScoringFormat.PPR, + league_size=12, + draft_slot=1, + rounds=15, + pool=[], + ) + self.assertIn("empty", str(ctx.exception).lower()) + + def test_draft_completes_after_all_rounds(self) -> None: + pool = _test_pool() + draft = MockDraftEngine.create( + scoring=ScoringFormat.HALF_PPR, + league_size=4, + draft_slot=1, + rounds=2, + pool=pool, + ) + while not draft.is_complete: + draft.auto_pick() + self.assertEqual(len(draft.picks), 8) + self.assertEqual(len(draft.available), len(pool) - 8) + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/test_espn_loader.py b/tests/test_espn_loader.py new file mode 100644 index 0000000..c127b0e --- /dev/null +++ b/tests/test_espn_loader.py @@ -0,0 +1,46 @@ +"""Unit tests for ESPN player parsing.""" + +from __future__ import annotations + +import unittest + +from draft.espn_loader import parse_athlete_payload +from draft.models import Position + + +class ParseAthletePayloadTests(unittest.TestCase): + def test_parses_skill_position(self) -> None: + payload = { + "id": "1", + "displayName": "Ja'Marr Chase", + "position": {"abbreviation": "WR"}, + "experience": {"years": 4}, + "team": {"id": "4", "$ref": "http://example/teams/4"}, + } + player = parse_athlete_payload(payload, {"4": "CIN"}) + assert player is not None + self.assertEqual(player.name, "Ja'Marr Chase") + self.assertEqual(player.position, Position.WR) + self.assertEqual(player.team, "CIN") + + def test_skips_offensive_line(self) -> None: + payload = { + "displayName": "Some Lineman", + "position": {"abbreviation": "OT"}, + "experience": {"years": 3}, + } + self.assertIsNone(parse_athlete_payload(payload, {})) + + def test_maps_pk_to_kicker(self) -> None: + payload = { + "displayName": "Justin Tucker", + "position": {"abbreviation": "PK"}, + "experience": {"years": 10}, + } + player = parse_athlete_payload(payload, {}) + assert player is not None + self.assertEqual(player.position, Position.K) + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/test_ffanalytics_loader.py b/tests/test_ffanalytics_loader.py new file mode 100644 index 0000000..3665f65 --- /dev/null +++ b/tests/test_ffanalytics_loader.py @@ -0,0 +1,102 @@ +"""Unit tests for ffanalytics player parsing (no R required).""" + +from __future__ import annotations + +import unittest + +from draft.ffanalytics_loader import ( + merge_projection_tables, + parse_projection_row, + _player_name_from_row, + _adp_from_row, +) +from draft.models import Position + + +class ParseHelpersTests(unittest.TestCase): + def test_player_name_from_first_last(self) -> None: + row = {"first_name": "Josh", "last_name": "Allen", "pos": "QB"} + self.assertEqual(_player_name_from_row(row), "Josh Allen") + + def test_adp_from_source_columns(self) -> None: + row = {"id": "1", "adp_espn": 2.2, "adp_yahoo": 3.0} + self.assertAlmostEqual(_adp_from_row(row), 2.6) + + +class MergeProjectionTablesTests(unittest.TestCase): + def test_merges_scoring_formats_and_adp(self) -> None: + std_rows = [ + { + "id": "1", + "first_name": "Ja'Marr", + "last_name": "Chase", + "pos": "WR", + "team": "CIN", + "points": 18.0, + }, + { + "id": "2", + "first_name": "Christian", + "last_name": "McCaffrey", + "pos": "RB", + "team": "SF", + "points": 20.0, + }, + ] + half_rows = [ + { + "id": "1", + "first_name": "Ja'Marr", + "last_name": "Chase", + "pos": "WR", + "team": "CIN", + "points": 19.0, + }, + { + "id": "2", + "first_name": "Christian", + "last_name": "McCaffrey", + "pos": "RB", + "team": "SF", + "points": 20.5, + }, + ] + ppr_rows = [ + { + "id": "1", + "first_name": "Ja'Marr", + "last_name": "Chase", + "pos": "WR", + "team": "CIN", + "points": 20.0, + }, + { + "id": "2", + "first_name": "Christian", + "last_name": "McCaffrey", + "pos": "RB", + "team": "SF", + "points": 21.0, + }, + ] + adp_rows = [ + {"id": "1", "adp_espn": 1.5}, + {"id": "2", "adp_espn": 2.0}, + ] + players = merge_projection_tables(std_rows, half_rows, ppr_rows, adp_rows) + self.assertEqual(len(players), 2) + chase = next(p for p in players if p.name == "Ja'Marr Chase") + self.assertEqual(chase.position, Position.WR) + self.assertEqual(chase.fp_std, 18.0) + self.assertEqual(chase.fp_half, 19.0) + self.assertEqual(chase.fp_ppr, 20.0) + self.assertEqual(chase.adp, 1.5) + + def test_skips_unknown_positions(self) -> None: + row = {"id": "9", "player_name": "Some Lineman", "pos": "OT", "points": 5.0} + player = parse_projection_row(row, points=5.0, adp_by_id={}, adp_by_name={}) + self.assertIsNone(player) + + +if __name__ == "__main__": + unittest.main() From 3bccff0471efb61d72c3005af3ae4240abc8d5c7 Mon Sep 17 00:00:00 2001 From: Joseph Lee <175lee.joseph@gmail.com> Date: Wed, 20 May 2026 13:19:44 -0400 Subject: [PATCH 6/9] Added load player data feedback and Yahoo, ESPN, Sleeper consensus rankings --- .gitignore | 1 + app/mock_draft_tab.py | 187 +++++++++++++++++++++---------- app/streamlit_app.py | 50 ++------- draft/engine.py | 110 ++++++++++++++++-- draft/ffanalytics_loader.py | 103 +++++++++++++++-- draft/models.py | 18 ++- draft/sleeper_loader.py | 126 +++++++++++++++++++++ tests/test_draft_engine.py | 183 +++++++++++++++++++++++++++++- tests/test_ffanalytics_loader.py | 55 +++++++++ tests/test_sleeper_loader.py | 85 ++++++++++++++ 10 files changed, 795 insertions(+), 123 deletions(-) create mode 100644 draft/sleeper_loader.py create mode 100644 tests/test_sleeper_loader.py diff --git a/.gitignore b/.gitignore index 24ed429..051aa23 100644 --- a/.gitignore +++ b/.gitignore @@ -6,5 +6,6 @@ data/*.db data/*.db-* data/espn_active_players.json data/ffanalytics_players.json +data/sleeper_players.json .DS_Store .streamlit/secrets.toml diff --git a/app/mock_draft_tab.py b/app/mock_draft_tab.py index 0b12dfc..f902ef8 100644 --- a/app/mock_draft_tab.py +++ b/app/mock_draft_tab.py @@ -5,9 +5,9 @@ import streamlit as st from draft.engine import MockDraftEngine -from draft.ffanalytics_loader import load_active_players, load_player_cache -from draft.models import ROSTER_SLOTS, ScoringFormat -from draft.player_pool import cache_status, refresh_players +from draft.ffanalytics_loader import cache_is_fresh, load_active_players, load_player_cache +from draft.models import ROSTER_SLOTS, RankingSource, ScoringFormat +from draft.player_pool import refresh_players from draft.rpy2_setup import rpy2_session @@ -21,7 +21,7 @@ def _init_draft_state() -> None: def _ensure_player_pool(*, force_refresh: bool = False) -> list: if force_refresh: with rpy2_session(): - with st.spinner("Loading players from ffanalytics…"): + with st.spinner("Loading players…"): st.session_state["draft_pool"] = refresh_players(force_refresh=True) elif st.session_state.get("draft_pool") is None: # Use JSON cache only on tab open — avoid a long R scrape until user clicks Refresh @@ -29,9 +29,11 @@ def _ensure_player_pool(*, force_refresh: bool = False) -> list: return st.session_state["draft_pool"] -def _render_settings(pool_size: int) -> tuple[ScoringFormat, int, int, int] | None: +def _render_settings( + pool_size: int, +) -> tuple[ScoringFormat, int, int, int, float, RankingSource] | None: st.subheader("League settings") - col1, col2, col3, col4 = st.columns(4) + col1, col2, col3, col4, col5 = st.columns(5) with col1: scoring_label = st.selectbox( @@ -40,8 +42,16 @@ def _render_settings(pool_size: int) -> tuple[ScoringFormat, int, int, int] | No index=0, ) with col2: - league_size = st.selectbox("League size", options=[8, 10, 12, 14], index=2) + ranking_options = [r.value for r in RankingSource] + ranking_label = st.selectbox( + "Rankings", + options=ranking_options, + index=0, + help="Consensus rankings source for CPU picks and best available.", + ) with col3: + league_size = st.selectbox("League size", options=[8, 10, 12, 14], index=2) + with col4: draft_slot = st.number_input( "Your draft slot", min_value=1, @@ -49,7 +59,7 @@ def _render_settings(pool_size: int) -> tuple[ScoringFormat, int, int, int] | No value=min(5, int(league_size)), step=1, ) - with col4: + with col5: if pool_size: max_rounds = max(1, pool_size // max(int(league_size), 1)) else: @@ -63,12 +73,20 @@ def _render_settings(pool_size: int) -> tuple[ScoringFormat, int, int, int] | No disabled=pool_size == 0, ) - scoring = ScoringFormat(scoring_label) - st.caption( - f"Player pool: **{pool_size}** ranked players (QB/RB/WR/TE/K) · " - f"up to **{max_rounds}** rounds for a **{league_size}**-team league" + cpu_randomness = st.slider( + "Draft unpredictability", + min_value=0.0, + max_value=1.0, + value=0.35, + step=0.05, + help="Higher values make other teams less predictable.", ) - return scoring, int(league_size), int(draft_slot), int(rounds) + + scoring = ScoringFormat(scoring_label) + ranking_source = RankingSource(ranking_label) + if pool_size: + st.caption(f"{pool_size} players · up to {max_rounds} rounds") + return scoring, int(league_size), int(draft_slot), int(rounds), float(cpu_randomness), ranking_source def _render_draft_board(draft: MockDraftEngine) -> None: @@ -150,11 +168,15 @@ def _render_pick_controls(draft: MockDraftEngine) -> None: if not ranked: st.error("No players left in the pool.") return - options = { - f"{p.name} ({p.position.value}, {p.team}) — " - f"{p.fantasy_points(draft.settings.scoring):.1f} FPG": p - for p in ranked - } + options = {} + for p in ranked: + rank = p.rank_for_source(draft.ranking_source) + rank_str = f"#{int(rank)}" if rank < 999 else "—" + label = ( + f"{rank_str} {p.name} ({p.position.value}, {p.team}) — " + f"{p.fantasy_points(draft.settings.scoring):.1f} FPG" + ) + options[label] = p choice = st.selectbox("Select player", options=list(options.keys())) if st.button("Draft player", type="primary", use_container_width=True): draft.make_pick(options[choice]) @@ -169,45 +191,67 @@ def _render_pick_controls(draft: MockDraftEngine) -> None: def render_mock_draft_tab() -> None: _init_draft_state() - st.markdown( - "Configure scoring and league size, then run a **snake mock draft** with " - "**projections and ADP** from [ffanalytics](https://github.com/FantasyFootballAnalytics/ffanalytics) " - "(QB/RB/WR/TE/K)." - ) - st.caption(cache_status()) + # Auto-load players if cache is missing or stale and not already attempted + if "auto_load_attempted" not in st.session_state: + st.session_state["auto_load_attempted"] = False + + if not st.session_state["auto_load_attempted"]: + if not cache_is_fresh(): + st.session_state["auto_load_attempted"] = True + try: + with rpy2_session(): + with st.spinner("Loading player pool…"): + pool_loaded = load_active_players(force_refresh=True) + refresh_players(force_refresh=True) + st.session_state["draft_pool"] = pool_loaded + st.session_state["players_loaded_successfully"] = True + except Exception as exc: + st.error(f"Could not auto-load players: {exc}") + st.session_state["players_loaded_successfully"] = False + else: + # Cache is fresh, just load from cache + st.session_state["auto_load_attempted"] = True + cached = load_player_cache() + if cached: + st.session_state["draft_pool"] = cached + st.session_state["players_loaded_successfully"] = True - refresh_col, _ = st.columns([1, 3]) - with refresh_col: - if st.button("Refresh player pool from ffanalytics", use_container_width=True): + pool = _ensure_player_pool() + pool_size = len(pool) + + load_col, _ = st.columns([1, 3]) + with load_col: + if st.button("Load players", use_container_width=True): try: with rpy2_session(): - with st.spinner("Scraping projections and ADP via ffanalytics…"): + with st.spinner("Loading player pool…"): pool_loaded = load_active_players(force_refresh=True) refresh_players(force_refresh=True) st.session_state["draft_pool"] = pool_loaded + st.session_state["players_loaded_successfully"] = True if not pool_loaded: - st.error("ffanalytics returned no draftable players.") + st.error("No players were returned. Try again in a few minutes.") + st.session_state["players_loaded_successfully"] = False else: - st.success(f"Loaded {len(pool_loaded)} players.") st.rerun() except Exception as exc: - st.error(f"ffanalytics sync failed: {exc}") - - pool = _ensure_player_pool() - pool_size = len(pool) + st.error(f"Could not load players: {exc}") + st.session_state["players_loaded_successfully"] = False + pool = _ensure_player_pool() + pool_size = len(pool) + + if st.session_state.get("players_loaded_successfully"): + st.success("Players successfully loaded!", icon="✅") if pool_size == 0: - st.warning( - "No player pool loaded. Click **Refresh player pool from ffanalytics** " - "(first run may take several minutes)." - ) + st.info("Load players to start a mock draft.") settings = _render_settings(pool_size) if settings is None: return - scoring, league_size, draft_slot, rounds = settings + scoring, league_size, draft_slot, rounds, cpu_randomness, ranking_source = settings slot_cols = st.columns([1, 1, 2]) with slot_cols[0]: @@ -224,6 +268,8 @@ def render_mock_draft_tab() -> None: draft_slot=draft_slot, rounds=rounds, pool=pool, + cpu_randomness=cpu_randomness, + ranking_source=ranking_source, ) st.rerun() except ValueError as exc: @@ -235,27 +281,14 @@ def render_mock_draft_tab() -> None: draft: MockDraftEngine | None = st.session_state.get("mock_draft") if draft is None: - st.info("Set your parameters and click **Start new draft**.") - st.markdown( - f""" - **Roster slots:** {", ".join(f"{k}×{v}" for k, v in ROSTER_SLOTS.items())} - - **Player source:** ffanalytics projections + ADP (~{pool_size} players). - First load scrapes multiple sites and may take several minutes; cached 24 hours - in the `statshift_data` Docker volume (`data/ffanalytics_players.json`). - """ - ) return meta = draft.settings - rounds_note = f"{meta.rounds} rounds" - if draft.requested_rounds > meta.rounds: - rounds_note += f" (capped from {draft.requested_rounds}; pool limit)" st.caption( - f"{meta.scoring.value} · {meta.league_size} teams · " - f"slot {meta.draft_slot} · {rounds_note} · " - f"Pick {min(draft.current_overall, draft.total_picks)} of {draft.total_picks} · " - f"{len(draft.available)} players left" + f"{meta.scoring.value} · {draft.ranking_source.value} rankings · " + f"{meta.league_size} teams · pick {meta.draft_slot} · " + f"round {draft.current_round} · " + f"#{min(draft.current_overall, draft.total_picks)} of {draft.total_picks}" ) board_col, roster_col = st.columns([3, 2]) @@ -267,7 +300,47 @@ def render_mock_draft_tab() -> None: st.subheader("Best available") for player in draft.rank_available()[:8]: + rank = player.rank_for_source(draft.ranking_source) + rank_str = f"#{int(rank)}" if rank < 999 else "—" st.write( f"**{player.name}** ({player.position.value}, {player.team}) — " - f"{player.fantasy_points(draft.settings.scoring):.1f} FPG" + f"{player.fantasy_points(draft.settings.scoring):.1f} FPG · {rank_str}" ) + + _render_monte_carlo_lookahead(draft) + + +def _render_monte_carlo_lookahead(draft: MockDraftEngine) -> None: + if draft.is_complete or draft.pool_exhausted or not draft.is_user_turn: + return + + num_sims = 40 + with st.spinner("Calculating pick outlook…"): + probabilities = draft.simulate_availability_at_next_user_pick(num_sims=num_sims) + + ranked = draft.rank_available()[:10] + if not ranked: + return + + rows = [] + for player in ranked: + pct = probabilities.get(player.name, 0.0) * 100 + rank = player.rank_for_source(draft.ranking_source) + rows.append( + { + "Player": player.name, + "Pos": player.position.value, + "Team": player.team, + "Rank": int(rank) if rank < 999 else "—", + "FPG": round(player.fantasy_points(draft.settings.scoring), 1), + "Avail next pick": f"{pct:.0f}%", + "_pct": pct, + } + ) + + rows.sort(key=lambda r: r["_pct"], reverse=True) + for row in rows: + del row["_pct"] + + st.subheader("Likely available next pick") + st.dataframe(rows, use_container_width=True, hide_index=True) diff --git a/app/streamlit_app.py b/app/streamlit_app.py index fb51af5..f211a24 100644 --- a/app/streamlit_app.py +++ b/app/streamlit_app.py @@ -12,7 +12,6 @@ sys.path.insert(0, str(ROOT)) from app.mock_draft_tab import render_mock_draft_tab -from config import settings from draft.rpy2_setup import init_rpy2_on_main_thread from rag.engine import RAGEngine from rag.ollama_client import OllamaError @@ -24,9 +23,6 @@ ) st.title("StatShift") -st.caption( - "Fantasy football stats — Streamlit → RAG → FastAPI (read-only) → SQLite → Ollama/Gemma" -) engine = RAGEngine() @@ -41,34 +37,14 @@ def _rpy2_ready() -> bool: _rpy2_ready() with st.sidebar: - st.header("System status") - if st.button("Check health", use_container_width=True): + if st.button("Check services", use_container_width=True): st.session_state["health"] = engine.health() health = st.session_state.get("health") if health: - st.write("API", "✅" if health["api_ok"] else "❌", health.get("api_detail", "")) - st.write("Ollama", "✅" if health["ollama_ok"] else "❌") - if health.get("ollama_models"): - st.caption("Models: " + ", ".join(health["ollama_models"][:5])) - st.caption(f"Configured model: {health['configured_model']}") - - st.divider() - st.markdown( - """ - **Docker** - `docker compose up --build` → open http://localhost:8501 - - **Local** - 1. `python scripts/init_db.py` - 2. `uvicorn api.main:app --reload` - 3. `ollama pull gemma2:2b` - 4. `streamlit run app/streamlit_app.py` - """ - ) - st.caption(f"API: {settings.api_base_url}") - st.caption(f"Ollama: {settings.ollama_base_url}") + st.write("Data", "✅" if health["api_ok"] else "❌") + st.write("AI", "✅" if health["ollama_ok"] else "❌") -ask_tab, draft_tab = st.tabs(["Ask StatShift", "Mock Draft"]) +ask_tab, draft_tab = st.tabs(["Ask", "Mock Draft"]) with ask_tab: query = st.text_input( @@ -78,9 +54,9 @@ def _rpy2_ready() -> bool: col1, col2 = st.columns([1, 4]) with col1: - run = st.button("Run RAG", type="primary", use_container_width=True) + run = st.button("Ask", type="primary", use_container_width=True) with col2: - show_prompt = st.checkbox("Show prompt sent to Gemma") + show_prompt = st.checkbox("Show AI prompt") if run and query.strip(): with st.spinner("Routing your question..."): @@ -90,21 +66,15 @@ def _rpy2_ready() -> bool: except OllamaError as exc: st.error(str(exc)) except Exception as exc: - st.error(f"RAG failed: {exc}") + st.error(f"Something went wrong: {exc}") result = st.session_state.get("last_result") if result: - route_label = "API (database)" if result.route == "api" else "Gemma (chat)" - st.caption( - f"Route: **{route_label}** · intent: `{result.intent.value}` — {result.intent_reason}" - ) - st.subheader("Answer") st.markdown(result.answer) - st.subheader("Retrieved sources") - if not result.sources: - st.caption("No API retrieval for this route.") + if result.sources: + st.subheader("Sources") for doc in result.sources: with st.expander(f"[{doc['id']}] {doc['title']} · {doc['category']}"): st.write(doc["content"]) @@ -112,8 +82,6 @@ def _rpy2_ready() -> bool: if show_prompt and result.prompt: st.subheader("Prompt") st.code(result.prompt) - elif show_prompt and result.route == "api": - st.caption("No Gemma prompt — this answer came directly from API search results.") elif run and not query.strip(): st.warning("Enter a question first.") diff --git a/draft/engine.py b/draft/engine.py index f02f4b6..1a654c2 100644 --- a/draft/engine.py +++ b/draft/engine.py @@ -2,6 +2,8 @@ from __future__ import annotations +import math +import random from dataclasses import dataclass, field from draft.models import ( @@ -10,6 +12,7 @@ LeagueSettings, Player, Position, + RankingSource, ScoringFormat, TeamRoster, ) @@ -31,6 +34,13 @@ class MockDraftEngine: available: list[Player] = field(default_factory=list) requested_rounds: int = 15 max_supported_rounds: int = 15 + cpu_randomness: float = 0.35 + cpu_top_k: int = 12 + seed: int | None = None + ranking_source: RankingSource = RankingSource.YAHOO + _rng: random.Random = field( + init=False, repr=False, compare=False, default_factory=random.Random + ) def __post_init__(self) -> None: if not self.pool: @@ -41,6 +51,7 @@ def __post_init__(self) -> None: key=lambda p: p.fantasy_points(self.settings.scoring), reverse=True, ) + self._rng = random.Random(self.seed) @property def total_picks(self) -> int: @@ -115,18 +126,36 @@ def _roster_need_score(self, roster: TeamRoster, player: Player) -> float: return need - def rank_available(self, team_index: int | None = None) -> list[Player]: - idx = team_index if team_index is not None else self.current_team_index - roster = self.rosters[idx] + def _candidate_score(self, player: Player, team_index: int) -> float: + roster = self.rosters[team_index] scoring = self.settings.scoring + value = player.fantasy_points(scoring) + need = self._roster_need_score(roster, player) + rank = player.rank_for_source(self.ranking_source) + rank_bonus = max(0.0, (80.0 - rank) / 80.0) * 0.5 + return value + need * 2.0 + rank_bonus - def score(player: Player) -> float: - value = player.fantasy_points(scoring) - need = self._roster_need_score(roster, player) - adp_bonus = max(0.0, (80.0 - player.adp) / 80.0) * 0.5 - return value + need * 2.0 + adp_bonus + def rank_available(self, team_index: int | None = None) -> list[Player]: + idx = team_index if team_index is not None else self.current_team_index + return sorted( + self.available, + key=lambda p: self._candidate_score(p, idx), + reverse=True, + ) - return sorted(self.available, key=score, reverse=True) + def _sample_cpu_pick(self, candidates: list[Player]) -> Player: + if not candidates: + raise RuntimeError("No candidates available") + if self.cpu_randomness <= 0.0 or len(candidates) == 1: + return candidates[0] + top_k = candidates[: max(1, self.cpu_top_k)] + team_idx = self.current_team_index + scores = [self._candidate_score(p, team_idx) for p in top_k] + top = scores[0] + # Softmax with temperature: 0.5 (near-deterministic) → 5.0 (nearly uniform) + temperature = 0.5 + self.cpu_randomness * 4.5 + weights = [math.exp((s - top) / temperature) for s in scores] + return self._rng.choices(top_k, weights=weights, k=1)[0] def make_pick(self, player: Player) -> DraftPick: if self.is_complete: @@ -151,7 +180,7 @@ def auto_pick(self) -> DraftPick: ranked = self.rank_available() if not ranked: raise RuntimeError("No players available") - return self.make_pick(ranked[0]) + return self.make_pick(self._sample_cpu_pick(ranked)) def run_cpu_picks_until_user(self) -> list[DraftPick]: made: list[DraftPick] = [] @@ -164,6 +193,54 @@ def run_cpu_picks_until_user(self) -> list[DraftPick]: def user_roster(self) -> TeamRoster: return self.rosters[self.settings.draft_slot - 1] + def _clone_for_simulation(self) -> MockDraftEngine: + """Lightweight copy that shares the immutable Player/pool references.""" + clone = MockDraftEngine( + settings=self.settings, + pool=self.pool, + cpu_randomness=self.cpu_randomness, + cpu_top_k=self.cpu_top_k, + ranking_source=self.ranking_source, + ) + clone.picks = list(self.picks) + clone.rosters = [ + TeamRoster(team_index=r.team_index, picks=list(r.picks)) + for r in self.rosters + ] + clone.available = list(self.available) + clone.requested_rounds = self.requested_rounds + clone.max_supported_rounds = self.max_supported_rounds + clone._rng = random.Random() + return clone + + def simulate_availability_at_next_user_pick( + self, *, num_sims: int = 40 + ) -> dict[str, float]: + """ + Monte Carlo: probability each currently-available player is still on the + board when the user is next on the clock. + + Each sim clones the engine and runs CPU picks (with cpu_randomness) until + the user's next turn. If the user is currently on the clock, the sim + treats that as an auto-pick (so the result describes the *following* pick). + """ + if self.is_complete or self.pool_exhausted: + return {p.name: 1.0 for p in self.available} + + counts: dict[str, int] = {p.name: 0 for p in self.available} + + for _ in range(num_sims): + sim = self._clone_for_simulation() + if sim.is_user_turn and not sim.pool_exhausted: + sim.auto_pick() + while not sim.is_complete and not sim.is_user_turn: + sim.auto_pick() + for player in sim.available: + if player.name in counts: + counts[player.name] += 1 + + return {name: count / num_sims for name, count in counts.items()} + @staticmethod def create( scoring: ScoringFormat, @@ -172,6 +249,10 @@ def create( rounds: int = 15, *, pool: list[Player] | None = None, + cpu_randomness: float = 0.35, + cpu_top_k: int = 12, + seed: int | None = None, + ranking_source: RankingSource = RankingSource.YAHOO, ) -> MockDraftEngine: player_pool = pool if pool is not None else get_players() if not player_pool: @@ -186,7 +267,14 @@ def create( draft_slot=draft_slot, rounds=effective_rounds, ) - engine = MockDraftEngine(settings=settings, pool=player_pool) + engine = MockDraftEngine( + settings=settings, + pool=player_pool, + cpu_randomness=cpu_randomness, + cpu_top_k=cpu_top_k, + seed=seed, + ranking_source=ranking_source, + ) engine.requested_rounds = rounds engine.max_supported_rounds = supported return engine diff --git a/draft/ffanalytics_loader.py b/draft/ffanalytics_loader.py index 478632d..6e008ad 100644 --- a/draft/ffanalytics_loader.py +++ b/draft/ffanalytics_loader.py @@ -8,6 +8,7 @@ from config import settings from draft.models import Player, Position +from draft.sleeper_loader import get_sleeper_rankings, lookup_sleeper_rank CACHE_PATH = settings.project_root / "data" / "ffanalytics_players.json" CACHE_MAX_AGE_HOURS = 24 @@ -126,12 +127,31 @@ def _adp_from_row(row: dict[str, Any]) -> float | None: return sum(values) / len(values) +def _per_source_adp_from_row(row: dict[str, Any]) -> dict[str, float]: + """Extract per-source ADP values from ffanalytics get_adp row.""" + result: dict[str, float] = {} + + for key, raw in row.items(): + key_lower = str(key).lower() + if key_lower.startswith("adp_"): + source = key_lower[4:] + val = _float_or(raw, 0.0) + if val > 0: + result[source] = val + + return result + + def parse_projection_row( row: dict[str, Any], *, points: float, adp_by_id: dict[str, float], adp_by_name: dict[str, float], + espn_adp_by_id: dict[str, float] | None = None, + espn_adp_by_name: dict[str, float] | None = None, + yahoo_adp_by_id: dict[str, float] | None = None, + yahoo_adp_by_name: dict[str, float] | None = None, ) -> Player | None: pos_raw = _pick_column(row, "pos", "position") if pos_raw is None: @@ -146,11 +166,26 @@ def parse_projection_row( team = str(_pick_column(row, "team", "nfl_team") or "").strip() player_id = _pick_column(row, "id", "player_id") + pid_str = str(player_id) if player_id is not None else None + name_lower = name.lower() + adp = 999.0 - if player_id is not None: - adp = adp_by_id.get(str(player_id), adp) + if pid_str is not None: + adp = adp_by_id.get(pid_str, adp) if adp >= 999.0: - adp = adp_by_name.get(name.lower(), adp) + adp = adp_by_name.get(name_lower, adp) + + rank_espn = 999.0 + if espn_adp_by_id and pid_str: + rank_espn = espn_adp_by_id.get(pid_str, rank_espn) + if rank_espn >= 999.0 and espn_adp_by_name: + rank_espn = espn_adp_by_name.get(name_lower, rank_espn) + + rank_yahoo = 999.0 + if yahoo_adp_by_id and pid_str: + rank_yahoo = yahoo_adp_by_id.get(pid_str, rank_yahoo) + if rank_yahoo >= 999.0 and yahoo_adp_by_name: + rank_yahoo = yahoo_adp_by_name.get(name_lower, rank_yahoo) pts = round(points, 1) return Player( @@ -161,6 +196,9 @@ def parse_projection_row( fp_std=pts, team=team, adp=adp, + rank_espn=rank_espn, + rank_yahoo=rank_yahoo, + rank_sleeper=999.0, ) @@ -173,16 +211,39 @@ def merge_projection_tables( """Combine standard, half-PPR, and PPR projection rows into Player models.""" adp_by_id: dict[str, float] = {} adp_by_name: dict[str, float] = {} + espn_adp_by_id: dict[str, float] = {} + espn_adp_by_name: dict[str, float] = {} + yahoo_adp_by_id: dict[str, float] = {} + yahoo_adp_by_name: dict[str, float] = {} + for row in adp_rows: pid = _pick_column(row, "id", "player_id") adp_val = _adp_from_row(row) - if adp_val is None: - continue - if pid is not None: - adp_by_id[str(pid)] = adp_val + per_source = _per_source_adp_from_row(row) + + pid_str = str(pid) if pid is not None else None name = _player_name_from_row(row) - if name: - adp_by_name[name.lower()] = adp_val + name_lower = name.lower() if name else None + + if adp_val is not None: + if pid_str: + adp_by_id[pid_str] = adp_val + if name_lower: + adp_by_name[name_lower] = adp_val + + espn_val = per_source.get("espn") + if espn_val is not None: + if pid_str: + espn_adp_by_id[pid_str] = espn_val + if name_lower: + espn_adp_by_name[name_lower] = espn_val + + yahoo_val = per_source.get("yahoo") + if yahoo_val is not None: + if pid_str: + yahoo_adp_by_id[pid_str] = yahoo_val + if name_lower: + yahoo_adp_by_name[name_lower] = yahoo_val def index_by_id(rows: list[dict[str, Any]]) -> dict[str, tuple[dict[str, Any], float]]: out: dict[str, tuple[dict[str, Any], float]] = {} @@ -218,6 +279,10 @@ def index_by_id(rows: list[dict[str, Any]]) -> dict[str, tuple[dict[str, Any], f points=fp_ppr or fp_half or fp_std, adp_by_id=adp_by_id, adp_by_name=adp_by_name, + espn_adp_by_id=espn_adp_by_id, + espn_adp_by_name=espn_adp_by_name, + yahoo_adp_by_id=yahoo_adp_by_id, + yahoo_adp_by_name=yahoo_adp_by_name, ) if player is None: continue @@ -329,10 +394,24 @@ def _fetch_ffanalytics_players_impl( _r_dataframe_to_records(proj_ppr), _r_dataframe_to_records(adp_df), ) + + _populate_sleeper_rankings(players) + players.sort(key=lambda p: p.fp_ppr, reverse=True) return players +def _populate_sleeper_rankings(players: list[Player]) -> None: + """Fetch Sleeper rankings and populate rank_sleeper for each player.""" + try: + sleeper_rankings = get_sleeper_rankings() + except Exception: + return + + for player in players: + player.rank_sleeper = lookup_sleeper_rank(player.name, sleeper_rankings) + + def save_player_cache(players: list[Player], path: Any = None) -> Any: target = _cache_path(path) target.parent.mkdir(parents=True, exist_ok=True) @@ -350,6 +429,9 @@ def save_player_cache(players: list[Player], path: Any = None) -> Any: "fp_std": p.fp_std, "team": p.team, "adp": p.adp, + "rank_espn": p.rank_espn, + "rank_yahoo": p.rank_yahoo, + "rank_sleeper": p.rank_sleeper, } for p in players ], @@ -379,6 +461,9 @@ def load_player_cache(path: Any = None) -> list[Player] | None: fp_std=float(row["fp_std"]), team=row.get("team", ""), adp=float(row.get("adp", 999)), + rank_espn=float(row.get("rank_espn", 999)), + rank_yahoo=float(row.get("rank_yahoo", 999)), + rank_sleeper=float(row.get("rank_sleeper", 999)), ) ) except (KeyError, ValueError): diff --git a/draft/models.py b/draft/models.py index dc5d748..ad6a25c 100644 --- a/draft/models.py +++ b/draft/models.py @@ -12,6 +12,12 @@ class ScoringFormat(str, Enum): STANDARD = "Standard" +class RankingSource(str, Enum): + YAHOO = "Yahoo" + ESPN = "ESPN" + SLEEPER = "Sleeper" + + class Position(str, Enum): QB = "QB" RB = "RB" @@ -56,7 +62,10 @@ class Player: fp_half: float fp_std: float team: str = "" - adp: float = 99.0 + adp: float = 999.0 + rank_espn: float = 999.0 + rank_yahoo: float = 999.0 + rank_sleeper: float = 999.0 def fantasy_points(self, scoring: ScoringFormat) -> float: if scoring == ScoringFormat.PPR: @@ -65,6 +74,13 @@ def fantasy_points(self, scoring: ScoringFormat) -> float: return self.fp_half return self.fp_std + def rank_for_source(self, source: RankingSource) -> float: + if source == RankingSource.ESPN: + return self.rank_espn + if source == RankingSource.YAHOO: + return self.rank_yahoo + return self.rank_sleeper + @dataclass class DraftPick: diff --git a/draft/sleeper_loader.py b/draft/sleeper_loader.py new file mode 100644 index 0000000..960044a --- /dev/null +++ b/draft/sleeper_loader.py @@ -0,0 +1,126 @@ +"""Load player rankings from the Sleeper API.""" + +from __future__ import annotations + +import json +from datetime import datetime, timezone +from typing import Any + +import requests + +from config import settings + +SLEEPER_PLAYERS_URL = "https://api.sleeper.app/v1/players/nfl" +CACHE_PATH = settings.project_root / "data" / "sleeper_players.json" +CACHE_MAX_AGE_HOURS = 24 + + +def _normalize_name(name: str) -> str: + """Normalize player name for matching: lowercase, strip suffixes.""" + name = name.lower().strip() + for suffix in (" jr", " sr", " ii", " iii", " iv", " v"): + if name.endswith(suffix): + name = name[: -len(suffix)].strip() + return name + + +def fetch_sleeper_players() -> dict[str, Any]: + """Fetch all NFL players from Sleeper API.""" + resp = requests.get(SLEEPER_PLAYERS_URL, timeout=30) + resp.raise_for_status() + return resp.json() + + +def parse_sleeper_rankings(raw: dict[str, Any]) -> dict[str, int]: + """ + Extract player name -> search_rank mapping from Sleeper data. + + Returns a dict mapping normalized player names to their search_rank. + Lower search_rank = higher ranked player. + """ + rankings: dict[str, int] = {} + + for player_id, data in raw.items(): + if not isinstance(data, dict): + continue + + position = data.get("position", "") + if position not in ("QB", "RB", "WR", "TE", "K", "DEF"): + continue + + search_rank = data.get("search_rank") + if search_rank is None or not isinstance(search_rank, (int, float)): + continue + + first_name = data.get("first_name", "") or "" + last_name = data.get("last_name", "") or "" + full_name = f"{first_name} {last_name}".strip() + + if not full_name: + continue + + normalized = _normalize_name(full_name) + current = rankings.get(normalized) + if current is None or search_rank < current: + rankings[normalized] = int(search_rank) + + return rankings + + +def save_sleeper_cache(rankings: dict[str, int], path: Any = None) -> Any: + """Save Sleeper rankings to cache file.""" + target = path or CACHE_PATH + target.parent.mkdir(parents=True, exist_ok=True) + + payload = { + "fetched_at": datetime.now(timezone.utc).isoformat(), + "source": "sleeper", + "count": len(rankings), + "rankings": rankings, + } + target.write_text(json.dumps(payload, indent=2), encoding="utf-8") + return target + + +def load_sleeper_cache(path: Any = None) -> dict[str, int] | None: + """Load cached Sleeper rankings if available and fresh.""" + target = path or CACHE_PATH + if not target.exists(): + return None + + try: + payload = json.loads(target.read_text(encoding="utf-8")) + fetched_at = datetime.fromisoformat(payload["fetched_at"]) + age_hours = (datetime.now(timezone.utc) - fetched_at).total_seconds() / 3600 + + if age_hours >= CACHE_MAX_AGE_HOURS: + return None + + return payload.get("rankings", {}) + except (json.JSONDecodeError, OSError, KeyError, ValueError): + return None + + +def get_sleeper_rankings(*, force_refresh: bool = False) -> dict[str, int]: + """ + Get Sleeper search_rank for all NFL players. + + Returns a dict mapping normalized player names to their search_rank. + Uses cached data if available and fresh (< 24 hours old). + """ + if not force_refresh: + cached = load_sleeper_cache() + if cached is not None: + return cached + + raw = fetch_sleeper_players() + rankings = parse_sleeper_rankings(raw) + save_sleeper_cache(rankings) + return rankings + + +def lookup_sleeper_rank(name: str, rankings: dict[str, int]) -> float: + """Look up a player's Sleeper rank by name, returning 999.0 if not found.""" + normalized = _normalize_name(name) + rank = rankings.get(normalized) + return float(rank) if rank is not None else 999.0 diff --git a/tests/test_draft_engine.py b/tests/test_draft_engine.py index a20ad7a..92f9906 100644 --- a/tests/test_draft_engine.py +++ b/tests/test_draft_engine.py @@ -5,18 +5,36 @@ import unittest from draft.engine import MockDraftEngine, _snake_team_index -from draft.models import Player, Position, ScoringFormat +from draft.models import Player, Position, RankingSource, ScoringFormat def _test_pool() -> list[Player]: return [ - Player(f"QB {i}", Position.QB, 20.0, 20.0, 20.0, adp=float(i)) + Player( + f"QB {i}", Position.QB, 20.0, 20.0, 20.0, + adp=float(i), + rank_espn=float(i), + rank_yahoo=float(5 - i) if i <= 4 else 999.0, + rank_sleeper=float(i * 2), + ) for i in range(1, 5) ] + [ - Player(f"RB {i}", Position.RB, 15.0, 14.0, 13.0, adp=float(10 + i)) + Player( + f"RB {i}", Position.RB, 15.0, 14.0, 13.0, + adp=float(10 + i), + rank_espn=float(10 + i), + rank_yahoo=float(20 - i), + rank_sleeper=float(5 + i), + ) for i in range(1, 10) ] + [ - Player(f"WR {i}", Position.WR, 14.0, 13.0, 12.0, adp=float(20 + i)) + Player( + f"WR {i}", Position.WR, 14.0, 13.0, 12.0, + adp=float(20 + i), + rank_espn=float(20 + i), + rank_yahoo=float(10 + i), + rank_sleeper=float(15 + i), + ) for i in range(1, 10) ] @@ -79,5 +97,162 @@ def test_draft_completes_after_all_rounds(self) -> None: self.assertEqual(len(draft.available), len(pool) - 8) +class MonteCarloDraftTests(unittest.TestCase): + def _run_full(self, *, cpu_randomness: float, seed: int | None) -> list[str]: + draft = MockDraftEngine.create( + scoring=ScoringFormat.PPR, + league_size=4, + draft_slot=2, + rounds=3, + pool=_test_pool(), + cpu_randomness=cpu_randomness, + seed=seed, + ) + while not draft.is_complete: + draft.auto_pick() + return [p.player.name for p in draft.picks] + + def test_randomness_zero_is_deterministic(self) -> None: + # seed should be irrelevant when randomness is off + a = self._run_full(cpu_randomness=0.0, seed=None) + b = self._run_full(cpu_randomness=0.0, seed=999) + self.assertEqual(a, b) + + def test_seed_reproduces_random_draft(self) -> None: + a = self._run_full(cpu_randomness=0.6, seed=42) + b = self._run_full(cpu_randomness=0.6, seed=42) + self.assertEqual(a, b) + + def test_different_seeds_diverge(self) -> None: + a = self._run_full(cpu_randomness=0.8, seed=1) + b = self._run_full(cpu_randomness=0.8, seed=2) + self.assertNotEqual(a, b) + + def test_simulate_availability_returns_probabilities(self) -> None: + draft = MockDraftEngine.create( + scoring=ScoringFormat.PPR, + league_size=4, + draft_slot=2, + rounds=2, + pool=_test_pool(), + cpu_randomness=0.5, + seed=7, + ) + draft.run_cpu_picks_until_user() + self.assertTrue(draft.is_user_turn) + + probs = draft.simulate_availability_at_next_user_pick(num_sims=10) + + self.assertEqual(set(probs.keys()), {p.name for p in draft.available}) + self.assertTrue(all(0.0 <= v <= 1.0 for v in probs.values())) + + def test_simulate_does_not_mutate_live_draft(self) -> None: + draft = MockDraftEngine.create( + scoring=ScoringFormat.PPR, + league_size=4, + draft_slot=2, + rounds=2, + pool=_test_pool(), + cpu_randomness=0.4, + seed=11, + ) + draft.run_cpu_picks_until_user() + picks_before = len(draft.picks) + available_before = [p.name for p in draft.available] + + draft.simulate_availability_at_next_user_pick(num_sims=5) + + self.assertEqual(len(draft.picks), picks_before) + self.assertEqual([p.name for p in draft.available], available_before) + + +class RankingSourceTests(unittest.TestCase): + def test_player_rank_for_source_espn(self) -> None: + player = Player( + "Test Player", Position.QB, 20.0, 20.0, 20.0, + rank_espn=5.0, rank_yahoo=10.0, rank_sleeper=15.0, + ) + self.assertEqual(player.rank_for_source(RankingSource.ESPN), 5.0) + + def test_player_rank_for_source_yahoo(self) -> None: + player = Player( + "Test Player", Position.QB, 20.0, 20.0, 20.0, + rank_espn=5.0, rank_yahoo=10.0, rank_sleeper=15.0, + ) + self.assertEqual(player.rank_for_source(RankingSource.YAHOO), 10.0) + + def test_player_rank_for_source_sleeper(self) -> None: + player = Player( + "Test Player", Position.QB, 20.0, 20.0, 20.0, + rank_espn=5.0, rank_yahoo=10.0, rank_sleeper=15.0, + ) + self.assertEqual(player.rank_for_source(RankingSource.SLEEPER), 15.0) + + def test_different_ranking_sources_produce_different_orders(self) -> None: + pool = _test_pool() + + draft_yahoo = MockDraftEngine.create( + scoring=ScoringFormat.PPR, + league_size=4, + draft_slot=2, + rounds=2, + pool=pool, + cpu_randomness=0.0, + ranking_source=RankingSource.YAHOO, + ) + draft_espn = MockDraftEngine.create( + scoring=ScoringFormat.PPR, + league_size=4, + draft_slot=2, + rounds=2, + pool=pool, + cpu_randomness=0.0, + ranking_source=RankingSource.ESPN, + ) + + while not draft_yahoo.is_complete: + draft_yahoo.auto_pick() + while not draft_espn.is_complete: + draft_espn.auto_pick() + + yahoo_picks = [p.player.name for p in draft_yahoo.picks] + espn_picks = [p.player.name for p in draft_espn.picks] + + self.assertNotEqual(yahoo_picks, espn_picks) + + def test_default_ranking_source_is_yahoo(self) -> None: + draft = MockDraftEngine.create( + scoring=ScoringFormat.PPR, + league_size=4, + draft_slot=1, + rounds=1, + pool=_test_pool(), + ) + self.assertEqual(draft.ranking_source, RankingSource.YAHOO) + + def test_ranking_source_passed_to_create(self) -> None: + draft = MockDraftEngine.create( + scoring=ScoringFormat.PPR, + league_size=4, + draft_slot=1, + rounds=1, + pool=_test_pool(), + ranking_source=RankingSource.SLEEPER, + ) + self.assertEqual(draft.ranking_source, RankingSource.SLEEPER) + + def test_clone_preserves_ranking_source(self) -> None: + draft = MockDraftEngine.create( + scoring=ScoringFormat.PPR, + league_size=4, + draft_slot=2, + rounds=2, + pool=_test_pool(), + ranking_source=RankingSource.ESPN, + ) + clone = draft._clone_for_simulation() + self.assertEqual(clone.ranking_source, RankingSource.ESPN) + + if __name__ == "__main__": unittest.main() diff --git a/tests/test_ffanalytics_loader.py b/tests/test_ffanalytics_loader.py index 3665f65..64a7a37 100644 --- a/tests/test_ffanalytics_loader.py +++ b/tests/test_ffanalytics_loader.py @@ -9,6 +9,7 @@ parse_projection_row, _player_name_from_row, _adp_from_row, + _per_source_adp_from_row, ) from draft.models import Position @@ -22,6 +23,19 @@ def test_adp_from_source_columns(self) -> None: row = {"id": "1", "adp_espn": 2.2, "adp_yahoo": 3.0} self.assertAlmostEqual(_adp_from_row(row), 2.6) + def test_per_source_adp_from_row(self) -> None: + row = {"id": "1", "adp_espn": 5.0, "adp_yahoo": 8.0, "adp_cbs": 6.0} + result = _per_source_adp_from_row(row) + self.assertEqual(result["espn"], 5.0) + self.assertEqual(result["yahoo"], 8.0) + self.assertEqual(result["cbs"], 6.0) + + def test_per_source_adp_ignores_zero_values(self) -> None: + row = {"id": "1", "adp_espn": 5.0, "adp_yahoo": 0.0} + result = _per_source_adp_from_row(row) + self.assertEqual(result["espn"], 5.0) + self.assertNotIn("yahoo", result) + class MergeProjectionTablesTests(unittest.TestCase): def test_merges_scoring_formats_and_adp(self) -> None: @@ -97,6 +111,47 @@ def test_skips_unknown_positions(self) -> None: player = parse_projection_row(row, points=5.0, adp_by_id={}, adp_by_name={}) self.assertIsNone(player) + def test_populates_per_source_rankings(self) -> None: + std_rows = [ + { + "id": "1", + "first_name": "Patrick", + "last_name": "Mahomes", + "pos": "QB", + "team": "KC", + "points": 25.0, + }, + ] + half_rows = [ + { + "id": "1", + "first_name": "Patrick", + "last_name": "Mahomes", + "pos": "QB", + "team": "KC", + "points": 25.0, + }, + ] + ppr_rows = [ + { + "id": "1", + "first_name": "Patrick", + "last_name": "Mahomes", + "pos": "QB", + "team": "KC", + "points": 25.0, + }, + ] + adp_rows = [ + {"id": "1", "adp_espn": 10.0, "adp_yahoo": 12.0}, + ] + players = merge_projection_tables(std_rows, half_rows, ppr_rows, adp_rows) + self.assertEqual(len(players), 1) + mahomes = players[0] + self.assertEqual(mahomes.rank_espn, 10.0) + self.assertEqual(mahomes.rank_yahoo, 12.0) + self.assertEqual(mahomes.rank_sleeper, 999.0) + if __name__ == "__main__": unittest.main() diff --git a/tests/test_sleeper_loader.py b/tests/test_sleeper_loader.py new file mode 100644 index 0000000..a896d2d --- /dev/null +++ b/tests/test_sleeper_loader.py @@ -0,0 +1,85 @@ +"""Unit tests for Sleeper loader.""" + +from __future__ import annotations + +import unittest + +from draft.sleeper_loader import _normalize_name, lookup_sleeper_rank, parse_sleeper_rankings + + +class SleeperLoaderTests(unittest.TestCase): + def test_normalize_name_lowercase(self) -> None: + self.assertEqual(_normalize_name("Patrick Mahomes"), "patrick mahomes") + + def test_normalize_name_strips_jr(self) -> None: + self.assertEqual(_normalize_name("Marvin Harrison Jr"), "marvin harrison") + + def test_normalize_name_strips_sr(self) -> None: + self.assertEqual(_normalize_name("John Smith Sr"), "john smith") + + def test_normalize_name_strips_ii(self) -> None: + self.assertEqual(_normalize_name("Robert Griffin II"), "robert griffin") + + def test_normalize_name_strips_iii(self) -> None: + self.assertEqual(_normalize_name("Robert Griffin III"), "robert griffin") + + def test_parse_sleeper_rankings_extracts_search_rank(self) -> None: + raw = { + "1234": { + "first_name": "Patrick", + "last_name": "Mahomes", + "position": "QB", + "search_rank": 5, + }, + "5678": { + "first_name": "Travis", + "last_name": "Kelce", + "position": "TE", + "search_rank": 12, + }, + } + rankings = parse_sleeper_rankings(raw) + self.assertEqual(rankings["patrick mahomes"], 5) + self.assertEqual(rankings["travis kelce"], 12) + + def test_parse_sleeper_rankings_ignores_non_skill_positions(self) -> None: + raw = { + "1234": { + "first_name": "Andy", + "last_name": "Reid", + "position": "HC", + "search_rank": 100, + }, + } + rankings = parse_sleeper_rankings(raw) + self.assertNotIn("andy reid", rankings) + + def test_parse_sleeper_rankings_handles_missing_search_rank(self) -> None: + raw = { + "1234": { + "first_name": "Patrick", + "last_name": "Mahomes", + "position": "QB", + }, + } + rankings = parse_sleeper_rankings(raw) + self.assertNotIn("patrick mahomes", rankings) + + def test_lookup_sleeper_rank_found(self) -> None: + rankings = {"patrick mahomes": 5} + rank = lookup_sleeper_rank("Patrick Mahomes", rankings) + self.assertEqual(rank, 5.0) + + def test_lookup_sleeper_rank_not_found(self) -> None: + rankings = {"patrick mahomes": 5} + rank = lookup_sleeper_rank("Unknown Player", rankings) + self.assertEqual(rank, 999.0) + + def test_lookup_sleeper_rank_with_suffix(self) -> None: + rankings = {"marvin harrison": 3} + rank = lookup_sleeper_rank("Marvin Harrison Jr", rankings) + self.assertEqual(rank, 3.0) + + +if __name__ == "__main__": + unittest.main() From f20a65ea6bf88b9e44bdddecf9223d0576ef1710 Mon Sep 17 00:00:00 2001 From: Joseph Lee <175lee.joseph@gmail.com> Date: Wed, 20 May 2026 17:40:52 -0400 Subject: [PATCH 7/9] Add "Research" tab, ability to switch between LLMs, moved tabs from "Home" tab to left panel --- app/mock_draft_tab.py | 256 +++++++++++++---- app/research_tab.py | 401 ++++++++++++++++++++++++++ app/streamlit_app.py | 76 ++--- app/views/ask_page.py | 66 +++++ app/views/home_page.py | 11 + app/views/research_page.py | 11 + docker-compose.yml | 6 +- draft/engine.py | 66 +++++ draft/models.py | 34 +++ rag/engine.py | 5 +- rag/ollama_client.py | 51 ++-- requirements.txt | 4 + scripts/pull-ollama-models.sh | 20 ++ sdks/distribution_fit.py | 92 ++++++ sdks/espn_player_loader.py | 524 ++++++++++++++++++++++++++++++++++ 15 files changed, 1493 insertions(+), 130 deletions(-) create mode 100644 app/research_tab.py create mode 100644 app/views/ask_page.py create mode 100644 app/views/home_page.py create mode 100644 app/views/research_page.py create mode 100755 scripts/pull-ollama-models.sh create mode 100644 sdks/distribution_fit.py create mode 100644 sdks/espn_player_loader.py diff --git a/app/mock_draft_tab.py b/app/mock_draft_tab.py index f902ef8..9b0415c 100644 --- a/app/mock_draft_tab.py +++ b/app/mock_draft_tab.py @@ -3,10 +3,11 @@ from __future__ import annotations import streamlit as st +from streamlit_autorefresh import st_autorefresh -from draft.engine import MockDraftEngine +from draft.engine import MockDraftEngine, _snake_team_index from draft.ffanalytics_loader import cache_is_fresh, load_active_players, load_player_cache -from draft.models import ROSTER_SLOTS, RankingSource, ScoringFormat +from draft.models import POSITION_COLORS, ROSTER_SLOTS, Position, RankingSource, ScoringFormat from draft.player_pool import refresh_players from draft.rpy2_setup import rpy2_session @@ -29,11 +30,69 @@ def _ensure_player_pool(*, force_refresh: bool = False) -> list: return st.session_state["draft_pool"] +def _position_colors(position: str) -> tuple[str, str]: + """Return (background, text) hex colors for a position.""" + return POSITION_COLORS.get(position, ("#374151", "#FFFFFF")) + + +def _truncate_name(name: str, max_len: int = 14) -> str: + return name if len(name) <= max_len else name[: max_len - 1] + "…" + + +def _player_pick_html( + name: str, + position: str, + team: str = "", + *, + highlight: bool = False, +) -> str: + """HTML for a Sleeper-style colored pick cell.""" + bg, fg = _position_colors(position) + border = "2px solid #FBBF24" if highlight else "1px solid rgba(255,255,255,0.15)" + team_line = f'
{position}' + if team: + team_line += f" · {team}" + team_line += "
" + return ( + f'
' + f'
{_truncate_name(name)}
' + f"{team_line}
" + ) + + +def _on_clock_html(text: str, *, is_user: bool) -> str: + bg = "#F59E0B" if is_user else "#4B5563" + return ( + f'
' + f"{text}
" + ) + + +def _empty_cell_html(round_num: int) -> str: + return ( + f'
R{round_num}
' + ) + + +def _render_position_legend() -> None: + chips = [] + for pos in ("QB", "RB", "WR", "TE", "K", "DEF"): + bg, fg = _position_colors(pos) + chips.append( + f'{pos}' + ) + st.markdown("".join(chips), unsafe_allow_html=True) + + def _render_settings( pool_size: int, -) -> tuple[ScoringFormat, int, int, int, float, RankingSource] | None: +) -> tuple[ScoringFormat, int, int, int, float, RankingSource, str] | None: st.subheader("League settings") - col1, col2, col3, col4, col5 = st.columns(5) + col1, col2, col3, col4, col5, col6 = st.columns(6) with col1: scoring_label = st.selectbox( @@ -72,6 +131,12 @@ def _render_settings( step=1, disabled=pool_size == 0, ) + with col6: + user_team_name = st.text_input( + "Your team name", + value="My Team", + max_chars=25, + ) cpu_randomness = st.slider( "Draft unpredictability", @@ -86,36 +151,70 @@ def _render_settings( ranking_source = RankingSource(ranking_label) if pool_size: st.caption(f"{pool_size} players · up to {max_rounds} rounds") - return scoring, int(league_size), int(draft_slot), int(rounds), float(cpu_randomness), ranking_source + return scoring, int(league_size), int(draft_slot), int(rounds), float(cpu_randomness), ranking_source, user_team_name -def _render_draft_board(draft: MockDraftEngine) -> None: - st.subheader("Draft board") - rows = [] +def _render_sleeper_draft_board(draft: MockDraftEngine) -> None: + """Render Sleeper-style grid board (teams as columns, rounds as rows).""" + st.subheader("Draft Board") + _render_position_legend() + + num_teams = draft.settings.league_size + num_rounds = draft.settings.rounds + user_team_idx = draft.settings.draft_slot - 1 + + pick_grid: dict[tuple[int, int], tuple[str, str, str]] = {} for pick in draft.picks: - team_label = ( - f"Team {pick.team_index + 1} (You)" - if pick.team_index == draft.settings.draft_slot - 1 - else f"Team {pick.team_index + 1}" + pick_grid[(pick.round, pick.team_index)] = ( + pick.player.name, + pick.player.position.value, + pick.player.team, ) - rows.append( - { - "Overall": pick.overall, - "Round": pick.round, - "Pick": pick.pick_in_round, - "Team": team_label, - "Player": pick.player.name, - "Pos": pick.player.position.value, - "Team abbr": pick.player.team, - "FPG": round( - pick.player.fantasy_points(draft.settings.scoring), 1 - ), - } - ) - if rows: - st.dataframe(rows, use_container_width=True, hide_index=True) - else: - st.caption("No picks yet.") + + current_round = draft.current_round + current_team = draft.current_team_index if not draft.is_complete else -1 + + header_cols = st.columns(num_teams) + for i, col in enumerate(header_cols): + team_name = draft.get_team_name(i) + short_name = _truncate_name(team_name, 16) + if i == user_team_idx: + col.markdown(f"**{short_name}** 🏈") + else: + col.markdown(f"**{short_name}**") + + for round_num in range(1, num_rounds + 1): + row_cols = st.columns(num_teams) + + for pick_in_round in range(1, num_teams + 1): + team_idx = _snake_team_index(round_num, pick_in_round, num_teams) + col = row_cols[team_idx] + + pick_data = pick_grid.get((round_num, team_idx)) + is_current_pick = round_num == current_round and team_idx == current_team + is_user_team = team_idx == user_team_idx + + if pick_data: + name, position, team = pick_data + html = _player_pick_html( + name, + position, + team, + highlight=is_user_team, + ) + col.markdown(html, unsafe_allow_html=True) + elif is_current_pick: + remaining = draft.time_remaining() + if draft.is_user_turn: + text = f"⏱️ YOUR PICK ({remaining}s)" + else: + text = f"⏱️ On Clock ({remaining}s)" + col.markdown( + _on_clock_html(text, is_user=draft.is_user_turn), + unsafe_allow_html=True, + ) + else: + col.markdown(_empty_cell_html(round_num), unsafe_allow_html=True) def _render_user_roster(draft: MockDraftEngine) -> None: @@ -125,16 +224,24 @@ def _render_user_roster(draft: MockDraftEngine) -> None: st.caption("No players drafted yet.") return - rows = [ - { - "Player": p.name, - "Pos": p.position.value, - "Team": p.team, - "FPG": round(p.fantasy_points(draft.settings.scoring), 1), - } - for p in roster.picks - ] - st.dataframe(rows, use_container_width=True, hide_index=True) + pos_order = {p: i for i, p in enumerate(Position)} + sorted_picks = sorted(roster.picks, key=lambda p: pos_order.get(p.position, 99)) + + for p in sorted_picks: + fpg = round(p.fantasy_points(draft.settings.scoring), 1) + bg, fg = _position_colors(p.position.value) + st.markdown( + f'
' + f'
' + f"{p.position.value}
" + f'
' + f'
{p.name}
' + f'
{p.team} · {fpg} FPG
' + f"
", + unsafe_allow_html=True, + ) filled = [] for slot, need in ROSTER_SLOTS.items(): @@ -146,6 +253,28 @@ def _render_user_roster(draft: MockDraftEngine) -> None: st.caption("Starters · " + " · ".join(filled)) +def _render_pick_timer(draft: MockDraftEngine) -> None: + """Render the countdown timer for the current pick.""" + remaining = draft.time_remaining() + + if remaining <= 10: + st.error(f"⏱️ **{remaining}** seconds remaining!") + elif remaining <= 20: + st.warning(f"⏱️ **{remaining}** seconds remaining") + else: + st.info(f"⏱️ **{remaining}** seconds remaining") + + +def _handle_timer_expiration(draft: MockDraftEngine) -> bool: + """Auto-draft for the team on the clock when their 30s expires. One pick per timeout.""" + if draft.is_timer_expired() and not draft.is_complete: + draft.auto_pick() + if not draft.is_complete: + draft.start_pick_timer() + return True + return False + + def _render_pick_controls(draft: MockDraftEngine) -> None: if draft.is_complete: if draft.pool_exhausted and len(draft.picks) < draft.settings.league_size * draft.requested_rounds: @@ -154,16 +283,22 @@ def _render_pick_controls(draft: MockDraftEngine) -> None: f"(round {draft.picks[-1].round if draft.picks else 0})." ) else: - st.success("Draft complete.") + st.success("Draft complete!") return + if _handle_timer_expiration(draft): + st.rerun() + st.subheader("On the clock") round_num = draft.current_round overall = draft.current_overall - team_num = draft.current_team_index + 1 + team_name = draft.get_team_name(draft.current_team_index) + + _render_pick_timer(draft) + st.caption(f"Each team has **{draft.pick_time_limit}** seconds to pick.") if draft.is_user_turn: - st.info(f"Round {round_num} · Pick {overall} — **Your pick**") + st.info(f"Round {round_num} · Pick {overall} — **{team_name} (You)**") ranked = draft.rank_available()[:25] if not ranked: st.error("No players left in the pool.") @@ -180,12 +315,19 @@ def _render_pick_controls(draft: MockDraftEngine) -> None: choice = st.selectbox("Select player", options=list(options.keys())) if st.button("Draft player", type="primary", use_container_width=True): draft.make_pick(options[choice]) - draft.run_cpu_picks_until_user() + if not draft.is_complete: + draft.start_pick_timer() st.rerun() else: - st.warning(f"Round {round_num} · Pick {overall} — Team {team_num} is picking…") - if st.button("Simulate to your pick", use_container_width=True): - draft.run_cpu_picks_until_user() + st.warning( + f"Round {round_num} · Pick {overall} — **{team_name}** is on the clock " + f"({draft.time_remaining()}s left)" + ) + if st.button("Skip to your pick", use_container_width=True): + while not draft.is_complete and not draft.is_user_turn: + draft.auto_pick() + if not draft.is_complete: + draft.start_pick_timer() st.rerun() @@ -251,7 +393,7 @@ def render_mock_draft_tab() -> None: if settings is None: return - scoring, league_size, draft_slot, rounds, cpu_randomness, ranking_source = settings + scoring, league_size, draft_slot, rounds, cpu_randomness, ranking_source, user_team_name = settings slot_cols = st.columns([1, 1, 2]) with slot_cols[0]: @@ -270,6 +412,7 @@ def render_mock_draft_tab() -> None: pool=pool, cpu_randomness=cpu_randomness, ranking_source=ranking_source, + user_team_name=user_team_name, ) st.rerun() except ValueError as exc: @@ -283,6 +426,8 @@ def render_mock_draft_tab() -> None: if draft is None: return + st_autorefresh(interval=1000, key="draft_timer_refresh") + meta = draft.settings st.caption( f"{meta.scoring.value} · {draft.ranking_source.value} rankings · " @@ -291,13 +436,18 @@ def render_mock_draft_tab() -> None: f"#{min(draft.current_overall, draft.total_picks)} of {draft.total_picks}" ) - board_col, roster_col = st.columns([3, 2]) - with board_col: - _render_pick_controls(draft) - _render_draft_board(draft) + _render_pick_controls(draft) + + st.divider() + + _render_sleeper_draft_board(draft) + + st.divider() + + roster_col, best_col = st.columns([1, 1]) with roster_col: _render_user_roster(draft) - + with best_col: st.subheader("Best available") for player in draft.rank_available()[:8]: rank = player.rank_for_source(draft.ranking_source) diff --git a/app/research_tab.py b/app/research_tab.py new file mode 100644 index 0000000..fb75d41 --- /dev/null +++ b/app/research_tab.py @@ -0,0 +1,401 @@ +"""Streamlit UI for NFL player research.""" + +from __future__ import annotations + +import matplotlib.pyplot as plt +import numpy as np +import streamlit as st + +from sdks.distribution_fit import fit_best_distribution, format_params_string +from sdks.espn_player_loader import ( + CombineMetrics, + GameLogEntry, + InjuryInfo, + NewsArticle, + PlayerProfile, + SeasonStats, + calculate_fantasy_points, + get_available_seasons, + get_player_combine, + get_player_gamelog, + get_player_injuries, + get_player_news, + get_player_profile, + get_player_seasons, + get_player_stats, + search_players, +) + + +def _init_research_state() -> None: + if "research_selected_player" not in st.session_state: + st.session_state["research_selected_player"] = None + if "research_search_results" not in st.session_state: + st.session_state["research_search_results"] = [] + + +def _render_search() -> None: + """Render the player search section.""" + st.subheader("Search Players") + + col1, col2 = st.columns([4, 1]) + with col1: + query = st.text_input( + "Enter player name", + placeholder="e.g. Patrick Mahomes", + label_visibility="collapsed", + ) + with col2: + search_clicked = st.button("Search", type="primary", use_container_width=True) + + if search_clicked and query.strip(): + with st.spinner("Searching..."): + results = search_players(query.strip()) + st.session_state["research_search_results"] = results + if not results: + st.warning("No players found. Try a different search term.") + + results = st.session_state.get("research_search_results", []) + if results: + st.caption(f"Found {len(results)} active player(s)") + + for player in results: + img_col, info_col, btn_col = st.columns([1, 5, 1]) + with img_col: + headshot = player.get("headshot", "") + if headshot: + st.image(headshot, width=72) + else: + st.markdown("—") + with info_col: + display = f"**{player['name']}** — {player['position']}, {player['team']}" + st.markdown(display) + with btn_col: + if st.button("Select", key=f"select_{player['id']}", use_container_width=True): + st.session_state["research_selected_player"] = player["id"] + st.rerun() + + +def _render_profile_card(profile: PlayerProfile) -> None: + """Render the player profile card.""" + st.divider() + + col1, col2 = st.columns([1, 3]) + + with col1: + if profile.headshot_url: + st.image(profile.headshot_url, width=150) + else: + st.markdown("*No photo available*") + + with col2: + st.markdown(f"## {profile.name}") + st.markdown(f"**{profile.position}** | #{profile.jersey} | {profile.team}") + + info_col1, info_col2, info_col3 = st.columns(3) + with info_col1: + st.metric("Height", profile.height) + st.metric("College", profile.college) + with info_col2: + st.metric("Weight", profile.weight) + st.metric("Draft", profile.draft_info) + with info_col3: + if profile.age: + st.metric("Age", profile.age) + st.metric("Experience", f"{profile.experience} yrs") + + status_color = "green" if profile.status == "Active" else "orange" + st.markdown(f"**Status:** :{status_color}[{profile.status}]") + + +def _create_timeseries_chart(gamelog: list[GameLogEntry], scoring_key: str, scoring_label: str) -> plt.Figure: + """Create time series chart of fantasy points by week.""" + fig, ax = plt.subplots(figsize=(6, 3.5)) + + if not gamelog: + ax.text(0.5, 0.5, "No data available", ha="center", va="center", transform=ax.transAxes) + ax.set_xlabel("Week") + ax.set_ylabel("Fantasy Points") + return fig + + weeks = [] + points = [] + + for game in gamelog: + fp = calculate_fantasy_points(game, scoring_key) + weeks.append(game.week) + points.append(fp) + + if weeks: + ax.plot(weeks, points, marker='o', linestyle='-', linewidth=2, markersize=6, + color='#2E86AB', markerfacecolor='#2E86AB', markeredgecolor='white', markeredgewidth=1.5) + ax.fill_between(weeks, points, alpha=0.2, color='#2E86AB') + + max_week = max(weeks) + ax.set_xlim(0.5, max_week + 0.5) + ax.set_xticks(range(1, max_week + 1)) + + if points: + y_max = max(points) * 1.1 if max(points) > 0 else 10 + ax.set_ylim(0, y_max) + + ax.set_xlabel("Week", fontsize=10) + ax.set_ylabel(f"Fantasy Points ({scoring_label})", fontsize=10) + ax.set_title("Weekly Performance", fontsize=11, fontweight='bold') + ax.grid(alpha=0.3, linestyle='--', linewidth=0.5) + ax.spines['top'].set_visible(False) + ax.spines['right'].set_visible(False) + plt.tight_layout() + + return fig + + +def _create_histogram_with_fit(fp_data: list[float], scoring_label: str) -> plt.Figure: + """Create histogram with fitted distribution overlay.""" + nonzero = [x for x in fp_data if x > 0] + + fig, ax = plt.subplots(figsize=(6, 3.5)) + + if not nonzero: + ax.text(0.5, 0.5, "No data available", ha="center", va="center", transform=ax.transAxes) + ax.set_xlabel("Fantasy Points") + ax.set_ylabel("Density") + return fig + + ax.hist(nonzero, bins="auto", density=True, alpha=0.7, color="steelblue", edgecolor="white", label="Data") + + if len(nonzero) >= 5: + dist_name, params, pdf_fn = fit_best_distribution(nonzero) + x = np.linspace(min(nonzero), max(nonzero), 100) + ax.plot(x, pdf_fn(x), "r-", lw=2, label=dist_name) + + param_str = format_params_string(params) + ax.set_title(f"{dist_name} Distribution\n({param_str})", fontsize=10, fontweight='bold') + else: + ax.set_title(f"Fantasy Points Distribution\n(Need 5+ games for model fit, have {len(nonzero)})", fontsize=9, fontweight='bold') + + ax.set_xlabel(f"Fantasy Points ({scoring_label})", fontsize=10) + ax.set_ylabel("Density", fontsize=10) + ax.legend(loc="upper right") + ax.grid(alpha=0.3, linestyle='--', linewidth=0.5) + ax.spines['top'].set_visible(False) + ax.spines['right'].set_visible(False) + plt.tight_layout() + + return fig + + +def _build_stats_table(gamelog: list[GameLogEntry], scoring_key: str) -> list[dict]: + """Build stats table rows from game log.""" + rows = [] + for game in gamelog: + fp = calculate_fantasy_points(game, scoring_key) + rows.append({ + "Week": game.week, + "Pass Yds": game.passing_yards, + "Pass TD": game.passing_tds, + "INT": game.interceptions, + "Rush Yds": game.rushing_yards, + "Rush TD": game.rushing_tds, + "Rec": game.receptions, + "Rec Yds": game.receiving_yards, + "Rec TD": game.receiving_tds, + "Fum": game.fumbles_lost, + "FP": fp, + }) + return rows + + +def _render_stats_tab(player_id: str) -> None: + """Render the statistics sub-tab with season checkboxes and histogram.""" + scoring_options = ["PPR", "Half-PPR", "Standard"] + scoring_label = st.selectbox("Scoring Format", options=scoring_options, index=0) + scoring_key = scoring_label.lower().replace("-", "_") + + with st.spinner("Loading available seasons..."): + seasons = get_player_seasons(player_id) + + if not seasons: + st.info("No season data available for this player.") + return + + selected_seasons = st.multiselect( + "Select Seasons", + options=seasons, + default=[seasons[0]] if seasons else [], + ) + + if not selected_seasons: + st.info("Select one or more seasons above to view stats.") + return + + for season in selected_seasons: + st.subheader(f"{season} Season") + + with st.spinner(f"Loading {season} game log..."): + gamelog = get_player_gamelog(player_id, season) + + if not gamelog: + st.warning(f"No game data available for {season}.") + continue + + fp_list = [calculate_fantasy_points(g, scoring_key) for g in gamelog] + + col_table, col_charts = st.columns(2) + + with col_table: + st.caption(f"Games: {len(gamelog)}") + rows = _build_stats_table(gamelog, scoring_key) + st.dataframe(rows, use_container_width=True, hide_index=True) + + with col_charts: + nonzero_count = len([x for x in fp_list if x > 0]) + st.caption(f"Games with stats: {nonzero_count}") + + fig_ts = _create_timeseries_chart(gamelog, scoring_key, scoring_label) + st.pyplot(fig_ts) + plt.close(fig_ts) + + fig_hist = _create_histogram_with_fit(fp_list, scoring_label) + st.pyplot(fig_hist) + plt.close(fig_hist) + + st.divider() + + +def _render_combine_tab(player_id: str) -> None: + """Render the combine metrics sub-tab.""" + with st.spinner("Loading combine data..."): + combine = get_player_combine(player_id) + + if not combine: + st.info("No combine data available for this player.") + return + + if combine.year: + st.caption(f"NFL Combine {combine.year}") + + col1, col2 = st.columns(2) + + with col1: + st.metric("40-Yard Dash", f"{combine.forty_yard}s" if combine.forty_yard else "—") + st.metric("Bench Press", f"{combine.bench_press} reps" if combine.bench_press else "—") + st.metric("3-Cone Drill", f"{combine.three_cone}s" if combine.three_cone else "—") + + with col2: + st.metric("Vertical Jump", f'{combine.vertical_jump}"' if combine.vertical_jump else "—") + st.metric("Broad Jump", f'{combine.broad_jump}"' if combine.broad_jump else "—") + st.metric("20-Yard Shuttle", f"{combine.shuttle}s" if combine.shuttle else "—") + + all_none = all([ + combine.forty_yard is None, + combine.vertical_jump is None, + combine.bench_press is None, + combine.broad_jump is None, + combine.three_cone is None, + combine.shuttle is None, + ]) + + if all_none: + st.info("Combine metrics not available. Player may not have participated in the NFL Combine or data is not publicly available.") + + +def _render_injuries_tab(player_id: str) -> None: + """Render the injuries sub-tab.""" + with st.spinner("Loading injury data..."): + injuries = get_player_injuries(player_id) + + if not injuries: + st.success("No injuries reported.") + return + + for injury in injuries: + status_color = "green" if injury.status == "Active" else "red" + + st.markdown(f"### :{status_color}[{injury.status}]") + + if injury.injury_type and injury.injury_type != "—": + st.markdown(f"**Type:** {injury.injury_type}") + + if injury.details and injury.details != "—": + st.markdown(f"**Details:** {injury.details}") + + if injury.date and injury.date != "—": + st.caption(f"Date: {injury.date}") + + st.divider() + + +def _render_news_tab(player_id: str) -> None: + """Render the news sub-tab.""" + with st.spinner("Loading news..."): + articles = get_player_news(player_id) + + if not articles: + st.info("No recent news articles found.") + return + + for article in articles: + col1, col2 = st.columns([1, 4]) + + with col1: + if article.image_url: + st.image(article.image_url, width=120) + + with col2: + if article.link: + st.markdown(f"### [{article.headline}]({article.link})") + else: + st.markdown(f"### {article.headline}") + + if article.description: + st.markdown(article.description[:200] + "..." if len(article.description) > 200 else article.description) + + st.caption(article.published) + + st.divider() + + +def render_research_tab() -> None: + """Main entry point for the Research tab.""" + _init_research_state() + + _render_search() + + player_id = st.session_state.get("research_selected_player") + + if not player_id: + st.info("Search for a player above to view their profile, stats, and news.") + return + + with st.spinner("Loading player profile..."): + profile = get_player_profile(player_id) + + if not profile: + st.error("Could not load player profile. Please try again.") + st.session_state["research_selected_player"] = None + return + + if st.button("← Back to Search"): + st.session_state["research_selected_player"] = None + st.rerun() + + _render_profile_card(profile) + + st.divider() + + stats_tab, combine_tab, injuries_tab, news_tab = st.tabs([ + "Stats", "Combine", "Injuries", "News" + ]) + + with stats_tab: + _render_stats_tab(player_id) + + with combine_tab: + _render_combine_tab(player_id) + + with injuries_tab: + _render_injuries_tab(player_id) + + with news_tab: + _render_news_tab(player_id) diff --git a/app/streamlit_app.py b/app/streamlit_app.py index f211a24..318cb9b 100644 --- a/app/streamlit_app.py +++ b/app/streamlit_app.py @@ -11,10 +11,7 @@ if str(ROOT) not in sys.path: sys.path.insert(0, str(ROOT)) -from app.mock_draft_tab import render_mock_draft_tab from draft.rpy2_setup import init_rpy2_on_main_thread -from rag.engine import RAGEngine -from rag.ollama_client import OllamaError st.set_page_config( page_title="StatShift", @@ -22,10 +19,6 @@ layout="wide", ) -st.title("StatShift") - -engine = RAGEngine() - @st.cache_resource def _rpy2_ready() -> bool: @@ -36,55 +29,24 @@ def _rpy2_ready() -> bool: _rpy2_ready() -with st.sidebar: - if st.button("Check services", use_container_width=True): - st.session_state["health"] = engine.health() - health = st.session_state.get("health") - if health: - st.write("Data", "✅" if health["api_ok"] else "❌") - st.write("AI", "✅" if health["ollama_ok"] else "❌") - -ask_tab, draft_tab = st.tabs(["Ask", "Mock Draft"]) - -with ask_tab: - query = st.text_input( - "Ask about players, matchups, injuries, or fantasy scoring", - placeholder="e.g. How many targets did Ja'Marr Chase have in 2024?", - ) - - col1, col2 = st.columns([1, 4]) - with col1: - run = st.button("Ask", type="primary", use_container_width=True) - with col2: - show_prompt = st.checkbox("Show AI prompt") - - if run and query.strip(): - with st.spinner("Routing your question..."): - try: - result = engine.ask(query.strip()) - st.session_state["last_result"] = result - except OllamaError as exc: - st.error(str(exc)) - except Exception as exc: - st.error(f"Something went wrong: {exc}") +VIEWS_DIR = Path(__file__).parent / "views" - result = st.session_state.get("last_result") - if result: - st.subheader("Answer") - st.markdown(result.answer) - - if result.sources: - st.subheader("Sources") - for doc in result.sources: - with st.expander(f"[{doc['id']}] {doc['title']} · {doc['category']}"): - st.write(doc["content"]) - - if show_prompt and result.prompt: - st.subheader("Prompt") - st.code(result.prompt) - - elif run and not query.strip(): - st.warning("Enter a question first.") +home_page = st.Page( + str(VIEWS_DIR / "home_page.py"), + title="Home", + icon="🏠", + default=True, +) +ask_page = st.Page( + str(VIEWS_DIR / "ask_page.py"), + title="Ask", + icon="💬", +) +research_page = st.Page( + str(VIEWS_DIR / "research_page.py"), + title="Research", + icon="🔍", +) -with draft_tab: - render_mock_draft_tab() +pg = st.navigation([home_page, ask_page, research_page]) +pg.run() diff --git a/app/views/ask_page.py b/app/views/ask_page.py new file mode 100644 index 0000000..4a073a3 --- /dev/null +++ b/app/views/ask_page.py @@ -0,0 +1,66 @@ +"""Ask page - RAG-powered Q&A.""" + +from __future__ import annotations + +import streamlit as st + +from rag.engine import RAGEngine +from rag.ollama_client import OllamaClient, OllamaError + +st.title("StatShift") + +# Model selection +model_options = { + "Gemma 2": "gemma2:latest", + "Llama 3": "llama3:latest", +} + +model_col, _ = st.columns([1, 3]) +with model_col: + selected_model_name = st.selectbox( + "AI Model", + options=list(model_options.keys()), + index=0, + ) +selected_model = model_options[selected_model_name] + +query = st.text_input( + "Ask about players, matchups, injuries, or fantasy scoring", + placeholder="e.g. How many targets did Ja'Marr Chase have in 2024?", +) + +col1, col2 = st.columns([1, 4]) +with col1: + run = st.button("Ask", type="primary", use_container_width=True) +with col2: + show_prompt = st.checkbox("Show AI prompt") + +if run and query.strip(): + with st.spinner("Routing your question..."): + try: + ollama_client = OllamaClient(model=selected_model) + engine = RAGEngine(ollama=ollama_client) + result = engine.ask(query.strip()) + st.session_state["last_result"] = result + except OllamaError as exc: + st.error(str(exc)) + except Exception as exc: + st.error(f"Something went wrong: {exc}") + +result = st.session_state.get("last_result") +if result: + st.subheader("Answer") + st.markdown(result.answer) + + if result.sources: + st.subheader("Sources") + for doc in result.sources: + with st.expander(f"[{doc['id']}] {doc['title']} · {doc['category']}"): + st.write(doc["content"]) + + if show_prompt and result.prompt: + st.subheader("Prompt") + st.code(result.prompt) + +elif run and not query.strip(): + st.warning("Enter a question first.") diff --git a/app/views/home_page.py b/app/views/home_page.py new file mode 100644 index 0000000..098fd2a --- /dev/null +++ b/app/views/home_page.py @@ -0,0 +1,11 @@ +"""Home page - Mock Draft.""" + +from __future__ import annotations + +import streamlit as st + +from app.mock_draft_tab import render_mock_draft_tab + +st.title("StatShift") + +render_mock_draft_tab() diff --git a/app/views/research_page.py b/app/views/research_page.py new file mode 100644 index 0000000..8ee3821 --- /dev/null +++ b/app/views/research_page.py @@ -0,0 +1,11 @@ +"""Research page - NFL player research.""" + +from __future__ import annotations + +import streamlit as st + +from app.research_tab import render_research_tab + +st.title("StatShift") + +render_research_tab() diff --git a/docker-compose.yml b/docker-compose.yml index b38dcaf..8739a6f 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -38,7 +38,9 @@ services: condition: service_healthy environment: OLLAMA_HOST: http://ollama:11434 - entrypoint: ["ollama", "pull", "gemma2:2b"] + volumes: + - ./scripts:/scripts:ro + entrypoint: ["/bin/sh", "/scripts/pull-ollama-models.sh"] restart: "no" ui: @@ -47,7 +49,7 @@ services: environment: API_BASE_URL: http://api:8000 OLLAMA_BASE_URL: http://ollama:11434 - OLLAMA_MODEL: gemma2:2b + OLLAMA_MODEL: gemma2:latest volumes: - statshift_data:/app/data ports: diff --git a/draft/engine.py b/draft/engine.py index 1a654c2..832719b 100644 --- a/draft/engine.py +++ b/draft/engine.py @@ -4,10 +4,12 @@ import math import random +import time from dataclasses import dataclass, field from draft.models import ( ROSTER_SLOTS, + TEAM_NAME_POOL, DraftPick, LeagueSettings, Player, @@ -19,6 +21,30 @@ from draft.player_pool import get_players, max_rounds_for_league +def _generate_team_names( + league_size: int, + user_team_name: str = "My Team", + user_slot: int = 1, + rng: random.Random | None = None, +) -> list[str]: + """Generate team names with user's name at their draft slot.""" + rng = rng or random.Random() + cpu_names = rng.sample(TEAM_NAME_POOL, min(league_size - 1, len(TEAM_NAME_POOL))) + + names: list[str] = [] + cpu_idx = 0 + for i in range(league_size): + if i == user_slot - 1: + names.append(user_team_name) + else: + if cpu_idx < len(cpu_names): + names.append(cpu_names[cpu_idx]) + cpu_idx += 1 + else: + names.append(f"Team {i + 1}") + return names + + def _snake_team_index(round_num: int, pick_in_round: int, league_size: int) -> int: if round_num % 2 == 1: return pick_in_round - 1 @@ -38,6 +64,10 @@ class MockDraftEngine: cpu_top_k: int = 12 seed: int | None = None ranking_source: RankingSource = RankingSource.YAHOO + team_names: list[str] = field(default_factory=list) + user_team_name: str = "My Team" + pick_time_limit: int = 30 + pick_start_time: float | None = None _rng: random.Random = field( init=False, repr=False, compare=False, default_factory=random.Random ) @@ -52,6 +82,13 @@ def __post_init__(self) -> None: reverse=True, ) self._rng = random.Random(self.seed) + if not self.team_names: + self.team_names = _generate_team_names( + self.settings.league_size, + self.user_team_name, + self.settings.draft_slot, + self._rng, + ) @property def total_picks(self) -> int: @@ -92,6 +129,27 @@ def is_user_turn(self) -> bool: and self.current_team_index == self.settings.draft_slot - 1 ) + def start_pick_timer(self) -> None: + """Start or reset the pick timer for the current pick.""" + self.pick_start_time = time.time() + + def time_remaining(self) -> int: + """Return seconds remaining on the pick clock.""" + if self.pick_start_time is None: + return self.pick_time_limit + elapsed = time.time() - self.pick_start_time + return max(0, self.pick_time_limit - int(elapsed)) + + def is_timer_expired(self) -> bool: + """Check if the pick timer has expired.""" + return self.pick_start_time is not None and self.time_remaining() <= 0 + + def get_team_name(self, team_index: int) -> str: + """Get the display name for a team.""" + if 0 <= team_index < len(self.team_names): + return self.team_names[team_index] + return f"Team {team_index + 1}" + def drafted_names(self) -> set[str]: return {pick.player.name for pick in self.picks} @@ -201,6 +259,9 @@ def _clone_for_simulation(self) -> MockDraftEngine: cpu_randomness=self.cpu_randomness, cpu_top_k=self.cpu_top_k, ranking_source=self.ranking_source, + team_names=self.team_names, + user_team_name=self.user_team_name, + pick_time_limit=self.pick_time_limit, ) clone.picks = list(self.picks) clone.rosters = [ @@ -253,6 +314,8 @@ def create( cpu_top_k: int = 12, seed: int | None = None, ranking_source: RankingSource = RankingSource.YAHOO, + user_team_name: str = "My Team", + pick_time_limit: int = 30, ) -> MockDraftEngine: player_pool = pool if pool is not None else get_players() if not player_pool: @@ -274,7 +337,10 @@ def create( cpu_top_k=cpu_top_k, seed=seed, ranking_source=ranking_source, + user_team_name=user_team_name, + pick_time_limit=pick_time_limit, ) engine.requested_rounds = rounds engine.max_supported_rounds = supported + engine.start_pick_timer() return engine diff --git a/draft/models.py b/draft/models.py index ad6a25c..bedcf04 100644 --- a/draft/models.py +++ b/draft/models.py @@ -27,6 +27,17 @@ class Position(str, Enum): DEF = "DEF" +# Sleeper-style position colors: (background, text) +POSITION_COLORS: dict[str, tuple[str, str]] = { + "QB": ("#523C87", "#FFFFFF"), + "RB": ("#2E7D5E", "#FFFFFF"), + "WR": ("#2D7A4F", "#FFFFFF"), + "TE": ("#C75724", "#FFFFFF"), + "K": ("#6B6B6B", "#FFFFFF"), + "DEF": ("#C72424", "#FFFFFF"), +} + + ROSTER_SLOTS: dict[str, int] = { "QB": 1, "RB": 2, @@ -37,6 +48,29 @@ class Position(str, Enum): "DEF": 1, } +TEAM_NAME_POOL: list[str] = [ + "Gridiron Gladiators", + "Sunday Scaries", + "The Algorithm", + "Waiver Wire Wizards", + "Trade Block Party", + "Bench Warmers", + "Fantasy Fiends", + "TD Chasers", + "Point Projectors", + "Sleeper Agents", + "Red Zone Rockets", + "Fourth Quarter Comeback", + "Bye Week Blues", + "Commissioner's Picks", + "Draft Day Disasters", + "Playoff Bound", + "Injury Reserve", + "Monday Night Mayhem", + "Touchdown Titans", + "Roster Roulette", +] + @dataclass(frozen=True) class LeagueSettings: diff --git a/rag/engine.py b/rag/engine.py index 2051d9c..05ab737 100644 --- a/rag/engine.py +++ b/rag/engine.py @@ -104,9 +104,12 @@ def ask(self, query: str, *, intent: IntentResult | None = None) -> RAGResult: prompt = self._build_conversational_prompt(query) try: answer = self.ollama.generate(prompt) + except OllamaError: + # Re-raise OllamaError as-is (already has specific message) + raise except httpx.HTTPError as exc: raise OllamaError( - "Could not reach Ollama. Start it locally and ensure Gemma is pulled." + f"Network error: {type(exc).__name__} - {str(exc)}" ) from exc return RAGResult( answer=answer, diff --git a/rag/ollama_client.py b/rag/ollama_client.py index 136814d..a139ac4 100644 --- a/rag/ollama_client.py +++ b/rag/ollama_client.py @@ -16,7 +16,7 @@ def __init__( self, base_url: str | None = None, model: str | None = None, - timeout: float = 120.0, + timeout: float = 300.0, # Increased to 5 minutes for initial model load ) -> None: self.base_url = (base_url or settings.ollama_base_url).rstrip("/") self.model = model or settings.ollama_model @@ -37,20 +37,37 @@ def list_models(self) -> list[str]: return [m["name"] for m in payload.get("models", [])] def generate(self, prompt: str, *, temperature: float = 0.2) -> str: - response = httpx.post( - f"{self.base_url}/api/generate", - json={ - "model": self.model, - "prompt": prompt, - "stream": False, - "options": {"temperature": temperature}, - }, - timeout=self.timeout, - ) - if response.status_code == 404: - raise OllamaError( - f"Model '{self.model}' not found. Pull it with: ollama pull {self.model}" + try: + response = httpx.post( + f"{self.base_url}/api/generate", + json={ + "model": self.model, + "prompt": prompt, + "stream": False, + "options": {"temperature": temperature}, + }, + timeout=self.timeout, ) - response.raise_for_status() - data = response.json() - return (data.get("response") or "").strip() + if response.status_code == 404: + raise OllamaError( + f"Model '{self.model}' not found. Pull it with: ollama pull {self.model}" + ) + response.raise_for_status() + data = response.json() + return (data.get("response") or "").strip() + except httpx.ConnectError as e: + raise OllamaError( + f"Cannot connect to Ollama at {self.base_url}. Error: {str(e)}" + ) from e + except httpx.TimeoutException as e: + raise OllamaError( + f"Timeout connecting to Ollama at {self.base_url}. Error: {str(e)}" + ) from e + except httpx.HTTPStatusError as e: + raise OllamaError( + f"HTTP error from Ollama (status {e.response.status_code}): {str(e)}" + ) from e + except httpx.HTTPError as e: + raise OllamaError( + f"Network error connecting to Ollama at {self.base_url}: {type(e).__name__} - {str(e)}" + ) from e diff --git a/requirements.txt b/requirements.txt index a948c94..44d7d33 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,6 +2,10 @@ fastapi>=0.115.0 uvicorn[standard]>=0.32.0 httpx>=0.27.0 streamlit>=1.40.0 +streamlit-autorefresh>=1.0.0 pydantic>=2.9.0 pydantic-settings>=2.6.0 rpy2>=3.5.0 +scipy>=1.11.0 +matplotlib>=3.8.0 +numpy>=1.26.0 diff --git a/scripts/pull-ollama-models.sh b/scripts/pull-ollama-models.sh new file mode 100755 index 0000000..d119222 --- /dev/null +++ b/scripts/pull-ollama-models.sh @@ -0,0 +1,20 @@ +#!/bin/sh +# Pull Ollama models for StatShift (skips models that are already cached) + +set -e + +ensure_model() { + model_name="$1" + if ollama list | awk '{print $1}' | grep -Fxq "$model_name"; then + echo "✓ $model_name is already cached, skipping download." + else + echo "↓ Pulling $model_name..." + ollama pull "$model_name" + echo "✓ $model_name pulled successfully." + fi +} + +ensure_model "gemma2:latest" +ensure_model "llama3:latest" + +echo "All models ready!" diff --git a/sdks/distribution_fit.py b/sdks/distribution_fit.py new file mode 100644 index 0000000..9d89967 --- /dev/null +++ b/sdks/distribution_fit.py @@ -0,0 +1,92 @@ +"""Distribution fitting for fantasy points analysis.""" + +from __future__ import annotations + +from typing import Any, Callable + +import numpy as np +from scipy import stats + + +CANDIDATE_DISTRIBUTIONS: list[tuple[str, Any]] = [ + ("Normal", stats.norm), + ("Gamma", stats.gamma), + ("Log-Normal", stats.lognorm), + ("Weibull", stats.weibull_min), +] + + +def fit_best_distribution( + data: list[float], +) -> tuple[str, dict[str, float], Callable[[np.ndarray], np.ndarray]]: + """Fit candidate distributions and return best fit by AIC. + + Args: + data: List of fantasy point values (non-zero values recommended) + + Returns: + Tuple of (distribution_name, parameters_dict, pdf_function) + """ + arr = np.array([x for x in data if x > 0]) + + if len(arr) < 5: + mean = float(np.mean(arr)) if len(arr) > 0 else 0.0 + std = float(np.std(arr)) if len(arr) > 1 else 1.0 + return ( + "Normal", + {"mean": mean, "std": std}, + lambda x, m=mean, s=max(std, 0.1): stats.norm.pdf(x, loc=m, scale=s), + ) + + best_name = "Normal" + best_params: dict[str, float] = {} + best_pdf: Callable[[np.ndarray], np.ndarray] = lambda x: stats.norm.pdf(x) + best_aic = float("inf") + + for name, dist in CANDIDATE_DISTRIBUTIONS: + try: + if name == "Normal": + params = dist.fit(arr) + loc, scale = params + pdf_fn = lambda x, d=dist, p=params: d.pdf(x, *p) + param_dict = {"mean": loc, "std": scale} + + elif name == "Gamma": + params = dist.fit(arr, floc=0) + a, loc, scale = params + pdf_fn = lambda x, d=dist, p=params: d.pdf(x, *p) + param_dict = {"shape": a, "scale": scale} + + elif name == "Log-Normal": + params = dist.fit(arr, floc=0) + s, loc, scale = params + pdf_fn = lambda x, d=dist, p=params: d.pdf(x, *p) + param_dict = {"s": s, "scale": scale} + + elif name == "Weibull": + params = dist.fit(arr, floc=0) + c, loc, scale = params + pdf_fn = lambda x, d=dist, p=params: d.pdf(x, *p) + param_dict = {"c": c, "scale": scale} + else: + continue + + log_likelihood = np.sum(np.log(dist.pdf(arr, *params) + 1e-10)) + k = len(params) + aic = 2 * k - 2 * log_likelihood + + if aic < best_aic: + best_aic = aic + best_name = name + best_params = param_dict + best_pdf = pdf_fn + + except Exception: + continue + + return best_name, best_params, best_pdf + + +def format_params_string(params: dict[str, float]) -> str: + """Format parameters dictionary as a readable string.""" + return ", ".join(f"{k}={v:.2f}" for k, v in params.items()) diff --git a/sdks/espn_player_loader.py b/sdks/espn_player_loader.py new file mode 100644 index 0000000..066d297 --- /dev/null +++ b/sdks/espn_player_loader.py @@ -0,0 +1,524 @@ +"""ESPN API client for player research data.""" + +from __future__ import annotations + +from dataclasses import dataclass, field +from datetime import datetime +from typing import Any + +import httpx + +CORE_API_BASE = "https://sports.core.api.espn.com/v2/sports/football/leagues/nfl" +SITE_API_BASE = "https://site.api.espn.com/apis/site/v2/sports/football/nfl" +COMMON_SEARCH_URL = "https://site.api.espn.com/apis/common/v3/search" +TIMEOUT = 30.0 + + +@dataclass +class PlayerProfile: + id: str + name: str + position: str + team: str + jersey: str + height: str + weight: str + age: int | None + birth_date: str + college: str + draft_info: str + experience: int + headshot_url: str + status: str + + +@dataclass +class SeasonStats: + season: int + games_played: int + stats: dict[str, Any] = field(default_factory=dict) + + +@dataclass +class CombineMetrics: + year: int | None + forty_yard: float | None + vertical_jump: float | None + bench_press: int | None + broad_jump: float | None + three_cone: float | None + shuttle: float | None + + +@dataclass +class InjuryInfo: + status: str + injury_type: str + details: str + date: str + + +@dataclass +class NewsArticle: + headline: str + description: str + published: str + link: str + image_url: str | None + + +@dataclass +class GameLogEntry: + """Single game stats for fantasy point calculation.""" + week: int + opponent: str + result: str + passing_yards: int + passing_tds: int + interceptions: int + rushing_yards: int + rushing_tds: int + receptions: int + receiving_yards: int + receiving_tds: int + fumbles_lost: int + + +def _get_json(url: str, params: dict[str, Any] | None = None) -> dict[str, Any]: + """Make a GET request and return JSON response.""" + with httpx.Client(timeout=TIMEOUT, follow_redirects=True) as client: + response = client.get(url, params=params) + response.raise_for_status() + return response.json() + + +def _safe_get(data: dict, *keys: str, default: Any = None) -> Any: + """Safely navigate nested dict keys.""" + for key in keys: + if not isinstance(data, dict): + return default + data = data.get(key, default) + if data is None: + return default + return data + + +def _format_height(inches: int | None) -> str: + """Convert height in inches to feet'inches format.""" + if not inches: + return "—" + feet = inches // 12 + remaining = inches % 12 + return f"{feet}'{remaining}\"" + + +def _format_weight(pounds: int | None) -> str: + """Format weight with lbs suffix.""" + if not pounds: + return "—" + return f"{pounds} lbs" + + +def _team_from_search_item(item: dict[str, Any]) -> str: + for rel in item.get("teamRelationships", []) or []: + if rel.get("type") == "team": + return ( + rel.get("displayName") + or _safe_get(rel, "core", "displayName", default="Free Agent") + ) + return "Free Agent" + + +def _headshot_from_search_item(item: dict[str, Any]) -> str: + headshot = item.get("headshot") + if isinstance(headshot, dict): + return headshot.get("href", "") or "" + player_id = item.get("id") + if player_id: + return f"https://a.espncdn.com/i/headshots/nfl/players/full/{player_id}.png" + return "" + + +def _position_for_player_id(player_id: str) -> str: + try: + data = _get_json(f"{CORE_API_BASE}/athletes/{player_id}") + return _safe_get(data, "position", "abbreviation", default="—") + except httpx.HTTPError: + return "—" + + +def search_players(query: str, limit: int = 10) -> list[dict[str, Any]]: + """Search active NFL players by name via ESPN common search API.""" + if not query or len(query.strip()) < 2: + return [] + + try: + data = _get_json( + COMMON_SEARCH_URL, + params={ + "query": query.strip(), + "limit": max(limit * 3, 25), + "type": "player", + "sport": "football", + "league": "nfl", + }, + ) + except httpx.HTTPError: + return [] + + results: list[dict[str, Any]] = [] + + for item in data.get("items", []): + if len(results) >= limit: + break + if not item.get("isActive") or item.get("isRetired"): + continue + + player_id = str(item.get("id", "")) + if not player_id: + continue + + results.append({ + "id": player_id, + "name": item.get("displayName", "Unknown"), + "position": _position_for_player_id(player_id), + "team": _team_from_search_item(item), + "headshot": _headshot_from_search_item(item), + "active": True, + "status": "Active", + }) + + return results + + +def get_player_profile(player_id: str) -> PlayerProfile | None: + """Get detailed player profile information.""" + url = f"{CORE_API_BASE}/athletes/{player_id}" + + try: + data = _get_json(url) + except httpx.HTTPError: + return None + + height_inches = data.get("height") + weight_pounds = data.get("weight") + + birth_date = data.get("dateOfBirth", "") + age = None + if birth_date: + try: + dob = datetime.fromisoformat(birth_date.replace("Z", "+00:00")) + age = (datetime.now(dob.tzinfo) - dob).days // 365 + except (ValueError, TypeError): + pass + + draft = data.get("draft", {}) + draft_info = "—" + if draft: + year = draft.get("year", "") + round_num = draft.get("round", "") + pick = draft.get("selection", "") + if year and round_num and pick: + draft_info = f"{year} Round {round_num}, Pick {pick}" + + return PlayerProfile( + id=str(data.get("id", "")), + name=data.get("displayName", "Unknown"), + position=_safe_get(data, "position", "displayName", default="—"), + team=_safe_get(data, "team", "displayName", default="Free Agent"), + jersey=str(data.get("jersey", "—")), + height=_format_height(height_inches), + weight=_format_weight(weight_pounds), + age=age, + birth_date=birth_date[:10] if birth_date else "—", + college=_safe_get(data, "college", "name", default="—"), + draft_info=draft_info, + experience=data.get("experience", {}).get("years", 0), + headshot_url=_safe_get(data, "headshot", "href", default=""), + status=_safe_get(data, "status", "name", default="Active"), + ) + + +def get_player_stats(player_id: str, season: int | None = None) -> list[SeasonStats]: + """Get player statistics for a season or career.""" + url = f"{CORE_API_BASE}/athletes/{player_id}/statistics" + + try: + data = _get_json(url) + except httpx.HTTPError: + return [] + + results = [] + splits = data.get("splits", {}) + categories = splits.get("categories", []) + + if not categories: + return [] + + stats_dict: dict[str, Any] = {} + games_played = 0 + + for category in categories: + cat_name = category.get("displayName", "") + for stat in category.get("stats", []): + stat_name = stat.get("displayName", stat.get("name", "")) + stat_value = stat.get("displayValue", stat.get("value", "—")) + if stat_name: + stats_dict[f"{cat_name} - {stat_name}"] = stat_value + if stat_name.lower() in ("games played", "gp"): + try: + games_played = int(stat_value) + except (ValueError, TypeError): + pass + + current_year = datetime.now().year + results.append(SeasonStats( + season=season or current_year, + games_played=games_played, + stats=stats_dict, + )) + + return results + + +def get_player_combine(player_id: str) -> CombineMetrics | None: + """Get NFL Combine metrics for a player.""" + profile = get_player_profile(player_id) + if not profile: + return None + + url = f"{CORE_API_BASE}/athletes/{player_id}" + + try: + data = _get_json(url) + except httpx.HTTPError: + return CombineMetrics( + year=None, + forty_yard=None, + vertical_jump=None, + bench_press=None, + broad_jump=None, + three_cone=None, + shuttle=None, + ) + + draft = data.get("draft", {}) + combine_year = draft.get("year") if draft else None + + return CombineMetrics( + year=combine_year, + forty_yard=None, + vertical_jump=None, + bench_press=None, + broad_jump=None, + three_cone=None, + shuttle=None, + ) + + +def get_player_injuries(player_id: str) -> list[InjuryInfo]: + """Get injury information for a player.""" + url = f"{CORE_API_BASE}/athletes/{player_id}" + + try: + data = _get_json(url) + except httpx.HTTPError: + return [] + + injuries = data.get("injuries", []) + results = [] + + for injury in injuries: + results.append(InjuryInfo( + status=_safe_get(injury, "status", "type", default="Unknown"), + injury_type=injury.get("type", {}).get("text", "—"), + details=injury.get("details", {}).get("detail", "—"), + date=injury.get("date", "—")[:10] if injury.get("date") else "—", + )) + + status_info = data.get("status", {}) + if status_info and not injuries: + results.append(InjuryInfo( + status=status_info.get("name", "Active"), + injury_type="—", + details="No current injuries reported", + date="—", + )) + + return results if results else [InjuryInfo( + status="Active", + injury_type="—", + details="No injury information available", + date="—", + )] + + +def get_player_news(player_id: str, limit: int = 10) -> list[NewsArticle]: + """Get recent news articles about a player.""" + url = f"{SITE_API_BASE}/news" + + try: + data = _get_json(url, params={"player": player_id, "limit": limit}) + except httpx.HTTPError: + return [] + + results = [] + articles = data.get("articles", []) + + for article in articles[:limit]: + published = article.get("published", "") + if published: + try: + dt = datetime.fromisoformat(published.replace("Z", "+00:00")) + published = dt.strftime("%b %d, %Y") + except (ValueError, TypeError): + published = published[:10] + + images = article.get("images", []) + image_url = images[0].get("url") if images else None + + links = article.get("links", {}) + web_link = links.get("web", {}).get("href", "") + + results.append(NewsArticle( + headline=article.get("headline", "No headline"), + description=article.get("description", ""), + published=published, + link=web_link, + image_url=image_url, + )) + + return results + + +def get_available_seasons(player_id: str) -> list[int]: + """Get list of seasons with available statistics for a player.""" + current_year = datetime.now().year + return list(range(current_year, current_year - 5, -1)) + + +def get_player_seasons(player_id: str) -> list[int]: + """Fetch actual seasons with game data from ESPN statisticslog.""" + url = f"{CORE_API_BASE}/athletes/{player_id}/statisticslog" + + try: + data = _get_json(url) + except httpx.HTTPError: + return [] + + seasons: list[int] = [] + entries = data.get("entries", []) + + for entry in entries: + season_ref = _safe_get(entry, "season", "$ref", default="") + if season_ref and "/seasons/" in season_ref: + try: + year_str = season_ref.split("/seasons/")[1].split("?")[0] + year = int(year_str) + if year not in seasons: + seasons.append(year) + except (IndexError, ValueError): + continue + + seasons.sort(reverse=True) + return seasons + + +def _extract_stat_value(categories: list[dict], category_name: str, stat_name: str) -> float: + """Extract a specific stat value from ESPN categories structure.""" + for cat in categories: + if cat.get("name") == category_name: + for stat in cat.get("stats", []): + if stat.get("name") == stat_name: + try: + return float(stat.get("value", 0)) + except (TypeError, ValueError): + return 0.0 + return 0.0 + + +def get_player_gamelog(player_id: str, season: int) -> list[GameLogEntry]: + """Fetch week-by-week game stats for a season.""" + url = f"{CORE_API_BASE}/seasons/{season}/athletes/{player_id}/eventlog" + + try: + data = _get_json(url) + except httpx.HTTPError: + return [] + + events = data.get("events", {}) + items = events.get("items", []) + + results: list[GameLogEntry] = [] + + for idx, item in enumerate(items): + if not item.get("played"): + continue + + stats_ref = _safe_get(item, "statistics", "$ref", default="") + if not stats_ref: + continue + + try: + stats_data = _get_json(stats_ref) + except httpx.HTTPError: + continue + + categories = _safe_get(stats_data, "splits", "categories", default=[]) + if not categories: + continue + + entry = GameLogEntry( + week=idx + 1, + opponent="", + result="", + passing_yards=int(_extract_stat_value(categories, "passing", "passingYards")), + passing_tds=int(_extract_stat_value(categories, "passing", "passingTouchdowns")), + interceptions=int(_extract_stat_value(categories, "passing", "interceptions")), + rushing_yards=int(_extract_stat_value(categories, "rushing", "rushingYards")), + rushing_tds=int(_extract_stat_value(categories, "rushing", "rushingTouchdowns")), + receptions=int(_extract_stat_value(categories, "receiving", "receptions")), + receiving_yards=int(_extract_stat_value(categories, "receiving", "receivingYards")), + receiving_tds=int(_extract_stat_value(categories, "receiving", "receivingTouchdowns")), + fumbles_lost=int(_extract_stat_value(categories, "general", "fumblesLost")), + ) + results.append(entry) + + return results + + +def calculate_fantasy_points(game: GameLogEntry, scoring: str) -> float: + """Calculate fantasy points for a game. + + Args: + game: GameLogEntry with stats + scoring: 'ppr', 'half_ppr', or 'standard' + + Returns: + Fantasy points as float + """ + points = 0.0 + + # Passing: 0.04 per yard, 4 per TD, -2 per INT + points += game.passing_yards * 0.04 + points += game.passing_tds * 4 + points -= game.interceptions * 2 + + # Rushing: 0.1 per yard, 6 per TD + points += game.rushing_yards * 0.1 + points += game.rushing_tds * 6 + + # Receiving: 0.1 per yard, 6 per TD + points += game.receiving_yards * 0.1 + points += game.receiving_tds * 6 + + # Reception bonus based on scoring format + if scoring == "ppr": + points += game.receptions * 1.0 + elif scoring == "half_ppr": + points += game.receptions * 0.5 + + # Fumbles lost: -2 + points -= game.fumbles_lost * 2 + + return round(points, 2) From da39152a6f44de92a2eddf4876b04a438de174fc Mon Sep 17 00:00:00 2001 From: Joseph Lee <175lee.joseph@gmail.com> Date: Wed, 20 May 2026 18:58:34 -0400 Subject: [PATCH 8/9] Added college stats, bug fixes --- app/mock_draft_tab.py | 41 +++---- app/research_tab.py | 96 ++++++++++++++--- app/streamlit_app.py | 2 + app/ui_styles.py | 68 ++++++++++++ sdks/espn_player_loader.py | 168 ++++++++++++++++++++++++++++- tests/test_espn_player_research.py | 62 +++++++++++ 6 files changed, 401 insertions(+), 36 deletions(-) create mode 100644 app/ui_styles.py create mode 100644 tests/test_espn_player_research.py diff --git a/app/mock_draft_tab.py b/app/mock_draft_tab.py index 9b0415c..2f549eb 100644 --- a/app/mock_draft_tab.py +++ b/app/mock_draft_tab.py @@ -2,6 +2,8 @@ from __future__ import annotations +import html + import streamlit as st from streamlit_autorefresh import st_autorefresh @@ -35,10 +37,6 @@ def _position_colors(position: str) -> tuple[str, str]: return POSITION_COLORS.get(position, ("#374151", "#FFFFFF")) -def _truncate_name(name: str, max_len: int = 14) -> str: - return name if len(name) <= max_len else name[: max_len - 1] + "…" - - def _player_pick_html( name: str, position: str, @@ -49,14 +47,17 @@ def _player_pick_html( """HTML for a Sleeper-style colored pick cell.""" bg, fg = _position_colors(position) border = "2px solid #FBBF24" if highlight else "1px solid rgba(255,255,255,0.15)" - team_line = f'
{position}' - if team: - team_line += f" · {team}" + safe_name = html.escape(name) + safe_pos = html.escape(position) + safe_team = html.escape(team) if team else "" + team_line = f'
{safe_pos}' + if safe_team: + team_line += f" · {safe_team}" team_line += "
" return ( - f'
' - f'
{_truncate_name(name)}
' + f'
' + f'
{safe_name}
' f"{team_line}
" ) @@ -64,9 +65,9 @@ def _player_pick_html( def _on_clock_html(text: str, *, is_user: bool) -> str: bg = "#F59E0B" if is_user else "#4B5563" return ( - f'
' - f"{text}
" + f'
' + f"{html.escape(text)}
" ) @@ -177,11 +178,11 @@ def _render_sleeper_draft_board(draft: MockDraftEngine) -> None: header_cols = st.columns(num_teams) for i, col in enumerate(header_cols): team_name = draft.get_team_name(i) - short_name = _truncate_name(team_name, 16) - if i == user_team_idx: - col.markdown(f"**{short_name}** 🏈") - else: - col.markdown(f"**{short_name}**") + suffix = " 🏈" if i == user_team_idx else "" + col.markdown( + f'
{html.escape(team_name)}{suffix}
', + unsafe_allow_html=True, + ) for round_num in range(1, num_rounds + 1): row_cols = st.columns(num_teams) @@ -196,13 +197,13 @@ def _render_sleeper_draft_board(draft: MockDraftEngine) -> None: if pick_data: name, position, team = pick_data - html = _player_pick_html( + pick_html = _player_pick_html( name, position, team, highlight=is_user_team, ) - col.markdown(html, unsafe_allow_html=True) + col.markdown(pick_html, unsafe_allow_html=True) elif is_current_pick: remaining = draft.time_remaining() if draft.is_user_turn: diff --git a/app/research_tab.py b/app/research_tab.py index fb75d41..1e97eaa 100644 --- a/app/research_tab.py +++ b/app/research_tab.py @@ -2,12 +2,15 @@ from __future__ import annotations +import html + import matplotlib.pyplot as plt import numpy as np import streamlit as st from sdks.distribution_fit import fit_best_distribution, format_params_string from sdks.espn_player_loader import ( + CollegeSeasonStats, CombineMetrics, GameLogEntry, InjuryInfo, @@ -16,6 +19,7 @@ SeasonStats, calculate_fantasy_points, get_available_seasons, + get_player_college_stats, get_player_combine, get_player_gamelog, get_player_injuries, @@ -23,6 +27,7 @@ get_player_profile, get_player_seasons, get_player_stats, + is_upcoming_rookie, search_players, ) @@ -37,17 +42,23 @@ def _init_research_state() -> None: def _render_search() -> None: """Render the player search section.""" st.subheader("Search Players") - - col1, col2 = st.columns([4, 1]) - with col1: - query = st.text_input( - "Enter player name", - placeholder="e.g. Patrick Mahomes", - label_visibility="collapsed", - ) - with col2: - search_clicked = st.button("Search", type="primary", use_container_width=True) - + + if not st.session_state.get("research_selected_player"): + st.info("Search for a player below to view their profile, stats, and news.") + + with st.form("research_player_search", clear_on_submit=False): + col1, col2 = st.columns([4, 1]) + with col1: + query = st.text_input( + "Enter player name", + placeholder="e.g. Patrick Mahomes", + label_visibility="collapsed", + ) + with col2: + search_clicked = st.form_submit_button( + "Search", type="primary", use_container_width=True + ) + if search_clicked and query.strip(): with st.spinner("Searching..."): results = search_players(query.strip()) @@ -56,7 +67,7 @@ def _render_search() -> None: st.warning("No players found. Try a different search term.") results = st.session_state.get("research_search_results", []) - if results: + if results and not st.session_state.get("research_selected_player"): st.caption(f"Found {len(results)} active player(s)") for player in results: @@ -73,9 +84,53 @@ def _render_search() -> None: with btn_col: if st.button("Select", key=f"select_{player['id']}", use_container_width=True): st.session_state["research_selected_player"] = player["id"] + st.session_state["research_search_results"] = [] st.rerun() +def _college_stats_to_rows(seasons: list[CollegeSeasonStats]) -> list[dict]: + """Convert college season stats to dataframe rows, omitting empty columns.""" + columns = [ + ("Season", "season"), + ("Team", "team"), + ("GP", "games_played"), + ("Cmp", "completions"), + ("Pass Yds", "passing_yards"), + ("Pass TD", "passing_tds"), + ("INT", "interceptions"), + ("Rush Att", "rush_attempts"), + ("Rush Yds", "rushing_yards"), + ("Rush TD", "rushing_tds"), + ("Rec", "receptions"), + ("Rec Yds", "receiving_yards"), + ("Rec TD", "receiving_tds"), + ] + raw_rows = [] + for season in seasons: + raw_rows.append({label: getattr(season, attr) for label, attr in columns}) + + active_labels = ["Season"] + for label, _ in columns[1:]: + if any(row.get(label, "—") not in ("—", "0", "0.0", "") for row in raw_rows): + active_labels.append(label) + + return [{label: row[label] for label in active_labels} for row in raw_rows] + + +def _render_college_stats(profile: PlayerProfile) -> None: + """Show college stats table for pre-season rookies.""" + with st.spinner("Loading college stats..."): + seasons = get_player_college_stats(profile.id) + + if not seasons: + st.info("No college statistics available for this player.") + return + + st.subheader("College stats") + rows = _college_stats_to_rows(seasons) + st.dataframe(rows, use_container_width=True, hide_index=True) + + def _render_profile_card(profile: PlayerProfile) -> None: """Render the player profile card.""" st.divider() @@ -89,7 +144,11 @@ def _render_profile_card(profile: PlayerProfile) -> None: st.markdown("*No photo available*") with col2: - st.markdown(f"## {profile.name}") + rookie = is_upcoming_rookie(profile.draft_year) + name_line = f"## {profile.name}" + if rookie: + name_line += ' Rookie' + st.markdown(name_line, unsafe_allow_html=True) st.markdown(f"**{profile.position}** | #{profile.jersey} | {profile.team}") info_col1, info_col2, info_col3 = st.columns(3) @@ -107,6 +166,9 @@ def _render_profile_card(profile: PlayerProfile) -> None: status_color = "green" if profile.status == "Active" else "orange" st.markdown(f"**Status:** :{status_color}[{profile.status}]") + if is_upcoming_rookie(profile.draft_year): + _render_college_stats(profile) + def _create_timeseries_chart(gamelog: list[GameLogEntry], scoring_key: str, scoring_label: str) -> plt.Figure: """Create time series chart of fantasy points by week.""" @@ -349,7 +411,11 @@ def _render_news_tab(player_id: str) -> None: st.markdown(f"### {article.headline}") if article.description: - st.markdown(article.description[:200] + "..." if len(article.description) > 200 else article.description) + st.markdown( + f'

' + f"{html.escape(article.description)}

", + unsafe_allow_html=True, + ) st.caption(article.published) @@ -365,7 +431,6 @@ def render_research_tab() -> None: player_id = st.session_state.get("research_selected_player") if not player_id: - st.info("Search for a player above to view their profile, stats, and news.") return with st.spinner("Loading player profile..."): @@ -378,6 +443,7 @@ def render_research_tab() -> None: if st.button("← Back to Search"): st.session_state["research_selected_player"] = None + st.session_state["research_search_results"] = [] st.rerun() _render_profile_card(profile) diff --git a/app/streamlit_app.py b/app/streamlit_app.py index 318cb9b..475a5a9 100644 --- a/app/streamlit_app.py +++ b/app/streamlit_app.py @@ -11,6 +11,7 @@ if str(ROOT) not in sys.path: sys.path.insert(0, str(ROOT)) +from app.ui_styles import inject_global_styles from draft.rpy2_setup import init_rpy2_on_main_thread st.set_page_config( @@ -28,6 +29,7 @@ def _rpy2_ready() -> bool: _rpy2_ready() +inject_global_styles() VIEWS_DIR = Path(__file__).parent / "views" diff --git a/app/ui_styles.py b/app/ui_styles.py new file mode 100644 index 0000000..83b79b1 --- /dev/null +++ b/app/ui_styles.py @@ -0,0 +1,68 @@ +"""Shared Streamlit UI styles for readable, non-truncated text.""" + +from __future__ import annotations + +import streamlit as st + + +def inject_global_styles() -> None: + """Apply CSS so long labels and values wrap instead of showing ellipsis.""" + st.markdown( + """ + + """, + unsafe_allow_html=True, + ) diff --git a/sdks/espn_player_loader.py b/sdks/espn_player_loader.py index 066d297..e6f5a10 100644 --- a/sdks/espn_player_loader.py +++ b/sdks/espn_player_loader.py @@ -9,10 +9,25 @@ import httpx CORE_API_BASE = "https://sports.core.api.espn.com/v2/sports/football/leagues/nfl" +CFB_API_BASE = "https://sports.core.api.espn.com/v2/sports/football/leagues/college-football" SITE_API_BASE = "https://site.api.espn.com/apis/site/v2/sports/football/nfl" COMMON_SEARCH_URL = "https://site.api.espn.com/apis/common/v3/search" TIMEOUT = 30.0 +_COLLEGE_STAT_FIELDS: dict[str, str] = { + "gamesPlayed": "games_played", + "completions": "completions", + "netPassingYards": "passing_yards", + "passingTouchdowns": "passing_tds", + "interceptions": "interceptions", + "rushingAttempts": "rush_attempts", + "rushingYards": "rushing_yards", + "rushingTouchdowns": "rushing_tds", + "receptions": "receptions", + "receivingYards": "receiving_yards", + "receivingTouchdowns": "receiving_tds", +} + @dataclass class PlayerProfile: @@ -27,11 +42,29 @@ class PlayerProfile: birth_date: str college: str draft_info: str + draft_year: int | None experience: int headshot_url: str status: str +@dataclass +class CollegeSeasonStats: + season: int + team: str + games_played: str + completions: str + passing_yards: str + passing_tds: str + interceptions: str + rush_attempts: str + rushing_yards: str + rushing_tds: str + receptions: str + receiving_yards: str + receiving_tds: str + + @dataclass class SeasonStats: season: int @@ -119,6 +152,81 @@ def _format_weight(pounds: int | None) -> str: return f"{pounds} lbs" +def nfl_season_has_started(season_year: int, *, now: datetime | None = None) -> bool: + """Return True if the NFL regular season for season_year has begun.""" + now = now or datetime.now() + if now.year > season_year: + return True + if now.year < season_year: + return False + return now.month >= 9 + + +def is_upcoming_rookie(draft_year: int | None, *, now: datetime | None = None) -> bool: + """True when drafted this calendar year and that NFL season has not started.""" + now = now or datetime.now() + if draft_year is None: + return False + return draft_year == now.year and not nfl_season_has_started(draft_year, now=now) + + +def _resolve_ref_field(ref_obj: Any, *, name_keys: tuple[str, ...] = ("name", "displayName", "abbreviation")) -> str: + """Resolve a display name from an embedded object or ESPN $ref.""" + if not ref_obj: + return "—" + if isinstance(ref_obj, dict): + for key in name_keys: + value = ref_obj.get(key) + if value: + return str(value) + ref_url = ref_obj.get("$ref") + if ref_url: + try: + data = _get_json(ref_url) + except httpx.HTTPError: + return "—" + for key in name_keys: + value = data.get(key) + if value: + return str(value) + return "—" + + +def _stat_display_value(categories: list[dict], stat_name: str) -> str: + for cat in categories: + for stat in cat.get("stats", []): + if stat.get("name") == stat_name: + return str(stat.get("displayValue", stat.get("value", "—"))) + return "—" + + +def _empty_college_season(season: int, team: str = "") -> CollegeSeasonStats: + return CollegeSeasonStats( + season=season, + team=team, + games_played="—", + completions="—", + passing_yards="—", + passing_tds="—", + interceptions="—", + rush_attempts="—", + rushing_yards="—", + rushing_tds="—", + receptions="—", + receiving_yards="—", + receiving_tds="—", + ) + + +def _college_season_from_categories(season: int, team: str, categories: list[dict]) -> CollegeSeasonStats: + row = _empty_college_season(season, team) + for stat_name, attr in _COLLEGE_STAT_FIELDS.items(): + value = _stat_display_value(categories, stat_name) + if value != "—": + setattr(row, attr, value) + return row + + def _team_from_search_item(item: dict[str, Any]) -> str: for rel in item.get("teamRelationships", []) or []: if rel.get("type") == "team": @@ -214,7 +322,9 @@ def get_player_profile(player_id: str) -> PlayerProfile | None: draft = data.get("draft", {}) draft_info = "—" + draft_year: int | None = None if draft: + draft_year = draft.get("year") year = draft.get("year", "") round_num = draft.get("round", "") pick = draft.get("selection", "") @@ -231,14 +341,70 @@ def get_player_profile(player_id: str) -> PlayerProfile | None: weight=_format_weight(weight_pounds), age=age, birth_date=birth_date[:10] if birth_date else "—", - college=_safe_get(data, "college", "name", default="—"), + college=_resolve_ref_field(data.get("college")), draft_info=draft_info, + draft_year=int(draft_year) if draft_year else None, experience=data.get("experience", {}).get("years", 0), headshot_url=_safe_get(data, "headshot", "href", default=""), status=_safe_get(data, "status", "name", default="Active"), ) +def get_player_college_stats(player_id: str) -> list[CollegeSeasonStats]: + """Fetch season-by-season college statistics for an NFL player.""" + try: + athlete = _get_json(f"{CORE_API_BASE}/athletes/{player_id}") + except httpx.HTTPError: + return [] + + college_athlete_ref = _safe_get(athlete, "collegeAthlete", "$ref", default="") + if not college_athlete_ref: + return [] + + athlete_id = college_athlete_ref.rstrip("/").split("/athletes/")[-1].split("?")[0] + try: + log_data = _get_json(f"{CFB_API_BASE}/athletes/{athlete_id}/statisticslog") + except httpx.HTTPError: + return [] + + seasons: list[CollegeSeasonStats] = [] + for entry in log_data.get("entries", []): + season_ref = _safe_get(entry, "season", "$ref", default="") + if not season_ref or "/seasons/" not in season_ref: + continue + try: + season_year = int(season_ref.split("/seasons/")[1].split("?")[0]) + except (IndexError, ValueError): + continue + + stats_ref = "" + team_name = "" + for stat_entry in entry.get("statistics", []): + if stat_entry.get("type") == "total": + stats_ref = _safe_get(stat_entry, "statistics", "$ref", default="") + elif stat_entry.get("type") == "team" and not team_name: + team_ref = _safe_get(stat_entry, "team", "$ref", default="") + if team_ref: + team_name = _resolve_ref_field({"$ref": team_ref}, name_keys=("abbreviation", "displayName", "name")) + + if not stats_ref: + continue + + try: + stats_data = _get_json(stats_ref) + except httpx.HTTPError: + continue + + categories = _safe_get(stats_data, "splits", "categories", default=[]) + if not categories: + continue + + seasons.append(_college_season_from_categories(season_year, team_name, categories)) + + seasons.sort(key=lambda s: s.season, reverse=True) + return seasons + + def get_player_stats(player_id: str, season: int | None = None) -> list[SeasonStats]: """Get player statistics for a season or career.""" url = f"{CORE_API_BASE}/athletes/{player_id}/statistics" diff --git a/tests/test_espn_player_research.py b/tests/test_espn_player_research.py new file mode 100644 index 0000000..a9d99da --- /dev/null +++ b/tests/test_espn_player_research.py @@ -0,0 +1,62 @@ +"""Tests for ESPN player research helpers.""" + +from __future__ import annotations + +import unittest +from datetime import datetime + +from sdks.espn_player_loader import ( + _college_season_from_categories, + is_upcoming_rookie, + nfl_season_has_started, +) + + +class NflSeasonTests(unittest.TestCase): + def test_season_not_started_before_september(self) -> None: + now = datetime(2026, 5, 20) + self.assertFalse(nfl_season_has_started(2026, now=now)) + + def test_season_started_in_september(self) -> None: + now = datetime(2026, 9, 10) + self.assertTrue(nfl_season_has_started(2026, now=now)) + + def test_rookie_when_drafted_this_year_pre_season(self) -> None: + now = datetime(2026, 5, 20) + self.assertTrue(is_upcoming_rookie(2026, now=now)) + + def test_not_rookie_when_drafted_prior_year(self) -> None: + now = datetime(2026, 5, 20) + self.assertFalse(is_upcoming_rookie(2025, now=now)) + + def test_not_rookie_after_season_starts(self) -> None: + now = datetime(2026, 9, 10) + self.assertFalse(is_upcoming_rookie(2026, now=now)) + + +class CollegeStatsParsingTests(unittest.TestCase): + def test_parses_categories_into_season_row(self) -> None: + categories = [ + { + "name": "general", + "stats": [{"name": "gamesPlayed", "displayValue": "12"}], + }, + { + "name": "passing", + "stats": [ + {"name": "completions", "displayValue": "250"}, + {"name": "netPassingYards", "displayValue": "3,000"}, + {"name": "passingTouchdowns", "displayValue": "25"}, + {"name": "interceptions", "displayValue": "5"}, + ], + }, + ] + row = _college_season_from_categories(2024, "MIA", categories) + self.assertEqual(row.season, 2024) + self.assertEqual(row.team, "MIA") + self.assertEqual(row.games_played, "12") + self.assertEqual(row.passing_yards, "3,000") + + +if __name__ == "__main__": + unittest.main() From ebc4de84b3742c13bd54907c7a04afcb0d54645f Mon Sep 17 00:00:00 2001 From: Joseph Lee <175lee.joseph@gmail.com> Date: Wed, 20 May 2026 19:10:48 -0400 Subject: [PATCH 9/9] Github Action: Install R in test env --- .github/workflows/tests.yml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index fd1a415..c45f823 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -13,6 +13,11 @@ jobs: - name: Checkout uses: actions/checkout@v4 + - name: Install R + run: | + sudo apt-get update + sudo apt-get install -y r-base r-base-dev + - name: Set up Python uses: actions/setup-python@v5 with: