-
Notifications
You must be signed in to change notification settings - Fork 0
feat: implement discovery domain with record search, feature catalog, and feature search #79
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
2da7928
958d004
95c4a41
cdd4b42
3f1a07e
d6f3665
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,130 @@ | ||
| """Discovery API routes — search and filter records and features.""" | ||
|
|
||
| from typing import Any | ||
|
|
||
| from dishka.integrations.fastapi import DishkaRoute, FromDishka | ||
| from fastapi import APIRouter | ||
| from pydantic import BaseModel, Field | ||
|
|
||
| from osa.domain.discovery.model.value import ( | ||
| Filter, | ||
| SortOrder, | ||
| ) | ||
| from osa.domain.discovery.query.get_feature_catalog import ( | ||
| GetFeatureCatalog, | ||
| GetFeatureCatalogHandler, | ||
| GetFeatureCatalogResult, | ||
| ) | ||
| from osa.domain.discovery.query.search_features import ( | ||
| SearchFeatures, | ||
| SearchFeaturesHandler, | ||
| SearchFeaturesResult, | ||
| ) | ||
| from osa.domain.discovery.query.search_records import ( | ||
| SearchRecords, | ||
| SearchRecordsHandler, | ||
| SearchRecordsResult, | ||
| ) | ||
|
|
||
| router = APIRouter( | ||
| prefix="/discovery", | ||
| tags=["discovery"], | ||
| route_class=DishkaRoute, | ||
| ) | ||
|
|
||
|
|
||
| # ── Request / Response models ── | ||
|
|
||
|
|
||
| class RecordSearchRequest(BaseModel): | ||
| filters: list[Filter] = [] | ||
| q: str | None = None | ||
| sort: str = "published_at" | ||
| order: SortOrder = SortOrder.DESC | ||
| cursor: str | None = None | ||
| limit: int = Field(default=20, ge=1, le=100) | ||
|
|
||
|
|
||
| class RecordSearchResponse(BaseModel): | ||
| results: list[dict[str, Any]] | ||
| cursor: str | None | ||
| has_more: bool | ||
|
|
||
|
|
||
| class FeatureCatalogResponse(BaseModel): | ||
| tables: list[dict[str, Any]] | ||
|
|
||
|
|
||
| class FeatureSearchRequest(BaseModel): | ||
| filters: list[Filter] = [] | ||
| record_srn: str | None = None | ||
| sort: str = "id" | ||
| order: SortOrder = SortOrder.DESC | ||
| cursor: str | None = None | ||
| limit: int = Field(default=50, ge=1, le=100) | ||
|
|
||
|
|
||
| class FeatureSearchResponse(BaseModel): | ||
| rows: list[dict[str, Any]] | ||
| cursor: str | None | ||
| has_more: bool | ||
|
|
||
|
|
||
| # ── Routes ── | ||
|
|
||
|
|
||
| @router.post("/records") | ||
| async def search_records( | ||
| body: RecordSearchRequest, | ||
| handler: FromDishka[SearchRecordsHandler], | ||
| ) -> RecordSearchResponse: | ||
| """Search and filter published records.""" | ||
| result: SearchRecordsResult = await handler.run( | ||
| SearchRecords( | ||
| filters=body.filters, | ||
| q=body.q, | ||
| sort=body.sort, | ||
| order=body.order, | ||
| cursor=body.cursor, | ||
| limit=body.limit, | ||
| ) | ||
| ) | ||
| return RecordSearchResponse( | ||
| results=result.results, | ||
| cursor=result.cursor, | ||
| has_more=result.has_more, | ||
| ) | ||
|
|
||
|
|
||
| @router.get("/features") | ||
| async def get_feature_catalog( | ||
| handler: FromDishka[GetFeatureCatalogHandler], | ||
| ) -> FeatureCatalogResponse: | ||
| """List available feature tables with column schemas and record counts.""" | ||
| result: GetFeatureCatalogResult = await handler.run(GetFeatureCatalog()) | ||
| return FeatureCatalogResponse(tables=result.tables) | ||
|
|
||
|
|
||
| @router.post("/features/{hook_name}") | ||
| async def search_features( | ||
| hook_name: str, | ||
| body: FeatureSearchRequest, | ||
| handler: FromDishka[SearchFeaturesHandler], | ||
| ) -> FeatureSearchResponse: | ||
| """Query and filter rows in a specific feature table.""" | ||
| result: SearchFeaturesResult = await handler.run( | ||
| SearchFeatures( | ||
| hook_name=hook_name, | ||
| filters=body.filters, | ||
| record_srn=body.record_srn, | ||
| sort=body.sort, | ||
| order=body.order, | ||
| cursor=body.cursor, | ||
| limit=body.limit, | ||
| ) | ||
| ) | ||
| return FeatureSearchResponse( | ||
| rows=result.rows, | ||
| cursor=result.cursor, | ||
| has_more=result.has_more, | ||
| ) |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1 @@ | ||
| """Discovery domain — read-only search and filter API for records and features.""" |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,100 @@ | ||
| """Discovery domain value objects — filters, cursors, result types.""" | ||
|
|
||
| from __future__ import annotations | ||
|
|
||
| import base64 | ||
| import json | ||
| from datetime import datetime | ||
| from enum import StrEnum | ||
| from typing import Any | ||
|
|
||
| from pydantic import BaseModel | ||
|
|
||
| from osa.domain.semantics.model.value import FieldType | ||
| from osa.domain.shared.model.srn import RecordSRN | ||
|
|
||
|
|
||
| class FilterOperator(StrEnum): | ||
| EQ = "eq" | ||
| CONTAINS = "contains" | ||
| GTE = "gte" | ||
| LTE = "lte" | ||
|
|
||
|
|
||
| class SortOrder(StrEnum): | ||
| ASC = "asc" | ||
| DESC = "desc" | ||
|
|
||
|
|
||
| class Filter(BaseModel): | ||
| field: str | ||
| operator: FilterOperator | ||
| value: str | float | bool | ||
|
|
||
|
|
||
| VALID_OPERATORS: dict[FieldType, set[FilterOperator]] = { | ||
| FieldType.TEXT: {FilterOperator.EQ, FilterOperator.CONTAINS}, | ||
| FieldType.URL: {FilterOperator.EQ, FilterOperator.CONTAINS}, | ||
| FieldType.NUMBER: {FilterOperator.EQ, FilterOperator.GTE, FilterOperator.LTE}, | ||
| FieldType.DATE: {FilterOperator.EQ, FilterOperator.GTE, FilterOperator.LTE}, | ||
| FieldType.BOOLEAN: {FilterOperator.EQ}, | ||
| FieldType.TERM: {FilterOperator.EQ}, | ||
| } | ||
|
|
||
|
|
||
| def encode_cursor(sort_value: Any, id_value: Any) -> str: | ||
| """Encode a cursor as base64 JSON.""" | ||
| payload = {"s": sort_value, "id": id_value} | ||
| return base64.urlsafe_b64encode(json.dumps(payload).encode()).decode() | ||
|
|
||
|
|
||
| def decode_cursor(cursor: str) -> dict[str, Any]: | ||
| """Decode a base64 JSON cursor. Raises ValueError on malformed input.""" | ||
| try: | ||
| raw = base64.urlsafe_b64decode(cursor.encode()) | ||
| data = json.loads(raw) | ||
| except Exception as exc: | ||
| raise ValueError(f"Malformed cursor: {exc}") from exc | ||
| if not isinstance(data, dict) or "s" not in data or "id" not in data: | ||
| raise ValueError("Cursor must contain 's' and 'id' keys") | ||
| return data | ||
|
Comment on lines
+51
to
+60
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Cursor type validation missing — non-integer The {"s": 1, "id": "not_a_number"}When this cursor is decoded and used in the keyset predicate, SQLAlchemy attempts to bind the string value to a The same issue applies to the Fix: Add type validation after decoding: def decode_cursor(cursor: str) -> dict[str, Any]:
"""Decode a base64 JSON cursor. Raises ValueError on malformed input."""
try:
raw = base64.urlsafe_b64decode(cursor.encode())
data = json.loads(raw)
except Exception as exc:
raise ValueError(f"Malformed cursor: {exc}") from exc
if not isinstance(data, dict) or "s" not in data or "id" not in data:
raise ValueError("Cursor must contain 's' and 'id' keys")
# Validate types
if not isinstance(data.get("id"), int):
raise ValueError("Cursor 'id' must be an integer")
return dataIn the feature search handler, also validate that |
||
|
|
||
|
|
||
| class RecordSummary(BaseModel): | ||
| srn: RecordSRN | ||
| published_at: datetime | ||
| metadata: dict[str, Any] | ||
|
|
||
|
|
||
| class RecordSearchResult(BaseModel): | ||
| results: list[RecordSummary] | ||
| cursor: str | None | ||
| has_more: bool | ||
|
|
||
|
|
||
| class ColumnInfo(BaseModel): | ||
| name: str | ||
| type: str | ||
| required: bool | ||
|
|
||
|
|
||
| class FeatureCatalogEntry(BaseModel): | ||
| hook_name: str | ||
| columns: list[ColumnInfo] | ||
| record_count: int | ||
|
|
||
|
|
||
| class FeatureCatalog(BaseModel): | ||
| tables: list[FeatureCatalogEntry] | ||
|
|
||
|
|
||
| class FeatureRow(BaseModel): | ||
| row_id: int | ||
| record_srn: RecordSRN | ||
| data: dict[str, Any] | ||
|
|
||
|
|
||
| class FeatureSearchResult(BaseModel): | ||
| rows: list[FeatureRow] | ||
| cursor: str | None | ||
| has_more: bool | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,17 @@ | ||
| """FieldDefinitionReader port — cross-domain read port for schema field lookups.""" | ||
|
|
||
| from __future__ import annotations | ||
|
|
||
| from typing import TYPE_CHECKING, Protocol | ||
|
|
||
| if TYPE_CHECKING: | ||
| from osa.domain.semantics.model.value import FieldType | ||
|
|
||
|
|
||
| class FieldDefinitionReader(Protocol): | ||
| async def get_all_field_types(self) -> dict[str, FieldType]: | ||
| """Return global field_name -> FieldType map across all schemas. | ||
|
|
||
| Raises ValidationError if same field name has conflicting types across schemas. | ||
| """ | ||
| ... |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,56 @@ | ||
| """DiscoveryReadStore port — read-only access to records and feature data.""" | ||
|
|
||
| from __future__ import annotations | ||
|
|
||
| from typing import TYPE_CHECKING, Protocol | ||
|
|
||
| if TYPE_CHECKING: | ||
| from osa.domain.discovery.model.value import ( | ||
| FeatureCatalogEntry, | ||
| FeatureRow, | ||
| Filter, | ||
| RecordSummary, | ||
| SortOrder, | ||
| ) | ||
| from osa.domain.semantics.model.value import FieldType | ||
| from osa.domain.shared.model.srn import RecordSRN | ||
|
|
||
|
|
||
| class DiscoveryReadStore(Protocol): | ||
| async def search_records( | ||
| self, | ||
| filters: list[Filter], | ||
| text_fields: list[str], | ||
| q: str | None, | ||
| sort: str, | ||
| order: SortOrder, | ||
| cursor: dict | None, | ||
| limit: int, | ||
| field_types: dict[str, FieldType] | None = None, | ||
| ) -> list[RecordSummary]: | ||
| """Search and filter published records.""" | ||
| ... | ||
|
|
||
| async def get_feature_catalog(self) -> list[FeatureCatalogEntry]: | ||
| """List all feature tables with column schemas and record counts.""" | ||
| ... | ||
|
|
||
| async def get_feature_table_schema(self, hook_name: str) -> FeatureCatalogEntry | None: | ||
| """Look up a single feature table's schema by hook name. | ||
|
|
||
| Returns None if the hook_name is not found. | ||
| """ | ||
| ... | ||
|
|
||
| async def search_features( | ||
| self, | ||
| hook_name: str, | ||
| filters: list[Filter], | ||
| record_srn: RecordSRN | None, | ||
| sort: str, | ||
| order: SortOrder, | ||
| cursor: dict | None, | ||
| limit: int, | ||
| ) -> list[FeatureRow]: | ||
| """Search and filter feature rows.""" | ||
| ... |
Uh oh!
There was an error while loading. Please reload this page.