diff --git a/README.md b/README.md index 2df2642..aaeb5a6 100644 --- a/README.md +++ b/README.md @@ -1,9 +1,9 @@ flowrun ======= -`flowrun` is a lightweight async DAG orchestrator for small to medium ETL pipelines. -It is designed for low operational overhead and low complexity workloads (for example -Polars pipelines, API ingest + transform + load jobs, or periodic data sync tasks). +`flowrun` is a compact DAG execution engine for small to medium ETL jobs. +It is designed for local, code-first workflows such as API ingest -> Polars transform +-> validation/quarantine -> sink, plus sequential micro-batch data sync jobs. Core ideas: @@ -11,12 +11,48 @@ Core ideas: - Keep runtime dependency-free: stdlib-based implementation. - Keep behavior explicit: retries, timeouts, skip semantics, run reports. +## Positioning + +`flowrun` is a good fit when your workflow lives inside one Python process, +the DAG is declared in code, and you want a small execution layer around ETL +functions rather than a full workflow platform. + +It is not positioned as a durable scheduler, distributed orchestrator, or +policy-heavy control plane. If you need persistent workers, cron scheduling, +cross-process recovery guarantees, dynamic scaling, or extensive execution +policies, you should use a heavier system. + +## Strengths + +- Clear fit for API -> transform -> validate -> load pipelines. +- Works well with Polars-style business logic and thin orchestration wrappers. +- Small API surface and low operational overhead. +- Explicit execution model: retries, DAG validation, run reports, hooks, resume, and subgraph runs. +- Good match for sequential micro-batch jobs where context such as `batch_id`, `source`, or `window` matters. + +## Tradeoffs + +- In-process execution only; no distributed workers or durable queueing. +- No built-in scheduling layer; run triggering belongs outside the framework. +- Recovery is scoped to stored run state in the current process, not a full external orchestration backend. +- Retry behavior is intentionally simple; API-specific backoff and resilience policies belong in user code. +- Best for low-to-moderate workflow complexity, not platform-scale orchestration. + ## Installation ```bash pip install flowrun-dag ``` +Optional example dependencies: + +```bash +pip install "flowrun-dag[examples]" +``` + +This installs the libraries used by the example workflows, including Polars and +Pandera's Polars integration. + > The import name remains `flowrun`: > ```python > import flowrun @@ -28,6 +64,7 @@ For development: git clone https://github.com/Mg30/flowrun.git cd flowrun uv sync --group dev +uv sync --group dev --extra examples uv run pytest -q ``` @@ -47,19 +84,21 @@ class Deps: source_path: str -@engine.task(name="extract", dag="daily_etl") +# Task names default to the Python function name. Use name="daily_extract" +# only when you need an explicit alias or a stable external task name. +@engine.task(dag="daily_etl") def extract(context: RunContext[Deps]) -> list[dict]: # In real jobs, read from file/API/db return [{"id": 1, "amount": 10}, {"id": 2, "amount": 15}] -@engine.task(name="transform", dag="daily_etl", deps=[extract]) +@engine.task(dag="daily_etl", deps=[extract]) def transform(extract: list[dict]) -> dict[str, int]: total = sum(row["amount"] for row in extract) return {"rows": len(extract), "total": total} -@engine.task(name="load", dag="daily_etl", deps=[transform]) +@engine.task(dag="daily_etl", deps=[transform]) def load(transform: dict[str, int]) -> str: # Persist results return f"loaded rows={transform['rows']} total={transform['total']}" @@ -79,7 +118,7 @@ asyncio.run(main()) ## Concepts -- Task: Python callable registered with `@engine.task(...)` or `@task(...)`. +- Task: Python callable registered with `@engine.task(...)`. - DAG: namespace (`dag="name"`) plus dependency edges between tasks. - Run: one execution instance of a DAG (`run_id`). - State store: tracks run/task status, timing, errors, and results. @@ -112,7 +151,7 @@ Parameters: - `max_parallel`: max concurrent scheduled tasks, must be `>= 1`. - `logger`: optional `logging.Logger` used across components. - `hooks`: optional list of `RunHook` handlers. -- `state_store`: optional custom state store (`StateStoreProtocol`). +- `state_store`: optional custom in-memory state store instance. Returns: configured `Engine`. @@ -121,6 +160,7 @@ Returns: configured `Engine`. Run control: - `await engine.run_once(dag_name, context=None) -> str` +- `await engine.run_many(dag_name, contexts) -> list[str]` - `await engine.resume(run_id, from_tasks=None, context=None) -> str` - `await engine.run_subgraph(dag_name, targets, context=None) -> str` @@ -145,7 +185,7 @@ Resource lifecycle: Preferred style (bound to engine registry): ```python -@engine.task(name="task_a", dag="etl", deps=[...], timeout_s=30.0, retries=1, retain_result=True) +@engine.task(name="task_a", dag="etl", deps=[...], retries=1) def task_a(...): ... ``` @@ -154,10 +194,17 @@ Arguments: - `name`: optional, defaults to function name. - `dag`: DAG namespace for selection via `run_once(dag_name)`. -- `deps`: list of task names or decorated task callables. -- `timeout_s`: per-attempt timeout (`None` disables timeout). +- `deps`: optional list of task names or decorated task callables. When omitted, + required parameter names that match already-registered task names are inferred. +- `timeout_s`: per-attempt timeout for async tasks (`None` disables timeout). - `retries`: retry count after failures. -- `retain_result`: if `False`, clear result from state when safe. + +For synchronous tasks, configure timeouts in the client you call inside the task. +`flowrun` intentionally rejects framework-level timeouts for sync callables because +thread-based timeouts cannot safely stop side effects. + +Use explicit `deps=` when you need `upstream`, dependency aliases, non-identifier +task names, or forward references to tasks registered later. Avoid repeating `dag=...` with a DAG-scoped container: @@ -168,7 +215,7 @@ etl = engine.dag("daily_etl") def extract() -> list[int]: return [1, 2, 3] -@etl.task(name="sum_values", deps=[extract]) +@etl.task(name="sum_values") def sum_values(extract: list[int]) -> int: return sum(extract) @@ -178,40 +225,28 @@ run_id = await etl.run_once() Available on the scope: - `etl.task(...)` -- `etl.task_template(...)` - `await etl.run_once(context=None)` +- `await etl.run_many(contexts)` - `await etl.run_subgraph(targets, context=None)` - `etl.validate()`, `etl.display()`, `etl.list_tasks()` -Also available as global decorator: - -```python -from flowrun import task, TaskRegistry - -registry = TaskRegistry() -token = registry.activate() - -@task -def my_task(): - return 1 - -TaskRegistry.deactivate(token) -``` - -Notes: - -- `@task(...)`, `@task`, and `@task("name", ...)` are supported. -- If using global `@task`, provide `registry=...` or activate one. - ### Dependency result injection -Named dependency injection: +Named dependency injection with inferred dependencies: ```python @engine.task(name="extract", dag="etl") def extract() -> list[int]: return [1, 2, 3] +@engine.task(name="sum_values", dag="etl") +def sum_values(extract: list[int]) -> int: + return sum(extract) +``` + +Explicit dependencies remain available when you prefer the edges in the decorator: + +```python @engine.task(name="sum_values", dag="etl", deps=[extract]) def sum_values(extract: list[int]) -> int: return sum(extract) @@ -241,19 +276,44 @@ def pull(context: RunContext[Deps]) -> dict: return {"base": context.api_base} ``` -### Task templates +`RunContext` can also carry an ambient deadline or cancellation event when a task +needs to pass timeouts into a client or stop cooperatively at a safe checkpoint. + +```python +import threading + +cancel_event = threading.Event() +ctx = RunContext(Deps(api_base="https://api.example.com")) +ctx = ctx.with_deadline_s(30.0).with_cancel_event(cancel_event) + +@engine.task(name="pull", dag="etl") +def pull(context: RunContext[Deps]) -> dict: + context.raise_if_cancelled() + timeout_s = context.time_remaining_s() or 10.0 + return call_api(context.api_base, timeout=timeout_s) +``` + +This is optional. Most tasks do not need these helpers. + +### Run metadata -Register parameterized task variants. +Attach lightweight reporting metadata to a run through `RunContext`. ```python -def fetch_table(*, table: str) -> str: - return f"select * from {table}" +ctx = RunContext(Deps(api_base="https://api.example.com")).with_metadata( + batch_id=42, + source="users_api", + window="2026-04-01", +) -tpl = engine.task_template(fetch_table, dag="etl") -tpl.bind("fetch_users", table="users") -tpl.bind("fetch_orders", table="orders") +run_id = await engine.run_once("etl", context=ctx) +report = engine.get_run_report(run_id) +print(report["metadata"]) # {"batch_id": 42, "source": "users_api", ...} ``` +This is useful for ETL-style identifiers such as batch ids, partitions, sources, +or time windows without adding more orchestration parameters. + ## Execution Semantics ### DAG scoping and unknown DAG behavior @@ -269,6 +329,7 @@ Build-time validation catches: - Missing dependencies. - Cross-DAG dependencies. - Cycles. +- Required task parameters that do not match an inferred or explicit dependency, `RunContext`, or `upstream`. Missing dependency errors include close-match suggestions when available. @@ -279,15 +340,9 @@ Missing dependency errors include close-match suggestions when available. ### Timeouts -- Applied per attempt. +- Applied per attempt for async tasks. - Async tasks use `asyncio.wait_for`. -- Sync tasks run in executor and are awaited with timeout. - -### Result retention - -- `retain_result=True` (default): keep result in state. -- `retain_result=False`: clear result once all downstream consumers are launched/done. -- Useful to reduce memory when passing larger intermediate objects. +- Sync tasks do not support framework-level timeouts; use client/library timeouts inside the task. ## Run Report Format @@ -297,6 +352,7 @@ Missing dependency errors include close-match suggestions when available. { "run_id": "...", "dag_name": "...", + "metadata": {"batch_id": 42, "source": "users_api"}, "created_at": 0.0, "finished_at": 0.0, "status": "SUCCESS", # SUCCESS | FAILED | RUNNING @@ -353,27 +409,94 @@ In-memory (default): - `StateStore` / `InMemoryStateStore` - Fast, process-local, ephemeral. -SQLite persistent backend: - -- `SqliteStateStore(db_path, serializer=..., cache_ttl_s=None, recover=False)` -- Persists run and task state. -- Optional crash recovery marks orphaned `RUNNING` tasks as failed. - -Serialization options for persisted results: - -- `JsonSerializer` (default for SQLite backend) -- `PickleSerializer` -- custom `ResultSerializer` implementation - ## Practical ETL Patterns ### Small Polars pipeline pattern - Keep each task focused (`extract`, `transform`, `load`). -- Set `retain_result=False` on large intermediate transforms. - Use `retries` on flaky IO tasks, not pure transforms. - Keep `max_parallel` modest for predictable resource use. +### Sequential micro-batch pattern + +When chunks are fetched outside the DAG, run the full DAG once per chunk in a +sequential loop. This is a micro-batch pattern, not end-to-end streaming. + +```python +import asyncio +from dataclasses import dataclass + +from flowrun import RunContext, build_default_engine + +engine = build_default_engine(max_workers=4, max_parallel=2) +etl = engine.dag("users") + + +@dataclass(frozen=True) +class ChunkDeps: + chunk_index: int + rows: list[dict[str, int]] + + +@etl.task() +def input_chunk(context: RunContext[ChunkDeps]) -> list[dict[str, int]]: + return context.rows + + +@etl.task(deps=[input_chunk]) +def transform_chunk(input_chunk: list[dict[str, int]]) -> dict[str, int]: + return { + "rows": len(input_chunk), + "total": sum(row["value"] for row in input_chunk), + } + + +@etl.task(deps=[transform_chunk]) +def load_chunk(transform_chunk: dict[str, int]) -> str: + return f"loaded rows={transform_chunk['rows']} total={transform_chunk['total']}" + + +async def chunk_contexts(): + for chunk_index in range(3): + rows = [{"value": chunk_index * 10 + offset} for offset in range(3)] + yield RunContext(ChunkDeps(chunk_index=chunk_index, rows=rows)).with_metadata( + batch_id=chunk_index, + source="users_api", + ) + + +async def main() -> None: + async with engine: + etl.validate() + run_ids = await etl.run_many(chunk_contexts()) + print(run_ids) + + +asyncio.run(main()) +``` + +This keeps chunk fetching outside the DAG while preserving plain task boundaries +inside the graph. + +### Layered Polars workflow pattern + +For teams that need clearer structure, keep undecorated business functions in one +layer and add a thin Flowrun orchestration layer on top. + +Recommended split: + +- async extraction functions that fetch raw endpoint payloads +- pure Polars functions that normalise each dataset independently +- Pandera validation functions that split validated and rejected rows +- quarantine sink functions for rejected rows +- a pure join/aggregation function that combines the processed frames +- a plain sink function +- small task wrappers that call those functions and express orchestration only + +See `examples/polars_workflow_demo.py` for a concrete example with two fake API +endpoints fetched in parallel, separate Polars processing branches, schema +validation with quarantine, a join step, and a fake sink. + ### Re-run from a checkpoint task ```python @@ -415,12 +538,10 @@ Top-level exports in `flowrun`: - `Engine`, `build_default_engine` - `RunContext` -- `task`, `task_template`, `TaskSpec`, `TaskRegistry` +- `TaskSpec`, `TaskRegistry` - `SchedulerConfig` - `RunHook`, `fn_hook` -- `StateStore`, `InMemoryStateStore`, `StateStoreProtocol` -- `SqliteStateStore` -- `JsonSerializer`, `PickleSerializer`, `ResultSerializer` +- `StateStore`, `InMemoryStateStore` ## License diff --git a/examples/demo.py b/examples/demo.py index 88ca364..1e55631 100644 --- a/examples/demo.py +++ b/examples/demo.py @@ -64,11 +64,11 @@ class ProcessDataResult(TypedDict): version: int +# Task names default to the function name. Use name="fetch_api_v2" only if the +# orchestration name should stay stable while the Python function is renamed. @engine.task( - name="fetch_api", dag="demo_dag", deps=[], - timeout_s=5.0, retries=1, ) def fetch_api(ctx: RunContext[DemoDeps]): @@ -82,7 +82,6 @@ def fetch_api(ctx: RunContext[DemoDeps]): @engine.task( - name="fetch_metadata", dag="demo_dag", deps=[], timeout_s=5.0, @@ -95,11 +94,8 @@ async def fetch_metadata(): @engine.task( - name="process_data", dag="demo_dag", deps=[fetch_api, fetch_metadata], - timeout_s=10.0, - retain_result=False, # free intermediate memory after consumers finish ) def process_data(fetch_api: FetchApiResult, fetch_metadata: FetchMetadataResult) -> ProcessDataResult: """Pretend to transform upstream results into a final data artifact.""" @@ -115,10 +111,8 @@ def process_data(fetch_api: FetchApiResult, fetch_metadata: FetchMetadataResult) @engine.task( - name="store_results", dag="demo_dag", deps=[process_data], - timeout_s=10.0, ) def store_results(process_data: ProcessDataResult) -> str: """Fake persistence step that stores the processed result.""" diff --git a/examples/micro_batch_demo.py b/examples/micro_batch_demo.py new file mode 100644 index 0000000..607f326 --- /dev/null +++ b/examples/micro_batch_demo.py @@ -0,0 +1,86 @@ +import asyncio +import logging +from dataclasses import dataclass +from typing import TypedDict + +from flowrun import RunContext, build_default_engine + +logging.basicConfig(level=logging.INFO, format="%(asctime)s %(name)-22s %(levelname)-7s %(message)s") +logger = logging.getLogger("micro_batch_demo") + +engine = build_default_engine(max_workers=4, max_parallel=2, logger=logger) +etl = engine.dag("micro_batch_demo") + + +@dataclass(frozen=True) +class ChunkDeps: + """Per-chunk dependencies passed into one DAG run.""" + + chunk_index: int + rows: list[dict[str, int]] + + +class InputChunkResult(TypedDict): + """Structured payload produced by the input adapter task.""" + + chunk_index: int + rows: list[dict[str, int]] + + +# Task names default to the Python function name. Use name="chunk_input_v2" +# only when you need an alias or a stable orchestration name during refactors. +@etl.task() +def input_chunk(context: RunContext[ChunkDeps]) -> InputChunkResult: + """Expose the current chunk from the run context as normal task input.""" + return { + "chunk_index": context.chunk_index, + "rows": context.rows, + } + + +@etl.task(deps=[input_chunk]) +def transform_chunk(input_chunk: InputChunkResult) -> dict[str, int]: + """Summarise the current chunk without knowing about orchestration.""" + rows = input_chunk["rows"] + chunk_index = input_chunk["chunk_index"] + return { + "chunk_index": chunk_index, + "rows": len(rows), + "total": sum(row["value"] for row in rows), + } + + +@etl.task(deps=[transform_chunk]) +def load_chunk(transform_chunk: dict[str, int]) -> str: + """Return a fake sink result for the processed chunk.""" + return ( + f"chunk={transform_chunk['chunk_index']} loaded rows={transform_chunk['rows']} total={transform_chunk['total']}" + ) + + +async def fetch_chunk_contexts(): + """Yield fake chunk contexts from an async source outside the DAG.""" + for chunk_index in range(3): + await asyncio.sleep(0.1) + rows = [{"value": chunk_index * 10 + offset} for offset in range(3)] + yield RunContext(ChunkDeps(chunk_index=chunk_index, rows=rows)).with_metadata( + batch_id=chunk_index, + source="demo_chunks", + ) + + +async def main() -> None: + """Run the same DAG once per chunk from the async source.""" + async with engine: + etl.validate() + run_ids = await etl.run_many(fetch_chunk_contexts()) + + print("=== MICRO-BATCH RUNS ===") + for run_id in run_ids: + report = engine.get_run_report(run_id) + batch_id = report["metadata"]["batch_id"] + print(f"batch={batch_id} {run_id}: {report['tasks']['load_chunk']['result']}") + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/examples/polars_workflow_demo.py b/examples/polars_workflow_demo.py new file mode 100644 index 0000000..1240d35 --- /dev/null +++ b/examples/polars_workflow_demo.py @@ -0,0 +1,357 @@ +"""Layered Flowrun example with Polars validation, quarantine, and orchestration.""" + +import asyncio +import logging +import os +from dataclasses import dataclass +from datetime import date +from typing import TypedDict, cast + +import pandera.polars as pa +import polars as pl +from pandera.errors import SchemaErrors +from pandera.typing.polars import DataFrame, Series + +from flowrun import RunContext, build_default_engine, fn_hook + +logging.basicConfig(level=logging.INFO, format="%(asctime)s %(name)-22s %(levelname)-7s %(message)s") +logger = logging.getLogger("polars_workflow_demo") + +# Small example hook: surface quarantine outputs and mark the end of the run. +polars_hook = fn_hook( + on_task_success=lambda e: print(f"[hook] {e.task_name}: {e.result}") + if e.task_name in {"quarantine_users", "quarantine_orders"} + else None, + on_dag_end=lambda e: print(f"[hook] DAG {e.dag_name} finished run_id={e.run_id}"), +) + +engine = build_default_engine(max_workers=4, max_parallel=3, logger=logger, hooks=[polars_hook]) +etl = engine.dag("polars_workflow_demo") + + +@dataclass(frozen=True) +class ApiDeps: + """Runtime dependencies shared by the fake API tasks.""" + + api_base: str + auth_token: str + + +class UserRecord(TypedDict): + """Raw payload row returned by the fake users endpoint.""" + + user_id: int | None + country: str | None + segment: str | None + is_active: bool | None + + +class OrderRecord(TypedDict): + """Raw payload row returned by the fake orders endpoint.""" + + order_id: int | None + user_id: int | None + amount: float | None + status: str | None + + +@dataclass(frozen=True) +class ValidationSplit[SchemaModel: pa.DataFrameModel]: + """Validated and rejected rows produced by one schema check.""" + + validated: DataFrame[SchemaModel] + rejected: pl.DataFrame + + +class UsersSchema(pa.DataFrameModel): + """Validated users schema used after the users quality gate.""" + + user_id: Series[int] = pa.Field(gt=0) + country: Series[str] = pa.Field(isin=["FR", "DE", "ES"]) + segment: Series[str] = pa.Field(isin=["enterprise", "mid_market", "startup"]) + is_active: Series[bool] + + +class OrdersSchema(pa.DataFrameModel): + """Validated orders schema used after the orders quality gate.""" + + order_id: Series[int] = pa.Field(gt=0) + user_id: Series[int] = pa.Field(gt=0) + amount: Series[float] = pa.Field(gt=0) + status: Series[str] = pa.Field(isin=["paid", "cancelled"]) + + +class ActiveUsersSchema(pa.DataFrameModel): + """Projected active-users schema used by the summary join.""" + + user_id: Series[int] = pa.Field(gt=0) + country: Series[str] = pa.Field(isin=["FR", "DE", "ES"]) + segment: Series[str] = pa.Field(isin=["enterprise", "mid_market", "startup"]) + + +class PaidOrdersSchema(pa.DataFrameModel): + """Projected paid-orders schema used by the summary join.""" + + order_id: Series[int] = pa.Field(gt=0) + user_id: Series[int] = pa.Field(gt=0) + amount: Series[float] = pa.Field(gt=0) + + +class SalesSummarySchema(pa.DataFrameModel): + """Aggregated sales summary schema produced by the workflow.""" + + country: Series[str] = pa.Field(isin=["FR", "DE", "ES"]) + segment: Series[str] = pa.Field(isin=["enterprise", "mid_market", "startup"]) + orders: Series[int] = pa.Field(ge=0) + revenue: Series[float] = pa.Field(ge=0) + customers: Series[int] = pa.Field(ge=0) + + +async def fetch_users_records(*, api_base: str, auth_token: str) -> list[UserRecord]: + """Return fake users data after simulating remote latency.""" + del auth_token + await asyncio.sleep(0.2) + logger.info("Fetched users from %s/users", api_base) + return [ + {"user_id": 1, "country": "fr", "segment": "enterprise", "is_active": True}, + {"user_id": 2, "country": "de", "segment": "mid_market", "is_active": True}, + {"user_id": 3, "country": "zz", "segment": "startup", "is_active": True}, + {"user_id": 4, "country": "fr", "segment": None, "is_active": True}, + {"user_id": 4, "country": "es", "segment": "enterprise", "is_active": True}, + ] + + +async def fetch_orders_records(*, api_base: str, auth_token: str) -> list[OrderRecord]: + """Return fake orders data after simulating remote latency.""" + del auth_token + await asyncio.sleep(0.2) + logger.info("Fetched orders from %s/orders", api_base) + return [ + {"order_id": 101, "user_id": 1, "amount": 120.0, "status": "PAID"}, + {"order_id": 102, "user_id": 1, "amount": 80.0, "status": "PAID"}, + {"order_id": 103, "user_id": 2, "amount": 50.0, "status": "CANCELLED"}, + {"order_id": 104, "user_id": 2, "amount": -10.0, "status": "PAID"}, + {"order_id": 105, "user_id": None, "amount": 25.0, "status": "PAID"}, + {"order_id": 104, "user_id": 4, "amount": 210.0, "status": "PAID"}, + ] + + +def normalize_users(records: list[UserRecord]) -> pl.DataFrame: + """Convert raw users payloads into a clean users DataFrame.""" + return ( + pl.DataFrame(records) + .with_columns( + pl.col("country").str.to_uppercase(), + pl.col("segment").str.to_lowercase(), + ) + .select(["user_id", "country", "segment", "is_active"]) + .sort("user_id") + ) + + +def normalize_orders(records: list[OrderRecord]) -> pl.DataFrame: + """Convert raw orders payloads into a clean orders DataFrame.""" + return ( + pl.DataFrame(records) + .with_columns(pl.col("status").str.to_lowercase()) + .select(["order_id", "user_id", "amount", "status"]) + .sort("order_id") + ) + + +def validate_frame[SchemaModel: pa.DataFrameModel]( + df: pl.DataFrame, + schema: type[SchemaModel], + *, + business_object: str, +) -> ValidationSplit[SchemaModel]: + """Validate a frame and route bad rows into a quarantine-ready DataFrame.""" + indexed_df = df.with_row_index("index") + metadata_exprs = [ + pl.lit("fake_api").alias("source_system"), + pl.lit(business_object).alias("business_object"), + pl.lit(date.today()).alias("ingestion_date"), + ] + + try: + validated_df = schema.validate(df, lazy=True).with_columns(*metadata_exprs) + rejected_df = df.head(0).with_columns( + pl.lit(None, dtype=pl.List(pl.String)).alias("rejection_reason"), + *metadata_exprs, + ) + return ValidationSplit(validated=cast(DataFrame[SchemaModel], validated_df), rejected=rejected_df) + except SchemaErrors as exc: + rejection_map = ( + exc.failure_cases.with_columns( + pl.col("index").cast(pl.UInt32), + pl.concat_str( + [ + pl.col("column").fill_null("__dataframe__"), + pl.col("check").fill_null("schema_error"), + ], + separator=": ", + ).alias("rejection_reason"), + ) + .group_by("index") + .agg(pl.col("rejection_reason")) + ) + + validated_df = ( + indexed_df.join(rejection_map.select("index"), on="index", how="anti") + .drop("index") + .with_columns(*metadata_exprs) + ) + rejected_df = ( + indexed_df.join(rejection_map, on="index", how="inner").drop("index").with_columns(*metadata_exprs) + ) + return ValidationSplit(validated=cast(DataFrame[SchemaModel], validated_df), rejected=rejected_df) + + +def select_active_users(users_df: DataFrame[UsersSchema]) -> DataFrame[ActiveUsersSchema]: + """Keep only active validated users for downstream joins.""" + result = users_df.filter(pl.col("is_active")).select(["user_id", "country", "segment"]) + return cast(DataFrame[ActiveUsersSchema], result) + + +def select_paid_orders(orders_df: DataFrame[OrdersSchema]) -> DataFrame[PaidOrdersSchema]: + """Keep only paid validated orders for downstream joins.""" + result = orders_df.filter(pl.col("status") == "paid").select(["order_id", "user_id", "amount"]) + return cast(DataFrame[PaidOrdersSchema], result) + + +def build_sales_summary( + users_df: DataFrame[ActiveUsersSchema], + orders_df: DataFrame[PaidOrdersSchema], +) -> DataFrame[SalesSummarySchema]: + """Join processed users and orders, then aggregate a small sales summary.""" + result = ( + orders_df.join(users_df, on="user_id", how="inner") + .group_by(["country", "segment"]) + .agg( + pl.len().alias("orders"), + pl.col("amount").sum().alias("revenue"), + pl.col("user_id").n_unique().alias("customers"), + ) + .sort(["country", "segment"]) + ) + return cast(DataFrame[SalesSummarySchema], result) + + +def fake_sink(summary_df: DataFrame[SalesSummarySchema]) -> str: + """Pretend to persist the summary and return a sink location.""" + total_revenue = float(summary_df["revenue"].sum()) if summary_df.height else 0.0 + return f"sink://sales-summary?rows={summary_df.height}&revenue={total_revenue:.2f}" + + +def fake_quarantine_sink(rejected_df: pl.DataFrame, *, quarantine_name: str) -> str: + """Pretend to persist rejected rows into a quarantine location.""" + return f"quarantine://{quarantine_name}?rows={rejected_df.height}" + + +# Task names default to the function name. Pass name="users_extract_v2" only +# when you want a task name that differs from the Python symbol. +@etl.task(timeout_s=3.0) +async def fetch_users_raw(context: RunContext[ApiDeps]) -> list[UserRecord]: + """Thin orchestration wrapper for the users endpoint.""" + return await fetch_users_records(api_base=context.api_base, auth_token=context.auth_token) + + +@etl.task(timeout_s=3.0) +async def fetch_orders_raw(context: RunContext[ApiDeps]) -> list[OrderRecord]: + """Thin orchestration wrapper for the orders endpoint.""" + return await fetch_orders_records(api_base=context.api_base, auth_token=context.auth_token) + + +# Users branch: infer dependency edges from required parameter names. +@etl.task() +def prepare_users(fetch_users_raw: list[UserRecord]) -> pl.DataFrame: + """Thin orchestration wrapper around the users normalisation function.""" + return normalize_users(fetch_users_raw) + + +@etl.task() +def validate_users(prepare_users: pl.DataFrame) -> ValidationSplit[UsersSchema]: + """Thin orchestration wrapper around the users schema validation function.""" + return validate_frame(prepare_users, UsersSchema, business_object="users") + + +@etl.task() +def active_users(validate_users: ValidationSplit[UsersSchema]) -> DataFrame[ActiveUsersSchema]: + """Thin orchestration wrapper around the active-users filter.""" + return select_active_users(validate_users.validated) + + +@etl.task() +def quarantine_users(validate_users: ValidationSplit[UsersSchema]) -> str: + """Thin orchestration wrapper around the users quarantine sink.""" + return fake_quarantine_sink(validate_users.rejected, quarantine_name="users") + + +# Orders branch: keep explicit deps when you want graph edges declared in the decorator. +@etl.task(deps=[fetch_orders_raw]) +def prepare_orders(fetch_orders_raw: list[OrderRecord]) -> pl.DataFrame: + """Thin orchestration wrapper around the orders normalisation function.""" + return normalize_orders(fetch_orders_raw) + + +@etl.task(deps=[prepare_orders]) +def validate_orders(prepare_orders: pl.DataFrame) -> ValidationSplit[OrdersSchema]: + """Thin orchestration wrapper around the orders schema validation function.""" + return validate_frame(prepare_orders, OrdersSchema, business_object="orders") + + +@etl.task(deps=[validate_orders]) +def paid_orders(validate_orders: ValidationSplit[OrdersSchema]) -> DataFrame[PaidOrdersSchema]: + """Thin orchestration wrapper around the paid-orders filter.""" + return select_paid_orders(validate_orders.validated) + + +@etl.task(deps=[validate_orders]) +def quarantine_orders(validate_orders: ValidationSplit[OrdersSchema]) -> str: + """Thin orchestration wrapper around the orders quarantine sink.""" + return fake_quarantine_sink(validate_orders.rejected, quarantine_name="orders") + + +@etl.task(deps=[active_users, paid_orders]) +def build_summary( + active_users: DataFrame[ActiveUsersSchema], + paid_orders: DataFrame[PaidOrdersSchema], +) -> DataFrame[SalesSummarySchema]: + """Thin orchestration wrapper around the join and aggregation logic.""" + return build_sales_summary(active_users, paid_orders) + + +@etl.task(deps=[build_summary]) +def sink_summary(build_summary: DataFrame[SalesSummarySchema]) -> str: + """Thin orchestration wrapper around the sink function.""" + return fake_sink(build_summary) + + +async def main() -> None: + """Run the layered Polars workflow once and print the final outputs.""" + demo_token = os.environ.get("FLOWRUN_DEMO_TOKEN", "demo-token") + context = RunContext(ApiDeps(api_base="https://fake.api.local", auth_token=demo_token)).with_metadata( + source="fake_api", + pipeline="sales_summary", + batch_date=str(date.today()), + ) + + async with engine: + etl.validate() + print(etl.display()) + run_id = await etl.run_once(context=context) + report = engine.get_run_report(run_id) + + print("\n=== FINAL SUMMARY ===") + print(report["tasks"]["build_summary"]["result"]) + print("\n=== RUN METADATA ===") + print(report["metadata"]) + print("\n=== SINK RESULT ===") + print(report["tasks"]["sink_summary"]["result"]) + print("\n=== QUARANTINE RESULTS ===") + print(report["tasks"]["quarantine_users"]["result"]) + print(report["tasks"]["quarantine_orders"]["result"]) + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/pyproject.toml b/pyproject.toml index 0f5cbbc..ce96083 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "flowrun-dag" -version = "0.1.0" +version = "1.0.0" description = "A lightweight async DAG orchestrator for small to medium ETL pipelines" readme = "README.md" license = { text = "MIT" } @@ -22,6 +22,10 @@ classifiers = [ Homepage = "https://github.com/Mg30/flowrun" Repository = "https://github.com/Mg30/flowrun" Issues = "https://github.com/Mg30/flowrun/issues" + +[project.optional-dependencies] +examples = ["polars>=1.39.3", "pandera[polars]>=0.30.1"] + [dependency-groups] dev = ["pytest>=8.4.2", "pytest-asyncio>=1.2.0", "ruff>=0.14.2"] diff --git a/src/flowrun/__init__.py b/src/flowrun/__init__.py index 068ea54..c143eb2 100644 --- a/src/flowrun/__init__.py +++ b/src/flowrun/__init__.py @@ -1,31 +1,23 @@ """flowrun — a lightweight async DAG runner.""" -from flowrun.context import RunContext +from flowrun.context import RunCancelledError, RunContext from flowrun.engine import DagScope, Engine, build_default_engine from flowrun.hooks import RunHook, fn_hook from flowrun.scheduler import SchedulerConfig -from flowrun.serialization import JsonSerializer, PickleSerializer, ResultSerializer -from flowrun.sqlite_store import SqliteStateStore -from flowrun.state import InMemoryStateStore, StateStore, StateStoreProtocol -from flowrun.task import TaskRegistry, TaskSpec, task, task_template +from flowrun.state import InMemoryStateStore, StateStore +from flowrun.task import TaskRegistry, TaskSpec __all__ = [ "Engine", "DagScope", "InMemoryStateStore", - "JsonSerializer", - "PickleSerializer", - "ResultSerializer", + "RunCancelledError", "RunContext", "RunHook", "SchedulerConfig", - "SqliteStateStore", "StateStore", - "StateStoreProtocol", "TaskRegistry", "TaskSpec", "build_default_engine", "fn_hook", - "task", - "task_template", ] diff --git a/src/flowrun/context.py b/src/flowrun/context.py index 0d5bf61..25f9a62 100644 --- a/src/flowrun/context.py +++ b/src/flowrun/context.py @@ -1,8 +1,14 @@ +import threading +import time from collections.abc import Mapping -from dataclasses import dataclass +from dataclasses import dataclass, field, replace from typing import Any +class RunCancelledError(RuntimeError): + """Raised when a task checks a cancelled RunContext.""" + + @dataclass(frozen=True, slots=True) class RunContext[DepsT]: """Container that exposes user-defined dependencies to task functions. @@ -13,6 +19,66 @@ class RunContext[DepsT]: """ deps: DepsT + metadata: Mapping[str, Any] = field(default_factory=dict) + _deadline_monotonic_s: float | None = field(default=None, repr=False, compare=False) + _cancel_event: threading.Event | None = field(default=None, repr=False, compare=False) + + def with_metadata(self, metadata: Mapping[str, Any] | None = None, /, **entries: Any) -> "RunContext[DepsT]": + """Return a copy with merged run metadata for reporting and tracing. + + This is useful for ETL-style identifiers such as ``batch_id``, + ``window_start``, ``source``, or ``partition``. + """ + merged = dict(self.metadata) + if metadata is not None: + merged.update(metadata) + if entries: + merged.update(entries) + if merged == dict(self.metadata): + return self + return replace(self, metadata=merged) + + def with_deadline_s(self, timeout_s: float | None) -> "RunContext[DepsT]": + """Return a copy with a deadline derived from now + *timeout_s*. + + When a deadline already exists, the earliest deadline wins. + """ + if timeout_s is None: + return self + deadline_monotonic_s = time.monotonic() + max(timeout_s, 0.0) + return self._with_deadline_monotonic(deadline_monotonic_s) + + def with_cancel_event(self, cancel_event: threading.Event | None) -> "RunContext[DepsT]": + """Return a copy that checks *cancel_event* for cooperative cancellation.""" + if cancel_event is self._cancel_event: + return self + return replace(self, _cancel_event=cancel_event) + + def has_deadline(self) -> bool: + """Return True when this context carries an active deadline.""" + return self._deadline_monotonic_s is not None + + def time_remaining_s(self) -> float | None: + """Return seconds remaining until the deadline, or None when unset.""" + if self._deadline_monotonic_s is None: + return None + return max(0.0, self._deadline_monotonic_s - time.monotonic()) + + def deadline_exceeded(self) -> bool: + """Return True when the context deadline has elapsed.""" + remaining_s = self.time_remaining_s() + return remaining_s is not None and remaining_s <= 0.0 + + def cancelled(self) -> bool: + """Return True when the deadline elapsed or the cancel event was set.""" + return self.deadline_exceeded() or (self._cancel_event.is_set() if self._cancel_event is not None else False) + + def raise_if_cancelled(self) -> None: + """Raise when the context deadline elapsed or cancellation was requested.""" + if self.deadline_exceeded(): + raise TimeoutError("RunContext deadline exceeded.") + if self._cancel_event is not None and self._cancel_event.is_set(): + raise RunCancelledError("RunContext was cancelled.") def __getattr__(self, item: str) -> Any: """Delegate attribute access to the wrapped dependency bundle. @@ -28,3 +94,16 @@ def __getattr__(self, item: str) -> Any: if isinstance(self.deps, Mapping) and item in self.deps: return self.deps[item] raise AttributeError(item) from exc + + def _with_deadline_monotonic(self, deadline_monotonic_s: float | None) -> "RunContext[DepsT]": + """Return a copy using the earlier of the current and provided deadlines.""" + merged_deadline = deadline_monotonic_s + if self._deadline_monotonic_s is not None and merged_deadline is not None: + merged_deadline = min(self._deadline_monotonic_s, merged_deadline) + elif self._deadline_monotonic_s is not None: + merged_deadline = self._deadline_monotonic_s + + if merged_deadline == self._deadline_monotonic_s: + return self + + return replace(self, _deadline_monotonic_s=merged_deadline) diff --git a/src/flowrun/engine.py b/src/flowrun/engine.py index 0dcf7a5..c1fb1de 100644 --- a/src/flowrun/engine.py +++ b/src/flowrun/engine.py @@ -1,7 +1,7 @@ import concurrent.futures import logging import uuid -from collections.abc import Callable, Sequence +from collections.abc import AsyncIterable, AsyncIterator, Callable, Iterable, Sequence from types import TracebackType from typing import Any, Self @@ -10,14 +10,26 @@ from flowrun.executor import TaskExecutor from flowrun.hooks import RunHook from flowrun.scheduler import Scheduler, SchedulerConfig -from flowrun.state import RunRecord, StateStore, StateStoreProtocol +from flowrun.state import RunRecord, StateStore from flowrun.task import TaskRegistry from flowrun.task import task as task_decorator -from flowrun.task import task_template as task_template_factory _default_logger = logging.getLogger("flowrun") +async def _iterate_contexts( + contexts: AsyncIterable[RunContext[Any] | None] | Iterable[RunContext[Any] | None], +) -> AsyncIterator[RunContext[Any] | None]: + """Yield contexts from either a sync or async source.""" + if isinstance(contexts, AsyncIterable): + async for context in contexts: + yield context + return + + for context in contexts: + yield context + + class DagScope: """DAG-scoped API facade to avoid repeating ``dag=...`` on every task.""" @@ -35,9 +47,8 @@ def task( self, name: str | None = None, deps: Sequence[str | Callable[..., Any]] | None = None, - timeout_s: float | None = 30.0, + timeout_s: float | None = None, retries: int = 0, - retain_result: bool = True, ): """Return ``@task`` decorator bound to this scope's DAG.""" return self._engine.task( @@ -45,22 +56,6 @@ def task( deps=deps, timeout_s=timeout_s, retries=retries, - retain_result=retain_result, - dag=self._dag_name, - ) - - def task_template( - self, - func: Callable[..., Any], - *, - deps: Sequence[str | Callable[..., Any]] | None = None, - timeout_s: float | None = 30.0, - ): - """Create a task template bound to this scope's DAG.""" - return self._engine.task_template( - func, - deps=deps, - timeout_s=timeout_s, dag=self._dag_name, ) @@ -68,6 +63,13 @@ async def run_once(self, context: RunContext[Any] | None = None) -> str: """Run this DAG once.""" return await self._engine.run_once(self._dag_name, context=context) + async def run_many( + self, + contexts: AsyncIterable[RunContext[Any] | None] | Iterable[RunContext[Any] | None], + ) -> list[str]: + """Run this DAG once per context, sequentially, returning run ids in order.""" + return await self._engine.run_many(self._dag_name, contexts) + async def run_subgraph( self, targets: list[str], @@ -107,7 +109,7 @@ class Engine: def __init__( self, registry: TaskRegistry, - state_store: StateStoreProtocol, + state_store: StateStore, scheduler: Scheduler, *, _owns_pool: concurrent.futures.Executor | None = None, @@ -182,6 +184,23 @@ async def run_once(self, dag_name: str, context: RunContext[Any] | None = None) self._log.info("Finished DAG %r run_id=%s", dag_name, run_id) return run_id + async def run_many( + self, + dag_name: str, + contexts: AsyncIterable[RunContext[Any] | None] | Iterable[RunContext[Any] | None], + ) -> list[str]: + """Run a DAG once per context, sequentially, returning run ids in source order. + + This is intended for sequential micro-batch loops where the batch source + lives outside the DAG and each batch should execute the full graph. + """ + self.validate(dag_name) + + run_ids: list[str] = [] + async for context in _iterate_contexts(contexts): + run_ids.append(await self.run_once(dag_name, context=context)) + return run_ids + async def resume( self, run_id: str, @@ -226,6 +245,7 @@ async def resume( dag_name=prev.dag_name, task_names=dag.nodes, reset_tasks=reset_tasks, + metadata=context.metadata if context is not None and context.metadata else prev.metadata, ) self._log.info( @@ -354,6 +374,7 @@ def get_run_report(self, run_id: str) -> dict[str, Any]: return { "run_id": rec.run_id, "dag_name": rec.dag_name, + "metadata": dict(rec.metadata), "created_at": rec.created_at, "finished_at": rec.finished_at, "status": run_status, @@ -398,9 +419,8 @@ def task( self, name: str | None = None, deps: Sequence[str | Callable[..., Any]] | None = None, - timeout_s: float | None = 30.0, + timeout_s: float | None = None, retries: int = 0, - retain_result: bool = True, dag: str | None = None, ): """Return a ``@task`` decorator bound to this engine's registry.""" @@ -409,24 +429,6 @@ def task( deps=deps, timeout_s=timeout_s, retries=retries, - retain_result=retain_result, - dag=dag, - registry=self._registry, - ) - - def task_template( - self, - func: Callable[..., Any], - *, - deps: Sequence[str | Callable[..., Any]] | None = None, - timeout_s: float | None = 30.0, - dag: str | None = None, - ): - """Create a task template bound to this engine's registry.""" - return task_template_factory( - func, - deps=deps, - timeout_s=timeout_s, dag=dag, registry=self._registry, ) @@ -439,7 +441,7 @@ def build_default_engine( max_parallel: int = 4, logger: logging.Logger | None = None, hooks: list[RunHook] | None = None, - state_store: StateStoreProtocol | None = None, + state_store: StateStore | None = None, ) -> Engine: """Convenience constructor that wires up all components into a ready-to-use Engine. @@ -460,13 +462,13 @@ def build_default_engine( ``logging.getLogger('flowrun')``. hooks : list[RunHook] | None Optional lifecycle hooks forwarded to the scheduler. - state_store : StateStoreProtocol | None - Optional persistent state store (e.g. ``SqliteStateStore``). When - omitted an in-memory ``StateStore`` is used. + state_store : StateStore | None + Optional custom state store. When omitted an in-memory ``StateStore`` + is used. """ log = logger or _default_logger registry = TaskRegistry() - actual_store: StateStoreProtocol = state_store if state_store is not None else StateStore() + actual_store = state_store if state_store is not None else StateStore() owns_pool: concurrent.futures.Executor | None = None if executor is None: pool = concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) diff --git a/src/flowrun/executor.py b/src/flowrun/executor.py index 67426d0..9cadd8e 100644 --- a/src/flowrun/executor.py +++ b/src/flowrun/executor.py @@ -113,9 +113,16 @@ async def run_once( if spec.requires_context and context is None: raise RuntimeError(f"Task '{spec.name}' requires a RunContext but none was provided.") + if timeout_s is not None and not spec.is_async(): + raise RuntimeError( + f"Task '{spec.name}' is synchronous and cannot use timeout_s. " + "Thread-based timeouts cannot safely stop blocking work. " + "Use an async task or configure timeouts in the client you call inside the task." + ) + args: tuple[Any, ...] = () if context is not None and spec.accepts_context: - args = (context,) + args = (context.with_deadline_s(timeout_s),) kwargs: dict[str, Any] = {} if spec.accepts_upstream: diff --git a/src/flowrun/scheduler.py b/src/flowrun/scheduler.py index 93dd578..f9da63b 100644 --- a/src/flowrun/scheduler.py +++ b/src/flowrun/scheduler.py @@ -18,7 +18,7 @@ TaskStartEvent, TaskSuccessEvent, ) -from flowrun.state import StateStoreProtocol +from flowrun.state import StateStore from flowrun.task import TaskRegistry _default_logger = logging.getLogger("flowrun.scheduler") @@ -53,7 +53,7 @@ class Scheduler: def __init__( self, registry: TaskRegistry, - state_store: StateStoreProtocol, + state_store: StateStore, executor: TaskExecutor, config: SchedulerConfig, *, @@ -90,7 +90,8 @@ async def run_dag_once(self, dag: DAG, context: RunContext[Any] | None = None, * """ if run_id is None: run_id = str(uuid.uuid4()) - self._state.create_run(run_id, dag.name, dag.nodes) + metadata = context.metadata if context is not None else None + self._state.create_run(run_id, dag.name, dag.nodes, metadata=metadata) self._hooks.emit("on_dag_start", DagStartEvent(run_id=run_id, dag_name=dag.name)) inflight: dict[str, asyncio.Task] = {} @@ -191,9 +192,6 @@ async def run_dag_once(self, dag: DAG, context: RunContext[Any] | None = None, * # 5. mark SKIPPED tasks whose parents permanently FAILED self._mark_skipped_blocked(run_id, dag) - # 6. release memory for non-retained results whose consumers are all launched/done - self._release_non_retained(run_id, dag) - self._state.finalize_run_if_done(run_id) # loop again until nothing left @@ -266,22 +264,3 @@ def _mark_skipped_blocked(self, run_id: str, dag: DAG) -> None: reason="UPSTREAM_FAILED", ), ) - - def _release_non_retained(self, run_id: str, dag: DAG) -> None: - """Clear results for tasks with ``retain_result=False`` once all dependents are done/launched.""" - runrec = self._state.get_run(run_id) - # Build children map (task -> list of tasks that depend on it) - children: dict[str, list[str]] = {node: [] for node in dag.nodes} - for child, parents in dag.edges.items(): - for parent in parents: - children.setdefault(parent, []).append(child) - - for tname, trec in runrec.tasks.items(): - if trec.status != "SUCCESS" or trec.result is None: - continue - spec = self._registry.get(tname) - if spec.retain_result: - continue - # Release if all children are no longer PENDING (launched, done, or skipped) - if all(runrec.tasks[c].status != "PENDING" for c in children.get(tname, [])): - self._state.clear_result(run_id, tname) diff --git a/src/flowrun/serialization.py b/src/flowrun/serialization.py deleted file mode 100644 index 212fb4d..0000000 --- a/src/flowrun/serialization.py +++ /dev/null @@ -1,55 +0,0 @@ -"""Pluggable result serializers for persistent state backends.""" - -from __future__ import annotations - -import json -import pickle -from typing import Any, Protocol - - -class ResultSerializer(Protocol): - """Structural interface for serializing task results to/from bytes.""" - - def serialize(self, obj: Any) -> bytes: - """Serialize *obj* to bytes.""" - ... - - def deserialize(self, data: bytes) -> Any: - """Deserialize *data* back to a Python object.""" - ... - - -class JsonSerializer: - """Serialize results as JSON (UTF-8). - - Works out of the box for dicts, lists, strings, numbers, bools, and None. - Raises ``TypeError`` for non-JSON-serializable types. - """ - - def serialize(self, obj: Any) -> bytes: - """Encode *obj* as a compact JSON byte string.""" - return json.dumps(obj, separators=(",", ":")).encode() - - def deserialize(self, data: bytes) -> Any: - """Decode *data* from JSON bytes back to a Python object.""" - return json.loads(data) - - -class PickleSerializer: - """Serialize results using Python's ``pickle`` protocol. - - Handles arbitrary Python objects but the data is opaque and - version-sensitive. - """ - - def __init__(self, protocol: int = pickle.HIGHEST_PROTOCOL) -> None: - """Initialise with the given pickle *protocol* version.""" - self._protocol = protocol - - def serialize(self, obj: Any) -> bytes: - """Pickle *obj* using the configured protocol.""" - return pickle.dumps(obj, protocol=self._protocol) - - def deserialize(self, data: bytes) -> Any: - """Unpickle *data* and return the original Python object.""" - return pickle.loads(data) # noqa: S301 diff --git a/src/flowrun/sqlite_store.py b/src/flowrun/sqlite_store.py deleted file mode 100644 index a8019a4..0000000 --- a/src/flowrun/sqlite_store.py +++ /dev/null @@ -1,374 +0,0 @@ -"""SQLite-backed state store with write-through cache, optional TTL, and opt-in crash recovery.""" - -from __future__ import annotations - -import logging -import sqlite3 -import time -from pathlib import Path -from typing import Any - -from flowrun.serialization import JsonSerializer, ResultSerializer -from flowrun.state import RunRecord, TaskRunRecord, TaskStatus - -_log = logging.getLogger("flowrun.sqlite_store") - -_SCHEMA = """\ -CREATE TABLE IF NOT EXISTS runs ( - run_id TEXT PRIMARY KEY, - dag_name TEXT NOT NULL, - created_at REAL NOT NULL, - finished_at REAL -); - -CREATE TABLE IF NOT EXISTS task_runs ( - run_id TEXT NOT NULL, - task_name TEXT NOT NULL, - attempt INTEGER NOT NULL DEFAULT 0, - status TEXT NOT NULL DEFAULT 'PENDING', - started_at REAL, - finished_at REAL, - error TEXT, - result BLOB, - PRIMARY KEY (run_id, task_name), - FOREIGN KEY (run_id) REFERENCES runs(run_id) -); -""" - - -class SqliteStateStore: - """Persistent state store backed by SQLite. - - Reads go through an in-memory write-through cache so the scheduler's - tight loop stays fast. Every mutation is written to both the cache - *and* SQLite in the same call. - - Parameters - ---------- - db_path : str | Path - Path to the SQLite database file, or ``":memory:"`` for a - transient in-memory database useful in tests. - serializer : ResultSerializer | None - Strategy for serializing / deserializing task results stored in - the ``result BLOB`` column. Defaults to :class:`JsonSerializer`. - cache_ttl_s : float | None - When set, cached ``RunRecord`` objects that have not been accessed - for longer than *cache_ttl_s* seconds may be evicted from memory. - They remain in SQLite and will be reloaded on demand. ``None`` - (the default) keeps entries cached indefinitely. - recover : bool - When ``True`` the constructor scans for orphaned ``RUNNING`` tasks - (left behind by a crashed process) and resets them to ``FAILED`` - with ``error='PROCESS_CRASH'``. Defaults to ``False``. - """ - - # Re-use the same valid-transition map as the in-memory store. - _VALID_TRANSITIONS: dict[TaskStatus, set[TaskStatus]] = { - "PENDING": {"RUNNING", "SKIPPED"}, - "RUNNING": {"SUCCESS", "FAILED"}, - "SUCCESS": set(), - "FAILED": {"PENDING"}, - "SKIPPED": set(), - } - - def __init__( - self, - db_path: str | Path, - *, - serializer: ResultSerializer | None = None, - cache_ttl_s: float | None = None, - recover: bool = False, - ) -> None: - """Open (or create) the SQLite database at *db_path* and prepare the schema.""" - self._serializer: ResultSerializer = serializer or JsonSerializer() - self._cache_ttl_s = cache_ttl_s - - self._conn = sqlite3.connect( - str(db_path), - check_same_thread=False, - ) - self._conn.execute("PRAGMA journal_mode=WAL") - self._conn.execute("PRAGMA busy_timeout=5000") - self._conn.executescript(_SCHEMA) - - # Cache: run_id -> (RunRecord, last_access_time) - self._cache: dict[str, tuple[RunRecord, float]] = {} - - if recover: - self._recover_crashed() - - def close(self) -> None: - """Close the underlying SQLite connection.""" - self._conn.close() - - # ------------------------------------------------------------------ - # Public API (satisfies StateStoreProtocol) - # ------------------------------------------------------------------ - - def create_run(self, run_id: str, dag_name: str, task_names: list[str]) -> RunRecord: - """Create and persist a brand-new run record.""" - rec = RunRecord( - run_id=run_id, - dag_name=dag_name, - tasks={t: TaskRunRecord(task_name=t) for t in task_names}, - ) - self._insert_run(rec) - self._cache_put(rec) - return rec - - def create_resumed_run( - self, - run_id: str, - prev_run_id: str, - dag_name: str, - task_names: list[str], - reset_tasks: set[str] | None = None, - ) -> RunRecord: - """Create a new run that inherits successful task results from *prev_run_id*.""" - prev = self.get_run(prev_run_id) - reset_tasks = reset_tasks or set() - rec = RunRecord(run_id=run_id, dag_name=dag_name, tasks={}) - for t in task_names: - prev_task = prev.tasks.get(t) - if prev_task and prev_task.status == "SUCCESS" and t not in reset_tasks: - rec.tasks[t] = TaskRunRecord( - task_name=t, - attempt=prev_task.attempt, - status="SUCCESS", - started_at=prev_task.started_at, - finished_at=prev_task.finished_at, - result=prev_task.result, - ) - else: - rec.tasks[t] = TaskRunRecord(task_name=t) - self._insert_run(rec) - self._cache_put(rec) - return rec - - def get_run(self, run_id: str) -> RunRecord: - """Return the RunRecord for *run_id*, loading from SQLite on a cache miss.""" - hit = self._cache.get(run_id) - if hit is not None: - rec, _ = hit - self._cache[run_id] = (rec, time.time()) - return rec - # Cache miss — load from SQLite - rec = self._load_run(run_id) - self._cache_put(rec) - return rec - - def mark_running(self, run_id: str, task_name: str) -> None: - """Transition *task_name* to RUNNING and record its start time.""" - tr = self._transition(run_id, task_name, "RUNNING") - tr.started_at = time.time() - tr.attempt += 1 - self._persist_task(run_id, tr) - - def mark_success(self, run_id: str, task_name: str, result: object) -> None: - """Transition *task_name* to SUCCESS and store its *result*.""" - tr = self._transition(run_id, task_name, "SUCCESS") - tr.result = result - tr.finished_at = time.time() - self._persist_task(run_id, tr) - - def mark_failed(self, run_id: str, task_name: str, err: str) -> None: - """Transition *task_name* to FAILED and record the error message.""" - tr = self._transition(run_id, task_name, "FAILED") - tr.error = err - tr.finished_at = time.time() - self._persist_task(run_id, tr) - - def mark_skipped(self, run_id: str, task_name: str, reason: str) -> None: - """Transition *task_name* to SKIPPED and record the skip *reason*.""" - tr = self._transition(run_id, task_name, "SKIPPED") - tr.error = reason - tr.finished_at = time.time() - self._persist_task(run_id, tr) - - def mark_retry(self, run_id: str, task_name: str) -> None: - """Reset *task_name* back to PENDING so it can be retried.""" - tr = self._transition(run_id, task_name, "PENDING") - tr.error = None - tr.finished_at = None - self._persist_task(run_id, tr) - - def clear_result(self, run_id: str, task_name: str) -> None: - """Remove the stored result for *task_name* in *run_id*.""" - rec = self.get_run(run_id) - rec.tasks[task_name].result = None - self._conn.execute( - "UPDATE task_runs SET result = NULL WHERE run_id = ? AND task_name = ?", - (run_id, task_name), - ) - self._conn.commit() - - def finalize_run_if_done(self, run_id: str) -> None: - """Set the run's finished_at timestamp once all tasks have reached a terminal state.""" - rec = self.get_run(run_id) - if rec.finished_at is not None: - return - if all(t.status in ("SUCCESS", "FAILED", "SKIPPED") for t in rec.tasks.values()): - rec.finished_at = time.time() - self._conn.execute( - "UPDATE runs SET finished_at = ? WHERE run_id = ?", - (rec.finished_at, run_id), - ) - self._conn.commit() - - # ------------------------------------------------------------------ - # Extra helpers (not in Protocol — SQLite-specific) - # ------------------------------------------------------------------ - - def list_runs(self, dag_name: str | None = None) -> list[RunRecord]: - """Return stored runs, optionally filtered by DAG name. - - Loads runs from SQLite (does **not** require them to be cached). - """ - if dag_name is not None: - rows = self._conn.execute( - "SELECT run_id FROM runs WHERE dag_name = ? ORDER BY created_at", - (dag_name,), - ).fetchall() - else: - rows = self._conn.execute( - "SELECT run_id FROM runs ORDER BY created_at", - ).fetchall() - return [self.get_run(row[0]) for row in rows] - - # ------------------------------------------------------------------ - # Internal — state machine - # ------------------------------------------------------------------ - - def _transition(self, run_id: str, task_name: str, target: TaskStatus) -> TaskRunRecord: - rec = self.get_run(run_id) - tr = rec.tasks[task_name] - allowed = self._VALID_TRANSITIONS.get(tr.status, set()) - if target not in allowed: - raise RuntimeError(f"Invalid state transition for task {task_name!r}: {tr.status} -> {target}") - tr.status = target - return tr - - # ------------------------------------------------------------------ - # Internal — SQLite persistence - # ------------------------------------------------------------------ - - def _insert_run(self, rec: RunRecord) -> None: - """INSERT a full RunRecord (run + all task rows) inside a transaction.""" - cur = self._conn.cursor() - try: - cur.execute("BEGIN") - cur.execute( - "INSERT INTO runs (run_id, dag_name, created_at, finished_at) VALUES (?, ?, ?, ?)", - (rec.run_id, rec.dag_name, rec.created_at, rec.finished_at), - ) - for tr in rec.tasks.values(): - result_blob = self._serialize_result(tr.result) - cur.execute( - "INSERT INTO task_runs (run_id, task_name, attempt, status, started_at, finished_at, error, result) " - "VALUES (?, ?, ?, ?, ?, ?, ?, ?)", - ( - rec.run_id, - tr.task_name, - tr.attempt, - tr.status, - tr.started_at, - tr.finished_at, - tr.error, - result_blob, - ), - ) - cur.execute("COMMIT") - except Exception: - cur.execute("ROLLBACK") - raise - - def _persist_task(self, run_id: str, tr: TaskRunRecord) -> None: - """Persist a single task record mutation to SQLite.""" - result_blob = self._serialize_result(tr.result) - self._conn.execute( - "UPDATE task_runs SET attempt=?, status=?, started_at=?, finished_at=?, error=?, result=? " - "WHERE run_id=? AND task_name=?", - (tr.attempt, tr.status, tr.started_at, tr.finished_at, tr.error, result_blob, run_id, tr.task_name), - ) - self._conn.commit() - - def _load_run(self, run_id: str) -> RunRecord: - """Load a RunRecord from SQLite. Raises ``KeyError`` if missing.""" - row = self._conn.execute( - "SELECT run_id, dag_name, created_at, finished_at FROM runs WHERE run_id = ?", - (run_id,), - ).fetchone() - if row is None: - raise KeyError(f"Run {run_id!r} not found") - rec = RunRecord(run_id=row[0], dag_name=row[1], created_at=row[2], finished_at=row[3], tasks={}) - task_rows = self._conn.execute( - "SELECT task_name, attempt, status, started_at, finished_at, error, result FROM task_runs WHERE run_id = ?", - (run_id,), - ).fetchall() - for tr_row in task_rows: - result = self._deserialize_result(tr_row[6]) - rec.tasks[tr_row[0]] = TaskRunRecord( - task_name=tr_row[0], - attempt=tr_row[1], - status=tr_row[2], - started_at=tr_row[3], - finished_at=tr_row[4], - error=tr_row[5], - result=result, - ) - return rec - - # ------------------------------------------------------------------ - # Internal — serialization helpers - # ------------------------------------------------------------------ - - def _serialize_result(self, obj: Any) -> bytes | None: - if obj is None: - return None - return self._serializer.serialize(obj) - - def _deserialize_result(self, data: bytes | None) -> Any: - if data is None: - return None - return self._serializer.deserialize(data) - - # ------------------------------------------------------------------ - # Internal — cache management - # ------------------------------------------------------------------ - - def _cache_put(self, rec: RunRecord) -> None: - self._cache[rec.run_id] = (rec, time.time()) - self._maybe_evict() - - def _maybe_evict(self) -> None: - """Evict cache entries older than *cache_ttl_s* (if configured).""" - if self._cache_ttl_s is None: - return - now = time.time() - stale = [rid for rid, (_, ts) in self._cache.items() if (now - ts) > self._cache_ttl_s] - for rid in stale: - del self._cache[rid] - - # ------------------------------------------------------------------ - # Internal — crash recovery - # ------------------------------------------------------------------ - - def _recover_crashed(self) -> None: - """Reset orphaned RUNNING tasks to FAILED with error 'PROCESS_CRASH'.""" - now = time.time() - cur = self._conn.execute("SELECT run_id, task_name FROM task_runs WHERE status = 'RUNNING'") - rows = cur.fetchall() - if not rows: - return - _log.info("Recovering %d orphaned RUNNING task(s)", len(rows)) - for run_id, task_name in rows: - self._conn.execute( - "UPDATE task_runs SET status='FAILED', error='PROCESS_CRASH', finished_at=? " - "WHERE run_id=? AND task_name=?", - (now, run_id, task_name), - ) - self._conn.commit() - # Invalidate any cached entries for affected runs so they are reloaded. - affected_run_ids = {r[0] for r in rows} - for rid in affected_run_ids: - self._cache.pop(rid, None) diff --git a/src/flowrun/state.py b/src/flowrun/state.py index bfbbd7c..68671a7 100644 --- a/src/flowrun/state.py +++ b/src/flowrun/state.py @@ -1,8 +1,9 @@ from __future__ import annotations import time +from collections.abc import Mapping from dataclasses import dataclass, field -from typing import Literal, Protocol, runtime_checkable +from typing import Any, Literal TaskStatus = Literal[ "PENDING", @@ -52,63 +53,12 @@ class RunRecord: run_id: str dag_name: str + metadata: dict[str, Any] = field(default_factory=dict) tasks: dict[str, TaskRunRecord] = field(default_factory=dict) created_at: float = field(default_factory=time.time) finished_at: float | None = None -@runtime_checkable -class StateStoreProtocol(Protocol): - """Structural interface every state-store backend must satisfy.""" - - def create_run(self, run_id: str, dag_name: str, task_names: list[str]) -> RunRecord: - """Create and persist a brand-new run record for *dag_name*.""" - ... - - def create_resumed_run( - self, - run_id: str, - prev_run_id: str, - dag_name: str, - task_names: list[str], - reset_tasks: set[str] | None = None, - ) -> RunRecord: - """Create a new run that inherits successful results from *prev_run_id*.""" - ... - - def get_run(self, run_id: str) -> RunRecord: - """Return the RunRecord for *run_id*.""" - ... - - def mark_running(self, run_id: str, task_name: str) -> None: - """Transition *task_name* to RUNNING.""" - ... - - def mark_success(self, run_id: str, task_name: str, result: object) -> None: - """Transition *task_name* to SUCCESS and store *result*.""" - ... - - def mark_failed(self, run_id: str, task_name: str, err: str) -> None: - """Transition *task_name* to FAILED with the given *err* message.""" - ... - - def mark_skipped(self, run_id: str, task_name: str, reason: str) -> None: - """Transition *task_name* to SKIPPED with a skip *reason*.""" - ... - - def mark_retry(self, run_id: str, task_name: str) -> None: - """Reset *task_name* to PENDING so it can be retried.""" - ... - - def clear_result(self, run_id: str, task_name: str) -> None: - """Remove the stored result for *task_name* in *run_id*.""" - ... - - def finalize_run_if_done(self, run_id: str) -> None: - """Set the run's finished_at timestamp once all tasks are in a terminal state.""" - ... - - class InMemoryStateStore: """ In-memory implementation holding runs and task states. @@ -119,7 +69,13 @@ def __init__(self) -> None: """Initialize the state store with an empty run registry.""" self._runs: dict[str, RunRecord] = {} - def create_run(self, run_id: str, dag_name: str, task_names: list[str]) -> RunRecord: + def create_run( + self, + run_id: str, + dag_name: str, + task_names: list[str], + metadata: Mapping[str, Any] | None = None, + ) -> RunRecord: """Create a new RunRecord and initialize TaskRunRecord entries for each task. Parameters @@ -139,6 +95,7 @@ def create_run(self, run_id: str, dag_name: str, task_names: list[str]) -> RunRe rec = RunRecord( run_id=run_id, dag_name=dag_name, + metadata=dict(metadata or {}), tasks={t: TaskRunRecord(task_name=t) for t in task_names}, ) self._runs[run_id] = rec @@ -151,6 +108,7 @@ def create_resumed_run( dag_name: str, task_names: list[str], reset_tasks: set[str] | None = None, + metadata: Mapping[str, Any] | None = None, ) -> RunRecord: """Create a new run pre-populated with SUCCESS results from a previous run. @@ -179,7 +137,7 @@ def create_resumed_run( """ prev = self._runs[prev_run_id] reset_tasks = reset_tasks or set() - rec = RunRecord(run_id=run_id, dag_name=dag_name, tasks={}) + rec = RunRecord(run_id=run_id, dag_name=dag_name, metadata=dict(metadata or prev.metadata), tasks={}) for t in task_names: prev_task = prev.tasks.get(t) if prev_task and prev_task.status == "SUCCESS" and t not in reset_tasks: @@ -317,18 +275,6 @@ def mark_retry(self, run_id: str, task_name: str) -> None: tr.error = None tr.finished_at = None - def clear_result(self, run_id: str, task_name: str) -> None: - """Drop the stored result for a task to free memory. - - Parameters - ---------- - run_id : str - Identifier of the run. - task_name : str - Name of the task whose result to drop. - """ - self._runs[run_id].tasks[task_name].result = None - def finalize_run_if_done(self, run_id: str) -> None: """Set the run finished timestamp when all tasks reached a terminal state. diff --git a/src/flowrun/task.py b/src/flowrun/task.py index 9632442..320654b 100644 --- a/src/flowrun/task.py +++ b/src/flowrun/task.py @@ -1,5 +1,3 @@ -import contextvars -import functools import inspect import types from collections.abc import Callable, Iterator, Mapping, Sequence @@ -8,11 +6,6 @@ from flowrun.context import RunContext -_active_registry: contextvars.ContextVar["TaskRegistry | None"] = contextvars.ContextVar( - "flowrun_active_registry", - default=None, -) - @dataclass(frozen=True) class TaskSpec: @@ -27,7 +20,7 @@ class TaskSpec: deps : list[str] List of task names this task depends on. timeout_s : float | None - Timeout in seconds for task execution, or None for no timeout. + Timeout in seconds for async task execution, or None for no timeout. accepts_context : bool True when the task function signature allows a positional RunContext argument. requires_context : bool @@ -51,9 +44,8 @@ class TaskSpec: name: str func: Callable[..., Any] deps: list[str] = field(default_factory=list) - timeout_s: float | None = 30.0 + timeout_s: float | None = None retries: int = 0 - retain_result: bool = True dag: str | None = None accepts_context: bool = False requires_context: bool = False @@ -73,45 +65,13 @@ class TaskRegistry: """Registry that maps task names to `TaskSpec` objects. Supports the standard collection protocol (`in`, `len`, iteration, - subscript) and a `contextvars`-based activation model that is safe - across async tasks and threads. + subscript). """ def __init__(self) -> None: """Initialize an empty task registry.""" self._tasks: dict[str, TaskSpec] = {} - # ---- active-registry management (contextvars, async-safe) ---- - - @staticmethod - def active() -> "TaskRegistry": - """Return the currently active registry. - - Raises - ------ - LookupError - If no registry has been activated. - """ - reg = _active_registry.get() - if reg is None: - raise LookupError( - "No active TaskRegistry; pass registry=... to @task or activate one with `registry.activate()`." - ) - return reg - - def activate(self) -> contextvars.Token["TaskRegistry | None"]: - """Make this registry the active one and return a reset token. - - The token can be passed to `deactivate()` to restore the previous - registry, or used directly with `contextvars.ContextVar.reset`. - """ - return _active_registry.set(self) - - @staticmethod - def deactivate(token: contextvars.Token["TaskRegistry | None"]) -> None: - """Restore the previous active registry from a token.""" - _active_registry.reset(token) - # ---- collection protocol ---- def register(self, spec: TaskSpec) -> None: @@ -122,6 +82,7 @@ def register(self, spec: TaskSpec) -> None: ValueError If a task with the same name is already registered. """ + _validate_task_spec(spec) if spec.name in self._tasks: raise ValueError(f"Duplicate task name: {spec.name!r}") self._tasks[spec.name] = spec @@ -242,6 +203,26 @@ def _accepts_upstream(callable_obj: Callable[..., Any]) -> bool: ) +def _infer_required_dep_names(callable_obj: Callable[..., Any], registry: TaskRegistry) -> list[str]: + """Infer dependency names from required parameters that match registered tasks.""" + sig = inspect.signature(callable_obj) + inferred: list[str] = [] + for param in sig.parameters.values(): + if param.kind in (inspect.Parameter.VAR_POSITIONAL, inspect.Parameter.VAR_KEYWORD): + continue + if param.name in {"self", "cls"}: + continue + if param.default is not inspect._empty: + continue + if _annotation_is_run_context(param.annotation): + continue + if param.name == "upstream": + continue + if param.name in registry: + inferred.append(param.name) + return inferred + + def _accepted_named_deps(callable_obj: Callable[..., Any], dep_names: list[str]) -> list[str]: """Return the subset of *dep_names* that appear as parameter names in *callable_obj*.""" sig = inspect.signature(callable_obj) @@ -258,14 +239,56 @@ def _accepted_named_deps(callable_obj: Callable[..., Any], dep_names: list[str]) ] +def _unsatisfied_required_params(callable_obj: Callable[..., Any], dep_names: Sequence[str]) -> list[str]: + """Return required parameters that flowrun cannot satisfy for *callable_obj*.""" + sig = inspect.signature(callable_obj) + unsatisfied: list[str] = [] + for param in sig.parameters.values(): + if param.kind in (inspect.Parameter.VAR_POSITIONAL, inspect.Parameter.VAR_KEYWORD): + continue + if param.name in {"self", "cls"}: + continue + if param.default is not inspect._empty: + continue + if _annotation_is_run_context(param.annotation): + continue + if param.name == "upstream": + continue + if param.name in dep_names: + continue + unsatisfied.append(param.name) + return unsatisfied + + +def _validate_task_spec(spec: TaskSpec) -> None: + """Reject task configurations that would fail late or behave unsafely.""" + if spec.timeout_s is not None and not spec.is_async(): + raise ValueError( + f"Task {spec.name!r} is synchronous and cannot use timeout_s. " + "Thread-based timeouts cannot safely stop blocking work. " + "Use an async task or configure timeouts in the client you call inside the task." + ) + + unsatisfied = _unsatisfied_required_params(spec.func, spec.deps) + if unsatisfied: + deps_display = ", ".join(spec.deps) if spec.deps else "(none)" + raise ValueError( + f"Task {spec.name!r} has required parameters that flowrun cannot provide: {', '.join(unsatisfied)}. " + f"Available dependency names: {deps_display}. " + "Required parameters must either be annotated as RunContext, named 'upstream', " + "or exactly match a dependency name. When deps is omitted, flowrun only infers already-registered " + "task names from required parameters. If you use dependency names that are not valid Python " + "identifiers, consume them through the upstream mapping or rename the task." + ) + + def task( _func: Callable[..., Any] | str | None = None, *, name: str | None = None, deps: Sequence[str | Callable[..., Any]] | None = None, - timeout_s: float | None = 30.0, + timeout_s: float | None = None, retries: int = 0, - retain_result: bool = True, dag: str | None = None, registry: TaskRegistry | None = None, ): @@ -276,26 +299,23 @@ def task( name : str | None Optional explicit name; defaults to ``func.__name__``. deps : Sequence[str | Callable] | None - Task dependencies (names or previously-decorated callables). + Task dependencies (names or previously-decorated callables). When omitted, + required parameter names that match already-registered task names are inferred. timeout_s : float | None - Timeout in seconds, or ``None`` for no timeout. + Per-attempt timeout for async tasks, or ``None`` for no timeout. retries : int Number of times to retry on failure (0 = no retries). - retain_result : bool - When False, the result is cleared from state once all downstream - consumers have been launched, freeing memory for large payloads. dag : str | None Optional DAG namespace used by ``Engine.run_once(dag_name=...)`` to select only tasks belonging to that DAG. registry : TaskRegistry | None - Registry to register with. When omitted, falls back to - ``TaskRegistry.active()``. + Registry to register with. Required when using ``task(...)`` directly. """ if registry is None: - registry = TaskRegistry.active() + raise TypeError("task(...): registry= is required. Prefer engine.task(...) or etl.task(...).") def wrapper(func: Callable[..., Any]): - dep_names = _normalize_deps(deps) + dep_names = _normalize_deps(deps) if deps is not None else _infer_required_dep_names(func, registry) ctx_accepts, ctx_requires = _context_signature_flags(func) has_upstream = _accepts_upstream(func) named = [] if has_upstream else _accepted_named_deps(func, dep_names) @@ -306,7 +326,6 @@ def wrapper(func: Callable[..., Any]): deps=dep_names, timeout_s=timeout_s, retries=retries, - retain_result=retain_result, dag=dag, accepts_context=ctx_accepts, requires_context=ctx_requires, @@ -326,58 +345,3 @@ def wrapper(func: Callable[..., Any]): if _func is None: return wrapper return wrapper(_func) - - -@dataclass(frozen=True) -class TaskTemplate: - """Reusable template for registering many parameterized tasks. - - This is intended for cases where you want to register the *same* task - implementation multiple times under different names with some arguments - pre-bound (e.g. one task per API endpoint). - """ - - func: Callable[..., Any] - deps: Sequence[str | Callable[..., Any]] | None = None - timeout_s: float | None = 30.0 - dag: str | None = None - registry: TaskRegistry | None = None - - def bind(self, name: str, /, *args: Any, **kwargs: Any) -> Callable[..., Any]: - """Register a new task by binding arguments into the template callable. - - Parameters - ---------- - name : str - Name for the registered task. - *args, **kwargs - Arguments to pre-bind into the underlying task callable. - These are applied via ``functools.partial``. - - Returns - ------- - Callable[..., Any] - The bound callable that was registered. - """ - bound = functools.partial(self.func, *args, **kwargs) - # Reuse the existing @task decorator machinery so dependency normalization - # and signature-based flags (context/upstream) stay consistent. - return task( - name=name, - deps=self.deps, - timeout_s=self.timeout_s, - dag=self.dag, - registry=self.registry, - )(bound) - - -def task_template( - func: Callable[..., Any], - *, - deps: Sequence[str | Callable[..., Any]] | None = None, - timeout_s: float | None = 30.0, - dag: str | None = None, - registry: TaskRegistry | None = None, -) -> TaskTemplate: - """Create a TaskTemplate for registering parameterized instances of a task.""" - return TaskTemplate(func=func, deps=deps, timeout_s=timeout_s, dag=dag, registry=registry) diff --git a/tests/test_new_features.py b/tests/test_new_features.py index ccab80a..83ec024 100644 --- a/tests/test_new_features.py +++ b/tests/test_new_features.py @@ -1,9 +1,11 @@ -"""Tests for the new features: retries, retain_result, engine context manager.""" +"""Tests for the new features: retries and engine context manager.""" +import asyncio from typing import cast import pytest +from flowrun.context import RunContext from flowrun.dag import DAG from flowrun.engine import Engine, build_default_engine from flowrun.executor import ExecutionResult, TaskExecutor @@ -109,74 +111,6 @@ async def test_scheduler_retries_do_not_skip_children_prematurely(): assert rec.tasks["child"].status == "SUCCESS" -# --------------------------------------------------------------------------- -# retain_result tests -# --------------------------------------------------------------------------- - - -class TrackingExecutor: - """Executor that returns canned results and records calls.""" - - def __init__(self, results: dict[str, object]) -> None: - self._results = results - self.calls: list[str] = [] - - async def run_once(self, spec, timeout_s, context, upstream_results): - self.calls.append(spec.name) - return ExecutionResult(ok=True, result=self._results.get(spec.name), duration_s=0.01) - - -@pytest.mark.asyncio -async def test_retain_result_false_clears_after_consumers_launch(): - """A task with retain_result=False should have its result cleared after dependents are done.""" - registry = TaskRegistry() - registry.register(TaskSpec(name="big_df", func=lambda: None, retain_result=False)) - registry.register(TaskSpec(name="consumer", func=lambda: None, deps=["big_df"])) - - state_store = StateStore() - executor = TrackingExecutor({"big_df": "large-payload", "consumer": "done"}) - scheduler = Scheduler( - registry, - state_store, - cast(TaskExecutor, executor), - SchedulerConfig(max_parallel=2), - ) - dag = DAG( - name="mem_dag", - nodes=["big_df", "consumer"], - edges={"big_df": [], "consumer": ["big_df"]}, - ) - - run_id = await scheduler.run_dag_once(dag) - rec = state_store.get_run(run_id) - - assert rec.tasks["big_df"].status == "SUCCESS" - assert rec.tasks["big_df"].result is None # cleared - assert rec.tasks["consumer"].result == "done" - - -@pytest.mark.asyncio -async def test_retain_result_true_keeps_result(): - """The default retain_result=True should keep the result in state.""" - registry = TaskRegistry() - registry.register(TaskSpec(name="keep_me", func=lambda: None, retain_result=True)) - - state_store = StateStore() - executor = TrackingExecutor({"keep_me": "important"}) - scheduler = Scheduler( - registry, - state_store, - cast(TaskExecutor, executor), - SchedulerConfig(max_parallel=2), - ) - dag = DAG(name="keep_dag", nodes=["keep_me"], edges={"keep_me": []}) - - run_id = await scheduler.run_dag_once(dag) - rec = state_store.get_run(run_id) - - assert rec.tasks["keep_me"].result == "important" - - # --------------------------------------------------------------------------- # Engine context manager tests # --------------------------------------------------------------------------- @@ -320,18 +254,21 @@ def transform(extract: str) -> str: @pytest.mark.asyncio -async def test_engine_dag_scope_supports_templates_and_subgraph(): +async def test_engine_dag_scope_supports_factory_registered_tasks_and_subgraph(): engine = build_default_engine(max_workers=2, max_parallel=2) etl = engine.dag("templated") - def fetch(*, table: str) -> str: - return table + def bind_fetch(*, name: str, table: str): + @etl.task(name=name) + def fetch() -> str: + return table - tpl = etl.task_template(fetch) - tpl.bind("fetch_users", table="users") - tpl.bind("fetch_orders", table="orders") + return fetch - @etl.task(name="combine", deps=["fetch_users", "fetch_orders"]) + fetch_users = bind_fetch(name="fetch_users", table="users") + fetch_orders = bind_fetch(name="fetch_orders", table="orders") + + @etl.task(name="combine", deps=[fetch_users, fetch_orders]) def combine(fetch_users: str, fetch_orders: str) -> str: return f"{fetch_users}+{fetch_orders}" @@ -345,6 +282,119 @@ def combine(fetch_users: str, fetch_orders: str) -> str: assert set(report["tasks"].keys()) == {"fetch_users", "fetch_orders", "combine"} +@pytest.mark.asyncio +async def test_run_context_metadata_is_reported(): + engine = build_default_engine(max_workers=2, max_parallel=2) + + @engine.task(dag="metadata_demo") + def extract(context: RunContext[dict[str, int]]) -> int: + return context.value + + context = RunContext({"value": 3}).with_metadata(batch_id=7, source="api_users") + + async with engine: + run_id = await engine.run_once("metadata_demo", context=context) + report = engine.get_run_report(run_id) + + assert report["metadata"] == {"batch_id": 7, "source": "api_users"} + + +@pytest.mark.asyncio +async def test_run_many_reports_context_metadata_per_run(): + engine = build_default_engine(max_workers=2, max_parallel=2) + seen: list[tuple[str, int]] = [] + + @engine.task(name="input_chunk", dag="micro_batch") + def input_chunk(context: RunContext[dict[str, int]]) -> dict[str, int]: + seen.append(("input", context.batch_id)) + return {"batch_id": context.batch_id, "value": context.value} + + @engine.task(name="double", dag="micro_batch", deps=[input_chunk]) + def double(input_chunk: dict[str, int]) -> int: + seen.append(("double", input_chunk["batch_id"])) + return input_chunk["value"] * 2 + + contexts = [ + RunContext({"batch_id": 1, "value": 3}).with_metadata(batch_id=1, source="users"), + RunContext({"batch_id": 2, "value": 5}).with_metadata(batch_id=2, source="users"), + ] + + async with engine: + run_ids = await engine.run_many("micro_batch", contexts) + reports = [engine.get_run_report(run_id) for run_id in run_ids] + + assert len(run_ids) == 2 + assert [report["metadata"]["batch_id"] for report in reports] == [1, 2] + assert [report["tasks"]["double"]["result"] for report in reports] == [6, 10] + assert seen == [("input", 1), ("double", 1), ("input", 2), ("double", 2)] + + +@pytest.mark.asyncio +async def test_engine_run_many_supports_iterable_contexts_sequentially(): + engine = build_default_engine(max_workers=2, max_parallel=2) + seen: list[tuple[str, int]] = [] + + @engine.task(name="input_chunk", dag="micro_batch") + def input_chunk(context: RunContext[dict[str, int]]) -> dict[str, int]: + seen.append(("input", context.batch_id)) + return {"batch_id": context.batch_id, "value": context.value} + + @engine.task(name="double", dag="micro_batch", deps=[input_chunk]) + def double(input_chunk: dict[str, int]) -> int: + seen.append(("double", input_chunk["batch_id"])) + return input_chunk["value"] * 2 + + contexts = [ + RunContext({"batch_id": 1, "value": 3}), + RunContext({"batch_id": 2, "value": 5}), + ] + + async with engine: + run_ids = await engine.run_many("micro_batch", contexts) + reports = [engine.get_run_report(run_id) for run_id in run_ids] + + assert len(run_ids) == 2 + assert [report["tasks"]["double"]["result"] for report in reports] == [6, 10] + assert seen == [("input", 1), ("double", 1), ("input", 2), ("double", 2)] + + +@pytest.mark.asyncio +async def test_engine_dag_scope_run_many_supports_async_iterables(): + engine = build_default_engine(max_workers=2, max_parallel=2) + etl = engine.dag("micro_batch_scope") + seen: list[tuple[str, int]] = [] + + @etl.task(name="input_chunk") + def input_chunk(context: RunContext[dict[str, int]]) -> dict[str, int]: + seen.append(("input", context.batch_id)) + return {"batch_id": context.batch_id, "value": context.value} + + @etl.task(name="double", deps=[input_chunk]) + def double(input_chunk: dict[str, int]) -> int: + seen.append(("double", input_chunk["batch_id"])) + return input_chunk["value"] * 2 + + async def contexts(): + for batch_id, value in [(1, 3), (2, 5), (3, 7)]: + await asyncio.sleep(0) + yield RunContext({"batch_id": batch_id, "value": value}) + + async with engine: + run_ids = await etl.run_many(contexts()) + reports = [engine.get_run_report(run_id) for run_id in run_ids] + + assert len(run_ids) == 3 + assert [report["tasks"]["double"]["result"] for report in reports] == [6, 10, 14] + assert seen == [ + ("input", 1), + ("double", 1), + ("input", 2), + ("double", 2), + ("input", 3), + ("double", 3), + ] + + # --------------------------------------------------------------------------- # State machine: retry transition # --------------------------------------------------------------------------- @@ -360,15 +410,3 @@ def test_state_mark_retry_resets_to_pending(state_store): rec = state_store.get_run("r1") assert rec.tasks["t1"].status == "PENDING" assert rec.tasks["t1"].error is None - - -def test_state_clear_result(state_store): - """clear_result should set the result field to None.""" - state_store.create_run("r1", "dag", ["t1"]) - state_store.mark_running("r1", "t1") - state_store.mark_success("r1", "t1", result="big-data") - state_store.clear_result("r1", "t1") - - rec = state_store.get_run("r1") - assert rec.tasks["t1"].status == "SUCCESS" - assert rec.tasks["t1"].result is None diff --git a/tests/test_partial_dag.py b/tests/test_partial_dag.py index cb53c2f..198eb7a 100644 --- a/tests/test_partial_dag.py +++ b/tests/test_partial_dag.py @@ -189,8 +189,6 @@ async def test_engine_resume_skips_successful_tasks(): from flowrun.engine import Engine registry = _build_diamond_registry() - tok = registry.activate() - state = StateStore() # Build a first run where A+B succeeded but C failed @@ -221,8 +219,6 @@ async def test_engine_resume_skips_successful_tasks(): rec = state.get_run(new_run_id) assert all(rec.tasks[t].status == "SUCCESS" for t in ["A", "B", "C", "D"]) - TaskRegistry.deactivate(tok) - @pytest.mark.asyncio async def test_engine_resume_from_tasks(): @@ -230,7 +226,6 @@ async def test_engine_resume_from_tasks(): from flowrun.engine import Engine registry = _build_diamond_registry() - tok = registry.activate() state = StateStore() # All tasks succeeded in the original run @@ -259,8 +254,6 @@ async def test_engine_resume_from_tasks(): assert rec.tasks["B"].result == "b-v2" # re-executed assert rec.tasks["D"].result == "d-v2" # re-executed - TaskRegistry.deactivate(tok) - # --------------------------------------------------------------------------- # Engine.run_subgraph (integration) @@ -273,7 +266,6 @@ async def test_engine_run_subgraph(): from flowrun.engine import Engine registry = _build_diamond_registry() - tok = registry.activate() state = StateStore() executor = DummyExecutor( @@ -295,5 +287,3 @@ async def test_engine_run_subgraph(): assert "D" not in rec.tasks assert rec.tasks["A"].status == "SUCCESS" assert rec.tasks["B"].status == "SUCCESS" - - TaskRegistry.deactivate(tok) diff --git a/tests/test_registry.py b/tests/test_registry.py index 91f639e..ad29e44 100644 --- a/tests/test_registry.py +++ b/tests/test_registry.py @@ -1,8 +1,7 @@ -"""Tests for the redesigned TaskRegistry.""" +"""Tests for TaskRegistry collection behavior.""" import pytest -from flowrun.engine import build_default_engine from flowrun.task import TaskRegistry, TaskSpec @@ -80,41 +79,3 @@ def test_registry_repr(): reg = TaskRegistry() reg.register(TaskSpec(name="alpha", func=lambda: None)) assert "alpha" in repr(reg) - - -# ---- activate / deactivate ---- - - -def test_registry_activate_and_active(): - reg = TaskRegistry() - token = reg.activate() - try: - assert TaskRegistry.active() is reg - finally: - TaskRegistry.deactivate(token) - - -def test_registry_active_raises_when_none(): - """Ensure active() raises if no registry has been activated in this context.""" - from flowrun.task import _active_registry - - token = _active_registry.set(None) - try: - with pytest.raises(LookupError, match="No active"): - TaskRegistry.active() - finally: - _active_registry.reset(token) - - -def test_build_default_engine_does_not_activate_registry_globally(): - """Engine construction should not leak a global active registry.""" - from flowrun.task import _active_registry - - token = _active_registry.set(None) - try: - engine = build_default_engine() - engine.close() - with pytest.raises(LookupError, match="No active"): - TaskRegistry.active() - finally: - _active_registry.reset(token) diff --git a/tests/test_run_context.py b/tests/test_run_context.py index 105b1da..c09ee7a 100644 --- a/tests/test_run_context.py +++ b/tests/test_run_context.py @@ -1,6 +1,10 @@ +import threading +import time from dataclasses import dataclass -from flowrun.context import RunContext +import pytest + +from flowrun.context import RunCancelledError, RunContext def test_run_context_delegates_attribute_access(): @@ -27,3 +31,39 @@ def test_run_context_missing_attribute_raises(): except AttributeError: return raise AssertionError("AttributeError not raised for unknown attribute") + + +def test_run_context_deadline_helpers(): + ctx = RunContext(deps={"existing": 1}).with_deadline_s(0.02) + + assert ctx.has_deadline() is True + remaining_s = ctx.time_remaining_s() + assert remaining_s is not None + assert 0.0 < remaining_s <= 0.02 + + time.sleep(0.03) + + assert ctx.deadline_exceeded() is True + assert ctx.cancelled() is True + with pytest.raises(TimeoutError, match="deadline exceeded"): + ctx.raise_if_cancelled() + + +def test_run_context_cancel_event_helpers(): + cancel_event = threading.Event() + ctx = RunContext(deps={"existing": 1}).with_cancel_event(cancel_event) + + assert ctx.cancelled() is False + + cancel_event.set() + + assert ctx.cancelled() is True + with pytest.raises(RunCancelledError, match="cancelled"): + ctx.raise_if_cancelled() + + +def test_run_context_keeps_earliest_deadline(): + ctx = RunContext(deps={"existing": 1}).with_deadline_s(0.02) + widened = ctx.with_deadline_s(0.2) + + assert widened is ctx diff --git a/tests/test_sqlite_store.py b/tests/test_sqlite_store.py deleted file mode 100644 index b502baf..0000000 --- a/tests/test_sqlite_store.py +++ /dev/null @@ -1,487 +0,0 @@ -"""Tests for SqliteStateStore: persistence, TTL, recovery, serializers.""" - -from __future__ import annotations - -import time -from typing import cast - -import pytest - -from flowrun.dag import DAG -from flowrun.executor import ExecutionResult, TaskExecutor -from flowrun.scheduler import Scheduler, SchedulerConfig -from flowrun.serialization import JsonSerializer, PickleSerializer -from flowrun.sqlite_store import SqliteStateStore -from flowrun.state import InMemoryStateStore, StateStoreProtocol -from flowrun.task import TaskRegistry, TaskSpec - - -# --------------------------------------------------------------------------- -# Helpers -# --------------------------------------------------------------------------- - - -class DummyExecutor: - """Fake executor returning pre-configured results.""" - - def __init__(self, outcomes: dict[str, ExecutionResult]) -> None: - self._outcomes = outcomes - self.calls: list[str] = [] - - async def run_once(self, spec, timeout_s, context, upstream_results): - self.calls.append(spec.name) - return self._outcomes[spec.name] - - -DIAMOND_DAG = DAG( - name="diamond", - nodes=["A", "B", "C", "D"], - edges={"A": [], "B": ["A"], "C": ["A"], "D": ["B", "C"]}, -) - - -def _diamond_registry() -> TaskRegistry: - reg = TaskRegistry() - reg.register(TaskSpec(name="A", func=lambda: None)) - reg.register(TaskSpec(name="B", func=lambda: None, deps=["A"])) - reg.register(TaskSpec(name="C", func=lambda: None, deps=["A"])) - reg.register(TaskSpec(name="D", func=lambda: None, deps=["B", "C"])) - return reg - - -# --------------------------------------------------------------------------- -# Protocol conformance -# --------------------------------------------------------------------------- - - -class TestProtocolConformance: - def test_sqlite_store_satisfies_protocol(self): - store = SqliteStateStore(":memory:") - assert isinstance(store, StateStoreProtocol) - store.close() - - def test_inmemory_store_satisfies_protocol(self): - store = InMemoryStateStore() - assert isinstance(store, StateStoreProtocol) - - -# --------------------------------------------------------------------------- -# Basic state transitions (mirrors test_state_transitions.py) -# --------------------------------------------------------------------------- - - -class TestSqliteStateTransitions: - def _store(self) -> SqliteStateStore: - return SqliteStateStore(":memory:") - - def test_create_and_get_run(self): - s = self._store() - s.create_run("r1", "dag", ["a", "b"]) - rec = s.get_run("r1") - assert rec.run_id == "r1" - assert set(rec.tasks.keys()) == {"a", "b"} - assert rec.tasks["a"].status == "PENDING" - s.close() - - def test_valid_transition_pending_running_success(self): - s = self._store() - s.create_run("r1", "dag", ["t"]) - s.mark_running("r1", "t") - assert s.get_run("r1").tasks["t"].status == "RUNNING" - s.mark_success("r1", "t", {"val": 42}) - assert s.get_run("r1").tasks["t"].status == "SUCCESS" - assert s.get_run("r1").tasks["t"].result == {"val": 42} - s.close() - - def test_valid_transition_pending_running_failed(self): - s = self._store() - s.create_run("r1", "dag", ["t"]) - s.mark_running("r1", "t") - s.mark_failed("r1", "t", "boom") - tr = s.get_run("r1").tasks["t"] - assert tr.status == "FAILED" - assert tr.error == "boom" - s.close() - - def test_valid_transition_pending_skipped(self): - s = self._store() - s.create_run("r1", "dag", ["t"]) - s.mark_skipped("r1", "t", "reason") - tr = s.get_run("r1").tasks["t"] - assert tr.status == "SKIPPED" - assert tr.error == "reason" - s.close() - - def test_retry_resets_to_pending(self): - s = self._store() - s.create_run("r1", "dag", ["t"]) - s.mark_running("r1", "t") - s.mark_failed("r1", "t", "err") - s.mark_retry("r1", "t") - tr = s.get_run("r1").tasks["t"] - assert tr.status == "PENDING" - assert tr.error is None - s.close() - - def test_invalid_transition_raises(self): - s = self._store() - s.create_run("r1", "dag", ["t"]) - with pytest.raises(RuntimeError, match="Invalid state transition"): - s.mark_success("r1", "t", None) - s.close() - - def test_finalize_run_if_done(self): - s = self._store() - s.create_run("r1", "dag", ["a", "b"]) - s.mark_running("r1", "a") - s.mark_success("r1", "a", None) - s.finalize_run_if_done("r1") - assert s.get_run("r1").finished_at is None # b still PENDING - - s.mark_skipped("r1", "b", "skip") - s.finalize_run_if_done("r1") - assert s.get_run("r1").finished_at is not None - s.close() - - def test_clear_result(self): - s = self._store() - s.create_run("r1", "dag", ["t"]) - s.mark_running("r1", "t") - s.mark_success("r1", "t", "data") - assert s.get_run("r1").tasks["t"].result == "data" - s.clear_result("r1", "t") - assert s.get_run("r1").tasks["t"].result is None - s.close() - - def test_get_missing_run_raises(self): - s = self._store() - with pytest.raises(KeyError): - s.get_run("nope") - s.close() - - -# --------------------------------------------------------------------------- -# create_resumed_run -# --------------------------------------------------------------------------- - - -class TestSqliteResumedRun: - def test_copies_success_resets_failed(self): - s = SqliteStateStore(":memory:") - s.create_run("r1", "dag", ["A", "B", "C"]) - s.mark_running("r1", "A") - s.mark_success("r1", "A", "a-result") - s.mark_running("r1", "B") - s.mark_failed("r1", "B", "boom") - - s.create_resumed_run("r2", "r1", "dag", ["A", "B", "C"]) - r2 = s.get_run("r2") - assert r2.tasks["A"].status == "SUCCESS" - assert r2.tasks["A"].result == "a-result" - assert r2.tasks["B"].status == "PENDING" - assert r2.tasks["C"].status == "PENDING" - s.close() - - def test_reset_tasks_forces_pending(self): - s = SqliteStateStore(":memory:") - s.create_run("r1", "dag", ["A", "B"]) - s.mark_running("r1", "A") - s.mark_success("r1", "A", "a") - s.mark_running("r1", "B") - s.mark_success("r1", "B", "b") - - s.create_resumed_run("r2", "r1", "dag", ["A", "B"], reset_tasks={"B"}) - r2 = s.get_run("r2") - assert r2.tasks["A"].status == "SUCCESS" - assert r2.tasks["B"].status == "PENDING" - s.close() - - -# --------------------------------------------------------------------------- -# File persistence — survives close + reopen -# --------------------------------------------------------------------------- - - -class TestFilePersistence: - def test_data_survives_reopen(self, tmp_path): - db = tmp_path / "test.db" - s1 = SqliteStateStore(db) - s1.create_run("r1", "dag", ["t"]) - s1.mark_running("r1", "t") - s1.mark_success("r1", "t", {"key": "value"}) - s1.close() - - s2 = SqliteStateStore(db) - rec = s2.get_run("r1") - assert rec.tasks["t"].status == "SUCCESS" - assert rec.tasks["t"].result == {"key": "value"} - s2.close() - - def test_resumed_run_survives_reopen(self, tmp_path): - db = tmp_path / "test.db" - s1 = SqliteStateStore(db) - s1.create_run("r1", "dag", ["A", "B"]) - s1.mark_running("r1", "A") - s1.mark_success("r1", "A", "a-ok") - s1.mark_running("r1", "B") - s1.mark_failed("r1", "B", "boom") - s1.close() - - s2 = SqliteStateStore(db) - s2.create_resumed_run("r2", "r1", "dag", ["A", "B"]) - r2 = s2.get_run("r2") - assert r2.tasks["A"].status == "SUCCESS" - assert r2.tasks["B"].status == "PENDING" - s2.close() - - -# --------------------------------------------------------------------------- -# Crash recovery -# --------------------------------------------------------------------------- - - -class TestCrashRecovery: - def test_recover_resets_running_to_failed(self, tmp_path): - db = tmp_path / "test.db" - s1 = SqliteStateStore(db) - s1.create_run("r1", "dag", ["A", "B"]) - s1.mark_running("r1", "A") - s1.mark_success("r1", "A", "ok") - s1.mark_running("r1", "B") # simulate crash: B stays RUNNING - s1.close() - - # Reopen with recovery - s2 = SqliteStateStore(db, recover=True) - rec = s2.get_run("r1") - assert rec.tasks["A"].status == "SUCCESS" # untouched - assert rec.tasks["B"].status == "FAILED" - assert rec.tasks["B"].error == "PROCESS_CRASH" - s2.close() - - def test_no_recover_leaves_running(self, tmp_path): - db = tmp_path / "test.db" - s1 = SqliteStateStore(db) - s1.create_run("r1", "dag", ["t"]) - s1.mark_running("r1", "t") - s1.close() - - # Reopen WITHOUT recovery - s2 = SqliteStateStore(db, recover=False) - rec = s2.get_run("r1") - assert rec.tasks["t"].status == "RUNNING" - s2.close() - - -# --------------------------------------------------------------------------- -# Cache TTL -# --------------------------------------------------------------------------- - - -class TestCacheTTL: - def test_eviction_removes_stale_entries(self): - s = SqliteStateStore(":memory:", cache_ttl_s=0.05) - s.create_run("r1", "dag", ["t"]) - s.create_run("r2", "dag", ["t"]) - assert "r1" in s._cache - assert "r2" in s._cache - - # Manually backdate r1's access time - rec, _ = s._cache["r1"] - s._cache["r1"] = (rec, time.time() - 1.0) - - # Touch r2 to refresh it, which also triggers eviction - s.get_run("r2") - s._maybe_evict() - - assert "r1" not in s._cache # evicted from cache - assert "r2" in s._cache # still fresh - - # r1 is still loadable from SQLite - rec = s.get_run("r1") - assert rec.run_id == "r1" - assert "r1" in s._cache # re-cached on access - s.close() - - def test_no_ttl_keeps_everything(self): - s = SqliteStateStore(":memory:", cache_ttl_s=None) - s.create_run("r1", "dag", ["t"]) - # Backdate heavily - rec, _ = s._cache["r1"] - s._cache["r1"] = (rec, time.time() - 9999) - s._maybe_evict() - assert "r1" in s._cache - s.close() - - -# --------------------------------------------------------------------------- -# Serializer variants -# --------------------------------------------------------------------------- - - -class TestSerializers: - def test_json_roundtrip(self, tmp_path): - db = tmp_path / "test.db" - s1 = SqliteStateStore(db, serializer=JsonSerializer()) - s1.create_run("r1", "dag", ["t"]) - s1.mark_running("r1", "t") - s1.mark_success("r1", "t", {"nums": [1, 2, 3], "flag": True}) - s1.close() - - s2 = SqliteStateStore(db, serializer=JsonSerializer()) - result = s2.get_run("r1").tasks["t"].result - assert result == {"nums": [1, 2, 3], "flag": True} - s2.close() - - def test_pickle_roundtrip(self, tmp_path): - db = tmp_path / "test.db" - s1 = SqliteStateStore(db, serializer=PickleSerializer()) - custom_obj = {"set_data": {1, 2, 3}} # sets are not JSON-serializable - s1.create_run("r1", "dag", ["t"]) - s1.mark_running("r1", "t") - s1.mark_success("r1", "t", custom_obj) - s1.close() - - s2 = SqliteStateStore(db, serializer=PickleSerializer()) - result = s2.get_run("r1").tasks["t"].result - assert result == {"set_data": {1, 2, 3}} - s2.close() - - def test_none_result_roundtrip(self): - s = SqliteStateStore(":memory:") - s.create_run("r1", "dag", ["t"]) - s.mark_running("r1", "t") - s.mark_success("r1", "t", None) - assert s.get_run("r1").tasks["t"].result is None - s.close() - - -# --------------------------------------------------------------------------- -# list_runs -# --------------------------------------------------------------------------- - - -class TestListRuns: - def test_list_all(self): - s = SqliteStateStore(":memory:") - s.create_run("r1", "dag_a", ["t"]) - s.create_run("r2", "dag_b", ["t"]) - runs = s.list_runs() - assert [r.run_id for r in runs] == ["r1", "r2"] - s.close() - - def test_filter_by_dag_name(self): - s = SqliteStateStore(":memory:") - s.create_run("r1", "dag_a", ["t"]) - s.create_run("r2", "dag_b", ["t"]) - s.create_run("r3", "dag_a", ["t"]) - runs = s.list_runs(dag_name="dag_a") - assert [r.run_id for r in runs] == ["r1", "r3"] - s.close() - - -# --------------------------------------------------------------------------- -# Integration: scheduler with SqliteStateStore -# --------------------------------------------------------------------------- - - -@pytest.mark.asyncio -async def test_scheduler_with_sqlite_store(): - """Full DAG run with SqliteStateStore works identically to InMemoryStateStore.""" - registry = _diamond_registry() - state = SqliteStateStore(":memory:") - executor = DummyExecutor( - { - "A": ExecutionResult(ok=True, result="a", duration_s=0.01), - "B": ExecutionResult(ok=True, result="b", duration_s=0.01), - "C": ExecutionResult(ok=True, result="c", duration_s=0.01), - "D": ExecutionResult(ok=True, result="d", duration_s=0.01), - } - ) - scheduler = Scheduler(registry, state, cast(TaskExecutor, executor), SchedulerConfig(max_parallel=4)) - run_id = await scheduler.run_dag_once(DIAMOND_DAG) - rec = state.get_run(run_id) - assert all(rec.tasks[t].status == "SUCCESS" for t in ["A", "B", "C", "D"]) - assert rec.finished_at is not None - state.close() - - -@pytest.mark.asyncio -async def test_resume_with_sqlite_store(): - """Resume flow works end-to-end with SqliteStateStore.""" - from flowrun.engine import Engine - - registry = _diamond_registry() - tok = registry.activate() - state = SqliteStateStore(":memory:") - - # First run: A succeeds, B fails, C+D skipped - state.create_run("old", "diamond", ["A", "B", "C", "D"]) - state.mark_running("old", "A") - state.mark_success("old", "A", "a-ok") - state.mark_running("old", "B") - state.mark_failed("old", "B", "boom") - state.mark_skipped("old", "C", "UPSTREAM_FAILED") - state.mark_skipped("old", "D", "UPSTREAM_FAILED") - - executor = DummyExecutor( - { - "B": ExecutionResult(ok=True, result="b-ok", duration_s=0.01), - "C": ExecutionResult(ok=True, result="c-ok", duration_s=0.01), - "D": ExecutionResult(ok=True, result="d-ok", duration_s=0.01), - } - ) - scheduler = Scheduler(registry, state, cast(TaskExecutor, executor), SchedulerConfig(max_parallel=4)) - engine = Engine(registry, state, scheduler) - - new_run_id = await engine.resume("old") - - assert "A" not in executor.calls - assert set(executor.calls) == {"B", "C", "D"} - rec = state.get_run(new_run_id) - assert all(rec.tasks[t].status == "SUCCESS" for t in ["A", "B", "C", "D"]) - - TaskRegistry.deactivate(tok) - state.close() - - -@pytest.mark.asyncio -async def test_crash_recovery_then_resume(tmp_path): - """Simulate a crash mid-run, then recover and resume.""" - from flowrun.engine import Engine - - db = tmp_path / "test.db" - registry = _diamond_registry() - tok = registry.activate() - - # Phase 1: start a run, A succeeds, B starts (then "crash") - s1 = SqliteStateStore(db) - s1.create_run("r1", "diamond", ["A", "B", "C", "D"]) - s1.mark_running("r1", "A") - s1.mark_success("r1", "A", "a-ok") - s1.mark_running("r1", "B") # B is RUNNING when we "crash" - s1.close() - - # Phase 2: reopen with recovery, B becomes FAILED - s2 = SqliteStateStore(db, recover=True) - rec = s2.get_run("r1") - assert rec.tasks["B"].status == "FAILED" - assert rec.tasks["B"].error == "PROCESS_CRASH" - - # Phase 3: resume — A preserved, B+C+D re-run - executor = DummyExecutor( - { - "B": ExecutionResult(ok=True, result="b-ok", duration_s=0.01), - "C": ExecutionResult(ok=True, result="c-ok", duration_s=0.01), - "D": ExecutionResult(ok=True, result="d-ok", duration_s=0.01), - } - ) - scheduler = Scheduler(registry, s2, cast(TaskExecutor, executor), SchedulerConfig(max_parallel=4)) - engine = Engine(registry, s2, scheduler) - - new_run_id = await engine.resume("r1") - assert "A" not in executor.calls - assert set(executor.calls) == {"B", "C", "D"} - assert all(s2.get_run(new_run_id).tasks[t].status == "SUCCESS" for t in ["A", "B", "C", "D"]) - - TaskRegistry.deactivate(tok) - s2.close() diff --git a/tests/test_task_decorator.py b/tests/test_task_decorator.py index 8178b2a..ab76002 100644 --- a/tests/test_task_decorator.py +++ b/tests/test_task_decorator.py @@ -1,19 +1,17 @@ +import pytest + from flowrun.context import RunContext -from flowrun.task import TaskRegistry, task, task_template +from flowrun.task import TaskRegistry, task -def test_task_decorator_supports_bare_usage(): +def test_task_decorator_requires_explicit_registry(): registry = TaskRegistry() - token = registry.activate() - try: + + with pytest.raises(TypeError, match="registry= is required"): + @task def bare() -> int: return 1 - finally: - TaskRegistry.deactivate(token) - - spec = registry.get("bare") - assert spec.func is bare def test_task_decorator_supports_positional_name(): @@ -57,38 +55,48 @@ def consumer() -> int: assert registry.get("producer").deps == [] -def test_task_template_registers_parameterized_tasks_and_preserves_upstream_acceptance(): +def test_task_decorator_infers_dependencies_from_required_parameter_names(): registry = TaskRegistry() - def fetch(*, value: str, upstream=None) -> str: - return value + @task(name="producer", registry=registry) + def producer() -> int: + return 1 + + @task(registry=registry) + def consumer(producer: int) -> int: + return producer + 1 + + spec = registry.get("consumer") + + assert spec.deps == ["producer"] + assert spec.named_deps == ["producer"] - tpl = task_template(fetch, registry=registry) - tpl.bind("fetch_alpha", value="alpha") - tpl.bind("fetch_beta", value="beta") - spec_a = registry.get("fetch_alpha") - spec_b = registry.get("fetch_beta") +def test_task_decorator_inference_requires_registered_dependency_names(): + registry = TaskRegistry() + + with pytest.raises(ValueError, match="already-registered task names"): - assert spec_a.func() == "alpha" - assert spec_b.func() == "beta" - assert spec_a.accepts_upstream is True - assert spec_b.accepts_upstream is True + @task(name="consumer", registry=registry) + def consumer(producer: int) -> int: + return producer -def test_task_template_bound_callables_can_be_used_directly_as_dependencies(): +def test_task_decorator_rejects_unsatisfied_required_parameters(): registry = TaskRegistry() - def fetch(*, value: str) -> str: - return value + with pytest.raises(ValueError, match="cannot provide"): - tpl = task_template(fetch, registry=registry) - fetch_alpha = tpl.bind("fetch_alpha", value="alpha") - fetch_beta = tpl.bind("fetch_beta", value="beta") + @task(name="consumer", deps=["fetch-users"], registry=registry) + def consumer(fetch_users: int) -> int: + return fetch_users - @task(name="combine", deps=[fetch_alpha, fetch_beta], registry=registry) - def combine(fetch_alpha: str, fetch_beta: str) -> str: - return f"{fetch_alpha}+{fetch_beta}" - spec = registry.get("combine") - assert spec.deps == ["fetch_alpha", "fetch_beta"] +def test_task_decorator_rejects_sync_timeouts(): + registry = TaskRegistry() + + with pytest.raises(ValueError, match="cannot use timeout_s"): + + @task(name="sync_task", timeout_s=1.0, registry=registry) + def sync_task() -> int: + return 1 diff --git a/tests/test_task_executor.py b/tests/test_task_executor.py index 2e85233..d844b26 100644 --- a/tests/test_task_executor.py +++ b/tests/test_task_executor.py @@ -22,7 +22,7 @@ def sync_task(context) -> int: spec = TaskSpec( name="sync", func=sync_task, - timeout_s=1.0, + timeout_s=None, accepts_context=True, requires_context=True, ) @@ -46,7 +46,7 @@ def sync_task(context) -> int: spec = TaskSpec( name="needs_ctx", func=sync_task, - timeout_s=1.0, + timeout_s=None, accepts_context=True, requires_context=True, ) @@ -91,7 +91,7 @@ def child(upstream): spec = TaskSpec( name="child", func=child, - timeout_s=1.0, + timeout_s=None, accepts_upstream=True, ) @@ -115,7 +115,7 @@ def child(root: int) -> int: spec = TaskSpec( name="child", func=child, - timeout_s=1.0, + timeout_s=None, named_deps=["root"], ) @@ -128,3 +128,58 @@ def child(root: int) -> int: assert result.ok is True assert result.result == 3 + + +@pytest.mark.asyncio +async def test_task_executor_derives_deadline_on_context(): + captured: dict[str, float | None] = {} + + class Deps: + pass + + ctx = RunContext(Deps()) + + async def async_task(context: RunContext[Deps]) -> float: + captured["remaining"] = context.time_remaining_s() + return captured["remaining"] or -1.0 + + spec = TaskSpec( + name="timed_async", + func=async_task, + timeout_s=0.05, + accepts_context=True, + requires_context=True, + ) + + thread_pool = concurrent.futures.ThreadPoolExecutor(max_workers=1) + try: + executor = TaskExecutor(executor=thread_pool) + result = await executor.run_once(spec, spec.timeout_s, ctx) + finally: + thread_pool.shutdown(wait=True) + + assert result.ok is True + assert captured["remaining"] is not None + assert 0.0 < captured["remaining"] <= 0.05 + + +@pytest.mark.asyncio +async def test_task_executor_rejects_sync_timeouts_without_running_task(): + side_effects: list[str] = [] + + def slow_sync_task() -> str: + side_effects.append("ran") + return "done" + + spec = TaskSpec(name="slow_sync", func=slow_sync_task, timeout_s=0.01) + + thread_pool = concurrent.futures.ThreadPoolExecutor(max_workers=1) + try: + executor = TaskExecutor(executor=thread_pool) + result = await executor.run_once(spec, spec.timeout_s, None) + finally: + thread_pool.shutdown(wait=True) + + assert result.ok is False + assert "cannot use timeout_s" in (result.error or "") + assert side_effects == [] diff --git a/uv.lock b/uv.lock index 3f85ff5..f1568f8 100644 --- a/uv.lock +++ b/uv.lock @@ -2,6 +2,15 @@ version = 1 revision = 2 requires-python = ">=3.12" +[[package]] +name = "annotated-types" +version = "0.7.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/ee/67/531ea369ba64dcff5ec9c3402f9f51bf748cec26dde048a2f973a4eea7f5/annotated_types-0.7.0.tar.gz", hash = "sha256:aff07c09a53a08bc8cfccb9c85b05f1aa9a2a6f23728d790723543408344ce89", size = 16081, upload-time = "2024-05-20T21:33:25.928Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/78/b6/6307fbef88d9b5ee7421e68d78a9f162e0da4900bc5f5793f6d3d0e34fb8/annotated_types-0.7.0-py3-none-any.whl", hash = "sha256:1f02e8b43a8fbbc3f3e0d4f0f4bfc8131bcb4eebe8849b8e5c773f3a1c582a53", size = 13643, upload-time = "2024-05-20T21:33:24.1Z" }, +] + [[package]] name = "colorama" version = "0.4.6" @@ -12,10 +21,16 @@ wheels = [ ] [[package]] -name = "flowrun" +name = "flowrun-dag" version = "0.1.0" source = { editable = "." } +[package.optional-dependencies] +examples = [ + { name = "pandera", extra = ["polars"] }, + { name = "polars" }, +] + [package.dev-dependencies] dev = [ { name = "pytest" }, @@ -24,6 +39,11 @@ dev = [ ] [package.metadata] +requires-dist = [ + { name = "pandera", extras = ["polars"], marker = "extra == 'examples'", specifier = ">=0.30.1" }, + { name = "polars", marker = "extra == 'examples'", specifier = ">=1.39.3" }, +] +provides-extras = ["examples"] [package.metadata.requires-dev] dev = [ @@ -41,6 +61,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/cb/b1/3846dd7f199d53cb17f49cba7e651e9ce294d8497c8c150530ed11865bb8/iniconfig-2.3.0-py3-none-any.whl", hash = "sha256:f631c04d2c48c52b84d0d0549c99ff3859c98df65b3101406327ecc7d53fbf12", size = 7484, upload-time = "2025-10-18T21:55:41.639Z" }, ] +[[package]] +name = "mypy-extensions" +version = "1.1.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/a2/6e/371856a3fb9d31ca8dac321cda606860fa4548858c0cc45d9d1d4ca2628b/mypy_extensions-1.1.0.tar.gz", hash = "sha256:52e68efc3284861e772bbcd66823fde5ae21fd2fdb51c62a211403730b916558", size = 6343, upload-time = "2025-04-22T14:54:24.164Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/79/7b/2c79738432f5c924bef5071f933bcc9efd0473bac3b4aa584a6f7c1c8df8/mypy_extensions-1.1.0-py3-none-any.whl", hash = "sha256:1be4cccdb0f2482337c4743e60421de3a356cd97508abadd57d47403e94f5505", size = 4963, upload-time = "2025-04-22T14:54:22.983Z" }, +] + [[package]] name = "packaging" version = "25.0" @@ -50,6 +79,27 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/20/12/38679034af332785aac8774540895e234f4d07f7545804097de4b666afd8/packaging-25.0-py3-none-any.whl", hash = "sha256:29572ef2b1f17581046b3a2227d5c611fb25ec70ca1ba8554b24b0e69331a484", size = 66469, upload-time = "2025-04-19T11:48:57.875Z" }, ] +[[package]] +name = "pandera" +version = "0.30.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "packaging" }, + { name = "pydantic" }, + { name = "typeguard" }, + { name = "typing-extensions" }, + { name = "typing-inspect" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/09/82/e5c312159bba3220e0ba2a3f30d2f89c44ab611d5b4d2655f952caad22f0/pandera-0.30.1.tar.gz", hash = "sha256:84af217d96dd6541026b75e273c06c5ce70bb54f3a63c8b0a1f371935e24460d", size = 592585, upload-time = "2026-03-18T00:58:58.277Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/37/17/4c89d26ba4f6fb7fc5d3c7f3558aaf4b1e4d843b855e01300a86876987cf/pandera-0.30.1-py3-none-any.whl", hash = "sha256:910656a8c1e10a9759b57dd58ac9dd16298e64baaaae476f1c2c40ee326fb263", size = 303587, upload-time = "2026-03-18T00:58:56.993Z" }, +] + +[package.optional-dependencies] +polars = [ + { name = "polars" }, +] + [[package]] name = "pluggy" version = "1.6.0" @@ -59,6 +109,120 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/54/20/4d324d65cc6d9205fabedc306948156824eb9f0ee1633355a8f7ec5c66bf/pluggy-1.6.0-py3-none-any.whl", hash = "sha256:e920276dd6813095e9377c0bc5566d94c932c33b27a3e3945d8389c374dd4746", size = 20538, upload-time = "2025-05-15T12:30:06.134Z" }, ] +[[package]] +name = "polars" +version = "1.39.3" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "polars-runtime-32" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/93/ab/f19e592fce9e000da49c96bf35e77cef67f9cb4b040bfa538a2764c0263e/polars-1.39.3.tar.gz", hash = "sha256:2e016c7f3e8d14fa777ef86fe0477cec6c67023a20ba4c94d6e8431eefe4a63c", size = 728987, upload-time = "2026-03-20T11:16:24.836Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b4/db/08f4ca10c5018813e7e0b59e4472302328b3d2ab1512f5a2157a814540e0/polars-1.39.3-py3-none-any.whl", hash = "sha256:c2b955ccc0a08a2bc9259785decf3d5c007b489b523bf2390cf21cec2bb82a56", size = 823985, upload-time = "2026-03-20T11:14:23.619Z" }, +] + +[[package]] +name = "polars-runtime-32" +version = "1.39.3" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/17/39/c8688696bc22b6c501e3b82ef3be10e543c07a785af5660f30997cd22dd2/polars_runtime_32-1.39.3.tar.gz", hash = "sha256:c728e4f469cafab501947585f36311b8fb222d3e934c6209e83791e0df20b29d", size = 2872335, upload-time = "2026-03-20T11:16:26.581Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/3b/74/1b41205f7368c9375ab1dea91178eaa20435fe3eff036390a53a7660b416/polars_runtime_32-1.39.3-cp310-abi3-macosx_10_12_x86_64.whl", hash = "sha256:425c0b220b573fa097b4042edff73114cc6d23432a21dfd2dc41adf329d7d2e9", size = 45273243, upload-time = "2026-03-20T11:14:26.691Z" }, + { url = "https://files.pythonhosted.org/packages/90/bf/297716b3095fe719be20fcf7af1d2b6ab069c38199bbace2469608a69b3a/polars_runtime_32-1.39.3-cp310-abi3-macosx_11_0_arm64.whl", hash = "sha256:ef5884711e3c617d7dc93519a7d038e242f5741cfe5fe9afd32d58845d86c562", size = 40842924, upload-time = "2026-03-20T11:14:31.154Z" }, + { url = "https://files.pythonhosted.org/packages/3d/3e/e65236d9d0d9babfa0ecba593413c06530fca60a8feb8f66243aa5dba92e/polars_runtime_32-1.39.3-cp310-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:06b47f535eb1f97a9a1e5b0053ef50db3a4276e241178e37bbb1a38b1fa53b14", size = 43220650, upload-time = "2026-03-20T11:14:35.458Z" }, + { url = "https://files.pythonhosted.org/packages/b0/15/fc3e43f3fdf3f20b7dfb5abe871ab6162cf8fb4aeabf4cfad822d5dc4c79/polars_runtime_32-1.39.3-cp310-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8bc9e13dc1d2e828331f2fe8ccbc9757554dc4933a8d3e85e906b988178f95ed", size = 46877498, upload-time = "2026-03-20T11:14:40.14Z" }, + { url = "https://files.pythonhosted.org/packages/3c/81/bd5f895919e32c6ab0a7786cd0c0ca961cb03152c47c3645808b54383f31/polars_runtime_32-1.39.3-cp310-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:363d49e3a3e638fc943e2b9887940300a7d06789930855a178a4727949259dc2", size = 43380176, upload-time = "2026-03-20T11:14:45.566Z" }, + { url = "https://files.pythonhosted.org/packages/7a/3e/c86433c3b5ec0315bdfc7640d0c15d41f1216c0103a0eab9a9b5147d6c4c/polars_runtime_32-1.39.3-cp310-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:7c206bdcc7bc62ea038d6adea8e44b02f0e675e0191a54c810703b4895208ea4", size = 46485933, upload-time = "2026-03-20T11:14:51.155Z" }, + { url = "https://files.pythonhosted.org/packages/54/ce/200b310cf91f98e652eb6ea09fdb3a9718aa0293ebf113dce325797c8572/polars_runtime_32-1.39.3-cp310-abi3-win_amd64.whl", hash = "sha256:d66ca522517554a883446957539c40dc7b75eb0c2220357fb28bc8940d305339", size = 46995458, upload-time = "2026-03-20T11:14:56.074Z" }, + { url = "https://files.pythonhosted.org/packages/da/76/2d48927e0aa2abbdde08cbf4a2536883b73277d47fbeca95e952de86df34/polars_runtime_32-1.39.3-cp310-abi3-win_arm64.whl", hash = "sha256:f49f51461de63f13e5dd4eb080421c8f23f856945f3f8bd5b2b1f59da52c2860", size = 41857648, upload-time = "2026-03-20T11:15:01.142Z" }, +] + +[[package]] +name = "pydantic" +version = "2.12.5" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "annotated-types" }, + { name = "pydantic-core" }, + { name = "typing-extensions" }, + { name = "typing-inspection" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/69/44/36f1a6e523abc58ae5f928898e4aca2e0ea509b5aa6f6f392a5d882be928/pydantic-2.12.5.tar.gz", hash = "sha256:4d351024c75c0f085a9febbb665ce8c0c6ec5d30e903bdb6394b7ede26aebb49", size = 821591, upload-time = "2025-11-26T15:11:46.471Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/5a/87/b70ad306ebb6f9b585f114d0ac2137d792b48be34d732d60e597c2f8465a/pydantic-2.12.5-py3-none-any.whl", hash = "sha256:e561593fccf61e8a20fc46dfc2dfe075b8be7d0188df33f221ad1f0139180f9d", size = 463580, upload-time = "2025-11-26T15:11:44.605Z" }, +] + +[[package]] +name = "pydantic-core" +version = "2.41.5" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/71/70/23b021c950c2addd24ec408e9ab05d59b035b39d97cdc1130e1bce647bb6/pydantic_core-2.41.5.tar.gz", hash = "sha256:08daa51ea16ad373ffd5e7606252cc32f07bc72b28284b6bc9c6df804816476e", size = 460952, upload-time = "2025-11-04T13:43:49.098Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/5f/5d/5f6c63eebb5afee93bcaae4ce9a898f3373ca23df3ccaef086d0233a35a7/pydantic_core-2.41.5-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:f41a7489d32336dbf2199c8c0a215390a751c5b014c2c1c5366e817202e9cdf7", size = 2110990, upload-time = "2025-11-04T13:39:58.079Z" }, + { url = "https://files.pythonhosted.org/packages/aa/32/9c2e8ccb57c01111e0fd091f236c7b371c1bccea0fa85247ac55b1e2b6b6/pydantic_core-2.41.5-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:070259a8818988b9a84a449a2a7337c7f430a22acc0859c6b110aa7212a6d9c0", size = 1896003, upload-time = "2025-11-04T13:39:59.956Z" }, + { url = "https://files.pythonhosted.org/packages/68/b8/a01b53cb0e59139fbc9e4fda3e9724ede8de279097179be4ff31f1abb65a/pydantic_core-2.41.5-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e96cea19e34778f8d59fe40775a7a574d95816eb150850a85a7a4c8f4b94ac69", size = 1919200, upload-time = "2025-11-04T13:40:02.241Z" }, + { url = "https://files.pythonhosted.org/packages/38/de/8c36b5198a29bdaade07b5985e80a233a5ac27137846f3bc2d3b40a47360/pydantic_core-2.41.5-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:ed2e99c456e3fadd05c991f8f437ef902e00eedf34320ba2b0842bd1c3ca3a75", size = 2052578, upload-time = "2025-11-04T13:40:04.401Z" }, + { url = "https://files.pythonhosted.org/packages/00/b5/0e8e4b5b081eac6cb3dbb7e60a65907549a1ce035a724368c330112adfdd/pydantic_core-2.41.5-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:65840751b72fbfd82c3c640cff9284545342a4f1eb1586ad0636955b261b0b05", size = 2208504, upload-time = "2025-11-04T13:40:06.072Z" }, + { url = "https://files.pythonhosted.org/packages/77/56/87a61aad59c7c5b9dc8caad5a41a5545cba3810c3e828708b3d7404f6cef/pydantic_core-2.41.5-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e536c98a7626a98feb2d3eaf75944ef6f3dbee447e1f841eae16f2f0a72d8ddc", size = 2335816, upload-time = "2025-11-04T13:40:07.835Z" }, + { url = "https://files.pythonhosted.org/packages/0d/76/941cc9f73529988688a665a5c0ecff1112b3d95ab48f81db5f7606f522d3/pydantic_core-2.41.5-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:eceb81a8d74f9267ef4081e246ffd6d129da5d87e37a77c9bde550cb04870c1c", size = 2075366, upload-time = "2025-11-04T13:40:09.804Z" }, + { url = "https://files.pythonhosted.org/packages/d3/43/ebef01f69baa07a482844faaa0a591bad1ef129253ffd0cdaa9d8a7f72d3/pydantic_core-2.41.5-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:d38548150c39b74aeeb0ce8ee1d8e82696f4a4e16ddc6de7b1d8823f7de4b9b5", size = 2171698, upload-time = "2025-11-04T13:40:12.004Z" }, + { url = "https://files.pythonhosted.org/packages/b1/87/41f3202e4193e3bacfc2c065fab7706ebe81af46a83d3e27605029c1f5a6/pydantic_core-2.41.5-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:c23e27686783f60290e36827f9c626e63154b82b116d7fe9adba1fda36da706c", size = 2132603, upload-time = "2025-11-04T13:40:13.868Z" }, + { url = "https://files.pythonhosted.org/packages/49/7d/4c00df99cb12070b6bccdef4a195255e6020a550d572768d92cc54dba91a/pydantic_core-2.41.5-cp312-cp312-musllinux_1_1_armv7l.whl", hash = "sha256:482c982f814460eabe1d3bb0adfdc583387bd4691ef00b90575ca0d2b6fe2294", size = 2329591, upload-time = "2025-11-04T13:40:15.672Z" }, + { url = "https://files.pythonhosted.org/packages/cc/6a/ebf4b1d65d458f3cda6a7335d141305dfa19bdc61140a884d165a8a1bbc7/pydantic_core-2.41.5-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:bfea2a5f0b4d8d43adf9d7b8bf019fb46fdd10a2e5cde477fbcb9d1fa08c68e1", size = 2319068, upload-time = "2025-11-04T13:40:17.532Z" }, + { url = "https://files.pythonhosted.org/packages/49/3b/774f2b5cd4192d5ab75870ce4381fd89cf218af999515baf07e7206753f0/pydantic_core-2.41.5-cp312-cp312-win32.whl", hash = "sha256:b74557b16e390ec12dca509bce9264c3bbd128f8a2c376eaa68003d7f327276d", size = 1985908, upload-time = "2025-11-04T13:40:19.309Z" }, + { url = "https://files.pythonhosted.org/packages/86/45/00173a033c801cacf67c190fef088789394feaf88a98a7035b0e40d53dc9/pydantic_core-2.41.5-cp312-cp312-win_amd64.whl", hash = "sha256:1962293292865bca8e54702b08a4f26da73adc83dd1fcf26fbc875b35d81c815", size = 2020145, upload-time = "2025-11-04T13:40:21.548Z" }, + { url = "https://files.pythonhosted.org/packages/f9/22/91fbc821fa6d261b376a3f73809f907cec5ca6025642c463d3488aad22fb/pydantic_core-2.41.5-cp312-cp312-win_arm64.whl", hash = "sha256:1746d4a3d9a794cacae06a5eaaccb4b8643a131d45fbc9af23e353dc0a5ba5c3", size = 1976179, upload-time = "2025-11-04T13:40:23.393Z" }, + { url = "https://files.pythonhosted.org/packages/87/06/8806241ff1f70d9939f9af039c6c35f2360cf16e93c2ca76f184e76b1564/pydantic_core-2.41.5-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:941103c9be18ac8daf7b7adca8228f8ed6bb7a1849020f643b3a14d15b1924d9", size = 2120403, upload-time = "2025-11-04T13:40:25.248Z" }, + { url = "https://files.pythonhosted.org/packages/94/02/abfa0e0bda67faa65fef1c84971c7e45928e108fe24333c81f3bfe35d5f5/pydantic_core-2.41.5-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:112e305c3314f40c93998e567879e887a3160bb8689ef3d2c04b6cc62c33ac34", size = 1896206, upload-time = "2025-11-04T13:40:27.099Z" }, + { url = "https://files.pythonhosted.org/packages/15/df/a4c740c0943e93e6500f9eb23f4ca7ec9bf71b19e608ae5b579678c8d02f/pydantic_core-2.41.5-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0cbaad15cb0c90aa221d43c00e77bb33c93e8d36e0bf74760cd00e732d10a6a0", size = 1919307, upload-time = "2025-11-04T13:40:29.806Z" }, + { url = "https://files.pythonhosted.org/packages/9a/e3/6324802931ae1d123528988e0e86587c2072ac2e5394b4bc2bc34b61ff6e/pydantic_core-2.41.5-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:03ca43e12fab6023fc79d28ca6b39b05f794ad08ec2feccc59a339b02f2b3d33", size = 2063258, upload-time = "2025-11-04T13:40:33.544Z" }, + { url = "https://files.pythonhosted.org/packages/c9/d4/2230d7151d4957dd79c3044ea26346c148c98fbf0ee6ebd41056f2d62ab5/pydantic_core-2.41.5-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:dc799088c08fa04e43144b164feb0c13f9a0bc40503f8df3e9fde58a3c0c101e", size = 2214917, upload-time = "2025-11-04T13:40:35.479Z" }, + { url = "https://files.pythonhosted.org/packages/e6/9f/eaac5df17a3672fef0081b6c1bb0b82b33ee89aa5cec0d7b05f52fd4a1fa/pydantic_core-2.41.5-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:97aeba56665b4c3235a0e52b2c2f5ae9cd071b8a8310ad27bddb3f7fb30e9aa2", size = 2332186, upload-time = "2025-11-04T13:40:37.436Z" }, + { url = "https://files.pythonhosted.org/packages/cf/4e/35a80cae583a37cf15604b44240e45c05e04e86f9cfd766623149297e971/pydantic_core-2.41.5-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:406bf18d345822d6c21366031003612b9c77b3e29ffdb0f612367352aab7d586", size = 2073164, upload-time = "2025-11-04T13:40:40.289Z" }, + { url = "https://files.pythonhosted.org/packages/bf/e3/f6e262673c6140dd3305d144d032f7bd5f7497d3871c1428521f19f9efa2/pydantic_core-2.41.5-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:b93590ae81f7010dbe380cdeab6f515902ebcbefe0b9327cc4804d74e93ae69d", size = 2179146, upload-time = "2025-11-04T13:40:42.809Z" }, + { url = "https://files.pythonhosted.org/packages/75/c7/20bd7fc05f0c6ea2056a4565c6f36f8968c0924f19b7d97bbfea55780e73/pydantic_core-2.41.5-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:01a3d0ab748ee531f4ea6c3e48ad9dac84ddba4b0d82291f87248f2f9de8d740", size = 2137788, upload-time = "2025-11-04T13:40:44.752Z" }, + { url = "https://files.pythonhosted.org/packages/3a/8d/34318ef985c45196e004bc46c6eab2eda437e744c124ef0dbe1ff2c9d06b/pydantic_core-2.41.5-cp313-cp313-musllinux_1_1_armv7l.whl", hash = "sha256:6561e94ba9dacc9c61bce40e2d6bdc3bfaa0259d3ff36ace3b1e6901936d2e3e", size = 2340133, upload-time = "2025-11-04T13:40:46.66Z" }, + { url = "https://files.pythonhosted.org/packages/9c/59/013626bf8c78a5a5d9350d12e7697d3d4de951a75565496abd40ccd46bee/pydantic_core-2.41.5-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:915c3d10f81bec3a74fbd4faebe8391013ba61e5a1a8d48c4455b923bdda7858", size = 2324852, upload-time = "2025-11-04T13:40:48.575Z" }, + { url = "https://files.pythonhosted.org/packages/1a/d9/c248c103856f807ef70c18a4f986693a46a8ffe1602e5d361485da502d20/pydantic_core-2.41.5-cp313-cp313-win32.whl", hash = "sha256:650ae77860b45cfa6e2cdafc42618ceafab3a2d9a3811fcfbd3bbf8ac3c40d36", size = 1994679, upload-time = "2025-11-04T13:40:50.619Z" }, + { url = "https://files.pythonhosted.org/packages/9e/8b/341991b158ddab181cff136acd2552c9f35bd30380422a639c0671e99a91/pydantic_core-2.41.5-cp313-cp313-win_amd64.whl", hash = "sha256:79ec52ec461e99e13791ec6508c722742ad745571f234ea6255bed38c6480f11", size = 2019766, upload-time = "2025-11-04T13:40:52.631Z" }, + { url = "https://files.pythonhosted.org/packages/73/7d/f2f9db34af103bea3e09735bb40b021788a5e834c81eedb541991badf8f5/pydantic_core-2.41.5-cp313-cp313-win_arm64.whl", hash = "sha256:3f84d5c1b4ab906093bdc1ff10484838aca54ef08de4afa9de0f5f14d69639cd", size = 1981005, upload-time = "2025-11-04T13:40:54.734Z" }, + { url = "https://files.pythonhosted.org/packages/ea/28/46b7c5c9635ae96ea0fbb779e271a38129df2550f763937659ee6c5dbc65/pydantic_core-2.41.5-cp314-cp314-macosx_10_12_x86_64.whl", hash = "sha256:3f37a19d7ebcdd20b96485056ba9e8b304e27d9904d233d7b1015db320e51f0a", size = 2119622, upload-time = "2025-11-04T13:40:56.68Z" }, + { url = "https://files.pythonhosted.org/packages/74/1a/145646e5687e8d9a1e8d09acb278c8535ebe9e972e1f162ed338a622f193/pydantic_core-2.41.5-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:1d1d9764366c73f996edd17abb6d9d7649a7eb690006ab6adbda117717099b14", size = 1891725, upload-time = "2025-11-04T13:40:58.807Z" }, + { url = "https://files.pythonhosted.org/packages/23/04/e89c29e267b8060b40dca97bfc64a19b2a3cf99018167ea1677d96368273/pydantic_core-2.41.5-cp314-cp314-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:25e1c2af0fce638d5f1988b686f3b3ea8cd7de5f244ca147c777769e798a9cd1", size = 1915040, upload-time = "2025-11-04T13:41:00.853Z" }, + { url = "https://files.pythonhosted.org/packages/84/a3/15a82ac7bd97992a82257f777b3583d3e84bdb06ba6858f745daa2ec8a85/pydantic_core-2.41.5-cp314-cp314-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:506d766a8727beef16b7adaeb8ee6217c64fc813646b424d0804d67c16eddb66", size = 2063691, upload-time = "2025-11-04T13:41:03.504Z" }, + { url = "https://files.pythonhosted.org/packages/74/9b/0046701313c6ef08c0c1cf0e028c67c770a4e1275ca73131563c5f2a310a/pydantic_core-2.41.5-cp314-cp314-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4819fa52133c9aa3c387b3328f25c1facc356491e6135b459f1de698ff64d869", size = 2213897, upload-time = "2025-11-04T13:41:05.804Z" }, + { url = "https://files.pythonhosted.org/packages/8a/cd/6bac76ecd1b27e75a95ca3a9a559c643b3afcd2dd62086d4b7a32a18b169/pydantic_core-2.41.5-cp314-cp314-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2b761d210c9ea91feda40d25b4efe82a1707da2ef62901466a42492c028553a2", size = 2333302, upload-time = "2025-11-04T13:41:07.809Z" }, + { url = "https://files.pythonhosted.org/packages/4c/d2/ef2074dc020dd6e109611a8be4449b98cd25e1b9b8a303c2f0fca2f2bcf7/pydantic_core-2.41.5-cp314-cp314-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:22f0fb8c1c583a3b6f24df2470833b40207e907b90c928cc8d3594b76f874375", size = 2064877, upload-time = "2025-11-04T13:41:09.827Z" }, + { url = "https://files.pythonhosted.org/packages/18/66/e9db17a9a763d72f03de903883c057b2592c09509ccfe468187f2a2eef29/pydantic_core-2.41.5-cp314-cp314-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:2782c870e99878c634505236d81e5443092fba820f0373997ff75f90f68cd553", size = 2180680, upload-time = "2025-11-04T13:41:12.379Z" }, + { url = "https://files.pythonhosted.org/packages/d3/9e/3ce66cebb929f3ced22be85d4c2399b8e85b622db77dad36b73c5387f8f8/pydantic_core-2.41.5-cp314-cp314-musllinux_1_1_aarch64.whl", hash = "sha256:0177272f88ab8312479336e1d777f6b124537d47f2123f89cb37e0accea97f90", size = 2138960, upload-time = "2025-11-04T13:41:14.627Z" }, + { url = "https://files.pythonhosted.org/packages/a6/62/205a998f4327d2079326b01abee48e502ea739d174f0a89295c481a2272e/pydantic_core-2.41.5-cp314-cp314-musllinux_1_1_armv7l.whl", hash = "sha256:63510af5e38f8955b8ee5687740d6ebf7c2a0886d15a6d65c32814613681bc07", size = 2339102, upload-time = "2025-11-04T13:41:16.868Z" }, + { url = "https://files.pythonhosted.org/packages/3c/0d/f05e79471e889d74d3d88f5bd20d0ed189ad94c2423d81ff8d0000aab4ff/pydantic_core-2.41.5-cp314-cp314-musllinux_1_1_x86_64.whl", hash = "sha256:e56ba91f47764cc14f1daacd723e3e82d1a89d783f0f5afe9c364b8bb491ccdb", size = 2326039, upload-time = "2025-11-04T13:41:18.934Z" }, + { url = "https://files.pythonhosted.org/packages/ec/e1/e08a6208bb100da7e0c4b288eed624a703f4d129bde2da475721a80cab32/pydantic_core-2.41.5-cp314-cp314-win32.whl", hash = "sha256:aec5cf2fd867b4ff45b9959f8b20ea3993fc93e63c7363fe6851424c8a7e7c23", size = 1995126, upload-time = "2025-11-04T13:41:21.418Z" }, + { url = "https://files.pythonhosted.org/packages/48/5d/56ba7b24e9557f99c9237e29f5c09913c81eeb2f3217e40e922353668092/pydantic_core-2.41.5-cp314-cp314-win_amd64.whl", hash = "sha256:8e7c86f27c585ef37c35e56a96363ab8de4e549a95512445b85c96d3e2f7c1bf", size = 2015489, upload-time = "2025-11-04T13:41:24.076Z" }, + { url = "https://files.pythonhosted.org/packages/4e/bb/f7a190991ec9e3e0ba22e4993d8755bbc4a32925c0b5b42775c03e8148f9/pydantic_core-2.41.5-cp314-cp314-win_arm64.whl", hash = "sha256:e672ba74fbc2dc8eea59fb6d4aed6845e6905fc2a8afe93175d94a83ba2a01a0", size = 1977288, upload-time = "2025-11-04T13:41:26.33Z" }, + { url = "https://files.pythonhosted.org/packages/92/ed/77542d0c51538e32e15afe7899d79efce4b81eee631d99850edc2f5e9349/pydantic_core-2.41.5-cp314-cp314t-macosx_10_12_x86_64.whl", hash = "sha256:8566def80554c3faa0e65ac30ab0932b9e3a5cd7f8323764303d468e5c37595a", size = 2120255, upload-time = "2025-11-04T13:41:28.569Z" }, + { url = "https://files.pythonhosted.org/packages/bb/3d/6913dde84d5be21e284439676168b28d8bbba5600d838b9dca99de0fad71/pydantic_core-2.41.5-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:b80aa5095cd3109962a298ce14110ae16b8c1aece8b72f9dafe81cf597ad80b3", size = 1863760, upload-time = "2025-11-04T13:41:31.055Z" }, + { url = "https://files.pythonhosted.org/packages/5a/f0/e5e6b99d4191da102f2b0eb9687aaa7f5bea5d9964071a84effc3e40f997/pydantic_core-2.41.5-cp314-cp314t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3006c3dd9ba34b0c094c544c6006cc79e87d8612999f1a5d43b769b89181f23c", size = 1878092, upload-time = "2025-11-04T13:41:33.21Z" }, + { url = "https://files.pythonhosted.org/packages/71/48/36fb760642d568925953bcc8116455513d6e34c4beaa37544118c36aba6d/pydantic_core-2.41.5-cp314-cp314t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:72f6c8b11857a856bcfa48c86f5368439f74453563f951e473514579d44aa612", size = 2053385, upload-time = "2025-11-04T13:41:35.508Z" }, + { url = "https://files.pythonhosted.org/packages/20/25/92dc684dd8eb75a234bc1c764b4210cf2646479d54b47bf46061657292a8/pydantic_core-2.41.5-cp314-cp314t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5cb1b2f9742240e4bb26b652a5aeb840aa4b417c7748b6f8387927bc6e45e40d", size = 2218832, upload-time = "2025-11-04T13:41:37.732Z" }, + { url = "https://files.pythonhosted.org/packages/e2/09/f53e0b05023d3e30357d82eb35835d0f6340ca344720a4599cd663dca599/pydantic_core-2.41.5-cp314-cp314t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:bd3d54f38609ff308209bd43acea66061494157703364ae40c951f83ba99a1a9", size = 2327585, upload-time = "2025-11-04T13:41:40Z" }, + { url = "https://files.pythonhosted.org/packages/aa/4e/2ae1aa85d6af35a39b236b1b1641de73f5a6ac4d5a7509f77b814885760c/pydantic_core-2.41.5-cp314-cp314t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2ff4321e56e879ee8d2a879501c8e469414d948f4aba74a2d4593184eb326660", size = 2041078, upload-time = "2025-11-04T13:41:42.323Z" }, + { url = "https://files.pythonhosted.org/packages/cd/13/2e215f17f0ef326fc72afe94776edb77525142c693767fc347ed6288728d/pydantic_core-2.41.5-cp314-cp314t-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:d0d2568a8c11bf8225044aa94409e21da0cb09dcdafe9ecd10250b2baad531a9", size = 2173914, upload-time = "2025-11-04T13:41:45.221Z" }, + { url = "https://files.pythonhosted.org/packages/02/7a/f999a6dcbcd0e5660bc348a3991c8915ce6599f4f2c6ac22f01d7a10816c/pydantic_core-2.41.5-cp314-cp314t-musllinux_1_1_aarch64.whl", hash = "sha256:a39455728aabd58ceabb03c90e12f71fd30fa69615760a075b9fec596456ccc3", size = 2129560, upload-time = "2025-11-04T13:41:47.474Z" }, + { url = "https://files.pythonhosted.org/packages/3a/b1/6c990ac65e3b4c079a4fb9f5b05f5b013afa0f4ed6780a3dd236d2cbdc64/pydantic_core-2.41.5-cp314-cp314t-musllinux_1_1_armv7l.whl", hash = "sha256:239edca560d05757817c13dc17c50766136d21f7cd0fac50295499ae24f90fdf", size = 2329244, upload-time = "2025-11-04T13:41:49.992Z" }, + { url = "https://files.pythonhosted.org/packages/d9/02/3c562f3a51afd4d88fff8dffb1771b30cfdfd79befd9883ee094f5b6c0d8/pydantic_core-2.41.5-cp314-cp314t-musllinux_1_1_x86_64.whl", hash = "sha256:2a5e06546e19f24c6a96a129142a75cee553cc018ffee48a460059b1185f4470", size = 2331955, upload-time = "2025-11-04T13:41:54.079Z" }, + { url = "https://files.pythonhosted.org/packages/5c/96/5fb7d8c3c17bc8c62fdb031c47d77a1af698f1d7a406b0f79aaa1338f9ad/pydantic_core-2.41.5-cp314-cp314t-win32.whl", hash = "sha256:b4ececa40ac28afa90871c2cc2b9ffd2ff0bf749380fbdf57d165fd23da353aa", size = 1988906, upload-time = "2025-11-04T13:41:56.606Z" }, + { url = "https://files.pythonhosted.org/packages/22/ed/182129d83032702912c2e2d8bbe33c036f342cc735737064668585dac28f/pydantic_core-2.41.5-cp314-cp314t-win_amd64.whl", hash = "sha256:80aa89cad80b32a912a65332f64a4450ed00966111b6615ca6816153d3585a8c", size = 1981607, upload-time = "2025-11-04T13:41:58.889Z" }, + { url = "https://files.pythonhosted.org/packages/9f/ed/068e41660b832bb0b1aa5b58011dea2a3fe0ba7861ff38c4d4904c1c1a99/pydantic_core-2.41.5-cp314-cp314t-win_arm64.whl", hash = "sha256:35b44f37a3199f771c3eaa53051bc8a70cd7b54f333531c59e29fd4db5d15008", size = 1974769, upload-time = "2025-11-04T13:42:01.186Z" }, + { url = "https://files.pythonhosted.org/packages/09/32/59b0c7e63e277fa7911c2fc70ccfb45ce4b98991e7ef37110663437005af/pydantic_core-2.41.5-graalpy312-graalpy250_312_native-macosx_10_12_x86_64.whl", hash = "sha256:7da7087d756b19037bc2c06edc6c170eeef3c3bafcb8f532ff17d64dc427adfd", size = 2110495, upload-time = "2025-11-04T13:42:49.689Z" }, + { url = "https://files.pythonhosted.org/packages/aa/81/05e400037eaf55ad400bcd318c05bb345b57e708887f07ddb2d20e3f0e98/pydantic_core-2.41.5-graalpy312-graalpy250_312_native-macosx_11_0_arm64.whl", hash = "sha256:aabf5777b5c8ca26f7824cb4a120a740c9588ed58df9b2d196ce92fba42ff8dc", size = 1915388, upload-time = "2025-11-04T13:42:52.215Z" }, + { url = "https://files.pythonhosted.org/packages/6e/0d/e3549b2399f71d56476b77dbf3cf8937cec5cd70536bdc0e374a421d0599/pydantic_core-2.41.5-graalpy312-graalpy250_312_native-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c007fe8a43d43b3969e8469004e9845944f1a80e6acd47c150856bb87f230c56", size = 1942879, upload-time = "2025-11-04T13:42:56.483Z" }, + { url = "https://files.pythonhosted.org/packages/f7/07/34573da085946b6a313d7c42f82f16e8920bfd730665de2d11c0c37a74b5/pydantic_core-2.41.5-graalpy312-graalpy250_312_native-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:76d0819de158cd855d1cbb8fcafdf6f5cf1eb8e470abe056d5d161106e38062b", size = 2139017, upload-time = "2025-11-04T13:42:59.471Z" }, +] + [[package]] name = "pygments" version = "2.19.2" @@ -123,6 +287,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/2e/5d/aa883766f8ef9ffbe6aa24f7192fb71632f31a30e77eb39aa2b0dc4290ac/ruff-0.14.2-py3-none-win_arm64.whl", hash = "sha256:ea9d635e83ba21569fbacda7e78afbfeb94911c9434aff06192d9bc23fd5495a", size = 12554956, upload-time = "2025-10-23T19:36:58.714Z" }, ] +[[package]] +name = "typeguard" +version = "4.5.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/2b/e8/66e25efcc18542d58706ce4e50415710593721aae26e794ab1dec34fb66f/typeguard-4.5.1.tar.gz", hash = "sha256:f6f8ecbbc819c9bc749983cc67c02391e16a9b43b8b27f15dc70ed7c4a007274", size = 80121, upload-time = "2026-02-19T16:09:03.392Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/91/88/b55b3117287a8540b76dbdd87733808d4d01c8067a3b339408c250bb3600/typeguard-4.5.1-py3-none-any.whl", hash = "sha256:44d2bf329d49a244110a090b55f5f91aa82d9a9834ebfd30bcc73651e4a8cc40", size = 36745, upload-time = "2026-02-19T16:09:01.6Z" }, +] + [[package]] name = "typing-extensions" version = "4.15.0" @@ -131,3 +307,28 @@ sdist = { url = "https://files.pythonhosted.org/packages/72/94/1a15dd82efb362ac8 wheels = [ { url = "https://files.pythonhosted.org/packages/18/67/36e9267722cc04a6b9f15c7f3441c2363321a3ea07da7ae0c0707beb2a9c/typing_extensions-4.15.0-py3-none-any.whl", hash = "sha256:f0fa19c6845758ab08074a0cfa8b7aecb71c999ca73d62883bc25cc018c4e548", size = 44614, upload-time = "2025-08-25T13:49:24.86Z" }, ] + +[[package]] +name = "typing-inspect" +version = "0.9.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "mypy-extensions" }, + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/dc/74/1789779d91f1961fa9438e9a8710cdae6bd138c80d7303996933d117264a/typing_inspect-0.9.0.tar.gz", hash = "sha256:b23fc42ff6f6ef6954e4852c1fb512cdd18dbea03134f91f856a95ccc9461f78", size = 13825, upload-time = "2023-05-24T20:25:47.612Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/65/f3/107a22063bf27bdccf2024833d3445f4eea42b2e598abfbd46f6a63b6cb0/typing_inspect-0.9.0-py3-none-any.whl", hash = "sha256:9ee6fc59062311ef8547596ab6b955e1b8aa46242d854bfc78f4f6b0eff35f9f", size = 8827, upload-time = "2023-05-24T20:25:45.287Z" }, +] + +[[package]] +name = "typing-inspection" +version = "0.4.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/55/e3/70399cb7dd41c10ac53367ae42139cf4b1ca5f36bb3dc6c9d33acdb43655/typing_inspection-0.4.2.tar.gz", hash = "sha256:ba561c48a67c5958007083d386c3295464928b01faa735ab8547c5692e87f464", size = 75949, upload-time = "2025-10-01T02:14:41.687Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/dc/9b/47798a6c91d8bdb567fe2698fe81e0c6b7cb7ef4d13da4114b41d239f65d/typing_inspection-0.4.2-py3-none-any.whl", hash = "sha256:4ed1cacbdc298c220f1bd249ed5287caa16f34d44ef4e9c3d0cbad5b521545e7", size = 14611, upload-time = "2025-10-01T02:14:40.154Z" }, +]