diff --git a/airflow-core/docs/authoring-and-scheduling/assets.rst b/airflow-core/docs/authoring-and-scheduling/assets.rst index 74a22b65e5187..4adc86a84c0ab 100644 --- a/airflow-core/docs/authoring-and-scheduling/assets.rst +++ b/airflow-core/docs/authoring-and-scheduling/assets.rst @@ -623,7 +623,9 @@ partition match can be produced, so the downstream Dag is not triggered for that key. Inside partitioned Dag runs, access the resolved partition through -``dag_run.partition_key``. +``dag_run.partition_key``. For date-shaped partitions, the underlying +``datetime`` is also available as ``dag_run.partition_date``, so +templates can use ``{{ partition_date | ds }}``. You can also trigger a DagRun manually with a partition key (for example, through the Trigger Dag window in the UI, or through the REST API by diff --git a/airflow-core/docs/migrations-ref.rst b/airflow-core/docs/migrations-ref.rst index 82f32c8a2fdd9..034b95a29e5a7 100644 --- a/airflow-core/docs/migrations-ref.rst +++ b/airflow-core/docs/migrations-ref.rst @@ -39,7 +39,10 @@ Here's the list of all the Database Migrations that are executed via when you ru +-------------------------+------------------+-------------------+--------------------------------------------------------------+ | Revision ID | Revises ID | Airflow Version | Description | +=========================+==================+===================+==============================================================+ -| ``acc215baed80`` (head) | ``a1b2c3d4e5f6`` | ``3.3.0`` | Add team_name to trigger table. | +| ``d2f4e1b3c5a7`` (head) | ``acc215baed80`` | ``3.3.0`` | Add partition_date to asset_event and | +| | | | asset_partition_dag_run. | ++-------------------------+------------------+-------------------+--------------------------------------------------------------+ +| ``acc215baed80`` | ``a1b2c3d4e5f6`` | ``3.3.0`` | Add team_name to trigger table. | +-------------------------+------------------+-------------------+--------------------------------------------------------------+ | ``a1b2c3d4e5f6`` | ``a7f3b2c1d4e5`` | ``3.3.0`` | Add version_data to dag_version. | +-------------------------+------------------+-------------------+--------------------------------------------------------------+ diff --git a/airflow-core/docs/templates-ref.rst b/airflow-core/docs/templates-ref.rst index b1609ff0c943e..aeb907dbe470f 100644 --- a/airflow-core/docs/templates-ref.rst +++ b/airflow-core/docs/templates-ref.rst @@ -87,6 +87,9 @@ Variable Type Description | is enabled in ``airflow.cfg``. ``{{ partition_key }}`` str | None | The partition key from the current :class:`~airflow.models.dagrun.DagRun`. | Returns ``None`` if no partition key was set. Added in version 3.3.0. +``{{ partition_date }}`` datetime | None | The partition datetime from the current :class:`~airflow.models.dagrun.DagRun`. + | Use ``{{ partition_date | ds }}`` and related filters for formatting. + | Returns ``None`` if no partition date was set. Added in version 3.3.0. ``{{ var.value }}`` Airflow variables. See `Airflow Variables in Templates`_ below. ``{{ var.json }}`` Airflow variables. See `Airflow Variables in Templates`_ below. ``{{ conn }}`` Airflow connections. See `Airflow Connections in Templates`_ below. diff --git a/airflow-core/newsfragments/67285.feature.rst b/airflow-core/newsfragments/67285.feature.rst new file mode 100644 index 0000000000000..2c4370d5ff63a --- /dev/null +++ b/airflow-core/newsfragments/67285.feature.rst @@ -0,0 +1 @@ +Propagate ``partition_date`` from producer DagRuns to consumers of partitioned assets, so date-shaped partitions are available in consumer task templates. diff --git a/airflow-core/src/airflow/api_fastapi/core_api/datamodels/dag_run.py b/airflow-core/src/airflow/api_fastapi/core_api/datamodels/dag_run.py index 97b37fdaae5ea..0663a2db1db72 100644 --- a/airflow-core/src/airflow/api_fastapi/core_api/datamodels/dag_run.py +++ b/airflow-core/src/airflow/api_fastapi/core_api/datamodels/dag_run.py @@ -107,6 +107,7 @@ class DAGRunResponse(BaseModel): bundle_version: str | None dag_display_name: str = Field(validation_alias=AliasPath("dag_model", "dag_display_name")) partition_key: str | None + partition_date: datetime | None class DAGRunCollectionResponse(BaseModel): diff --git a/airflow-core/src/airflow/api_fastapi/core_api/openapi/v2-rest-api-generated.yaml b/airflow-core/src/airflow/api_fastapi/core_api/openapi/v2-rest-api-generated.yaml index 618356a1ce793..93d88dec15f80 100644 --- a/airflow-core/src/airflow/api_fastapi/core_api/openapi/v2-rest-api-generated.yaml +++ b/airflow-core/src/airflow/api_fastapi/core_api/openapi/v2-rest-api-generated.yaml @@ -13189,6 +13189,12 @@ components: - type: string - type: 'null' title: Partition Key + partition_date: + anyOf: + - type: string + format: date-time + - type: 'null' + title: Partition Date type: object required: - dag_run_id @@ -13212,6 +13218,7 @@ components: - bundle_version - dag_display_name - partition_key + - partition_date title: DAGRunResponse description: Dag Run serializer for responses. DAGRunsBatchBody: diff --git a/airflow-core/src/airflow/api_fastapi/execution_api/datamodels/asset_event.py b/airflow-core/src/airflow/api_fastapi/execution_api/datamodels/asset_event.py index f6c3ce826698b..d9808c34f8cb3 100644 --- a/airflow-core/src/airflow/api_fastapi/execution_api/datamodels/asset_event.py +++ b/airflow-core/src/airflow/api_fastapi/execution_api/datamodels/asset_event.py @@ -37,6 +37,7 @@ class DagRunAssetReference(StrictBaseModel): data_interval_start: datetime | None data_interval_end: datetime | None partition_key: str | None + partition_date: datetime | None class AssetEventResponse(BaseModel): @@ -54,6 +55,7 @@ class AssetEventResponse(BaseModel): source_run_id: str | None = None source_map_index: int | None = None partition_key: str | None = None + partition_date: datetime | None = None class AssetEventsResponse(BaseModel): diff --git a/airflow-core/src/airflow/api_fastapi/execution_api/datamodels/taskinstance.py b/airflow-core/src/airflow/api_fastapi/execution_api/datamodels/taskinstance.py index a0d9739080118..7ec7eb543a298 100644 --- a/airflow-core/src/airflow/api_fastapi/execution_api/datamodels/taskinstance.py +++ b/airflow-core/src/airflow/api_fastapi/execution_api/datamodels/taskinstance.py @@ -307,6 +307,7 @@ class DagRun(StrictBaseModel): triggering_user_name: str | None = None consumed_asset_events: list[AssetEventDagRunReference] partition_key: str | None + partition_date: UtcDateTime | None = None note: str | None = None team_name: str | None = None diff --git a/airflow-core/src/airflow/api_fastapi/execution_api/routes/asset_events.py b/airflow-core/src/airflow/api_fastapi/execution_api/routes/asset_events.py index d0ecc3d3adaf2..2f4633801cbef 100644 --- a/airflow-core/src/airflow/api_fastapi/execution_api/routes/asset_events.py +++ b/airflow-core/src/airflow/api_fastapi/execution_api/routes/asset_events.py @@ -66,6 +66,7 @@ def _get_asset_events_through_sql_clauses( source_run_id=event.source_run_id, source_map_index=event.source_map_index, partition_key=event.partition_key, + partition_date=event.partition_date, ) for event in asset_events ] diff --git a/airflow-core/src/airflow/api_fastapi/execution_api/versions/__init__.py b/airflow-core/src/airflow/api_fastapi/execution_api/versions/__init__.py index ab995da52d062..ea9629963bcbf 100644 --- a/airflow-core/src/airflow/api_fastapi/execution_api/versions/__init__.py +++ b/airflow-core/src/airflow/api_fastapi/execution_api/versions/__init__.py @@ -46,11 +46,14 @@ AddStateEndpoints, AddTeamNameField, ) -from airflow.api_fastapi.execution_api.versions.v2026_06_30 import AddVariableKeysEndpoint +from airflow.api_fastapi.execution_api.versions.v2026_06_30 import ( + AddPartitionDateField, + AddVariableKeysEndpoint, +) bundle = VersionBundle( HeadVersion(), - Version("2026-06-30", AddVariableKeysEndpoint), + Version("2026-06-30", AddVariableKeysEndpoint, AddPartitionDateField), Version( "2026-06-16", AddRetryPolicyFields, diff --git a/airflow-core/src/airflow/api_fastapi/execution_api/versions/v2026_06_30.py b/airflow-core/src/airflow/api_fastapi/execution_api/versions/v2026_06_30.py index 0bc300a499837..376b108ecbf97 100644 --- a/airflow-core/src/airflow/api_fastapi/execution_api/versions/v2026_06_30.py +++ b/airflow-core/src/airflow/api_fastapi/execution_api/versions/v2026_06_30.py @@ -17,7 +17,17 @@ from __future__ import annotations -from cadwyn import VersionChange, endpoint +from cadwyn import ResponseInfo, VersionChange, convert_response_to_previous_version_for, endpoint, schema + +from airflow.api_fastapi.execution_api.datamodels.asset_event import ( + AssetEventResponse, + AssetEventsResponse, + DagRunAssetReference, +) +from airflow.api_fastapi.execution_api.datamodels.taskinstance import ( + DagRun, + TIRunContext, +) class AddVariableKeysEndpoint(VersionChange): @@ -26,3 +36,30 @@ class AddVariableKeysEndpoint(VersionChange): description = __doc__ instructions_to_migrate_to_previous_version = (endpoint("/variables/keys", ["GET"]).didnt_exist,) + + +class AddPartitionDateField(VersionChange): + """Expose the producer's partition datetime on the execution API so consumer tasks can template it.""" + + description = __doc__ + + instructions_to_migrate_to_previous_version = ( + schema(DagRun).field("partition_date").didnt_exist, + schema(AssetEventResponse).field("partition_date").didnt_exist, + schema(DagRunAssetReference).field("partition_date").didnt_exist, + ) + + @convert_response_to_previous_version_for(TIRunContext) # type: ignore[arg-type] + def remove_partition_date_from_dag_run(response: ResponseInfo) -> None: # type: ignore[misc] + """Strip ``partition_date`` from the nested ``dag_run`` payload for older clients.""" + if "dag_run" in response.body and isinstance(response.body["dag_run"], dict): + response.body["dag_run"].pop("partition_date", None) + + @convert_response_to_previous_version_for(AssetEventsResponse) # type: ignore[arg-type] + def remove_partition_date_from_asset_events(response: ResponseInfo) -> None: # type: ignore[misc] + """Strip ``partition_date`` from each asset event and its ``created_dagruns`` references.""" + events = response.body["asset_events"] + for elem in events: + elem.pop("partition_date", None) + for dag_ref in elem.get("created_dagruns", []): + dag_ref.pop("partition_date", None) diff --git a/airflow-core/src/airflow/assets/manager.py b/airflow-core/src/airflow/assets/manager.py index 7c12c31f979d6..030deded64e87 100644 --- a/airflow-core/src/airflow/assets/manager.py +++ b/airflow-core/src/airflow/assets/manager.py @@ -47,11 +47,14 @@ from airflow.utils.sqlalchemy import get_dialect_name, with_row_locks if TYPE_CHECKING: + from datetime import datetime + from sqlalchemy.orm.session import Session from airflow.models.dag import DagModel from airflow.models.serialized_dag import SerializedDagModel from airflow.models.taskinstance import TaskInstance + from airflow.partition_mappers.base import PartitionMapper from airflow.serialization.definitions.assets import ( SerializedAsset, SerializedAssetAlias, @@ -62,6 +65,41 @@ log = structlog.get_logger(__name__) +def _compute_target_partition_date( + *, + mapper: PartitionMapper, + source_partition_key: str, + source_partition_date: datetime | None, +) -> datetime | None: + """ + Derive the consumer's ``partition_date`` from the partition mapper. + + Computed once at APDR creation and stored on the row, so the consumer + DagRun's ``partition_date`` is locked to the mapper output at the time + the source event was queued — later mapper code or config changes do + not retroactively shift the date. + + - ``IdentityMapper``: passes the source ``partition_date`` through. + - ``_BaseTemporalMapper`` subclasses (``StartOf*Mapper``): re-parse the + source key with the mapper's ``input_format`` and apply ``normalize``. + - All other mappers: ``None``. + """ + from airflow.partition_mappers.identity import IdentityMapper + from airflow.partition_mappers.temporal import _BaseTemporalMapper + + if isinstance(mapper, IdentityMapper): + return source_partition_date + if isinstance(mapper, _BaseTemporalMapper): + try: + return mapper.to_downstream_normalized(source_partition_key) + except Exception: + # to_downstream() already succeeded for the same input at the + # call site, so a failure here would indicate a custom subclass + # raising in `normalize`. Stay defensive. + return None + return None + + @contextmanager def _lock_asset_model( *, @@ -253,6 +291,7 @@ def register_asset_change( source_alias_names: Collection[str] = (), session: Session, partition_key: str | None = None, + partition_date: datetime | None = None, source_is_api: bool = False, api_user_teams: set[str] | None = None, **kwargs, @@ -301,6 +340,7 @@ def register_asset_change( "asset_id": asset_model.id, "extra": extra or {}, "partition_key": partition_key, + "partition_date": partition_date, } if task_instance: event_kwargs.update( @@ -365,6 +405,7 @@ def register_asset_change( source_map_index=asset_event.source_map_index, source_aliases=[aam.to_serialized() for aam in asset_alias_models], partition_key=partition_key, + partition_date=partition_date, ) ) @@ -524,11 +565,10 @@ def _queue_partitioned_dags( if (asset_model := session.scalar(select(AssetModel).where(AssetModel.id == asset_id))) is None: raise RuntimeError(f"Could not find asset for asset_id={asset_id}") + mapper = timetable.get_partition_mapper(name=asset_model.name, uri=asset_model.uri) try: # We'll need to catch every possible exception happen when mapping partition_key. - target_key = timetable.get_partition_mapper( - name=asset_model.name, uri=asset_model.uri - ).to_downstream(partition_key) + target_key = mapper.to_downstream(partition_key) except Exception as err: log.exception( "Could not map partition key for asset in target Dag. " @@ -564,9 +604,19 @@ def _queue_partitioned_dags( target_keys = [target_key] del target_key + # Compute the target partition_date once per (mapper, source_key). + # to_downstream already succeeded above, so to_downstream_normalized + # on the same input is expected to succeed for temporal mappers. + target_partition_date: datetime | None = _compute_target_partition_date( + mapper=mapper, + source_partition_key=partition_key, + source_partition_date=event.partition_date, + ) + for target_key in target_keys: apdr = cls._get_or_create_apdr( target_key=target_key, + target_partition_date=target_partition_date, target_dag=target_dag, asset_id=asset_id, session=session, @@ -586,6 +636,7 @@ def _get_or_create_apdr( cls, *, target_key: str, + target_partition_date: datetime | None, target_dag: SerializedDagModel, asset_id: int, session: Session, @@ -598,6 +649,13 @@ def _get_or_create_apdr( This leads to the unintended outcome of having two APDRs created instead of one. To resolve this, we add a mutex lock to AssetModel for PostgreSQL and MySQL and use AssetPartitionDagRunMutexLock table for SQLite. + + When an existing pending APDR is returned, its stored ``partition_date`` (set by the first + event that queued it) is kept. If a later event resolves to the same ``target_key`` with a + different ``target_partition_date`` — possible when two source assets use different + timezone-configured mappers that happen to format to the same string — we log a warning + and let the first event win, since the consumer DagRun has already been semantically + committed to the first datetime. """ with _lock_asset_model(session=session, asset_id=asset_id): latest_apdr: AssetPartitionDagRun | None = session.scalar( @@ -610,6 +668,15 @@ def _get_or_create_apdr( .limit(1) ) if latest_apdr and latest_apdr.created_dag_run_id is None: + if latest_apdr.partition_date != target_partition_date: + log.warning( + "Existing pending APDR has partition_date that differs from " + "the newly computed one; keeping the first value (first-event-wins).", + target_dag_id=target_dag.dag_id, + target_key=target_key, + existing_partition_date=latest_apdr.partition_date, + incoming_partition_date=target_partition_date, + ) cls.logger().debug( "Existing APDR found for key %s dag_id %s", target_key, @@ -622,6 +689,7 @@ def _get_or_create_apdr( target_dag_id=target_dag.dag_id, created_dag_run_id=None, partition_key=target_key, + partition_date=target_partition_date, ) session.add(apdr) session.flush() diff --git a/airflow-core/src/airflow/example_dags/example_asset_partition.py b/airflow-core/src/airflow/example_dags/example_asset_partition.py index 4edff9127ce0e..77ca047ad3934 100644 --- a/airflow-core/src/airflow/example_dags/example_asset_partition.py +++ b/airflow-core/src/airflow/example_dags/example_asset_partition.py @@ -94,7 +94,7 @@ def combine_player_stats(dag_run=None): """Merge the aligned hourly partitions into a combined dataset.""" if TYPE_CHECKING: assert dag_run - print(dag_run.partition_key) + print(dag_run.partition_key, dag_run.partition_date) combine_player_stats() diff --git a/airflow-core/src/airflow/jobs/scheduler_job_runner.py b/airflow-core/src/airflow/jobs/scheduler_job_runner.py index 224659c4c4d99..261b088a64561 100644 --- a/airflow-core/src/airflow/jobs/scheduler_job_runner.py +++ b/airflow-core/src/airflow/jobs/scheduler_job_runner.py @@ -1908,6 +1908,7 @@ def _create_dagruns_for_partitioned_asset_dags(self, session: Session) -> set[st logical_date=None, data_interval=None, partition_key=apdr.partition_key, + partition_date=apdr.partition_date, run_after=run_after, run_type=DagRunType.ASSET_TRIGGERED, triggered_by=DagRunTriggeredByType.ASSET, diff --git a/airflow-core/src/airflow/listeners/types.py b/airflow-core/src/airflow/listeners/types.py index 120b8ef503a6e..fa9ebdafaca0f 100644 --- a/airflow-core/src/airflow/listeners/types.py +++ b/airflow-core/src/airflow/listeners/types.py @@ -23,6 +23,8 @@ import attrs if TYPE_CHECKING: + from datetime import datetime + from pydantic import JsonValue from airflow.serialization.definitions.assets import SerializedAsset, SerializedAssetAlias @@ -40,3 +42,4 @@ class AssetEvent: source_map_index: int | None source_aliases: list[SerializedAssetAlias] partition_key: str | None + partition_date: datetime | None = None diff --git a/airflow-core/src/airflow/migrations/versions/0117_3_3_0_add_partition_date_to_asset_event.py b/airflow-core/src/airflow/migrations/versions/0117_3_3_0_add_partition_date_to_asset_event.py new file mode 100644 index 0000000000000..98110ab541320 --- /dev/null +++ b/airflow-core/src/airflow/migrations/versions/0117_3_3_0_add_partition_date_to_asset_event.py @@ -0,0 +1,59 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +""" +Add partition_date to asset_event and asset_partition_dag_run. + +Mirrors ``asset_event.partition_key`` (the source datetime) and +``asset_partition_dag_run.partition_key`` (the target datetime, frozen at +APDR creation time so the consumer DagRun's ``partition_date`` is +consistent with the partition mapper that produced its ``partition_key``). + +Revision ID: d2f4e1b3c5a7 +Revises: acc215baed80 +Create Date: 2026-05-21 09:00:00.000000 +""" + +from __future__ import annotations + +import sqlalchemy as sa +from alembic import op + +from airflow.utils.sqlalchemy import UtcDateTime + +revision = "d2f4e1b3c5a7" +down_revision = "acc215baed80" +branch_labels = None +depends_on = None +airflow_version = "3.3.0" + + +def upgrade(): + """Add partition_date columns to asset_event and asset_partition_dag_run.""" + with op.batch_alter_table("asset_event", schema=None) as batch_op: + batch_op.add_column(sa.Column("partition_date", UtcDateTime, nullable=True)) + with op.batch_alter_table("asset_partition_dag_run", schema=None) as batch_op: + batch_op.add_column(sa.Column("partition_date", UtcDateTime, nullable=True)) + + +def downgrade(): + """Remove partition_date columns from asset_event and asset_partition_dag_run.""" + with op.batch_alter_table("asset_partition_dag_run", schema=None) as batch_op: + batch_op.drop_column("partition_date") + with op.batch_alter_table("asset_event", schema=None) as batch_op: + batch_op.drop_column("partition_date") diff --git a/airflow-core/src/airflow/models/asset.py b/airflow-core/src/airflow/models/asset.py index d07fc876b1798..26e9aa84ef145 100644 --- a/airflow-core/src/airflow/models/asset.py +++ b/airflow-core/src/airflow/models/asset.py @@ -804,6 +804,7 @@ class AssetEvent(Base): :param source_map_index: the map_index of the TI which updated the asset :param timestamp: the time the event was logged :param partition_key: the key for the partition associated with event, if applicable + :param partition_date: the datetime of the partition associated with event, if applicable We use relationships instead of foreign keys so that asset events are not deleted even if the foreign key object is. @@ -818,6 +819,7 @@ class AssetEvent(Base): source_map_index: Mapped[int | None] = mapped_column(Integer, nullable=True, server_default="-1") timestamp: Mapped[datetime] = mapped_column(UtcDateTime, default=timezone.utcnow, nullable=False) partition_key: Mapped[str | None] = mapped_column(StringID(), nullable=True) + partition_date: Mapped[datetime | None] = mapped_column(UtcDateTime, nullable=True) __tablename__ = "asset_event" __table_args__ = ( @@ -916,6 +918,7 @@ class AssetPartitionDagRun(Base): target_dag_id: Mapped[str | None] = mapped_column(StringID(), nullable=False) created_dag_run_id: Mapped[int | None] = mapped_column(Integer(), nullable=True) partition_key: Mapped[str | None] = mapped_column(StringID(), nullable=False) + partition_date: Mapped[datetime | None] = mapped_column(UtcDateTime, nullable=True) created_at: Mapped[datetime] = mapped_column(UtcDateTime, default=timezone.utcnow, nullable=False) updated_at: Mapped[datetime] = mapped_column( UtcDateTime, default=timezone.utcnow, onupdate=timezone.utcnow, nullable=False diff --git a/airflow-core/src/airflow/models/dagrun.py b/airflow-core/src/airflow/models/dagrun.py index 36ed309feb0b7..92d7019e03746 100644 --- a/airflow-core/src/airflow/models/dagrun.py +++ b/airflow-core/src/airflow/models/dagrun.py @@ -432,6 +432,7 @@ def dag_run_data(self) -> DRDataModel: conf=self.conf, consumed_asset_events=[], partition_key=self.partition_key, + partition_date=self.partition_date, ) @property @@ -1081,6 +1082,8 @@ def _emit_dagrun_span(self, state: DagRunState): attributes["airflow.dag_run.logical_date"] = str(self.logical_date) if self.partition_key: attributes["airflow.dag_run.partition_key"] = str(self.partition_key) + if self.partition_date: + attributes["airflow.dag_run.partition_date"] = self.partition_date.isoformat() # TODO: make the empty parent context optional. Default should be to # nest the dag run span under the currently active parent span (by # omitting `context` here); only use the empty `context.Context()` to diff --git a/airflow-core/src/airflow/models/taskinstance.py b/airflow-core/src/airflow/models/taskinstance.py index 9cc8d6a627140..716316d38b7ae 100644 --- a/airflow-core/src/airflow/models/taskinstance.py +++ b/airflow-core/src/airflow/models/taskinstance.py @@ -1524,6 +1524,7 @@ def register_asset_changes_in_db( if len(runtime_pks) == 1 and ti.dag_run.partition_key is None: ti.dag_run.partition_key = next(iter(runtime_pks)) dag_run_partition_key = ti.dag_run.partition_key + dag_run_partition_date = ti.dag_run.partition_date asset_keys = { SerializedAssetUniqueKey(o.name, o.uri) @@ -1559,15 +1560,31 @@ def _register(am: AssetModel, key: SerializedAssetUniqueKey) -> None: asset=am, extra=None, partition_key=dag_run_partition_key, + partition_date=dag_run_partition_date, session=session, ) return for payload in payloads_for_asset: + # Drop partition_date when the payload's partition_key diverges + # from the DagRun's — the run-level date refers to the run-level + # key and would mis-label this event. + if payload.partition_key == dag_run_partition_key: + payload_partition_date = dag_run_partition_date + else: + payload_partition_date = None + if dag_run_partition_date is not None: + ti.log.debug( + "Task-emitted partition_key %r differs from DagRun partition_key %r; " + "AssetEvent partition_date will be None.", + payload.partition_key, + dag_run_partition_key, + ) asset_manager.register_asset_change( task_instance=ti, asset=am, extra=payload.extra, partition_key=payload.partition_key, + partition_date=payload_partition_date, session=session, ) @@ -1651,6 +1668,7 @@ def _asset_event_extras_from_aliases() -> dict[tuple[SerializedAssetUniqueKey, s source_alias_names=event_aliase_names, extra=asset_event_extra, partition_key=dag_run_partition_key, + partition_date=dag_run_partition_date, session=session, ) if event is None: @@ -1663,6 +1681,7 @@ def _asset_event_extras_from_aliases() -> dict[tuple[SerializedAssetUniqueKey, s source_alias_names=event_aliase_names, extra=asset_event_extra, partition_key=dag_run_partition_key, + partition_date=dag_run_partition_date, session=session, ) diff --git a/airflow-core/src/airflow/partition_mappers/temporal.py b/airflow-core/src/airflow/partition_mappers/temporal.py index 49f4162a12e2c..7290cdc54f239 100644 --- a/airflow-core/src/airflow/partition_mappers/temporal.py +++ b/airflow-core/src/airflow/partition_mappers/temporal.py @@ -45,14 +45,17 @@ def __init__( timezone = parse_timezone(timezone) self._timezone = timezone - def to_downstream(self, key: str) -> str: + def to_downstream_normalized(self, key: str) -> datetime: + """Parse ``key`` with ``input_format`` and return the normalized datetime.""" dt = datetime.strptime(key, self.input_format) if dt.tzinfo is None: dt = make_aware(dt, self._timezone) else: dt = dt.astimezone(self._timezone) - normalized = self.normalize(dt) - return self.format(normalized) + return self.normalize(dt) + + def to_downstream(self, key: str) -> str: + return self.format(self.to_downstream_normalized(key)) @abstractmethod def normalize(self, dt: datetime) -> datetime: diff --git a/airflow-core/src/airflow/ui/openapi-gen/requests/schemas.gen.ts b/airflow-core/src/airflow/ui/openapi-gen/requests/schemas.gen.ts index 4a4b95f183023..f76be4b4d0f9b 100644 --- a/airflow-core/src/airflow/ui/openapi-gen/requests/schemas.gen.ts +++ b/airflow-core/src/airflow/ui/openapi-gen/requests/schemas.gen.ts @@ -3068,10 +3068,22 @@ export const $DAGRunResponse = { } ], title: 'Partition Key' + }, + partition_date: { + anyOf: [ + { + type: 'string', + format: 'date-time' + }, + { + type: 'null' + } + ], + title: 'Partition Date' } }, type: 'object', - required: ['dag_run_id', 'dag_id', 'logical_date', 'queued_at', 'start_date', 'end_date', 'duration', 'data_interval_start', 'data_interval_end', 'run_after', 'last_scheduling_decision', 'run_type', 'state', 'triggered_by', 'triggering_user_name', 'conf', 'note', 'dag_versions', 'bundle_version', 'dag_display_name', 'partition_key'], + required: ['dag_run_id', 'dag_id', 'logical_date', 'queued_at', 'start_date', 'end_date', 'duration', 'data_interval_start', 'data_interval_end', 'run_after', 'last_scheduling_decision', 'run_type', 'state', 'triggered_by', 'triggering_user_name', 'conf', 'note', 'dag_versions', 'bundle_version', 'dag_display_name', 'partition_key', 'partition_date'], title: 'DAGRunResponse', description: 'Dag Run serializer for responses.' } as const; diff --git a/airflow-core/src/airflow/ui/openapi-gen/requests/types.gen.ts b/airflow-core/src/airflow/ui/openapi-gen/requests/types.gen.ts index 77380eec73833..2c739527009bb 100644 --- a/airflow-core/src/airflow/ui/openapi-gen/requests/types.gen.ts +++ b/airflow-core/src/airflow/ui/openapi-gen/requests/types.gen.ts @@ -814,6 +814,7 @@ export type DAGRunResponse = { bundle_version: string | null; dag_display_name: string; partition_key: string | null; + partition_date: string | null; }; /** diff --git a/airflow-core/src/airflow/utils/db.py b/airflow-core/src/airflow/utils/db.py index 00d512909dc5a..85f66863a279d 100644 --- a/airflow-core/src/airflow/utils/db.py +++ b/airflow-core/src/airflow/utils/db.py @@ -116,7 +116,7 @@ class MappedClassProtocol(Protocol): "3.1.0": "cc92b33c6709", "3.1.8": "509b94a1042d", "3.2.0": "1d6611b6ab7c", - "3.3.0": "acc215baed80", + "3.3.0": "d2f4e1b3c5a7", } # Prefix used to identify tables holding data moved during migration. diff --git a/airflow-core/tests/unit/api_fastapi/core_api/routes/public/test_assets.py b/airflow-core/tests/unit/api_fastapi/core_api/routes/public/test_assets.py index 298949fce1619..c26a6dcb4f7b2 100644 --- a/airflow-core/tests/unit/api_fastapi/core_api/routes/public/test_assets.py +++ b/airflow-core/tests/unit/api_fastapi/core_api/routes/public/test_assets.py @@ -1482,6 +1482,7 @@ def test_should_respond_200(self, test_client): "dag_versions": mock.ANY, "logical_date": None, "partition_key": None, + "partition_date": None, "queued_at": mock.ANY, "run_after": mock.ANY, "start_date": None, diff --git a/airflow-core/tests/unit/api_fastapi/core_api/routes/public/test_dag_run.py b/airflow-core/tests/unit/api_fastapi/core_api/routes/public/test_dag_run.py index bc59d204378fa..87e5931e9fa6e 100644 --- a/airflow-core/tests/unit/api_fastapi/core_api/routes/public/test_dag_run.py +++ b/airflow-core/tests/unit/api_fastapi/core_api/routes/public/test_dag_run.py @@ -295,6 +295,7 @@ def get_dag_run_dict(run: DagRun): "note": run.note, "dag_versions": get_dag_versions_dict(run.dag_versions), "partition_key": None, + "partition_date": None, } @@ -2070,6 +2071,7 @@ def test_should_respond_200( "triggered_by": "rest_api", "triggering_user_name": "test", "partition_key": None, + "partition_date": None, } assert response.json() == expected_response_json @@ -2302,6 +2304,7 @@ def test_should_response_409_for_duplicate_logical_date(self, test_client): "conf": {}, "note": note, "partition_key": None, + "partition_date": None, } assert response_2.status_code == 409 @@ -2391,6 +2394,7 @@ def test_should_respond_200_with_null_logical_date(self, test_client): "conf": {}, "note": None, "partition_key": None, + "partition_date": None, } @pytest.mark.usefixtures("configure_git_connection_for_dag_bundle") diff --git a/airflow-core/tests/unit/api_fastapi/execution_api/versions/head/test_asset_events.py b/airflow-core/tests/unit/api_fastapi/execution_api/versions/head/test_asset_events.py index e3839f19eafe8..71e3d0672adb9 100644 --- a/airflow-core/tests/unit/api_fastapi/execution_api/versions/head/test_asset_events.py +++ b/airflow-core/tests/unit/api_fastapi/execution_api/versions/head/test_asset_events.py @@ -42,6 +42,7 @@ def make_timestamp(day): "source_run_id": "custom", "source_map_index": -1, "partition_key": None, + "partition_date": None, } events = [AssetEvent(id=i, timestamp=make_timestamp(i), **common) for i in (1, 2, 3)] @@ -91,6 +92,36 @@ def test_asset_alias(session, test_asset_events, test_asset): class TestGetAssetEventByAsset: + def test_get_by_asset_surfaces_partition_date(self, session, client, test_asset): + """The route must populate partition_date from the AssetEvent row.""" + partition_date = timezone.parse("2026-05-20T01:00:00") + event = AssetEvent( + id=99, + asset_id=test_asset.id, + extra={}, + source_dag_id="foo", + source_task_id="bar", + source_run_id="custom", + source_map_index=-1, + timestamp=datetime(2021, 1, 1, tzinfo=timezone.utc), + partition_key="2026-05-20T01:00:00", + partition_date=partition_date, + ) + session.add(event) + session.commit() + try: + response = client.get( + "/execution/asset-events/by-asset", + params={"name": "test_get_asset_by_name", "uri": "s3://bucket/key"}, + ) + assert response.status_code == 200 + [returned] = response.json()["asset_events"] + assert returned["partition_key"] == "2026-05-20T01:00:00" + assert returned["partition_date"] == "2026-05-20T01:00:00Z" + finally: + session.delete(event) + session.commit() + @pytest.mark.parametrize( ("uri", "name"), [ @@ -124,6 +155,7 @@ def test_get_by_asset(self, uri, name, client): }, "timestamp": "2021-01-01T00:00:00Z", "partition_key": None, + "partition_date": None, }, { "id": 2, @@ -141,6 +173,7 @@ def test_get_by_asset(self, uri, name, client): "created_dagruns": [], "timestamp": "2021-01-02T00:00:00Z", "partition_key": None, + "partition_date": None, }, { "id": 3, @@ -158,6 +191,7 @@ def test_get_by_asset(self, uri, name, client): "created_dagruns": [], "timestamp": "2021-01-03T00:00:00Z", "partition_key": None, + "partition_date": None, }, ] } @@ -195,6 +229,7 @@ def test_get_by_asset_with_after_filter(self, uri, name, client): "created_dagruns": [], "timestamp": "2021-01-02T00:00:00Z", "partition_key": None, + "partition_date": None, }, { "id": 3, @@ -212,6 +247,7 @@ def test_get_by_asset_with_after_filter(self, uri, name, client): "created_dagruns": [], "timestamp": "2021-01-03T00:00:00Z", "partition_key": None, + "partition_date": None, }, ] } @@ -249,6 +285,7 @@ def test_get_by_asset_with_before_filter(self, uri, name, client): "created_dagruns": [], "timestamp": "2021-01-01T00:00:00Z", "partition_key": None, + "partition_date": None, }, { "id": 2, @@ -266,6 +303,7 @@ def test_get_by_asset_with_before_filter(self, uri, name, client): "created_dagruns": [], "timestamp": "2021-01-02T00:00:00Z", "partition_key": None, + "partition_date": None, }, ] } @@ -308,6 +346,7 @@ def test_get_by_asset_with_before_and_after_filters(self, uri, name, client): "created_dagruns": [], "timestamp": "2021-01-02T00:00:00Z", "partition_key": None, + "partition_date": None, }, ] } @@ -345,6 +384,7 @@ def test_get_by_asset_with_descending_order(self, uri, name, client): "created_dagruns": [], "timestamp": "2021-01-03T00:00:00Z", "partition_key": None, + "partition_date": None, }, { "id": 2, @@ -362,6 +402,7 @@ def test_get_by_asset_with_descending_order(self, uri, name, client): "created_dagruns": [], "timestamp": "2021-01-02T00:00:00Z", "partition_key": None, + "partition_date": None, }, { "id": 1, @@ -379,6 +420,7 @@ def test_get_by_asset_with_descending_order(self, uri, name, client): "created_dagruns": [], "timestamp": "2021-01-01T00:00:00Z", "partition_key": None, + "partition_date": None, }, ] } @@ -416,6 +458,7 @@ def test_get_by_asset_get_first(self, uri, name, client): "created_dagruns": [], "timestamp": "2021-01-01T00:00:00Z", "partition_key": None, + "partition_date": None, }, ] } @@ -453,6 +496,7 @@ def test_get_by_asset_get_last(self, uri, name, client): "created_dagruns": [], "timestamp": "2021-01-03T00:00:00Z", "partition_key": None, + "partition_date": None, }, ] } @@ -484,6 +528,7 @@ def test_get_by_asset(self, client): "created_dagruns": [], "timestamp": "2021-01-01T00:00:00Z", "partition_key": None, + "partition_date": None, }, { "id": 2, @@ -501,6 +546,7 @@ def test_get_by_asset(self, client): "created_dagruns": [], "timestamp": "2021-01-02T00:00:00Z", "partition_key": None, + "partition_date": None, }, { "id": 3, @@ -518,6 +564,7 @@ def test_get_by_asset(self, client): "created_dagruns": [], "timestamp": "2021-01-03T00:00:00Z", "partition_key": None, + "partition_date": None, }, ] } diff --git a/airflow-core/tests/unit/api_fastapi/execution_api/versions/head/test_dag_runs.py b/airflow-core/tests/unit/api_fastapi/execution_api/versions/head/test_dag_runs.py index 337bc1c90b8d5..6e9aacf2cc4e6 100644 --- a/airflow-core/tests/unit/api_fastapi/execution_api/versions/head/test_dag_runs.py +++ b/airflow-core/tests/unit/api_fastapi/execution_api/versions/head/test_dag_runs.py @@ -362,6 +362,7 @@ def test_get_state(self, client, session, dag_maker): "end_date": "2025-12-13T00:00:00Z", "logical_date": None, "partition_key": None, + "partition_date": None, "run_after": "2025-12-13T00:00:00Z", "run_id": "previous", "run_type": "manual", diff --git a/airflow-core/tests/unit/api_fastapi/execution_api/versions/head/test_task_instances.py b/airflow-core/tests/unit/api_fastapi/execution_api/versions/head/test_task_instances.py index 2d15e548a13a4..b267e94b0ea8c 100644 --- a/airflow-core/tests/unit/api_fastapi/execution_api/versions/head/test_task_instances.py +++ b/airflow-core/tests/unit/api_fastapi/execution_api/versions/head/test_task_instances.py @@ -236,6 +236,7 @@ def test_ti_run_state_to_running( "triggering_user_name": None, "consumed_asset_events": [], "partition_key": None, + "partition_date": None, "note": None, "team_name": None, }, diff --git a/airflow-core/tests/unit/api_fastapi/execution_api/versions/v2026_06_30/test_asset_events.py b/airflow-core/tests/unit/api_fastapi/execution_api/versions/v2026_06_30/test_asset_events.py new file mode 100644 index 0000000000000..baa83d1348466 --- /dev/null +++ b/airflow-core/tests/unit/api_fastapi/execution_api/versions/v2026_06_30/test_asset_events.py @@ -0,0 +1,138 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from __future__ import annotations + +from datetime import datetime + +import pytest + +from airflow._shared.timezones import timezone +from airflow.models.asset import AssetActive, AssetEvent, AssetModel + +from tests_common.test_utils.db import clear_db_assets, clear_db_dags, clear_db_runs + +DEFAULT_DATE = timezone.parse("2021-01-01T00:00:00") +PARTITION_DATE = timezone.parse("2026-05-20T01:00:00") + +pytestmark = pytest.mark.db_test + + +@pytest.fixture(autouse=True) +def _clear_db(): + clear_db_assets() + clear_db_runs() + clear_db_dags() + yield + clear_db_assets() + clear_db_runs() + clear_db_dags() + + +@pytest.fixture +def old_ver_client(client): + """Last released execution API before AddPartitionDateField was applied.""" + client.headers["Airflow-API-Version"] = "2026-06-16" + return client + + +@pytest.fixture +def test_asset(session): + asset = AssetModel( + id=1, + name="test_asset", + uri="s3://bucket/key", + group="asset", + extra={}, + created_at=DEFAULT_DATE, + updated_at=DEFAULT_DATE, + ) + session.add_all([asset, AssetActive.for_asset(asset)]) + session.commit() + return asset + + +@pytest.fixture +def test_asset_event(session, test_asset): + event = AssetEvent( + id=1, + asset_id=test_asset.id, + extra={}, + source_dag_id="producer", + source_task_id="emit", + source_run_id="scheduled__1", + source_map_index=-1, + timestamp=datetime(2021, 1, 1, tzinfo=timezone.utc), + partition_key="2026-05-20T01:00:00", + partition_date=PARTITION_DATE, + ) + session.add(event) + session.commit() + return event + + +@pytest.mark.usefixtures("test_asset_event") +def test_partition_date_stripped_at_top_level_for_older_clients(old_ver_client): + """``partition_date`` should not appear in the AssetEventResponse for ``2026-06-16``.""" + response = old_ver_client.get( + "/execution/asset-events/by-asset", + params={"name": "test_asset", "uri": "s3://bucket/key"}, + ) + assert response.status_code == 200 + [event] = response.json()["asset_events"] + assert "partition_key" in event + assert "partition_date" not in event + + +def test_partition_date_stripped_from_created_dagruns_for_older_clients( + old_ver_client, dag_maker, session, test_asset +): + """``partition_date`` should also be stripped from nested ``created_dagruns`` entries.""" + with dag_maker(dag_id="consumer_dag", schedule=None, session=session): + from airflow.sdk import BaseOperator + + BaseOperator(task_id="task") + dag_run = dag_maker.create_dagrun( + partition_key="2026-05-20", + partition_date=PARTITION_DATE, + ) + event = AssetEvent( + id=1, + asset_id=test_asset.id, + extra={}, + source_dag_id="producer", + source_task_id="emit", + source_run_id="scheduled__1", + source_map_index=-1, + timestamp=datetime(2021, 1, 1, tzinfo=timezone.utc), + partition_key="2026-05-20T01:00:00", + partition_date=PARTITION_DATE, + ) + event.created_dagruns.append(dag_run) + session.add(event) + session.commit() + + response = old_ver_client.get( + "/execution/asset-events/by-asset", + params={"name": "test_asset", "uri": "s3://bucket/key"}, + ) + assert response.status_code == 200 + [response_event] = response.json()["asset_events"] + assert response_event["created_dagruns"], "fixture must produce at least one consumer DagRun" + for dag_ref in response_event["created_dagruns"]: + assert "partition_key" in dag_ref + assert "partition_date" not in dag_ref diff --git a/airflow-core/tests/unit/api_fastapi/execution_api/versions/v2026_06_30/test_task_instances.py b/airflow-core/tests/unit/api_fastapi/execution_api/versions/v2026_06_30/test_task_instances.py new file mode 100644 index 0000000000000..b2cc016be84d7 --- /dev/null +++ b/airflow-core/tests/unit/api_fastapi/execution_api/versions/v2026_06_30/test_task_instances.py @@ -0,0 +1,96 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from __future__ import annotations + +import pytest + +from airflow._shared.timezones import timezone +from airflow.utils.state import DagRunState, State + +from tests_common.test_utils.db import clear_db_runs + +pytestmark = pytest.mark.db_test + +TIMESTAMP_STR = "2024-09-30T12:00:00Z" +TIMESTAMP = timezone.parse(TIMESTAMP_STR) +PARTITION_DATE = timezone.parse("2026-05-20T01:00:00") + +RUN_PATCH_BODY = { + "state": "running", + "hostname": "h", + "unixname": "u", + "pid": 1, + "start_date": TIMESTAMP_STR, +} + + +@pytest.fixture +def old_ver_client(client): + """Execution API version immediately before ``partition_date`` was added.""" + client.headers["Airflow-API-Version"] = "2026-06-16" + return client + + +class TestPartitionDateFieldBackwardCompat: + @pytest.fixture(autouse=True) + def _freeze_time(self, time_machine): + time_machine.move_to(TIMESTAMP_STR, tick=False) + + def setup_method(self): + clear_db_runs() + + def teardown_method(self): + clear_db_runs() + + def test_old_version_strips_partition_date_from_dag_run( + self, old_ver_client, session, create_task_instance + ): + ti = create_task_instance( + task_id="test_partition_date_downgrade", + state=State.QUEUED, + dagrun_state=DagRunState.RUNNING, + session=session, + start_date=TIMESTAMP, + ) + ti.dag_run.partition_key = "2026-05-20" + ti.dag_run.partition_date = PARTITION_DATE + session.commit() + + response = old_ver_client.patch(f"/execution/task-instances/{ti.id}/run", json=RUN_PATCH_BODY) + assert response.status_code == 200 + dag_run = response.json()["dag_run"] + assert dag_run["partition_key"] == "2026-05-20" + assert "partition_date" not in dag_run + + def test_head_version_includes_partition_date_field(self, client, session, create_task_instance): + ti = create_task_instance( + task_id="test_partition_date_head", + state=State.QUEUED, + dagrun_state=DagRunState.RUNNING, + session=session, + start_date=TIMESTAMP, + ) + ti.dag_run.partition_key = "2026-05-20" + ti.dag_run.partition_date = PARTITION_DATE + session.commit() + + response = client.patch(f"/execution/task-instances/{ti.id}/run", json=RUN_PATCH_BODY) + assert response.status_code == 200 + dag_run = response.json()["dag_run"] + assert dag_run["partition_key"] == "2026-05-20" + assert dag_run["partition_date"] == PARTITION_DATE.isoformat().replace("+00:00", "Z") diff --git a/airflow-core/tests/unit/assets/test_manager.py b/airflow-core/tests/unit/assets/test_manager.py index 0585fac803a8f..f08dbaccc8365 100644 --- a/airflow-core/tests/unit/assets/test_manager.py +++ b/airflow-core/tests/unit/assets/test_manager.py @@ -239,6 +239,7 @@ def _get_or_create_apdr(): try: return AssetManager._get_or_create_apdr( target_key="test_partition_key", + target_partition_date=None, target_dag=testing_dag, asset_id=asm.id, session=_session, diff --git a/airflow-core/tests/unit/cli/commands/test_asset_command.py b/airflow-core/tests/unit/cli/commands/test_asset_command.py index 7b17f2a5cea52..f45a1d9ce752d 100644 --- a/airflow-core/tests/unit/cli/commands/test_asset_command.py +++ b/airflow-core/tests/unit/cli/commands/test_asset_command.py @@ -158,6 +158,7 @@ def test_cli_assets_materialize(mock_hasattr, parser: ArgumentParser, stdout_cap "last_scheduling_decision": None, "note": None, "partition_key": None, + "partition_date": None, "run_type": "asset_materialization", "start_date": None, "state": "queued", @@ -197,6 +198,7 @@ def test_cli_assets_materialize_with_view_url_template(parser: ArgumentParser, s "last_scheduling_decision": None, "note": None, "partition_key": None, + "partition_date": None, "run_type": "asset_materialization", "start_date": None, "state": "queued", diff --git a/airflow-core/tests/unit/jobs/test_scheduler_job.py b/airflow-core/tests/unit/jobs/test_scheduler_job.py index 05803f5fd5be0..50d048fe64735 100644 --- a/airflow-core/tests/unit/jobs/test_scheduler_job.py +++ b/airflow-core/tests/unit/jobs/test_scheduler_job.py @@ -84,6 +84,7 @@ from airflow.models.team import Team from airflow.models.trigger import Trigger from airflow.partition_mappers.base import PartitionMapper as CorePartitionMapper +from airflow.partition_mappers.temporal import StartOfDayMapper as _CoreStartOfDayMapper from airflow.providers.standard.operators.bash import BashOperator from airflow.providers.standard.operators.empty import EmptyOperator from airflow.providers.standard.triggers.file import FileDeleteTrigger @@ -94,6 +95,7 @@ AssetWatcher, CronPartitionTimetable, IdentityMapper, + StartOfDayMapper, StartOfHourMapper, task, ) @@ -9751,6 +9753,7 @@ def _produce_and_register_asset_event( session: Session, dag_maker: DagMaker, expected_partition_key: str | None = None, + partition_date: datetime.datetime | None = None, ) -> AssetPartitionDagRun: if expected_partition_key is None: expected_partition_key = partition_key @@ -9758,7 +9761,11 @@ def _produce_and_register_asset_event( with dag_maker(dag_id=dag_id, schedule=None, session=session) as dag: EmptyOperator(task_id="hi", outlets=[asset]) - dr = dag_maker.create_dagrun(partition_key=partition_key, session=session) + dr = dag_maker.create_dagrun( + partition_key=partition_key, + partition_date=partition_date, + session=session, + ) [ti] = dr.get_task_instances(session=session) session.commit() @@ -9780,6 +9787,7 @@ def _produce_and_register_asset_event( ) assert event is not None assert event.partition_key == partition_key + assert event.partition_date == partition_date apdr = session.scalar( select(AssetPartitionDagRun) @@ -9971,6 +9979,192 @@ def test_partitioned_dag_run_with_customized_mapper( assert asset_event.source_run_id == "test" +@pytest.mark.need_serialized_dag +@pytest.mark.usefixtures("clear_asset_partition_rows") +@pytest.mark.parametrize( + ( + "mapper_factory", + "source_partition_key", + "source_partition_date", + "expected_partition_key", + "expected_partition_date", + ), + [ + pytest.param( + IdentityMapper, + "2026-05-20T01:00:00", + pendulum.datetime(2026, 5, 20, 1, 0, 0, tz="UTC"), + "2026-05-20T01:00:00", + pendulum.datetime(2026, 5, 20, 1, 0, 0, tz="UTC"), + id="identity-passes-source-date-through", + ), + pytest.param( + StartOfHourMapper, + "2026-05-20T01:30:45", + pendulum.datetime(2026, 5, 20, 1, 30, 45, tz="UTC"), + "2026-05-20T01", + pendulum.datetime(2026, 5, 20, 1, 0, 0, tz="UTC"), + id="hour-mapper-normalizes-to-start-of-hour", + ), + pytest.param( + StartOfDayMapper, + "2026-05-20T15:30:00", + pendulum.datetime(2026, 5, 20, 15, 30, 0, tz="UTC"), + "2026-05-20", + pendulum.datetime(2026, 5, 20, 0, 0, 0, tz="UTC"), + id="day-mapper-normalizes-to-start-of-day", + ), + pytest.param( + # _BaseTemporalMapper.to_downstream_normalized interprets a naive + # source key as already in the mapper's configured timezone, so + # 2026-05-20T03:00:00 in America/New_York normalizes to the start + # of 2026-05-20 NYC. A regression that mis-applied UTC would yield + # 2026-05-19 instead. Uses the core mapper directly because the + # task-SDK shim does not expose the timezone constructor argument. + lambda: _CoreStartOfDayMapper(timezone="America/New_York"), + "2026-05-20T03:00:00", + pendulum.datetime(2026, 5, 20, 3, 0, 0, tz="UTC"), + "2026-05-20", + pendulum.datetime(2026, 5, 20, 0, 0, 0, tz="America/New_York"), + id="day-mapper-honours-timezone", + ), + ], +) +def test_consumer_dag_run_partition_date_for_temporal_mappers( + dag_maker: DagMaker, + session: Session, + mapper_factory, + source_partition_key: str, + source_partition_date, + expected_partition_key: str, + expected_partition_date, +): + """Consumer DagRun's partition_date is populated by the partition mapper.""" + asset_1 = Asset(name="asset-1") + + with dag_maker( + dag_id="asset-event-consumer", + schedule=PartitionedAssetTimetable( + assets=asset_1, + default_partition_mapper=mapper_factory(), + ), + session=session, + ): + EmptyOperator(task_id="hi") + session.commit() + + runner = SchedulerJobRunner( + job=Job(job_type=SchedulerJobRunner.job_type), executors=[MockExecutor(do_update=False)] + ) + + apdr = _produce_and_register_asset_event( + dag_id="asset-event-producer", + asset=asset_1, + partition_key=source_partition_key, + partition_date=source_partition_date, + session=session, + dag_maker=dag_maker, + expected_partition_key=expected_partition_key, + ) + partition_dags = runner._create_dagruns_for_partitioned_asset_dags(session=session) + + session.refresh(apdr) + assert apdr.created_dag_run_id is not None + assert partition_dags == {"asset-event-consumer"} + + dag_run = session.scalar(select(DagRun).where(DagRun.id == apdr.created_dag_run_id)) + assert dag_run is not None + assert dag_run.partition_key == expected_partition_key + assert dag_run.partition_date == expected_partition_date + + +@pytest.mark.need_serialized_dag +@pytest.mark.usefixtures("clear_asset_partition_rows") +def test_consumer_dag_run_partition_date_none_for_non_temporal_mapper( + dag_maker: DagMaker, + session: Session, + custom_partition_mapper_patch: Callable[[], ExitStack], +): + """For mappers that aren't temporal/identity, the consumer DagRun's partition_date stays None.""" + asset_1 = Asset(name="asset-1") + + with custom_partition_mapper_patch(): + with dag_maker( + dag_id="asset-event-consumer", + schedule=PartitionedAssetTimetable( + assets=asset_1, + default_partition_mapper=Key1Mapper(), # type: ignore[arg-type] + ), + session=session, + ): + EmptyOperator(task_id="hi") + session.commit() + + runner = SchedulerJobRunner( + job=Job(job_type=SchedulerJobRunner.job_type), executors=[MockExecutor(do_update=False)] + ) + with custom_partition_mapper_patch(): + apdr = _produce_and_register_asset_event( + dag_id="asset-event-producer", + asset=asset_1, + partition_key="this-is-not-key-1-before-mapped", + partition_date=pendulum.datetime(2026, 5, 20, 1, 0, 0, tz="UTC"), + session=session, + dag_maker=dag_maker, + expected_partition_key="key-1", + ) + runner._create_dagruns_for_partitioned_asset_dags(session=session) + + session.refresh(apdr) + assert apdr.created_dag_run_id is not None + dag_run = session.scalar(select(DagRun).where(DagRun.id == apdr.created_dag_run_id)) + assert dag_run is not None + assert dag_run.partition_key == "key-1" + assert dag_run.partition_date is None + + +@pytest.mark.need_serialized_dag +@pytest.mark.usefixtures("clear_asset_partition_rows") +def test_consumer_dag_run_partition_date_is_none_when_source_event_predates_migration( + dag_maker: DagMaker, session: Session +): + """Pre-migration AssetEvent rows have partition_date=None; IdentityMapper passes None through.""" + asset_1 = Asset(name="asset-1") + + with dag_maker( + dag_id="asset-event-consumer", + schedule=PartitionedAssetTimetable( + assets=asset_1, + default_partition_mapper=IdentityMapper(), + ), + session=session, + ): + EmptyOperator(task_id="hi") + session.commit() + + runner = SchedulerJobRunner( + job=Job(job_type=SchedulerJobRunner.job_type), executors=[MockExecutor(do_update=False)] + ) + + apdr = _produce_and_register_asset_event( + dag_id="asset-event-producer", + asset=asset_1, + partition_key="2026-05-20T01:00:00", + partition_date=None, + session=session, + dag_maker=dag_maker, + expected_partition_key="2026-05-20T01:00:00", + ) + runner._create_dagruns_for_partitioned_asset_dags(session=session) + + session.refresh(apdr) + assert apdr.created_dag_run_id is not None + dag_run = session.scalar(select(DagRun).where(DagRun.id == apdr.created_dag_run_id)) + assert dag_run is not None + assert dag_run.partition_key == "2026-05-20T01:00:00" + assert dag_run.partition_date is None + + @pytest.mark.need_serialized_dag @pytest.mark.usefixtures("clear_asset_partition_rows") def test_consumer_dag_listen_to_two_partitioned_asset( diff --git a/airflow-core/tests/unit/models/test_taskinstance.py b/airflow-core/tests/unit/models/test_taskinstance.py index 3b4dceb628d03..9313c5910d6e7 100644 --- a/airflow-core/tests/unit/models/test_taskinstance.py +++ b/airflow-core/tests/unit/models/test_taskinstance.py @@ -3536,6 +3536,30 @@ def test_runtime_partition_key_backfills_dag_run_when_none(dag_maker, session): assert dr.partition_key == "us" +def test_dag_run_partition_date_stamped_onto_asset_event(dag_maker, session): + """When DagRun has both partition_key and partition_date, both flow to the AssetEvent.""" + asset = Asset(name="hello") + with dag_maker(dag_id="rt_pdate_stamped", schedule=None) as dag: + EmptyOperator(task_id="hi", outlets=[asset]) + partition_date = pendulum.datetime(2026, 5, 20, 1, 0, 0, tz="UTC") + dr = dag_maker.create_dagrun( + partition_key="2026-05-20T01:00:00", + partition_date=partition_date, + session=session, + ) + [ti] = dr.get_task_instances(session=session) + + TaskInstance.register_asset_changes_in_db( + ti=ti, + task_outlets=[ensure_serialized_asset(asset).asprofile()], + outlet_events=[], + session=session, + ) + event = session.scalar(select(AssetEvent).where(AssetEvent.source_dag_id == dag.dag_id)) + assert event.partition_key == "2026-05-20T01:00:00" + assert event.partition_date == partition_date + + def test_runtime_partition_key_does_not_overwrite_scheduler_partition(dag_maker, session): """Task-emitted key lands on the AssetEvent but does NOT overwrite a scheduler-set DagRun.partition_key.""" asset = Asset(name="hello") @@ -3558,6 +3582,31 @@ def test_runtime_partition_key_does_not_overwrite_scheduler_partition(dag_maker, assert dr.partition_key == "scheduler-key" +def test_runtime_partition_key_drops_partition_date_when_key_differs(dag_maker, session): + """When a task emits a partition_key different from the DagRun's, the event's partition_date is None.""" + asset = Asset(name="hello") + with dag_maker(dag_id="rt_pdate_drop", schedule=None) as dag: + EmptyOperator(task_id="hi", outlets=[asset]) + dr = dag_maker.create_dagrun( + partition_key="scheduler-key", + partition_date=pendulum.datetime(2026, 5, 20, 1, 0, 0, tz="UTC"), + session=session, + ) + [ti] = dr.get_task_instances(session=session) + + TaskInstance.register_asset_changes_in_db( + ti=ti, + task_outlets=[ensure_serialized_asset(asset).asprofile()], + outlet_events=[ + {"dest_asset_key": {"name": "hello", "uri": "hello"}, "extra": {}, "partition_key": "task-key"}, + ], + session=session, + ) + event = session.scalar(select(AssetEvent).where(AssetEvent.source_dag_id == dag.dag_id)) + assert event.partition_key == "task-key" + assert event.partition_date is None + + def test_runtime_partition_keys_fan_out_to_one_event_per_key(dag_maker, session): """Multiple distinct runtime keys produce one AssetEvent each; DagRun.partition_key stays None.""" asset = Asset(name="hello") diff --git a/airflow-core/tests/unit/partition_mappers/test_temporal.py b/airflow-core/tests/unit/partition_mappers/test_temporal.py index 0eb10991f3137..dfd5e4a8ee12b 100644 --- a/airflow-core/tests/unit/partition_mappers/test_temporal.py +++ b/airflow-core/tests/unit/partition_mappers/test_temporal.py @@ -162,6 +162,41 @@ def test_to_downstream_input_timezone_differs_from_mapper_timezone(self): # → start-of-day in New York is 2026-02-11 assert pm.to_downstream("2026-02-11T06:00:00+0000") == "2026-02-11" + @pytest.mark.parametrize( + ("mapper_cls", "expected_normalized"), + [ + (StartOfHourMapper, pendulum.datetime(2026, 2, 10, 14, 0, 0, tz="UTC")), + (StartOfDayMapper, pendulum.datetime(2026, 2, 10, 0, 0, 0, tz="UTC")), + (StartOfWeekMapper, pendulum.datetime(2026, 2, 9, 0, 0, 0, tz="UTC")), + (StartOfMonthMapper, pendulum.datetime(2026, 2, 1, 0, 0, 0, tz="UTC")), + (StartOfQuarterMapper, pendulum.datetime(2026, 1, 1, 0, 0, 0, tz="UTC")), + (StartOfYearMapper, pendulum.datetime(2026, 1, 1, 0, 0, 0, tz="UTC")), + ], + ) + def test_to_downstream_normalized( + self, + mapper_cls: type[_BaseTemporalMapper], + expected_normalized, + ): + """Each temporal mapper exposes the normalized datetime alongside the string form.""" + pm = mapper_cls() + normalized = pm.to_downstream_normalized("2026-02-10T14:30:45") + assert normalized == expected_normalized + # to_downstream returns the formatted string of the same normalized value. + assert pm.to_downstream("2026-02-10T14:30:45") == pm.format(normalized) + + def test_to_downstream_normalized_timezone_aware(self): + """Normalized datetime is in the configured timezone, mirroring to_downstream.""" + pm = StartOfDayMapper(timezone="America/New_York") + normalized = pm.to_downstream_normalized("2026-02-10T23:00:00") + # Normalized to start-of-day in New York timezone. + assert normalized.tzinfo is not None + assert normalized.year == 2026 + assert normalized.month == 2 + assert normalized.day == 10 + assert normalized.hour == 0 + assert normalized.minute == 0 + class TestSdkTemporalMappersTimezoneSerialization: """ diff --git a/airflow-ctl/src/airflowctl/api/datamodels/generated.py b/airflow-ctl/src/airflowctl/api/datamodels/generated.py index f05fa65cf56f8..89b8cddb7aad7 100644 --- a/airflow-ctl/src/airflowctl/api/datamodels/generated.py +++ b/airflow-ctl/src/airflowctl/api/datamodels/generated.py @@ -1642,6 +1642,7 @@ class DAGRunResponse(BaseModel): bundle_version: Annotated[str | None, Field(title="Bundle Version")] = None dag_display_name: Annotated[str, Field(title="Dag Display Name")] partition_key: Annotated[str | None, Field(title="Partition Key")] = None + partition_date: Annotated[datetime | None, Field(title="Partition Date")] = None class DAGRunsBatchBody(BaseModel): diff --git a/providers/standard/src/airflow/providers/standard/operators/python.py b/providers/standard/src/airflow/providers/standard/operators/python.py index fb058b924b6ca..8698ddc403b60 100644 --- a/providers/standard/src/airflow/providers/standard/operators/python.py +++ b/providers/standard/src/airflow/providers/standard/operators/python.py @@ -470,6 +470,8 @@ class _BasePythonVirtualenvOperator(PythonOperator, metaclass=ABCMeta): "prev_execution_date", "prev_execution_date_success", } + if AIRFLOW_V_3_3_PLUS: + PENDULUM_SERIALIZABLE_CONTEXT_KEYS.add("partition_date") AIRFLOW_SERIALIZABLE_CONTEXT_KEYS = { "macros", diff --git a/task-sdk/src/airflow/sdk/api/datamodels/_generated.py b/task-sdk/src/airflow/sdk/api/datamodels/_generated.py index 2d999111eb01b..597197461a26c 100644 --- a/task-sdk/src/airflow/sdk/api/datamodels/_generated.py +++ b/task-sdk/src/airflow/sdk/api/datamodels/_generated.py @@ -110,6 +110,7 @@ class DagRunAssetReference(BaseModel): data_interval_start: Annotated[AwareDatetime | None, Field(title="Data Interval Start")] = None data_interval_end: Annotated[AwareDatetime | None, Field(title="Data Interval End")] = None partition_key: Annotated[str | None, Field(title="Partition Key")] = None + partition_date: Annotated[AwareDatetime | None, Field(title="Partition Date")] = None class DagRunState(str, Enum): @@ -681,6 +682,7 @@ class AssetEventResponse(BaseModel): source_run_id: Annotated[str | None, Field(title="Source Run Id")] = None source_map_index: Annotated[int | None, Field(title="Source Map Index")] = None partition_key: Annotated[str | None, Field(title="Partition Key")] = None + partition_date: Annotated[AwareDatetime | None, Field(title="Partition Date")] = None class AssetEventsResponse(BaseModel): @@ -714,6 +716,7 @@ class DagRun(BaseModel): triggering_user_name: Annotated[str | None, Field(title="Triggering User Name")] = None consumed_asset_events: Annotated[list[AssetEventDagRunReference], Field(title="Consumed Asset Events")] partition_key: Annotated[str | None, Field(title="Partition Key")] = None + partition_date: Annotated[AwareDatetime | None, Field(title="Partition Date")] = None note: Annotated[str | None, Field(title="Note")] = None team_name: Annotated[str | None, Field(title="Team Name")] = None diff --git a/task-sdk/src/airflow/sdk/definitions/context.py b/task-sdk/src/airflow/sdk/definitions/context.py index c422c3462982a..77796ceaaeb8c 100644 --- a/task-sdk/src/airflow/sdk/definitions/context.py +++ b/task-sdk/src/airflow/sdk/definitions/context.py @@ -64,6 +64,7 @@ class Context(TypedDict, total=False): outlets: list params: dict[str, Any] partition_key: NotRequired[str | None] + partition_date: NotRequired[DateTime | None] prev_data_interval_start_success: NotRequired[DateTime | None] prev_data_interval_end_success: NotRequired[DateTime | None] prev_start_date_success: NotRequired[DateTime | None] diff --git a/task-sdk/src/airflow/sdk/execution_time/schema/schema.json b/task-sdk/src/airflow/sdk/execution_time/schema/schema.json index ce6cbb6b73a3a..b2bbf769f398f 100644 --- a/task-sdk/src/airflow/sdk/execution_time/schema/schema.json +++ b/task-sdk/src/airflow/sdk/execution_time/schema/schema.json @@ -114,6 +114,19 @@ ], "default": null, "title": "Partition Key" + }, + "partition_date": { + "anyOf": [ + { + "format": "date-time", + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Partition Date" } }, "required": [ @@ -1020,6 +1033,19 @@ ], "default": null, "title": "Partition Key" + }, + "partition_date": { + "anyOf": [ + { + "format": "date-time", + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Partition Date" } }, "required": [ @@ -1203,6 +1229,19 @@ "default": null, "title": "Partition Key" }, + "partition_date": { + "anyOf": [ + { + "format": "date-time", + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Partition Date" + }, "note": { "anyOf": [ { @@ -4565,6 +4604,19 @@ ], "title": "Partition Key" }, + "partition_date": { + "anyOf": [ + { + "format": "date-time", + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Partition Date" + }, "note": { "anyOf": [ { diff --git a/task-sdk/src/airflow/sdk/execution_time/task_runner.py b/task-sdk/src/airflow/sdk/execution_time/task_runner.py index 313309fa40100..a8bb39947af2d 100644 --- a/task-sdk/src/airflow/sdk/execution_time/task_runner.py +++ b/task-sdk/src/airflow/sdk/execution_time/task_runner.py @@ -319,6 +319,7 @@ def get_template_context(self) -> Context: # TODO: Assess if we need to pass these through timezone.coerce_datetime "dag_run": dag_run, # type: ignore[typeddict-item] # Removable after #46522 "partition_key": dag_run.partition_key, + "partition_date": coerce_datetime(dag_run.partition_date), "triggering_asset_events": TriggeringAssetEventsAccessor.build( AssetEventDagRunReferenceResult.from_asset_event_dag_run_reference(event) for event in dag_run.consumed_asset_events diff --git a/task-sdk/src/airflow/sdk/types.py b/task-sdk/src/airflow/sdk/types.py index 711dbe71ca46c..bdaa030cdbe1a 100644 --- a/task-sdk/src/airflow/sdk/types.py +++ b/task-sdk/src/airflow/sdk/types.py @@ -123,6 +123,7 @@ class DagRunProtocol(Protocol): triggering_user_name: str | None consumed_asset_events: list[AssetEventDagRunReference] partition_key: str | None + partition_date: AwareDatetime | None note: str | None diff --git a/task-sdk/tests/task_sdk/execution_time/test_supervisor.py b/task-sdk/tests/task_sdk/execution_time/test_supervisor.py index 11d22865c0dd0..4a83ab1af94bb 100644 --- a/task-sdk/tests/task_sdk/execution_time/test_supervisor.py +++ b/task-sdk/tests/task_sdk/execution_time/test_supervisor.py @@ -2290,6 +2290,7 @@ class RequestTestCase: "run_id": "prev_run", "logical_date": timezone.parse("2024-01-14T12:00:00Z"), "partition_key": None, + "partition_date": None, "run_type": "scheduled", "start_date": timezone.parse("2024-01-15T12:00:00Z"), "run_after": timezone.parse("2024-01-15T12:00:00Z"), @@ -2343,6 +2344,7 @@ class RequestTestCase: "run_id": "prev_run", "logical_date": timezone.parse("2024-01-14T12:00:00Z"), "partition_key": None, + "partition_date": None, "run_type": "scheduled", "start_date": timezone.parse("2024-01-15T12:00:00Z"), "run_after": timezone.parse("2024-01-15T12:00:00Z"), diff --git a/task-sdk/tests/task_sdk/execution_time/test_task_runner.py b/task-sdk/tests/task_sdk/execution_time/test_task_runner.py index 4ba821e537e56..ad095a4525099 100644 --- a/task-sdk/tests/task_sdk/execution_time/test_task_runner.py +++ b/task-sdk/tests/task_sdk/execution_time/test_task_runner.py @@ -1971,6 +1971,7 @@ def test_get_context_with_ti_context_from_server(self, create_runtime_ti, mock_s "ts_nodash": "20241201T010000", "ts_nodash_with_tz": "20241201T010000+0000", "partition_key": dr.partition_key, + "partition_date": dr.partition_date, } def test_partition_key_in_context(self, create_runtime_ti, mock_supervisor_comms): @@ -1997,6 +1998,39 @@ def test_partition_key_in_context(self, create_runtime_ti, mock_supervisor_comms context = runtime_ti.get_template_context() assert context["partition_key"] == "some-partition" + def test_partition_date_in_context(self, create_runtime_ti, mock_supervisor_comms): + """Test that partition_date from dag_run is exposed in the template context.""" + task = BaseOperator(task_id="hello") + runtime_ti = create_runtime_ti(task=task, dag_id="basic_task") + + dr = runtime_ti._ti_context_from_server.dag_run + + mock_supervisor_comms.send.return_value = PrevSuccessfulDagRunResult( + data_interval_end=dr.logical_date - timedelta(hours=1), + data_interval_start=dr.logical_date - timedelta(hours=2), + start_date=dr.start_date - timedelta(hours=1), + end_date=dr.start_date, + ) + + context = runtime_ti.get_template_context() + + # Default: partition_date is None + assert context["partition_date"] is None + + # Set partition_date on dag_run and verify it surfaces in context + partition_date = timezone.datetime(2026, 5, 20, 1, 0, 0) + dr.partition_date = partition_date + context = runtime_ti.get_template_context() + assert context["partition_date"] == partition_date + + # Naive datetime is coerced to tz-aware so Jinja `| ds` / `| ts` filters + # operate on a real awareness boundary. + from datetime import datetime as _datetime + + dr.partition_date = _datetime(2026, 5, 20, 1, 0, 0) + context = runtime_ti.get_template_context() + assert context["partition_date"].tzinfo is not None + def test_lazy_loading_not_triggered_until_accessed(self, create_runtime_ti, mock_supervisor_comms): """Ensure lazy-loaded attributes are not resolved until accessed.""" task = BaseOperator(task_id="hello")