Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,15 @@ and this project adheres to semantic versioning once public releases begin.

### Added

- Atomic output writes and clean cancellation (Ticket 104). Every `--output`
write and every on-disk artifact writer (flight trace, phase segments,
validation report, calibration profile, SITL artifacts) now writes to a sibling
temp file, `fsync`s, and `os.replace`s it onto the target, so a killed or
interrupted run never leaves a truncated file — the destination is either the
prior content or absent. A new `CliExitCode.CANCELLED` (`14`) is returned when a
run receives `SIGTERM`/`SIGINT`, installed by the console-script entrypoint, in
place of the shell defaults (`143`/`130`). The cancellation contract is
documented in `docs/CLI_EXIT_CODES.md` and `docs/VERSIONING_POLICY.md`.
- Calibration profiles and parameter fitting (Ticket 083). A new `bvlos-sim
calibrate VEHICLE TRACE [TRACE ...]` command fits a narrow set of vehicle
performance parameters from observed flights and emits a versioned,
Expand Down
48 changes: 48 additions & 0 deletions adapters/atomic_write.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
"""Atomic file writes (Ticket 104).

A killed or interrupted process must never leave a truncated artifact at a path a
consuming backend then reads. ``atomic_write_text`` writes to a sibling temp file
in the destination directory, flushes it to disk, then ``os.replace``s it onto the
target. ``os.replace`` is atomic on POSIX within a single filesystem, so an
interrupted run leaves either the prior file or nothing — never a partial one.
"""

from __future__ import annotations

import os
import tempfile
from pathlib import Path

__all__ = ["atomic_write_text"]


def atomic_write_text(path: Path, text: str, *, encoding: str = "utf-8") -> None:
"""Write ``text`` to ``path`` atomically.

The temp file is created in ``path``'s parent directory so the final
``os.replace`` stays within one filesystem (a cross-device rename is not
atomic). On any failure the temp file is removed and the original ``OSError``
propagates; the destination is left untouched.
"""
directory = path.parent
handle = tempfile.NamedTemporaryFile(
mode="w",
encoding=encoding,
dir=directory,
prefix=f".{path.name}.",
suffix=".tmp",
delete=False,
)
temp_path = Path(handle.name)
try:
with handle:
handle.write(text)
handle.flush()
os.fsync(handle.fileno())
os.replace(temp_path, path)
except BaseException:
try:
temp_path.unlink()
except OSError:
pass
raise
3 changes: 2 additions & 1 deletion adapters/calibration/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

from pydantic import ValidationError

from adapters.atomic_write import atomic_write_text
from adapters.calibration.apply import CalibrationMismatchError, apply_calibration
from adapters.canonical_json import render_canonical_json
from adapters.io import (
Expand All @@ -23,7 +24,7 @@
def write_calibration_profile(profile: CalibrationProfile, path: Path) -> None:
"""Write a calibration profile to a JSON file with canonical formatting."""
payload = profile.model_dump(mode="json")
path.write_text(render_canonical_json(payload), encoding="utf-8")
atomic_write_text(path, render_canonical_json(payload))


def load_calibration_profile(
Expand Down
26 changes: 26 additions & 0 deletions adapters/cli.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
"""Typer CLI adapter for estimator execution."""

import json
import signal
from enum import IntEnum, StrEnum
from types import FrameType
from typing import NoReturn

import typer
Expand All @@ -22,6 +24,7 @@ class CliExitCode(IntEnum):
INVALID_INPUT = 11
UNSUPPORTED = 12
INTERNAL_ERROR = 13
CANCELLED = 14


class ScenarioExitCode(IntEnum):
Expand Down Expand Up @@ -74,12 +77,35 @@ class SoraOutputFormat(StrEnum):
"ScenarioExitCode",
"SoraOutputFormat",
"SummaryOutputFormat",
"install_cancellation_handlers",
"run_monte_carlo",
"run_stochastic_propagation",
"try_estimate_mission_distance_time",
]


def _handle_cancellation_signal(signum: int, _frame: FrameType | None) -> NoReturn:
"""Exit with the documented CANCELLED code on SIGTERM/SIGINT.

Atomic output writes (Ticket 104) guarantee no partial ``--output`` file is
left behind; this just turns an interrupt into a defined exit code instead of
the shell's default (``143`` for SIGTERM, ``130`` for SIGINT) so a backend
worker can branch on it. ``raise SystemExit`` unwinds the stack, running
``finally`` blocks and context managers.
"""
raise SystemExit(int(CliExitCode.CANCELLED))


def install_cancellation_handlers() -> None:
"""Route SIGTERM and SIGINT to the CANCELLED exit code.

Called from the console-script entrypoint, not at import, so the in-process
Typer test runner keeps Python's default ``KeyboardInterrupt`` behaviour.
"""
signal.signal(signal.SIGTERM, _handle_cancellation_signal)
signal.signal(signal.SIGINT, _handle_cancellation_signal)


def _version_callback(value: bool) -> None:
if value:
typer.echo(f"bvlos-sim {tool_version()}")
Expand Down
17 changes: 14 additions & 3 deletions adapters/cli_support.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
OutputFormat,
render_envelope_json,
)
from adapters.atomic_write import atomic_write_text
from adapters.assets.geofence_geojson import GeofenceLoadError, load_geofences
from adapters.assets.obstacle_geojson import ObstacleLoadError, load_obstacles
from adapters.io import (
Expand All @@ -23,7 +24,10 @@
InputLoadStage,
validation_error_summary,
)
from adapters.assets.landing_zone_geojson import LandingZoneLoadError, load_landing_zones
from adapters.assets.landing_zone_geojson import (
LandingZoneLoadError,
load_landing_zones,
)
from adapters.assets.population_grid import load_population_grid
from adapters.checklist_markdown import (
render_checklist_markdown,
Expand All @@ -46,7 +50,12 @@
render_stochastic_envelope_json,
)
from adapters.stochastic_markdown import render_stochastic_markdown
from adapters.summary import format_estimate_summary, format_scenario_summary, format_stochastic_summary, format_uncertainty_summary
from adapters.summary import (
format_estimate_summary,
format_scenario_summary,
format_stochastic_summary,
format_uncertainty_summary,
)
from adapters.assets.terrain_grid import load_terrain_grid
from adapters.uncertainty_envelope import (
UncertaintyResultEnvelope,
Expand Down Expand Up @@ -204,6 +213,7 @@ def _render_scenario_summary(envelope: ScenarioResultEnvelope) -> str:
OutputFormat.GROUND_RISK: render_ground_risk_markdown_from_scenario,
}


def _render_uncertainty_summary(envelope: UncertaintyResultEnvelope) -> str:
return format_uncertainty_summary(envelope.result)

Expand All @@ -214,6 +224,7 @@ def _render_uncertainty_summary(envelope: UncertaintyResultEnvelope) -> str:
OutputFormat.SUMMARY: _render_uncertainty_summary,
}


def _render_stochastic_summary(envelope: StochasticResultEnvelope) -> str:
return format_stochastic_summary(envelope.result)

Expand Down Expand Up @@ -506,7 +517,7 @@ def _write_output(rendered: str, output: Path | None) -> None:
if output is None:
typer.echo(rendered, nl=False)
return
output.write_text(rendered, encoding="utf-8")
atomic_write_text(output, rendered)
except OSError as exc:
raise OutputWriteError("Failed to write output.") from exc

Expand Down
3 changes: 2 additions & 1 deletion adapters/flight_log/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

from pydantic import ValidationError

from adapters.atomic_write import atomic_write_text
from adapters.canonical_json import render_canonical_json
from adapters.io import (
InputDocument,
Expand All @@ -21,7 +22,7 @@
def write_flight_trace(trace: NormalizedFlightTrace, path: Path) -> None:
"""Write a normalized flight trace to a JSON file with canonical formatting."""
payload = trace.model_dump(mode="json")
path.write_text(render_canonical_json(payload), encoding="utf-8")
atomic_write_text(path, render_canonical_json(payload))


def read_flight_trace(path: Path) -> NormalizedFlightTrace:
Expand Down
3 changes: 2 additions & 1 deletion adapters/phase_segmentation/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

from pydantic import ValidationError

from adapters.atomic_write import atomic_write_text
from adapters.canonical_json import render_canonical_json
from adapters.io import (
InputDocument,
Expand All @@ -21,7 +22,7 @@
def write_phase_segments(result: PhaseSegmentResult, path: Path) -> None:
"""Write a phase segment result to a JSON file with canonical formatting."""
payload = result.model_dump(mode="json")
path.write_text(render_canonical_json(payload), encoding="utf-8")
atomic_write_text(path, render_canonical_json(payload))


def load_phase_segments(path: Path) -> tuple[PhaseSegmentResult, InputDocument]:
Expand Down
6 changes: 2 additions & 4 deletions adapters/sitl/artifacts.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from dataclasses import dataclass, field
from pathlib import Path

from adapters.atomic_write import atomic_write_text
from schemas.sitl import (
SitlArtifactReference,
SitlArtifactRole,
Expand Down Expand Up @@ -268,10 +269,7 @@ def _write_artifact(
"schema_version": schema_version,
payload_key: payload,
}
path.write_text(
json.dumps(content, indent=2, sort_keys=True) + "\n",
encoding="utf-8",
)
atomic_write_text(path, json.dumps(content, indent=2, sort_keys=True) + "\n")


def _write_artifact_reference(
Expand Down
3 changes: 2 additions & 1 deletion adapters/validation/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

from pydantic import ValidationError

from adapters.atomic_write import atomic_write_text
from adapters.canonical_json import render_canonical_json
from adapters.io import (
InputDocument,
Expand All @@ -21,7 +22,7 @@
def write_validation_report(report: ValidationReport, path: Path) -> None:
"""Write a validation report to a JSON file with canonical formatting."""
payload = report.model_dump(mode="json")
path.write_text(render_canonical_json(payload), encoding="utf-8")
atomic_write_text(path, render_canonical_json(payload))


def load_validation_report(path: Path) -> tuple[ValidationReport, InputDocument]:
Expand Down
17 changes: 17 additions & 0 deletions docs/CLI_EXIT_CODES.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,29 @@ divergences below are intentional and are called out explicitly.
| `11` | `INVALID_INPUT` | Input files, arguments, or referenced assets failed to load or validate. |
| `12` | `UNSUPPORTED` | The requested computation is not supported for these inputs. |
| `13` | `INTERNAL_ERROR` | An output could not be written, or an unexpected error occurred. |
| `14` | `CANCELLED` | The run received `SIGTERM`/`SIGINT` and aborted; no output was written. |

Every command returns `13` rather than a bare traceback (shell status `1`) when
an unexpected exception escapes. A shell status `2` comes from the argument
parser (Typer/Click) for malformed invocations (unknown option, missing
argument); it is not one of the codes above.

## Cancellation contract

Any command may receive `SIGTERM` or `SIGINT` (e.g. a worker cancelling a job or
enforcing a timeout). When it does:

- The process exits `14` (`CANCELLED`) instead of the shell defaults (`143` for
`SIGTERM`, `130` for `SIGINT`), so a caller can branch on a defined code.
- No `--output` file is left in a partial state. All on-disk writes go through an
atomic temp-file-then-`os.replace`, so an interrupted run leaves the
destination either at its prior content or absent — never truncated. A consumer
can therefore trust that any file that exists is complete.

The `CANCELLED` code is only installed by the console-script entrypoint
(`main:main`); importing the Typer app in-process (as the test runner does) keeps
Python's default `KeyboardInterrupt` behaviour.

## Per-command exit codes

| Command | `0` | `10` | `11` | `12` | `13` | Notes |
Expand Down
5 changes: 5 additions & 0 deletions docs/USAGE.md
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,11 @@ the body, never `10`), `scenario` has no `12` (every non-passed outcome collapse
to `10`), and `estimate` returns `11` for a computed invalid-input failure even
when the input files are valid.

A run interrupted by `SIGTERM`/`SIGINT` exits `14` (`CANCELLED`) and writes no
output file. All `--output` writes are atomic (temp file then `os.replace`), so
an interrupted run never leaves a truncated file — the destination is either the
prior content or absent.

Mission-scoped functionality is exposed through `estimate` by mission and
vehicle YAML: fidelity settings, terrain, wind grids, geofences, landing zones,
obstacles, resource systems, communication links, energy feasibility, and route
Expand Down
3 changes: 2 additions & 1 deletion docs/VERSIONING_POLICY.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,8 @@ Current public contracts:
- SITL evidence bundle: `sitl-evidence.v1`
- SITL comparison report: `sitl-comparison.v1`
- CLI exit-code semantics (enumerated per command in
[`CLI_EXIT_CODES.md`](CLI_EXIT_CODES.md))
[`CLI_EXIT_CODES.md`](CLI_EXIT_CODES.md)), including the `14` (`CANCELLED`)
signal-abort code and the atomic-output-write guarantee
- supported Markdown report shape covered by golden fixtures

Internal module layout is not a public contract. Refactors are allowed when the
Expand Down
20 changes: 19 additions & 1 deletion docs/tickets/104-atomic-output-writes-and-cancellation.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

## Status

Planned.
Implemented.

## Goal

Expand Down Expand Up @@ -59,3 +59,21 @@ raises `KeyboardInterrupt` (a `BaseException`) that bypasses every
the signal-handling half; either can ship first.
- Adding `CliExitCode.CANCELLED` is an additive contract change — record it in
`docs/CLI_EXIT_CODES.md` and `docs/VERSIONING_POLICY.md` in the same commit.

## Implementation

| File | Change |
| --- | --- |
| `adapters/atomic_write.py` | New `atomic_write_text(path, text)`: temp file in the destination directory → `flush` + `os.fsync` → `os.replace`; cleans up the temp file and re-raises on any failure. |
| `adapters/cli_support.py` | `_write_output` routes file writes through `atomic_write_text` (stdout path unchanged). |
| `adapters/flight_log/io.py`, `validation/io.py`, `phase_segmentation/io.py`, `calibration/io.py`, `sitl/artifacts.py` | The five other on-disk writers now use `atomic_write_text`. |
| `adapters/cli.py` | Added `CliExitCode.CANCELLED = 14`, `_handle_cancellation_signal` (raises `SystemExit(14)`), and `install_cancellation_handlers()` routing `SIGTERM`/`SIGINT` to it. |
| `main.py` | The console-script entrypoint installs the cancellation handlers before running the app. |
| `docs/CLI_EXIT_CODES.md`, `docs/VERSIONING_POLICY.md`, `docs/USAGE.md` | Documented the `14`/`CANCELLED` code and the atomic-write guarantee. |
| `tests/test_atomic_write.py` | Covers the temp-then-replace path, no-leftover-temp-files, original-preserved-on-failure, the missing-parent error, and the signal exit code / handler registration. `tests/test_cli.py`'s output-write-failure test was retargeted from `Path.write_text` to `os.replace`. |

The signal handlers are installed only by the console-script entrypoint, not at
import, so the in-process Typer test runner keeps Python's default
`KeyboardInterrupt` behaviour and existing tests are unaffected. The atomic-write
change does not alter output bytes for a normal run, so the golden fixtures are
unchanged.
4 changes: 2 additions & 2 deletions docs/tickets/README.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# Ticket Backlog

**59 implemented · 19 planned · 1230 tests passing**
**60 implemented · 18 planned · 1237 tests passing**

This directory tracks every capability from idea to implementation. Completed
tickets are kept as historical records. Open tickets describe what to build
Expand Down Expand Up @@ -53,7 +53,6 @@ worker depends on.

| # | Ticket | What it adds |
|---|---|---|
| 104 | [Atomic output writes and clean cancellation](./104-atomic-output-writes-and-cancellation.md) | Temp-then-`os.replace` output so a killed run never leaves a partial file; `SIGTERM` exit code |
| 105 | [Contract-version discovery command](./105-contract-version-discovery-command.md) | `schema-versions` command printing supported input/output contract versions without running a job |
| 106 | [Machine-readable run progress](./106-machine-readable-run-progress.md) | JSONL progress for `propagate`/`sample`/`batch` so a non-TTY worker can show live progress (extends 067) |
| 107 | [Machine-readable preflight report](./107-machine-readable-preflight-report.md) | JSON `--validate-only` envelope plus GeoJSON asset preflight across run types (composes with 089) |
Expand Down Expand Up @@ -203,3 +202,4 @@ New capabilities should work *with* existing pieces, not alongside them in isola
56. [082](./082-predicted-vs-observed-validation-metrics.md) Predicted vs. observed validation metrics
57. [098](./098-version-bump-and-release-tooling.md) Version bump and release tooling
58. [083](./083-calibration-profile-data-and-fitting.md) Calibration profile data and fitting
59. [104](./104-atomic-output-writes-and-cancellation.md) Atomic output writes and clean cancellation
5 changes: 4 additions & 1 deletion main.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
from adapters.cli import app
"""Entry point for the bvlos-sim CLI."""

from adapters.cli import app, install_cancellation_handlers


def main() -> None:
install_cancellation_handlers()
app()


Expand Down
Loading
Loading