diff --git a/dimos/core/module.py b/dimos/core/module.py index 26a2b6f893..fdf6d23061 100644 --- a/dimos/core/module.py +++ b/dimos/core/module.py @@ -477,14 +477,16 @@ def process_observable( self, observable: "Observable[Any]", async_cb: Callable[[Any], Any], + on_drop: Callable[[], None] | None = None, ) -> "DisposableBase": """Subscribe `async_cb` (an async function) to `observable`, dispatching each emitted value onto self._loop. Invocations are serialized through a - per-subscription dispatcher task with LATEST coalescing. The subscription + per-subscription dispatcher task with LATEST coalescing. `on_drop`, if + given, fires once per message dropped by that coalescing. The subscription is registered for cleanup on stop().""" if not inspect.iscoroutinefunction(async_cb): raise TypeError("process_observable requires an `async def` callback") - on_msg, dispatcher_disp = self._make_async_dispatch(async_cb) + on_msg, dispatcher_disp = self._make_async_dispatch(async_cb, on_drop) sub = observable.subscribe(on_msg) return self.register_disposable(CompositeDisposable(sub, dispatcher_disp)) @@ -635,7 +637,9 @@ def _auto_bind_handlers(self) -> None: self.process_observable(in_stream.pure_observable(), handler) def _make_async_dispatch( - self, async_handler: Callable[[Any], Any] + self, + async_handler: Callable[[Any], Any], + on_drop: Callable[[], None] | None = None, ) -> tuple[Callable[[Any], None], "DisposableBase"]: """Build a sync callback that delivers `msg` into a single-slot LATEST mailbox drained by a dedicated dispatcher task on `self._loop`. @@ -645,7 +649,9 @@ def _make_async_dispatch( awaits). - If messages arrive faster than the handler can process them, intermediate messages are dropped and only the most recent unprocessed - message is kept (LATEST policy). + message is kept (LATEST policy). `on_drop`, if given, is called once + per dropped message (on the loop thread) so callers that need every + message can surface the loss. - The returned Disposable cancels the dispatcher task. """ loop = self._loop @@ -685,6 +691,10 @@ def on_msg(msg: Any) -> None: return def _set() -> None: + # A slot that still holds an unconsumed value is about to be + # overwritten — that queued message is being dropped (LATEST). + if slot["has_value"] and on_drop is not None: + on_drop() slot["value"] = msg slot["has_value"] = True event.set() diff --git a/dimos/hardware/sensors/lidar/fastlio2/recorder.py b/dimos/hardware/sensors/lidar/fastlio2/recorder.py index 08784cfe89..5f081d13e9 100644 --- a/dimos/hardware/sensors/lidar/fastlio2/recorder.py +++ b/dimos/hardware/sensors/lidar/fastlio2/recorder.py @@ -45,13 +45,13 @@ class FastLio2Recorder(Recorder): _last_odom_pose: Pose | None = None @pose_setter_for("fastlio_odometry") - def _odom_pose(self, msg: Odometry) -> Pose | None: + async def _odom_pose(self, msg: Odometry) -> Pose | None: pose = getattr(msg, "pose", None) self._last_odom_pose = getattr(pose, "pose", None) if pose is not None else None return self._last_odom_pose @pose_setter_for("fastlio_lidar") - def _lidar_pose(self, msg: PointCloud2) -> Pose | None: + async def _lidar_pose(self, msg: PointCloud2) -> Pose | None: # Most-recent odometry pose, stamped directly (no tf). None before the # first odometry -> frame stored unposed, map-skipped. return self._last_odom_pose diff --git a/dimos/memory2/db_tf.py b/dimos/memory2/db_tf.py new file mode 100644 index 0000000000..7f52de854b --- /dev/null +++ b/dimos/memory2/db_tf.py @@ -0,0 +1,198 @@ +# Copyright 2026 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Transform lookups over the transforms recorded in a store. + +A store's ``tf`` member lazily reads every transform recorded under the ``tf`` +(and ``tf_static``) streams into a :class:`MultiTBuffer`, then answers +``store.tf.get(target_frame, source_frame, time)`` — composing multi-hop chains +(e.g. ``world -> map -> odom -> base_link -> mid360_link``) and interpolating to +the nearest recorded sample. This makes world-registration a real transform +lookup instead of assuming a single baked-in pose. + +``write_tf_tree`` populates those streams for a recording that lacks them. +""" + +from __future__ import annotations + +import re +import sqlite3 +import threading +from typing import TYPE_CHECKING + +import numpy as np + +from dimos.memory2.store.sqlite import SqliteStoreConfig +from dimos.msgs.geometry_msgs.Quaternion import Quaternion +from dimos.msgs.geometry_msgs.Transform import Transform +from dimos.msgs.geometry_msgs.Vector3 import Vector3 +from dimos.msgs.tf2_msgs.TFMessage import TFMessage +from dimos.protocol.tf.tf import MultiTBuffer + +if TYPE_CHECKING: + from dimos.memory2.store.base import Store + +TF_STREAMS = ("tf", "tf_static") +# Larger than any single recording's span so the buffer never prunes loaded +# transforms (MultiTBuffer drops samples older than ts - buffer_size). +_NO_PRUNE = 1.0e15 +# SQLite can't parameterize table names, so caller-supplied stream names are +# interpolated; allow only safe identifiers to keep that injection-free. +_SAFE_TABLE = re.compile(r"^[A-Za-z_][A-Za-z0-9_]*$") + + +def _safe_table(name: str) -> str: + if not _SAFE_TABLE.match(name): + raise ValueError(f"unsafe stream/table name: {name!r}") + return name + + +class DbTf: + """Transform lookups backed by the ``tf``/``tf_static`` streams of a store.""" + + def __init__(self, store: Store, stream_names: tuple[str, ...] = TF_STREAMS) -> None: + self._store = store + self._stream_names = stream_names + self._buffer: MultiTBuffer | None = None + self._load_lock = threading.Lock() + + def _ensure_loaded(self) -> MultiTBuffer: + if self._buffer is not None: + return self._buffer + with self._load_lock: + if self._buffer is not None: # another thread loaded while we waited + return self._buffer + buffer = MultiTBuffer(buffer_size=_NO_PRUNE) + available = set(self._store.list_streams()) + for name in self._stream_names: + if name not in available: + continue + for observation in self._store.stream(name, TFMessage): + message = observation.data + transforms = getattr(message, "transforms", None) + if transforms is None: + transforms = [message] + buffer.receive_transform(*transforms) + self._buffer = buffer + return buffer + + def has_transforms(self) -> bool: + return bool(self._ensure_loaded().buffers) + + def get( + self, + target_frame: str, + source_frame: str, + time_point: float | None = None, + time_tolerance: float | None = None, + ) -> Transform | None: + """Transform that maps a point in ``source_frame`` into ``target_frame``. + + Returns ``None`` if no chain connects the two frames. Uses the buffer's + non-warning lookup so per-scan misses don't spam the log. + """ + buffer = self._ensure_loaded() + return buffer.lookup(target_frame, source_frame, time_point, time_tolerance) + + +def transform_matrix(transform: Transform) -> tuple[np.ndarray, np.ndarray]: + """Return ``(R, t)`` (3x3, 3) for ``transform`` so ``p_target = p_source @ R.T + t``.""" + rotation = transform.rotation + rotation_matrix = np.asarray(rotation.to_rotation_matrix(), float).reshape(3, 3) + translation = np.array( + [transform.translation.x, transform.translation.y, transform.translation.z], float + ) + return rotation_matrix, translation + + +def write_tf_tree( + store: Store, + *, + odom_stream: str, + odom_parent: str = "odom", + odom_child: str = "base_link", + root_links: tuple[tuple[str, str], ...] = (("world", "map"), ("map", "odom")), + sensor_child: str = "mid360_link", + sensor_translation: tuple[float, float, float] = (0.0, 0.0, 0.0), + sensor_rotation: tuple[float, float, float, float] = (0.0, 0.0, 0.0, 1.0), + static_period: float = 0.45, + stream_name: str = "tf", +) -> int: + """Populate ``store``'s tf stream from an odometry stream. + + - ``root_links`` and ``odom_child -> sensor_child`` are emitted as identity / + fixed transforms every ``static_period`` seconds across the recording span. + - ``odom_parent -> odom_child`` is emitted once per odometry sample, taken + from each observation's pose. + + Returns the number of tf observations written. + """ + config = store.config + if not isinstance(config, SqliteStoreConfig): + raise TypeError("write_tf_tree reads the db directly and needs a SqliteStore") + db_path = config.path + connection = sqlite3.connect(f"file:{db_path}?mode=ro&immutable=1", uri=True) + odom = np.array( + list( + connection.execute( + "select ts,pose_x,pose_y,pose_z,pose_qx,pose_qy,pose_qz,pose_qw " + f"from {_safe_table(odom_stream)} order by ts" + ) + ), + float, + ) + connection.close() + if not len(odom): + raise ValueError(f"odom stream {odom_stream!r} is empty; cannot build tf tree") + + tf_stream = store.stream(stream_name, TFMessage) + written = 0 + + # dynamic: odom_parent -> odom_child, one per odometry sample + for row in odom: + ts = float(row[0]) + transform = Transform( + translation=Vector3(row[1], row[2], row[3]), + rotation=Quaternion(row[4], row[5], row[6], row[7]), + frame_id=odom_parent, + child_frame_id=odom_child, + ts=ts, + ) + tf_stream.append(TFMessage(transform), ts=ts) + written += 1 + + # static: root links + sensor mount, resampled every static_period + t0 = float(odom[0, 0]) + t1 = float(odom[-1, 0]) + + def statics_at(ts: float) -> list[Transform]: + links = [ + Transform(frame_id=parent, child_frame_id=child, ts=ts) for parent, child in root_links + ] + links.append( + Transform( + translation=Vector3(*sensor_translation), + rotation=Quaternion(*sensor_rotation), + frame_id=odom_child, + child_frame_id=sensor_child, + ts=ts, + ) + ) + return links + + for static_ts in np.arange(t0, t1 + static_period, static_period): + tf_stream.append(TFMessage(*statics_at(float(static_ts))), ts=float(static_ts)) + written += 1 + + return written diff --git a/dimos/memory2/module.py b/dimos/memory2/module.py index 00cd46a1d0..cc3aebc67d 100644 --- a/dimos/memory2/module.py +++ b/dimos/memory2/module.py @@ -14,7 +14,7 @@ from __future__ import annotations -from collections.abc import Callable +from collections.abc import Awaitable, Callable import enum import inspect import os @@ -274,15 +274,20 @@ class RecorderConfig(MemoryModuleConfig): stream_remapping: dict[str, str] = Field(default_factory=dict) -PoseSetter = Callable[[Any], "Pose | None"] +PoseSetter = Callable[[Any], "Awaitable[Pose | None]"] def pose_setter_for(*stream_names: str) -> Callable[[Any], Any]: - """Mark a method ``(self, msg) -> Pose | None`` as the pose setter for the - given recorded stream(s). Streams without a setter fall back to the tf-based - ``world <- frame_id`` lookup.""" + """Mark an ``async def`` method ``(self, msg) -> Pose | None`` as the pose + setter for the given recorded stream(s). Streams without a setter fall back + to the tf-based ``world <- frame_id`` lookup.""" def decorate(fn: Any) -> Any: + if not inspect.iscoroutinefunction(fn): + raise TypeError( + f"@pose_setter_for must decorate an `async def` method; " + f"{getattr(fn, '__qualname__', fn)} is not async" + ) fn._pose_setter_for = tuple(stream_names) return fn @@ -302,16 +307,20 @@ class MyRecorder(Recorder): Each stream's pose defaults to a ``world <- frame_id`` tf lookup; decorate a method with ``@pose_setter_for("stream")`` to source it elsewhere (e.g. from - an odometry stream):: + an odometry stream). Setters run on the module's event loop and may be + ``async def``:: @pose_setter_for("lidar") - def _lidar_pose(self, msg): + async def _lidar_pose(self, msg): return self._last_odom_pose """ config: RecorderConfig _pose_setters: dict[str, Any] = {} + # Per-stream count of frames lost to the dispatcher's LATEST coalescing + # (sink slower than input). Populated lazily as drops happen. + _dropped_frames: dict[str, int] = {} @rpc def start(self) -> None: @@ -324,6 +333,7 @@ def start(self) -> None: return self._pose_setters = self._collect_pose_setters() + self._dropped_frames = {} # TODO: store reset API/logic is not implemented yet. This module # shouldn't need to know about files (SqliteStore specific), and @@ -368,12 +378,14 @@ def _port_to_stream(self, name: str, input_topic: In[Any], stream: Stream[Any]) already in world coords) fall back to ``config.default_frame_id`` — so every observation gets a robot-pose anchor when tf is publishing. - Registers the subscription as a disposable on this module. + Each port is recorded by an async callback dispatched on the module's + event loop via :meth:`process_observable`, which serialises invocations + and registers the subscription for cleanup on stop(). """ - def on_msg(msg: Any) -> None: + async def on_msg(msg: Any) -> None: ts = self._resolve_ts(name, msg) - pose = self._resolve_pose(name, msg, ts) + pose = await self._resolve_pose(name, msg, ts) if not pose: logger.warning( "[%s] No pose for time %s (msg ts: %s), storing without pose", @@ -383,7 +395,23 @@ def on_msg(msg: Any) -> None: ) stream.append(msg, ts=ts, pose=pose) - self.register_disposable(Disposable(input_topic.subscribe(on_msg))) + self.process_observable( + input_topic.pure_observable(), on_msg, on_drop=lambda: self._on_frame_dropped(name) + ) + + def _on_frame_dropped(self, name: str) -> None: + """A frame for *name* was dropped because the sink couldn't keep up with + the input rate (dispatcher LATEST coalescing). Count it and warn — once, + then on each power-of-ten — so silent data loss is visible without + flooding the log.""" + count = self._dropped_frames.get(name, 0) + 1 + self._dropped_frames[name] = count + if count == 1 or count % 1000 == 0: + logger.warning( + "[%s] Recorder dropped %d frame(s) — sink slower than input; recording is lossy", + name, + count, + ) def _prepare_streams(self) -> None: """On APPEND, drop the streams this recorder is about to (re)write — the @@ -401,13 +429,13 @@ def _resolve_ts(self, name: str, msg: Any) -> float: """Timestamp to record *msg* at. Override to re-base onto another clock.""" return getattr(msg, "ts", None) or time.time() - def _resolve_pose(self, name: str, msg: Any, ts: float) -> Pose | None: - """Pose to anchor *msg* with. Dispatches to the stream's + async def _resolve_pose(self, name: str, msg: Any, ts: float) -> Pose | None: + """Pose to anchor *msg* with. Dispatches to the stream's (async) ``@pose_setter_for`` if one is defined, else falls back to a ``world <- frame_id`` tf lookup.""" setter = self._pose_setters.get(name) if setter is not None: - return cast("Pose | None", setter(msg)) + return cast("Pose | None", await setter(msg)) frame_id = getattr(msg, "frame_id", None) or self.config.default_frame_id transform = self.tf.get( self.config.root_frame, frame_id, time_point=ts, time_tolerance=self.config.tf_tolerance diff --git a/dimos/memory2/store/base.py b/dimos/memory2/store/base.py index 7a7162a6d1..46be8b8cb5 100644 --- a/dimos/memory2/store/base.py +++ b/dimos/memory2/store/base.py @@ -29,6 +29,7 @@ from dimos.protocol.service.spec import BaseConfig, Configurable if TYPE_CHECKING: + from dimos.memory2.db_tf import DbTf from dimos.memory2.replay import Replay T = TypeVar("T") @@ -105,12 +106,26 @@ def __init__(self, **kwargs: Any) -> None: Configurable.__init__(self, **kwargs) CompositeResource.__init__(self) self._streams: dict[str, Stream[Any]] = {} + self._tf: DbTf | None = None @property def streams(self) -> StreamAccessor[Stream[Any]]: """Attribute-style access to streams: ``store.streams.name``.""" return StreamAccessor(self) + @property + def tf(self) -> DbTf: + """Transform lookups over the recording's ``tf``/``tf_static`` streams. + + ``store.tf.get(target_frame, source_frame, ts)`` composes multi-hop chains + (e.g. ``world -> ... -> mid360_link``) from the recorded transforms. + """ + if self._tf is None: + from dimos.memory2.db_tf import DbTf + + self._tf = DbTf(self) + return self._tf + def replay( self, *, diff --git a/dimos/navigation/jnav/.gitignore b/dimos/navigation/jnav/.gitignore new file mode 100644 index 0000000000..750baebf41 --- /dev/null +++ b/dimos/navigation/jnav/.gitignore @@ -0,0 +1,2 @@ +result +result-* diff --git a/dimos/navigation/jnav/components/loop_closure/benchmark_table.py b/dimos/navigation/jnav/components/loop_closure/benchmark_table.py new file mode 100644 index 0000000000..102ebfdc25 --- /dev/null +++ b/dimos/navigation/jnav/components/loop_closure/benchmark_table.py @@ -0,0 +1,343 @@ +# Copyright 2026 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Resumable PGO benchmark over (environment x implementation/config). + +Fills one big table: every environment (go2 recordings, hk_village; kitti once +converted) scored against every PGO column (gsc_pgo in several configs, the +basic unrefined_pgo, ivan_pgo, ivan_pgo_transformer). Each cell is one eval.py +subprocess (sequential — they share the isolated LCM bus). + +CHECKPOINTED: a cell is skipped when its summary.json already exists and its +fingerprint still matches the db (size+mtime) and EVAL_VERSION. Kill it any +time and rerun — only missing/stale cells recompute. `--force` recomputes all. + +The universal score is **voxel agreement** (re-anchoring scans onto the +corrected trajectory should collapse double walls — ground-truth-free and +needs no camera), so tagless environments (kitti, hk_village) still get a +real number. April-tag agreement is reported additionally wherever a camera + +intrinsics sidecar exists. + +Usage: + uv run python dimos/navigation/jnav/components/loop_closure/benchmark_table.py + ... [--go2-root ~/datasets/go2_recordings] [--with-hk-village] [--force] + ... [--only-env NAME] [--only-col NAME] [--table-only] +""" + +from __future__ import annotations + +import argparse +from dataclasses import dataclass, field +import json +from pathlib import Path +import subprocess +import sys +import time +from typing import Any + +LOOP_CLOSURE_DIR = Path(__file__).resolve().parent +EVAL_PY = LOOP_CLOSURE_DIR / "eval.py" +RESULTS_DIR = LOOP_CLOSURE_DIR / "eval_results" +TABLE_PATH = RESULTS_DIR / "benchmark_table.md" + +DEFAULT_GO2_ROOT = Path("~/datasets/go2_recordings").expanduser() +# loop_closure/modules/jnav/navigation/dimos/ -> parents[4] is repo root. +LFS_DATA_DIR = LOOP_CLOSURE_DIR.parents[4] / "data" # repo_root/data (hk_village*.db) + +# Shared loop-closure thresholds for the gsc_pgo configs (mirrors recordings_eval). +_CMU_BASE: dict[str, Any] = { + "loop_search_radius": 3.0, + "loop_time_thresh": 5.0, + "min_loop_detect_duration": 2.0, + "key_pose_delta_trans": 0.5, +} + + +@dataclass(frozen=True) +class Column: + """One implementation+config = one table column.""" + + name: str # column label + results-suffix (disambiguates same-class modules) + module_dir: str # subdir under loop_closure/ holding module.py (class PGO) + overrides: dict[str, Any] = field(default_factory=dict) + + +COLUMNS: list[Column] = [ + Column("cmu_stock", "gsc_pgo", {}), + Column("cmu_scan_context", "gsc_pgo", {**_CMU_BASE, "use_scan_context": True}), + Column("cmu_radius", "gsc_pgo", {**_CMU_BASE, "use_scan_context": False}), + Column( + "cmu_scan_context_far", + "gsc_pgo", + { + **_CMU_BASE, + "use_scan_context": True, + "loop_candidate_max_distance_m": 0.0, + "loop_score_thresh": 10000.0, + }, + ), + Column("unrefined", "unrefined_pgo", {}), + Column("ivan", "ivan_pgo", {"publish_global_map": False}), + Column("ivan_transformer", "ivan_pgo_transformer", {}), +] + + +@dataclass(frozen=True) +class Environment: + """One dataset = one table row.""" + + name: str # results-dir recording key + db_path: Path + odom_stream: str + lidar_stream: str + camera_stream: str | None = None + intrinsics_json: Path | None = None + + +def discover_go2(root: Path) -> list[Environment]: + environments = [] + for db_path in sorted(root.glob("*/mem2.db")): + recording = db_path.parent + sidecar = recording / "camera_intrinsics.json" + environments.append( + Environment( + name=recording.name, + db_path=db_path, + odom_stream="fastlio_odometry", + lidar_stream="fastlio_lidar", + camera_stream="color_image", + intrinsics_json=sidecar if sidecar.exists() else None, + ) + ) + return environments + + +def discover_hk_village(data_dir: Path) -> list[Environment]: + # hk_village LFS dbs publish world-frame lidar + PoseStamped odom; no camera + # intrinsics sidecar, so they score on voxel agreement alone. + environments = [] + for db_path in sorted(data_dir.glob("hk_village*.db")): + environments.append( + Environment( + name=db_path.stem, + db_path=db_path, + odom_stream="odom", + lidar_stream="lidar", + camera_stream=None, + intrinsics_json=None, + ) + ) + return environments + + +def cell_dir(environment: Environment, column: Column) -> Path: + # Mirrors eval.py's out_dir formula: __.PGO[.]. + return RESULTS_DIR / f"{environment.name}__{column.module_dir}.PGO.{column.name}" + + +def cell_is_fresh(environment: Environment, column: Column) -> bool: + summary_path = cell_dir(environment, column) / "summary.json" + if not summary_path.exists(): + return False + try: + summary = json.loads(summary_path.read_text()) + except (json.JSONDecodeError, OSError): + return False + fingerprint = summary.get("fingerprint", {}) + stat = environment.db_path.stat() + return ( + fingerprint.get("db_bytes") == stat.st_size + and fingerprint.get("db_mtime") == int(stat.st_mtime) + and fingerprint.get("version") is not None + ) + + +def _kill_zombies() -> None: + """Clear leftover native processes / workers that can wedge the next cell.""" + subprocess.run( + "lsof -ti tcp:7766 2>/dev/null | xargs kill -9 2>/dev/null;" + ' pkill -9 -f "bin/pgo|scene_lidar" 2>/dev/null', + shell=True, + check=False, + ) + + +def run_cell(environment: Environment, column: Column) -> bool: + command = [ + sys.executable, + "-u", + str(EVAL_PY), + "--db-path", + str(environment.db_path), + "--odom-stream", + environment.odom_stream, + "--lidar-stream", + environment.lidar_stream, + "--module-path", + str(LOOP_CLOSURE_DIR / column.module_dir / "module.py"), + "--module-name", + "PGO", + "--recording-name", + environment.name, + "--results-suffix", + column.name, + "--with-rrd", + "false", + "--lockstep", + "true", + ] + if environment.camera_stream is not None: + command += ["--camera-stream", environment.camera_stream] + if environment.intrinsics_json is not None: + command += ["--camera-intrinsics-json-path", str(environment.intrinsics_json)] + if column.overrides: + command += ["--pgo-config-json", json.dumps(column.overrides)] + print(f"\n=== {environment.name} x {column.name} ===", flush=True) + result = subprocess.run(command, check=False) + print(f"=== {environment.name} x {column.name} exit: {result.returncode} ===", flush=True) + return result.returncode == 0 + + +def _fmt(value: float | None, places: int = 3, signed: bool = True) -> str: + if value is None: + return "—" + return f"{value:+.{places}f}" if signed else f"{value:.{places}f}" + + +def render_table(environments: list[Environment]) -> Path: + cells: dict[tuple[str, str], dict[str, Any]] = {} + for summary_path in RESULTS_DIR.glob("*/summary.json"): + recording, _, module_key = summary_path.parent.name.rpartition("__") + try: + summary = json.loads(summary_path.read_text()) + except (json.JSONDecodeError, OSError): + continue + cells[(recording, module_key)] = summary.get("scores", {}) + + column_keys = [f"{column.module_dir}.PGO.{column.name}" for column in COLUMNS] + header = "| environment | " + " | ".join(column.name for column in COLUMNS) + " |" + sep = "|" + "---|" * (len(COLUMNS) + 1) + lines = [ + "# PGO benchmark — environments x implementations", + "", + "Each cell: **voxel improvement** (fractional drop in occupied 0.2 m voxels " + "after re-anchoring scans onto the corrected trajectory; the universal, " + "ground-truth-free score) — then `tag:` where a camera " + "exists, and `cl`. Higher is better; `—` = not yet run / N/A.", + "", + header, + sep, + ] + for environment in environments: + row_cells = [] + for column_key in column_keys: + scores = cells.get((environment.name, column_key)) + if scores is None: + row_cells.append("—") + continue + voxel = _fmt(scores.get("voxel_improvement")) + tag = scores.get("tag_improvement") + closures = scores.get("closures") + text = voxel + if tag is not None: + text += f" tag:{tag:+.2f}" + if closures is not None: + text += f" cl{closures}" + row_cells.append(text) + lines.append(f"| {environment.name} | " + " | ".join(row_cells) + " |") + + # Per-column mean voxel improvement (over environments that have a number). + lines += ["", "## Mean voxel improvement per column", ""] + lines.append("| " + " | ".join(column.name for column in COLUMNS) + " |") + lines.append("|" + "---|" * len(COLUMNS)) + means = [] + for column_key in column_keys: + values = [ + cells[(environment.name, column_key)]["voxel_improvement"] + for environment in environments + if (environment.name, column_key) in cells + and cells[(environment.name, column_key)].get("voxel_improvement") is not None + ] + means.append(f"{sum(values) / len(values):+.3f}" if values else "—") + lines.append("| " + " | ".join(means) + " |") + lines.append("") + + RESULTS_DIR.mkdir(exist_ok=True) + TABLE_PATH.write_text("\n".join(lines) + "\n") + return TABLE_PATH + + +def main() -> None: + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument("--go2-root", type=Path, default=DEFAULT_GO2_ROOT) + parser.add_argument("--with-hk-village", action="store_true") + parser.add_argument("--data-dir", type=Path, default=LFS_DATA_DIR) + parser.add_argument("--only-env", help="comma-separated environment names") + parser.add_argument("--only-col", help="comma-separated column names") + parser.add_argument("--force", action="store_true", help="recompute fresh cells too") + parser.add_argument( + "--attempts", type=int, default=2, help="retries per cell on transient RPC timeouts" + ) + parser.add_argument("--table-only", action="store_true", help="render from cache, run nothing") + args = parser.parse_args() + + environments = discover_go2(args.go2_root.expanduser()) + if args.with_hk_village: + environments += discover_hk_village(args.data_dir.expanduser()) + if args.only_env: + wanted = {name.strip() for name in args.only_env.split(",")} + environments = [environment for environment in environments if environment.name in wanted] + + columns = COLUMNS + if args.only_col: + wanted = {name.strip() for name in args.only_col.split(",")} + columns = [column for column in COLUMNS if column.name in wanted] + + if args.table_only: + print(f"table -> {render_table(environments)}") + return + + total = len(environments) * len(columns) + print(f"benchmark: {len(environments)} environments x {len(columns)} columns = {total} cells") + done = skipped = failed = 0 + for environment in environments: + for column in columns: + if not args.force and cell_is_fresh(environment, column): + skipped += 1 + print(f"skip (fresh): {environment.name} x {column.name}", flush=True) + continue + # Retry transient macOS LCM startup-RPC timeouts; a fresh process + # almost always gets past them. Kill zombies between attempts. + ok = False + for attempt in range(1, args.attempts + 1): + ok = run_cell(environment, column) + if ok: + break + _kill_zombies() + if attempt < args.attempts: + print( + f"retry {attempt + 1}/{args.attempts}: {environment.name} x {column.name}" + ) + time.sleep(5) + done += 1 if ok else 0 + failed += 0 if ok else 1 + render_table(environments) # refresh after every cell — live + crash-safe + + table = render_table(environments) + print(f"\ncells: {done} ran, {skipped} cached, {failed} failed") + print(f"table -> {table}") + + +if __name__ == "__main__": + main() diff --git a/dimos/navigation/jnav/components/loop_closure/eval.py b/dimos/navigation/jnav/components/loop_closure/eval.py new file mode 100644 index 0000000000..27902331b0 --- /dev/null +++ b/dimos/navigation/jnav/components/loop_closure/eval.py @@ -0,0 +1,924 @@ +# Copyright 2026 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Evaluate a loop-closure module against a recording. + +Two ground-truth-free scores, before vs after correction: + * April-tag agreement — a fixed tag re-seen along the run should map to one + world position; the spread of its per-visit robot positions measures drift. + Tags are taken as relative to the chosen odom stream (sighting time -> + nearest odom pose), so no static transforms or stored db poses are needed. + * Lidar-voxel agreement — re-anchoring the registered scans onto the + corrected trajectory should collapse double walls, so the corrected map + should occupy FEWER voxels than the raw one. + +Pipeline: + 1. April tags: read the db's `april_tags` stream (ts + marker_id only), or + detect them with sane defaults (medoid, blur/reproj/size/distance gates). + 2. Raw agreement over the raw odometry. + 3. Replay lidar + odom through the module (loaded dynamically from + --module-path/--module-name), capture its optimized pose graph. + 4. Corrected agreement + voxel agreement, written to + eval_results/__/summary.json (and an eval.rrd with the + raw + corrected trajectories when --with-rrd true). + +Usage: + uv run python dimos/navigation/jnav/components/loop_closure/eval.py \\ + --db-path ~/datasets/go2_recordings/2026-06-04_12-56pm-PST/mem2.db \\ + --odom-stream fastlio_odometry \\ + --camera-stream color_image \\ + --camera-intrinsics-json-path \\ + ~/datasets/go2_recordings/2026-06-04_12-56pm-PST/camera_intrinsics.json \\ + --module-path dimos/navigation/jnav/components/loop_closure/gsc_pgo/module.py \\ + --module-name PGO \\ + --pgo-config-json '{"use_scan_context": true}' \\ + --with-rrd true +""" + +from __future__ import annotations + +import argparse +import asyncio +from collections.abc import AsyncGenerator, Iterable +import importlib +import json +from pathlib import Path +import tempfile +import time +from typing import Any + +import numpy as np + +from dimos.core.coordination.blueprints import autoconnect +from dimos.core.coordination.module_coordinator import ModuleCoordinator +from dimos.core.module import Module, ModuleConfig +from dimos.core.stream import In, Out +from dimos.msgs.geometry_msgs.Pose import Pose +from dimos.msgs.nav_msgs.Odometry import Odometry +from dimos.msgs.sensor_msgs.PointCloud2 import PointCloud2 +from dimos.navigation.jnav.msgs.Graph3D import Graph3D +from dimos.navigation.jnav.msgs.GraphDelta3D import GraphDelta3D +from dimos.navigation.jnav.utils.apriltag_agreement import ( + VISIT_GAP_S, + AgreementReport, + agreement_improvement, + agreement_report, + paired_tag_visit_positions, +) +from dimos.navigation.jnav.utils.apriltags import ( + detect_apriltags, + load_intrinsics_json, + load_or_detect_sightings, +) +from dimos.navigation.jnav.utils.module_loading import ( + filter_config_for_module, + load_module_class, +) +from dimos.navigation.jnav.utils.recording_db import ( + MAX_REPLAY_ODOM, + MAX_REPLAY_SCANS, + ODOM_MATCH_TOLERANCE_S, + REPLAY_DRAIN_MARGIN_S, + REPLAY_PUBLISH_HZ, + iterate_stream, + list_streams, + odometry_lookup, + store, + stream_count, +) +from dimos.navigation.jnav.utils.trajectory_metrics import ( + GraphPose, + PoseLookup7, + drifted_lookup, + graph_lookup, + has_drift, + lidar_voxel_agreement, + pose7_lookup, + trajectory_recovery_error, +) + +RESULTS_DIR = Path(__file__).resolve().parent / "eval_results" +APRIL_TAGS_STREAM = "april_tags" +_RRD_MAX_PATH_POINTS = 5000 + +# Cap replayed scans fed to voxel agreement so the map fits in memory. +VOXEL_MAX_SCANS = 300 + +# Bump to invalidate every cached cell (scoring/replay semantics changed). +EVAL_VERSION = 1 + + +def cell_fingerprint( + db_path: Path, + pgo_config: dict[str, Any], + lidar_stream: str, + odom_stream: str, + drift_per_sec: list[float] | None = None, +) -> dict[str, Any]: + """Identity of a completed cell — the driver re-runs only when this changes + (db edited, config changed, streams changed, drift changed, or version).""" + stat = db_path.stat() + return { + "db_bytes": stat.st_size, + "db_mtime": int(stat.st_mtime), + "pgo_config": pgo_config, + "lidar_stream": lidar_stream, + "odom_stream": odom_stream, + "drift_per_sec": list(drift_per_sec or [0.0, 0.0, 0.0]), + "version": EVAL_VERSION, + } + + +# A known-good PGO config for replayed recordings: revisit gates loose enough +# for walks where the same spot is re-seen tens of seconds later. These now +# match gsc_pgo's own defaults (so it's a no-op for gsc_pgo); kept here to apply +# the same gates to the other PGO modules, which have different defaults. +DEFAULT_PGO_CONFIG: dict[str, Any] = { + "loop_search_radius": 3.0, + "loop_time_thresh": 5.0, + "min_loop_detect_duration": 2.0, + "key_pose_delta_trans": 0.5, + "use_scan_context": True, +} + + +class GraphCaptureConfig(ModuleConfig): + output_path: str = "" + + +class GraphCapture(Module): + """Captures the module's optimized pose graph WITH orientations + closures. + + Results are handed back via a JSON file written on teardown (modules run in + separate worker processes).""" + + config: GraphCaptureConfig + + pose_graph: In[Graph3D] + loop_closure_event: In[GraphDelta3D] + + def __init__(self, **kwargs: Any) -> None: + super().__init__(**kwargs) + self._graph: list[GraphPose] = [] + self._closures = 0 + + async def handle_pose_graph(self, msg: Graph3D) -> None: + self._graph = [ + ( + node.pose.ts, + node.pose.position.x, + node.pose.position.y, + node.pose.position.z, + node.pose.orientation.x, + node.pose.orientation.y, + node.pose.orientation.z, + node.pose.orientation.w, + ) + for node in msg.nodes + ] + + async def handle_loop_closure_event(self, msg: GraphDelta3D) -> None: + self._closures += 1 + + async def main(self) -> AsyncGenerator[None, None]: + yield + Path(self.config.output_path).write_text( + json.dumps({"graph": self._graph, "closures": self._closures}) + ) + + +class LockstepReplayConfig(ModuleConfig): + db: str = "" + lidar_stream: str = "lidar" + odometry_stream: str = "odom" + lidar_stride: int = 1 + odometry_stride: int = 2 + odom_publish_hz: float = 500.0 + ack_timeout_s: float = 30.0 + done_path: str = "" + # Artificial odometry drift: a constant-velocity world offset added to both + # odom poses and lidar clouds at time t (offset = drift_per_sec * (t - t0)). + # Consistent per-instant, so the trajectory warps over time — exactly the + # accumulating error loop closure is supposed to fix. [0,0,0] = no drift. + drift_per_sec: list[float] = [0.0, 0.0, 0.0] + drift_t0: float = 0.0 + + +class LockstepReplay(Module): + """Closed-loop replay: after each scan, wait for the module's + corrected_odometry ack before sending the next. + + Every module under test sees 100% of the (strided) scans regardless of + machine speed — wall clock varies, the data the module processes doesn't. + Odometry messages are cheap latest-state updates and stay fire-and-forget + (lightly paced). Writes a done-marker JSON (ack timeout count) at the end + so the host knows when to tear down. + + odom and lidar are merged into one time-sorted stream, so playback runs in + bursts: all odoms whose timestamps fall before the next scan are emitted + fire-and-forget (paced by odom_publish_hz), then one scan is sent and the + loop blocks on its ack. The only guarantee is one ack-wait per scan; the + odom burst size per gap is data-dependent (~ odom_rate / lidar_rate).""" + + config: LockstepReplayConfig + + lidar: Out[PointCloud2] + odometry: Out[Odometry] + corrected_odometry: In[Odometry] + + def __init__(self, **kwargs: Any) -> None: + super().__init__(**kwargs) + self._ack_count = 0 + self._ack_event: asyncio.Event | None = None + + async def handle_corrected_odometry(self, msg: Odometry) -> None: + self._ack_count += 1 + if self._ack_event is not None: + self._ack_event.set() + + def _load(self) -> list[tuple[float, str, Any]]: + db_path = Path(self.config.db) + merged: list[tuple[float, str, Any]] = [] + for timestamp, pose in iterate_stream( + db_path, self.config.odometry_stream, stride=self.config.odometry_stride + ): + merged.append((timestamp, "odom", pose)) + for timestamp, cloud in iterate_stream( + db_path, self.config.lidar_stream, stride=self.config.lidar_stride + ): + merged.append((timestamp, "lidar", cloud)) + merged.sort(key=lambda item: item[0]) + return merged + + async def main(self) -> AsyncGenerator[None, None]: + messages = await asyncio.to_thread(self._load) + self._task = asyncio.create_task(self._replay(messages)) + yield + self._task.cancel() + + async def _replay(self, messages: list[tuple[float, str, Any]]) -> None: + odom_period = 1.0 / self.config.odom_publish_hz + timeouts = 0 + scans_sent = 0 + drift = np.asarray(self.config.drift_per_sec, dtype=np.float64) + t0 = self.config.drift_t0 + apply_drift = has_drift(drift) + # Timestamps of scans the module never acked — the frames it (likely) + # skipped. Recorded for reproducibility of partial runs. + skipped_scan_ts: list[float] = [] + for timestamp, kind, payload in messages: + if kind == "odom": + pose = RateReplay._payload_pose(payload) + if apply_drift: + offset = drift * (timestamp - t0) + pose = Pose( + position=[ + pose.position.x + offset[0], + pose.position.y + offset[1], + pose.position.z + offset[2], + ], + orientation=[ + pose.orientation.x, + pose.orientation.y, + pose.orientation.z, + pose.orientation.w, + ], + ) + self.odometry.publish( + Odometry( + ts=timestamp, + frame_id="map", + child_frame_id="base_link", + pose=pose, + ) + ) + await asyncio.sleep(odom_period) + continue + + acks_before = self._ack_count + self._ack_event = asyncio.Event() + points = payload.points_f32() + if apply_drift: + points = points + (drift * (timestamp - t0)).astype(np.float32) + self.lidar.publish(PointCloud2.from_numpy(points, frame_id="map", timestamp=timestamp)) + scans_sent += 1 + deadline = time.monotonic() + self.config.ack_timeout_s + while self._ack_count == acks_before: + remaining = deadline - time.monotonic() + if remaining <= 0: + timeouts += 1 + skipped_scan_ts.append(timestamp) + break + try: + await asyncio.wait_for(self._ack_event.wait(), timeout=remaining) + except TimeoutError: + continue + self._ack_event.clear() + if scans_sent % _PROGRESS_EVERY_N_SCANS == 0: + # Periodic progress so a capped run still reports coverage. + Path(self.config.done_path + ".progress").write_text( + json.dumps(self._stats(timeouts, scans_sent, skipped_scan_ts)) + ) + + Path(self.config.done_path).write_text( + json.dumps(self._stats(timeouts, scans_sent, skipped_scan_ts)) + ) + + @staticmethod + def _stats(timeouts: int, scans_sent: int, skipped_scan_ts: list[float]) -> dict[str, Any]: + return { + "timeouts": timeouts, + "scans_sent": scans_sent, + "skipped_scan_ts": skipped_scan_ts, + } + + +class RateReplayConfig(ModuleConfig): + db: str = "" + lidar_stream: str = "lidar" + odometry_stream: str = "odom" + lidar_stride: int = 1 + odometry_stride: int = 2 + publish_hz: float = 40.0 + + +class RateReplay(Module): + """Legacy fixed-rate replay: publishes world-frame lidar + odometry at a set + Hz with timestamps preserved (no ack pacing — wall-clock dependent). + + Works for both odometry payload shapes found in recordings: ``Odometry`` + (go2 ``fastlio_odometry``) and ``PoseStamped`` (hk_village ``odom``). + """ + + config: RateReplayConfig + + lidar: Out[PointCloud2] + odometry: Out[Odometry] + + def __init__(self, **kwargs: Any) -> None: + super().__init__(**kwargs) + self.done = False + + def _load(self) -> list[tuple[float, str, Any]]: + db_path = Path(self.config.db) + merged: list[tuple[float, str, Any]] = [] + for timestamp, pose in iterate_stream( + db_path, self.config.odometry_stream, stride=self.config.odometry_stride + ): + merged.append((timestamp, "odom", pose)) + for timestamp, cloud in iterate_stream( + db_path, self.config.lidar_stream, stride=self.config.lidar_stride + ): + merged.append((timestamp, "lidar", cloud)) + merged.sort(key=lambda item: item[0]) + return merged + + @staticmethod + def _payload_pose(payload: Any) -> Pose: + if hasattr(payload, "pose"): # Odometry + return payload.pose # type: ignore[no-any-return] + return Pose( # PoseStamped + payload.x, + payload.y, + payload.z, + payload.orientation.x, + payload.orientation.y, + payload.orientation.z, + payload.orientation.w, + ) + + async def main(self) -> AsyncGenerator[None, None]: + messages = await asyncio.to_thread(self._load) + self._task = asyncio.create_task(self._replay(messages)) + yield + self._task.cancel() + + async def _replay(self, messages: list[tuple[float, str, Any]]) -> None: + period = 1.0 / self.config.publish_hz + for timestamp, kind, payload in messages: + if kind == "odom": + self.odometry.publish( + Odometry( + ts=timestamp, + frame_id="map", + child_frame_id="base_link", + pose=self._payload_pose(payload), + ) + ) + else: + self.lidar.publish( + PointCloud2.from_numpy( + payload.points_f32(), frame_id="map", timestamp=timestamp + ) + ) + await asyncio.sleep(period) + self.done = True + + +# Run cap scales with the workload: a per-scan budget (well above any sane +# processing time, below the 30s ack timeout) plus fixed startup overhead. +LOCKSTEP_PER_SCAN_BUDGET_S = 2.0 +LOCKSTEP_BASE_OVERHEAD_S = 120.0 +LOCKSTEP_POLL_S = 5.0 +LOCKSTEP_DRAIN_S = 10.0 +_PROGRESS_EVERY_N_SCANS = 200 + + +def run_module_graph( + db_path: Path, + module_class: type, + config_overrides: dict[str, Any], + *, + lidar_stream: str, + odom_stream: str, + lockstep: bool = True, + drift_per_sec: list[float] | None = None, + drift_t0: float = 0.0, +) -> tuple[list[GraphPose], int, dict[str, Any]]: + """Replay the recording through the module; return its optimized pose graph + (with orientations), loop-closure count, and replay stats. + + lockstep=True (default) paces scans on the module's corrected_odometry + acks — machine-speed independent. lockstep=False is the legacy fixed-rate + wall-clock replay. drift_per_sec injects a constant-velocity world offset + into the replayed odom+lidar (see LockstepReplayConfig).""" + drift_per_sec = drift_per_sec or [0.0, 0.0, 0.0] + output_path = Path(tempfile.gettempdir()) / f"jnav_lc_eval_{db_path.parent.name}.json" + output_path.unlink(missing_ok=True) + done_path = Path(tempfile.gettempdir()) / f"jnav_lc_eval_done_{db_path.parent.name}.json" + done_path.unlink(missing_ok=True) + Path(str(done_path) + ".progress").unlink(missing_ok=True) + lidar_stride = max(1, -(-stream_count(db_path, lidar_stream) // MAX_REPLAY_SCANS)) + odometry_stride = max(1, -(-stream_count(db_path, odom_stream) // MAX_REPLAY_ODOM)) + n_messages = stream_count(db_path, odom_stream) // odometry_stride + n_messages += stream_count(db_path, lidar_stream) // lidar_stride + + if lockstep: + replay_blueprint = LockstepReplay.blueprint( + db=str(db_path), + lidar_stream=lidar_stream, + odometry_stream=odom_stream, + lidar_stride=lidar_stride, + odometry_stride=odometry_stride, + done_path=str(done_path), + drift_per_sec=drift_per_sec, + drift_t0=drift_t0, + ) + else: + replay_blueprint = RateReplay.blueprint( + db=str(db_path), + lidar_stream=lidar_stream, + odometry_stream=odom_stream, + lidar_stride=lidar_stride, + odometry_stride=odometry_stride, + publish_hz=REPLAY_PUBLISH_HZ, + ) + + blueprint = autoconnect( + replay_blueprint, + module_class.blueprint(**config_overrides), # type: ignore[attr-defined] + GraphCapture.blueprint(output_path=str(output_path)), + ) + coordinator = ModuleCoordinator.build(blueprint) + mode = "lockstep" if lockstep else f"fixed-rate {REPLAY_PUBLISH_HZ}Hz" + print( + f"replaying {n_messages} messages through {module_class.__name__}" + f" ({mode}, lidar stride {lidar_stride}, odom stride {odometry_stride})" + ) + replay_stats: dict[str, Any] = {"mode": mode} + try: + if lockstep: + # Per-frame budget: the cap scales with how many scans are fed. + n_scans = stream_count(db_path, lidar_stream) // lidar_stride + max_run_s = n_scans * LOCKSTEP_PER_SCAN_BUDGET_S + LOCKSTEP_BASE_OVERHEAD_S + started = time.monotonic() + while not done_path.exists(): + elapsed = time.monotonic() - started + if elapsed > max_run_s: + replay_stats["hit_max_run_s"] = max_run_s + print( + f"lockstep replay hit the per-frame cap" + f" ({n_scans} scans x {LOCKSTEP_PER_SCAN_BUDGET_S}s" + f" + {LOCKSTEP_BASE_OVERHEAD_S}s = {round(max_run_s)}s) — stopping early" + ) + break + if int(elapsed) % 60 < LOCKSTEP_POLL_S and elapsed > LOCKSTEP_POLL_S: + print(f" ... lockstep replay running ({round(elapsed)}s)") + time.sleep(LOCKSTEP_POLL_S) + progress_path = Path(str(done_path) + ".progress") + if done_path.exists(): + replay_stats.update(json.loads(done_path.read_text())) + elif progress_path.exists(): + # Capped run: last periodic progress still tells us coverage + # and which frames the module never acked. + replay_stats.update(json.loads(progress_path.read_text())) + replay_stats["partial"] = True + progress_path.unlink(missing_ok=True) + time.sleep(LOCKSTEP_DRAIN_S) + else: + time.sleep(n_messages / REPLAY_PUBLISH_HZ + REPLAY_DRAIN_MARGIN_S) + finally: + coordinator.stop() + + if not output_path.exists(): + raise SystemExit(f"{module_class.__name__} produced no pose graph output") + data = json.loads(output_path.read_text()) + graph = [tuple(row) for row in data["graph"]] + return graph, int(data["closures"]), replay_stats # type: ignore[return-value] + + +def odometry_pose7_lookup(db_path: Path, odom_stream: str) -> PoseLookup7: + times: list[float] = [] + poses: list[list[float]] = [] + for timestamp, payload in iterate_stream(db_path, odom_stream): + pose = RateReplay._payload_pose(payload) + times.append(timestamp) + poses.append( + [ + pose.position.x, + pose.position.y, + pose.position.z, + pose.orientation.x, + pose.orientation.y, + pose.orientation.z, + pose.orientation.w, + ] + ) + return pose7_lookup( + np.asarray(times, dtype=np.float64), + np.asarray(poses, dtype=np.float64), + ODOM_MATCH_TOLERANCE_S, + ) + + +def _subsampled_path(positions: np.ndarray) -> np.ndarray: + stride = max(1, len(positions) // _RRD_MAX_PATH_POINTS) + return positions[::stride] + + +def write_trajectory_rrd(rrd_path: Path, raw_positions: np.ndarray, graph: list[GraphPose]) -> None: + """Raw + corrected trajectories (no tag rendering — tags are scored as + odom-relative, not placed in 3D).""" + import rerun as rr + + rr.init("jnav_loop_closure_eval", spawn=False) + rr.save(str(rrd_path)) + rr.log( + "trajectory/raw_odom", + rr.LineStrips3D([_subsampled_path(raw_positions)], colors=[[200, 90, 90]]), + static=True, + ) + corrected = np.asarray([[node[1], node[2], node[3]] for node in graph], dtype=np.float64) + rr.log( + "trajectory/corrected", + rr.LineStrips3D([_subsampled_path(corrected)], colors=[[90, 200, 120]]), + static=True, + ) + + +def _report_dict(report: AgreementReport) -> dict[str, Any]: + return { + "mean_spread_m": report.mean_spread, + "total_observations": report.total_observations, + "per_tag": [ + {"tag_id": tag.tag_id, "observations": tag.observations, "spread_m": tag.spread} + for tag in report.per_tag + ], + } + + +def evaluate( + db_path: Path, + *, + odom_stream: str, + camera_stream: str | None, + intrinsics_json: Path | None, + module_path: Path, + module_name: str, + pgo_config: dict[str, Any], + with_rrd: bool, + lidar_stream: str, + lockstep: bool = True, + results_suffix: str = "", + recording_name: str | None = None, + drift_per_sec: list[float] | None = None, + ignore_tags: set[int] | None = None, +) -> dict[str, Any]: + streams = list_streams(db_path) + for required in (odom_stream, lidar_stream): + if required not in streams: + raise SystemExit(f"no stream {required!r} in {db_path} (have: {streams})") + + module_class = load_module_class(module_path, module_name) + pgo_config = filter_config_for_module(module_class, pgo_config) + + # Artificial drift: the module is fed odom+lidar with a constant-velocity + # world offset added at each time; the raw-baseline scoring must apply the + # SAME offset so it compares against what the module actually saw. + drift_per_sec = drift_per_sec or [0.0, 0.0, 0.0] + drift_t0 = next(iterate_stream(db_path, odom_stream))[0] if has_drift(drift_per_sec) else 0.0 + + # April-tag agreement needs a camera + intrinsics; voxel agreement does not. + # Datasets without either (kitti-360, bare lidar recordings) still score on + # voxel agreement alone, so the same harness fills every table cell. + sightings: dict[int, list[float]] = {} + tag_source = "none" + have_camera = camera_stream is not None and camera_stream in streams + if have_camera and intrinsics_json is not None and intrinsics_json.exists(): + assert camera_stream is not None # narrowed by have_camera + camera = camera_stream + intrinsics_config = load_intrinsics_json(intrinsics_json) + db_store = store(db_path) + stored_stream: Any = ( + db_store.stream(APRIL_TAGS_STREAM) + if APRIL_TAGS_STREAM in db_store.list_streams() + else [] + ) + stored = ((int(obs.tags["marker_id"]), float(obs.ts)) for obs in stored_stream) + + def detect() -> Iterable[tuple[int, float]]: + detections = detect_apriltags( + db_store, + intrinsics_config["intrinsics"], + intrinsics_config["distortion"], + image_stream=camera, + stream_name=APRIL_TAGS_STREAM, + marker_length=intrinsics_config.get("marker_length", 0.10), + dictionary=intrinsics_config.get("dictionary", "DICT_APRILTAG_36h11"), + ) + return ((int(d["marker_id"]), float(d["ts"])) for d in detections) + + sightings, tag_source = load_or_detect_sightings(stored, detect) + # Drop dynamic/unreliable tags (e.g. a tag on a moving object) so their + # motion isn't mistaken for trajectory drift. huge_loop_realsense tag #17 is + # dynamic; all others are static. + if ignore_tags: + dropped = sorted(tag_id for tag_id in sightings if tag_id in ignore_tags) + for tag_id in dropped: + del sightings[tag_id] + if dropped: + print(f"ignoring tags {dropped} (declared dynamic/unreliable)") + n_sightings = sum(len(times) for times in sightings.values()) + if sightings: + print(f"april tags ({tag_source}): {n_sightings} sightings across ids {sorted(sightings)}") + else: + print("no April tags (camera/intrinsics absent or none detected) — voxel agreement only") + + started = time.monotonic() + graph, closures, replay_stats = run_module_graph( + db_path, + module_class, + pgo_config, + lidar_stream=lidar_stream, + odom_stream=odom_stream, + lockstep=lockstep, + drift_per_sec=drift_per_sec, + drift_t0=drift_t0, + ) + runtime_s = time.monotonic() - started + if not graph: + raise SystemExit(f"{module_name} produced an empty pose graph") + + # The module solved on drifted input, so its graph lives in the drifted + # world; the raw baselines must be drifted to match (see drift_per_sec). + raw_xyz_lookup = drifted_lookup(odometry_lookup(db_path, odom_stream), drift_per_sec, drift_t0) + raw_pose7_lookup = drifted_lookup( + odometry_pose7_lookup(db_path, odom_stream), drift_per_sec, drift_t0 + ) + + xyz_graph = [(node[0], node[1], node[2], node[3]) for node in graph] + if sightings: + raw_tag_positions, corrected_tag_positions = paired_tag_visit_positions( + sightings, + raw_xyz_lookup, + graph_lookup(xyz_graph), + gap_s=VISIT_GAP_S, + ) + raw_report = agreement_report(raw_tag_positions) + corrected_report = agreement_report(corrected_tag_positions) + improvement: float | None = agreement_improvement(raw_report, corrected_report) + else: + raw_report = agreement_report({}) + corrected_report = agreement_report({}) + improvement = None # no tags — tag agreement is N/A for this cell + + voxel_stride = max(1, -(-stream_count(db_path, lidar_stream) // VOXEL_MAX_SCANS)) + voxel = lidar_voxel_agreement( + ( + (timestamp, cloud.points_f32()) + for timestamp, cloud in iterate_stream(db_path, lidar_stream, stride=voxel_stride) + ), + raw_pose7_lookup, + graph, + drift_per_sec=drift_per_sec, + drift_t0=drift_t0, + ) + + # Drift-recovery ATE: corrected trajectory vs the UN-drifted ground truth + # (the odom before drift was injected). Only meaningful with --drift-per-sec; + # the right metric where tag/voxel agreement is weak (e.g. KITTI's long loop). + trajectory = trajectory_recovery_error( + graph, odometry_lookup(db_path, odom_stream), drift_per_sec, drift_t0 + ) + if trajectory is not None: + print( + f" drift recovery: {trajectory['drifted_ate_m']:.2f}" + f" -> {trajectory['corrected_ate_m']:.2f} m ATE" + f" ({trajectory['trajectory_improvement']:+.3f})" + ) + + # Key by package + class — several loop-closure modules are all named PGO. + # results_suffix (dot-joined, NOT "__" which delimits the recording name) + # separates runs that differ in inputs, e.g. fastlio vs pointlio odometry. + module_package = module_class.__module__.rsplit(".", 2)[-2] + module_key = f"{module_package}.{module_name}" + ( + f".{results_suffix}" if results_suffix else "" + ) + # db.parent.name is the recording dir for go2; LFS dbs (hk_village) sit + # directly in data/, so an explicit recording_name avoids cell collisions. + out_dir = RESULTS_DIR / f"{recording_name or db_path.parent.name}__{module_key}" + out_dir.mkdir(parents=True, exist_ok=True) + rrd_path = out_dir / "eval.rrd" + if with_rrd: + raw_positions = np.asarray( + [ + [pose.position.x, pose.position.y, pose.position.z] + for _, payload in iterate_stream(db_path, odom_stream) + for pose in [RateReplay._payload_pose(payload)] + ], + dtype=np.float64, + ) + write_trajectory_rrd(rrd_path, raw_positions, graph) + + summary = { + "db": str(db_path), + "odom_stream": odom_stream, + "camera_stream": camera_stream, + "lidar_stream": lidar_stream, + "module": {"path": str(module_path), "name": module_name}, + "pgo_config": pgo_config, + "drift_per_sec": list(drift_per_sec), + "fingerprint": cell_fingerprint( + db_path, pgo_config, lidar_stream, odom_stream, drift_per_sec + ), + "replay": replay_stats, + "april_tags": { + "source": tag_source, + "sightings": n_sightings, + "ids": sorted(sightings), + }, + "scores": { + "raw_spread_m": raw_report.mean_spread if sightings else None, + "corrected_spread_m": corrected_report.mean_spread if sightings else None, + "tag_improvement": improvement, + "voxel_improvement": voxel.get("improvement"), + "trajectory_improvement": trajectory["trajectory_improvement"] if trajectory else None, + "drifted_ate_m": trajectory["drifted_ate_m"] if trajectory else None, + "corrected_ate_m": trajectory["corrected_ate_m"] if trajectory else None, + "closures": closures, + "keyframes": len(graph), + "runtime_s": round(runtime_s, 1), + }, + "raw_agreement": _report_dict(raw_report), + "corrected_agreement": _report_dict(corrected_report), + "voxel_agreement": voxel, + "rrd": str(rrd_path) if with_rrd else None, + "evaluated_at": time.strftime("%Y-%m-%d %H:%M:%S"), + } + (out_dir / "summary.json").write_text(json.dumps(summary, indent=2) + "\n") + + print(f"\nresults -> {out_dir / 'summary.json'}") + if sightings: + print( + f" tag spread: {raw_report.mean_spread:.3f}" + f" -> {corrected_report.mean_spread:.3f} m" + ) + print(f" tag improvement: {improvement:+.3f} (1.0 = perfect)") + else: + print(" tag improvement: n/a (no tags)") + if voxel.get("status") == "ok": + print( + f" voxel agreement: {voxel['raw_voxels']} -> {voxel['corrected_voxels']} voxels" + f" ({voxel['improvement']:+.3f}, {voxel['scans_used']} scans @ {voxel['voxel_size_m']}m)" + ) + else: + print(f" voxel agreement: {voxel.get('status')}") + print(f" closures: {closures}, keyframes: {len(graph)}") + if with_rrd: + print(f" rrd: {rrd_path}") + return summary + + +def main() -> None: + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument("--db-path", type=Path, required=True) + parser.add_argument("--odom-stream", required=True) + parser.add_argument( + "--camera-stream", default=None, help="omit for tagless datasets (voxel agreement only)" + ) + parser.add_argument( + "--camera-intrinsics-json-path", + type=Path, + default=None, + help="omit for tagless datasets (voxel agreement only)", + ) + parser.add_argument("--module-path", type=Path, required=True) + parser.add_argument("--module-name", required=True) + parser.add_argument( + "--pgo-config-json", + help="inline JSON of module config overrides (default: scan_context variant)", + ) + parser.add_argument("--with-rrd", default="false", choices=["true", "false"]) + parser.add_argument( + "--lidar-stream", + default="fastlio_lidar", + help="lidar stream replayed into the module alongside the odometry", + ) + parser.add_argument( + "--lockstep", + default="true", + choices=["true", "false"], + help="pace scans on corrected_odometry acks (machine-independent); false = fixed-rate", + ) + parser.add_argument( + "--results-suffix", + default="", + help="extra results-dir key for runs with different inputs (e.g. pointlio)", + ) + parser.add_argument( + "--recording-name", + default=None, + help="results-dir recording key (default: db parent dir name)", + ) + parser.add_argument( + "--drift-per-sec", + default=None, + help="inject odom drift as a constant world velocity 'x,y,z' in m/s " + "(offset = this * (t - t0), added to odom+lidar). e.g. '0.01,0,0'", + ) + parser.add_argument( + "--ignore-tags", + default=None, + help="comma-separated April-tag ids to drop from scoring (dynamic/unreliable " + "tags whose motion would look like drift). e.g. '17'", + ) + args = parser.parse_args() + + drift_per_sec = ( + [float(v) for v in args.drift_per_sec.split(",")] if args.drift_per_sec else None + ) + if drift_per_sec is not None and len(drift_per_sec) != 3: + raise SystemExit(f"--drift-per-sec must be 'x,y,z', got {args.drift_per_sec!r}") + + ignore_tags = ( + {int(tag_id) for tag_id in args.ignore_tags.split(",")} if args.ignore_tags else None + ) + + db_path = args.db_path.expanduser() + if not db_path.exists(): + raise SystemExit(f"no such db: {db_path}") + intrinsics_json = ( + args.camera_intrinsics_json_path.expanduser() + if args.camera_intrinsics_json_path is not None + else None + ) + if intrinsics_json is not None and not intrinsics_json.exists(): + raise SystemExit(f"no such intrinsics json: {intrinsics_json}") + + pgo_config = dict(DEFAULT_PGO_CONFIG) + if args.pgo_config_json: + pgo_config.update(json.loads(args.pgo_config_json)) + + evaluate( + db_path, + odom_stream=args.odom_stream, + camera_stream=args.camera_stream, + intrinsics_json=intrinsics_json, + module_path=args.module_path, + module_name=args.module_name, + pgo_config=pgo_config, + with_rrd=args.with_rrd == "true", + lidar_stream=args.lidar_stream, + lockstep=args.lockstep == "true", + results_suffix=args.results_suffix, + recording_name=args.recording_name, + drift_per_sec=drift_per_sec, + ignore_tags=ignore_tags, + ) + + +if __name__ == "__main__": + # Re-import under the canonical dotted name so module classes defined here + # (GraphCapture) deploy into workers with a picklable __module__. + importlib.import_module("dimos.navigation.jnav.components.loop_closure.eval").main() diff --git a/dimos/navigation/jnav/components/loop_closure/eval_all.py b/dimos/navigation/jnav/components/loop_closure/eval_all.py new file mode 100644 index 0000000000..47df33d081 --- /dev/null +++ b/dimos/navigation/jnav/components/loop_closure/eval_all.py @@ -0,0 +1,366 @@ +# Copyright 2026 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Run the loop-closure eval over every PGO module and render a comparison table. + +Each module is evaluated in its own subprocess (sequentially — they share LCM) +via eval.py with lockstep replay, then all eval_results summaries are rendered +to eval_results/comparison.md. A failed module doesn't stop the rest. + +Usage: + uv run python dimos/navigation/jnav/components/loop_closure/eval_all.py \\ + --db-path ~/datasets/go2_recordings//mem2.db \\ + [--camera-intrinsics-json-path ] # default: sidecar next to db + [--only gsc_pgo,ivan_pgo] # subset by directory name + [--with-rrd true] [--lockstep true] +""" + +from __future__ import annotations + +import argparse +from collections import defaultdict +import json +from pathlib import Path +import subprocess +import sys +from typing import Any + +LOOP_CLOSURE_DIR = Path(__file__).resolve().parent +RESULTS_DIR = LOOP_CLOSURE_DIR / "eval_results" +TABLE_PATH = RESULTS_DIR / "comparison.md" +SIDECAR_NAME = "camera_intrinsics.json" + +# (directory, per-module config overrides). All classes are named PGO. +# global_map is disabled everywhere — nothing in the eval consumes it, and the +# native modules' big global_map clouds overflow LCM and congest the +# corrected_odometry ack channel the lockstep replay waits on (rate 0 = off +# for the native binaries; publish_global_map=False for the python ivan). +MODULES: list[tuple[str, dict[str, Any]]] = [ + ("gsc_pgo", {"use_scan_context": True, "global_map_publish_rate": 0.0}), + ("ivan_pgo", {"publish_global_map": False}), + ("ivan_pgo_transformer", {}), + ("unrefined_pgo", {"global_map_publish_rate": 0.0}), +] + + +def _fmt_spread(s: dict[str, Any]) -> str: + raw, corrected = s.get("raw_spread_m"), s.get("corrected_spread_m") + if raw is None or corrected is None: # tagless dataset (KITTI, bare lidar) + return "—" + return f"{raw:.2f} -> {corrected:.2f}" + + +TABLE_COLUMNS = [ + ("tag spread (m)", _fmt_spread), + ( + "tag improvement", + lambda s: f"{s['tag_improvement']:+.3f}" if s.get("tag_improvement") is not None else "—", + ), + ( + "voxel improvement", + lambda s: f"{s['voxel_improvement']:+.3f}" + if s.get("voxel_improvement") is not None + else "—", + ), + ( + "drift recovery", # ATE improvement vs un-drifted GT (only with --drift-per-sec) + lambda s: f"{s['trajectory_improvement']:+.3f}" + if s.get("trajectory_improvement") is not None + else "—", + ), + ("closures", lambda s: str(s["closures"])), + ("keyframes", lambda s: str(s["keyframes"])), + ("runtime (s)", lambda s: str(s["runtime_s"])), +] + + +def run_one( + module_dir: str, + overrides: dict[str, Any], + *, + db_path: Path, + odom_stream: str, + camera_stream: str, + lidar_stream: str, + intrinsics_json: Path | None, + with_rrd: str, + lockstep: str, + results_suffix: str = "", + drift_per_sec: str = "", + ignore_tags: str = "", +) -> bool: + command = [ + sys.executable, + "-u", + str(LOOP_CLOSURE_DIR / "eval.py"), + "--db-path", + str(db_path), + "--odom-stream", + odom_stream, + "--camera-stream", + camera_stream, + "--lidar-stream", + lidar_stream, + "--module-path", + str(LOOP_CLOSURE_DIR / module_dir / "module.py"), + "--module-name", + "PGO", + "--with-rrd", + with_rrd, + "--lockstep", + lockstep, + ] + if intrinsics_json is not None: + command += ["--camera-intrinsics-json-path", str(intrinsics_json)] + if drift_per_sec: + command += ["--drift-per-sec", drift_per_sec] + if ignore_tags: + command += ["--ignore-tags", ignore_tags] + if results_suffix: + command += ["--results-suffix", results_suffix] + if overrides: + command += ["--pgo-config-json", json.dumps(overrides)] + print(f"\n=== {module_dir} ===", flush=True) + result = subprocess.run(command, check=False) + print(f"=== {module_dir} exit: {result.returncode} ===", flush=True) + return result.returncode == 0 + + +def _row_label(module_key: str, replay: dict[str, Any]) -> str: + label = module_key + scans = replay.get("scans_sent") + timeouts = replay.get("timeouts") + if scans is not None: + label += f" ({scans} scans, {timeouts} ack-timeouts)" if timeouts else f" ({scans} scans)" + if replay.get("hit_max_run_s"): + label += " [hit run cap]" + return label + + +def _section_for(recording: str, module_key: str) -> str: + """Which breakdown section a result belongs to.""" + if recording.startswith("hk_village"): + return "hk_village" + if "drift" in module_key: + return "drift" + return "real" + + +def _tag_improvement_key(labelled_row: tuple[str, dict[str, Any]]) -> float: + # Best first; fall back to drift-recovery for tagless rows, then sink to the + # bottom if neither is present. + scores = labelled_row[1] + value = scores.get("tag_improvement") + if value is None: + value = scores.get("trajectory_improvement") + return float(value) if value is not None else float("-inf") + + +def _render_section( + by_recording: dict[str, list[tuple[str, dict[str, Any]]]], sort_reverse: bool +) -> list[str]: + lines: list[str] = [] + for recording in sorted(by_recording): + rows = sorted(by_recording[recording], key=_tag_improvement_key, reverse=sort_reverse) + lines += [f"### {recording}", ""] + lines.append("| module | " + " | ".join(name for name, _ in TABLE_COLUMNS) + " | replay |") + lines.append("|" + "---|" * (len(TABLE_COLUMNS) + 2)) + for module_label, scores in rows: + cells = [render(scores) for _, render in TABLE_COLUMNS] + lines.append(f"| {module_label} | " + " | ".join(cells) + f" | {scores['_mode']} |") + lines.append("") + return lines + + +def _base_module(module_key: str) -> str: + """`gsc_pgo.PGO.drift0p05_0_0` -> `gsc_pgo`.""" + return module_key.split(".")[0] + + +def _conclusion(real: dict[str, list[tuple[str, dict[str, Any]]]]) -> list[str]: + """Data-driven summary: count per-recording tag-improvement wins per module.""" + wins: dict[str, int] = defaultdict(int) + tagged_recordings = 0 + for rows in real.values(): + tagged = [(label, s) for label, s in rows if s.get("tag_improvement") is not None] + if not tagged: + continue + tagged_recordings += 1 + best_label = max(tagged, key=lambda item: item[1]["tag_improvement"])[0] + wins[_base_module(best_label)] += 1 + if not tagged_recordings: + return [] + ranking = sorted(wins.items(), key=lambda item: item[1], reverse=True) + leader = ranking[0][0] if ranking else "n/a" + lines = ["## Conclusion", ""] + lines.append( + f"Across {tagged_recordings} tagged real recordings, the best per-recording " + f"April-tag improvement was won by:" + ) + lines.append("") + for module, count in ranking: + lines.append(f"- **{module}** — best on {count}/{tagged_recordings}") + lines += [ + "", + f"`{leader}` is the strongest loop-closure PGO overall on real recordings; " + "`unrefined_pgo` (the pass-through baseline) is the floor. On tagless data " + "(hk_village) modules are ranked by voxel improvement, and under artificial " + "drift by drift-recovery (fraction of injected ATE removed).", + "", + ] + return lines + + +def render_table() -> Path: + buckets: dict[str, dict[str, list[tuple[str, dict[str, Any]]]]] = { + "real": defaultdict(list), + "hk_village": defaultdict(list), + "drift": defaultdict(list), + } + for summary_path in sorted(RESULTS_DIR.glob("*/summary.json")): + summary = json.loads(summary_path.read_text()) + # Skip non-PGO summaries (e.g. the cross-recording ground-truth tag eval, + # which has a different shape and its own write-up). + if "scores" not in summary: + continue + recording, _, module_key = summary_path.parent.name.rpartition("__") + replay = summary.get("replay", {}) + row = {**summary["scores"], "_mode": replay.get("mode", "fixed-rate (pre-lockstep)")} + section = _section_for(recording, module_key) + buckets[section][recording].append((_row_label(module_key, replay), row)) + + lines = [ + "# Loop-closure PGO comparison", + "", + "Higher improvement = better. **Tag improvement**: fractional drop in per-visit", + "April-tag position spread (1.0 = perfect). **Voxel improvement**: fractional drop", + "in occupied 0.2 m voxels after re-anchoring scans onto the corrected trajectory.", + "**Drift recovery**: fraction of injected ATE removed vs the un-drifted ground truth.", + "", + ] + if buckets["real"]: + lines += ["## Real recordings (clean input)", ""] + lines += _render_section(buckets["real"], sort_reverse=True) + if buckets["hk_village"]: + lines += ["## hk_village (tagless — voxel agreement only)", ""] + lines += _render_section(buckets["hk_village"], sort_reverse=True) + if buckets["drift"]: + lines += ["## Artificial drift robustness", ""] + lines += _render_section(buckets["drift"], sort_reverse=True) + + kitti_table = RESULTS_DIR / "kitti_comparison.md" + if kitti_table.exists(): + lines += [ + "## KITTI (official odometry error)", + "", + "Scored with the official KITTI translational (%) / rotational (deg/m) error " + "(lower = better); see [kitti_comparison.md](kitti_comparison.md).", + "", + ] + + lines += _conclusion(buckets["real"]) + RESULTS_DIR.mkdir(exist_ok=True) + TABLE_PATH.write_text("\n".join(lines)) + return TABLE_PATH + + +def main() -> None: + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument("--db-path", type=Path, required=True) + parser.add_argument("--odom-stream", default="fastlio_odometry") + parser.add_argument("--camera-stream", default="color_image") + parser.add_argument("--lidar-stream", default="fastlio_lidar") + parser.add_argument( + "--camera-intrinsics-json-path", + type=Path, + help=f"default: {SIDECAR_NAME} next to the db", + ) + parser.add_argument("--only", help="comma-separated module directory names") + parser.add_argument("--with-rrd", default="true", choices=["true", "false"]) + parser.add_argument("--lockstep", default="true", choices=["true", "false"]) + parser.add_argument( + "--results-suffix", + default="", + help="extra results-dir key for runs with different inputs (e.g. pointlio)", + ) + parser.add_argument( + "--drift-per-sec", + default="", + help="inject constant-velocity world drift 'x,y,z' (m/s) into odom+lidar; " + "stress-tests how each PGO corrects known drift. Auto-tags the results dir.", + ) + parser.add_argument( + "--ignore-tags", + default="", + help="comma-separated April-tag ids to drop from scoring (e.g. '17' for the " + "dynamic tag in huge_loop_realsense)", + ) + args = parser.parse_args() + + # Keep drifted runs in their own results dirs so they don't clobber the + # clean-input comparison (e.g. drift 0.1,0,0 -> suffix '...drift0p1x'). + results_suffix = args.results_suffix + if args.drift_per_sec: + drift_tag = "drift" + args.drift_per_sec.replace(".", "p").replace(",", "_") + results_suffix = f"{results_suffix}.{drift_tag}" if results_suffix else drift_tag + + db_path = args.db_path.expanduser() + if not db_path.exists(): + raise SystemExit(f"no such db: {db_path}") + # Tagless datasets (KITTI, bare lidar): empty --camera-stream -> no intrinsics + # needed, voxel-agreement scoring only. + tagless = not args.camera_stream + intrinsics_json: Path | None = None + if not tagless: + sidecar: Path = args.camera_intrinsics_json_path or db_path.parent / SIDECAR_NAME + intrinsics_json = sidecar.expanduser() + if not intrinsics_json.exists(): + raise SystemExit(f"no intrinsics json: {intrinsics_json}") + + selected = MODULES + if args.only: + wanted = {name.strip() for name in args.only.split(",")} + selected = [(name, overrides) for name, overrides in MODULES if name in wanted] + missing = wanted - {name for name, _ in selected} + if missing: + raise SystemExit( + f"unknown modules: {sorted(missing)} (have: {[m for m, _ in MODULES]})" + ) + + outcomes = {} + for module_dir, overrides in selected: + outcomes[module_dir] = run_one( + module_dir, + overrides, + db_path=db_path, + odom_stream=args.odom_stream, + camera_stream=args.camera_stream, + lidar_stream=args.lidar_stream, + intrinsics_json=intrinsics_json, + with_rrd=args.with_rrd, + lockstep=args.lockstep, + results_suffix=results_suffix, + drift_per_sec=args.drift_per_sec, + ignore_tags=args.ignore_tags, + ) + + table = render_table() + print(f"\ntable -> {table}") + failed = [name for name, ok in outcomes.items() if not ok] + if failed: + raise SystemExit(f"failed modules: {failed}") + + +if __name__ == "__main__": + main() diff --git a/dimos/navigation/jnav/components/loop_closure/eval_ground_truth_tag.py b/dimos/navigation/jnav/components/loop_closure/eval_ground_truth_tag.py new file mode 100644 index 0000000000..3cb34b9018 --- /dev/null +++ b/dimos/navigation/jnav/components/loop_closure/eval_ground_truth_tag.py @@ -0,0 +1,310 @@ +# Copyright 2026 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Cross-recording ground-truth tag eval (todo item 4). + +Run the best PGO on huge_loop_realsense to get corrected April-tag world +positions (treated as ground truth, since cmu reaches ~0.14 m tag spread there), +then measure how close each PGO moves huge_loop_go2's tags toward those GT +locations — a CROSS-recording metric, unlike the within-recording agreement. + +Both recordings see the same physical tags. The pipeline per recording: + 1. run a PGO -> corrected keyframe trajectory (graph). + 2. read each tag sighting's pose-in-optical from the recorded april_tags + stream (PoseStamped: t_optical_tag). + 3. place each sighting in the world: + tag_world = T_world_base(corrected, t) · T_base_optical · t_optical_tag + where T_base_optical = the recording's `optical_in_base` extrinsic. + 4. average each tag's world positions -> one centroid per tag (the estimate). +Then Umeyama-align go2's tag constellation onto the realsense GT constellation +over shared tag ids; residual RMSE = how well the PGO recovered the true tag +geometry (lower = better; an uncorrected / wrong PGO leaves higher residual). + +Tag 17 is dynamic on huge_loop_realsense; it is dropped via --ignore-tags. + +Results are written to eval_results/__ground_truth_tag/summary.json +(per-module residual + closures, plus the GT tag ids), and the frame-composition +core is covered by --self-test. +""" + +from __future__ import annotations + +import argparse +from collections import defaultdict +from collections.abc import Callable +import json +from pathlib import Path +from typing import Any + +import numpy as np +from scipy.spatial.transform import Rotation + +from dimos.navigation.jnav.components.loop_closure.eval import ( + odometry_pose7_lookup, + run_module_graph, +) +from dimos.navigation.jnav.utils.module_loading import ( + filter_config_for_module, + load_module_class, +) +from dimos.navigation.jnav.utils.recording_db import store +from dimos.navigation.jnav.utils.trajectory_metrics import ( + drift_delta_lookup, + rigid_align_rmse, +) + + +def pose7_to_matrix(pose7: np.ndarray | list[float]) -> np.ndarray: + """[x,y,z, qx,qy,qz,qw] -> 4x4 homogeneous transform.""" + pose = np.asarray(pose7, dtype=np.float64) + transform = np.eye(4) + transform[:3, :3] = Rotation.from_quat(pose[3:7]).as_matrix() + transform[:3, 3] = pose[:3] + return transform + + +def tag_world_position( + robot_pose7: np.ndarray | list[float], + optical_in_base7: np.ndarray | list[float], + tag_in_optical7: np.ndarray | list[float], +) -> np.ndarray: + """World xyz of a tag from the corrected robot pose, the camera->base + extrinsic, and the tag-in-optical detection. Composes + T_world_base · T_base_optical · T_optical_tag and returns the translation.""" + transform = ( + pose7_to_matrix(robot_pose7) + @ pose7_to_matrix(optical_in_base7) + @ pose7_to_matrix(tag_in_optical7) + ) + return np.asarray(transform[:3, 3]) + + +def read_optical_in_base(intrinsics_json: Path) -> list[float]: + raw = json.loads(Path(intrinsics_json).read_text()) + extrinsic = raw.get("optical_in_base") + if extrinsic is None: + raise SystemExit(f"no optical_in_base (camera->base extrinsic) in {intrinsics_json}") + return list(extrinsic) + + +def tag_in_camera_sightings(db_path: Path) -> list[tuple[float, int, list[float]]]: + """(ts, marker_id, tag-in-optical pose7) per April-tag observation, read + straight from the recorded april_tags PoseStamped stream (no re-detection).""" + sightings: list[tuple[float, int, list[float]]] = [] + tag_stream: Any = store(db_path).stream("april_tags") + for observation in tag_stream: + pose_tuple = observation.pose_tuple + if pose_tuple is None: + continue + sightings.append( + (float(observation.ts), int(observation.tags["marker_id"]), list(pose_tuple)) + ) + return sightings + + +def corrected_pose7_at( + timestamp: float, + raw_pose7_lookup: Callable[[float], np.ndarray | None], + delta_lookup: Callable[[float], tuple[np.ndarray, np.ndarray] | None], +) -> np.ndarray | None: + """Corrected robot pose7 at a time: apply the nearest keyframe's drift + correction (R_delta, t_delta) to the raw odom pose.""" + raw = raw_pose7_lookup(timestamp) + delta = delta_lookup(timestamp) + if raw is None or delta is None: + return None + rotation_delta, translation_delta = delta + raw = np.asarray(raw, dtype=np.float64) + rotation_corrected = rotation_delta @ Rotation.from_quat(raw[3:7]).as_matrix() + translation_corrected = rotation_delta @ raw[:3] + translation_delta + return np.array([*translation_corrected, *Rotation.from_matrix(rotation_corrected).as_quat()]) + + +def tag_constellation( + db_path: Path, + module_path: Path, + module_name: str, + config: dict[str, Any], + *, + lidar_stream: str, + odom_stream: str, + optical_in_base: list[float], + ignore_tags: set[int], +) -> tuple[dict[int, np.ndarray], int]: + """Run the PGO, then place each tag sighting in the world via the corrected + trajectory + extrinsic, averaging per tag. Returns {marker_id: world centroid} + and the closure count.""" + module_class = load_module_class(module_path, module_name) + config = filter_config_for_module(module_class, config) + graph, closures, _ = run_module_graph( + db_path, + module_class, + config, + lidar_stream=lidar_stream, + odom_stream=odom_stream, + lockstep=True, + ) + raw_pose7_lookup = odometry_pose7_lookup(db_path, odom_stream) + delta_lookup = drift_delta_lookup(graph, raw_pose7_lookup) + by_tag: dict[int, list[np.ndarray]] = defaultdict(list) + for timestamp, marker_id, tag_pose7 in tag_in_camera_sightings(db_path): + if marker_id in ignore_tags: + continue + robot_pose7 = corrected_pose7_at(timestamp, raw_pose7_lookup, delta_lookup) + if robot_pose7 is None: + continue + by_tag[marker_id].append(tag_world_position(robot_pose7, optical_in_base, tag_pose7)) + constellation = {tag: np.mean(positions, axis=0) for tag, positions in by_tag.items()} + return constellation, closures + + +def constellation_residual( + gt: dict[int, np.ndarray], test: dict[int, np.ndarray] +) -> tuple[float, list[int]]: + """Umeyama-align the test tag constellation onto the GT; residual RMSE over + shared tags (lower = the PGO recovered the true tag geometry better).""" + shared = sorted(set(gt) & set(test)) + if len(shared) < 3: + return float("nan"), shared + gt_points = np.asarray([gt[tag] for tag in shared]) + test_points = np.asarray([test[tag] for tag in shared]) + return rigid_align_rmse(test_points, gt_points), shared + + +def _self_test() -> None: + identity = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0] + # 1) everything identity, tag 2m ahead in optical -> world = 2m ahead. + world = tag_world_position(identity, identity, [0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0]) + assert np.allclose(world, [0.0, 0.0, 2.0]), world + + # 2) robot translated by (10,5,0), no rotation: tag world shifts by that. + world = tag_world_position( + [10.0, 5.0, 0.0, 0.0, 0.0, 0.0, 1.0], identity, [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0] + ) + assert np.allclose(world, [11.0, 5.0, 0.0]), world + + # 3) robot yawed 90deg (+z): a tag 1m ahead in base (x) lands 1m to +y of robot. + yaw90 = [0.0, 0.0, 0.0, *Rotation.from_euler("z", 90, degrees=True).as_quat()] + world = tag_world_position(yaw90, identity, [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0]) + assert np.allclose(world, [0.0, 1.0, 0.0], atol=1e-9), world + + # 4) optical->base rotation (-0.5,0.5,-0.5,0.5) maps optical +z (forward) to + # base +x (forward). Tag 3m ahead in optical -> 3m ahead in base/world. + optical_in_base = [0.0, 0.0, 0.0, -0.5, 0.5, -0.5, 0.5] + world = tag_world_position(identity, optical_in_base, [0.0, 0.0, 3.0, 0.0, 0.0, 0.0, 1.0]) + assert np.allclose(world, [3.0, 0.0, 0.0], atol=1e-9), world + + print("frame-composition self-test PASSED (4 cases)") + + +LOOP_CLOSURE_DIR = Path(__file__).resolve().parent +DEFAULT_GT_CONFIG = {"use_scan_context": True, "global_map_publish_rate": 0} + + +def main() -> None: + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument( + "--gt-db", type=Path, required=True, help="recording for ground truth (realsense)" + ) + parser.add_argument("--gt-intrinsics", type=Path, required=True) + parser.add_argument("--gt-odom-stream", default="fastlio_odometry") + parser.add_argument("--gt-lidar-stream", default="fastlio_lidar") + parser.add_argument("--test-db", type=Path, required=True, help="recording to score (go2)") + parser.add_argument("--test-intrinsics", type=Path, required=True) + parser.add_argument("--test-odom-stream", default="fastlio_odometry") + parser.add_argument("--test-lidar-stream", default="fastlio_lidar") + parser.add_argument("--gt-module", default="gsc_pgo", help="PGO dir used to build GT") + parser.add_argument("--modules", default="gsc_pgo,ivan_pgo,ivan_pgo_transformer,unrefined_pgo") + parser.add_argument("--ignore-tags", default="17", help="dynamic tags to drop") + args = parser.parse_args() + + ignore_tags = {int(t) for t in args.ignore_tags.split(",")} if args.ignore_tags else set() + gt_extrinsic = read_optical_in_base(args.gt_intrinsics) + test_extrinsic = read_optical_in_base(args.test_intrinsics) + + def module_py(module_dir: str) -> Path: + return LOOP_CLOSURE_DIR / module_dir / "module.py" + + print(f"building GT constellation: {args.gt_module} on {args.gt_db.parent.name}") + gt_constellation, _ = tag_constellation( + args.gt_db, + module_py(args.gt_module), + "PGO", + dict(DEFAULT_GT_CONFIG), + lidar_stream=args.gt_lidar_stream, + odom_stream=args.gt_odom_stream, + optical_in_base=gt_extrinsic, + ignore_tags=ignore_tags, + ) + print(f" GT tags: {sorted(gt_constellation)}") + + print(f"\n{'module':24s} {'tag-to-GT residual (m)':>22s} {'shared tags':>12s} {'closures':>9s}") + print("-" * 70) + rows: list[dict[str, Any]] = [] + for module_dir in [m.strip() for m in args.modules.split(",")]: + try: + test_constellation, closures = tag_constellation( + args.test_db, + module_py(module_dir), + "PGO", + dict(DEFAULT_GT_CONFIG), + lidar_stream=args.test_lidar_stream, + odom_stream=args.test_odom_stream, + optical_in_base=test_extrinsic, + ignore_tags=ignore_tags, + ) + residual, shared = constellation_residual(gt_constellation, test_constellation) + rows.append( + { + "module": module_dir, + "residual_m": residual, + "shared_tags": len(shared), + "closures": closures, + } + ) + print(f"{module_dir:24s} {residual:>22.3f} {len(shared):>12d} {closures:>9d}") + except Exception as error: + print(f"{module_dir:24s} FAILED: {type(error).__name__}: {error}") + rows.append({"module": module_dir, "residual_m": None, "error": str(error)}) + + scored = [row for row in rows if row.get("residual_m") is not None] + best_module = min(scored, key=lambda row: row["residual_m"])["module"] if scored else None + if best_module is not None: + best = next(row for row in scored if row["module"] == best_module) + print( + f"\nbest (lowest residual to GT tag geometry): {best_module} ({best['residual_m']:.3f} m)" + ) + + out_dir = LOOP_CLOSURE_DIR / "eval_results" / f"{args.test_db.parent.name}__ground_truth_tag" + out_dir.mkdir(parents=True, exist_ok=True) + summary = { + "gt_db": str(args.gt_db), + "gt_module": args.gt_module, + "test_db": str(args.test_db), + "ignore_tags": sorted(ignore_tags), + "gt_tags": sorted(int(tag) for tag in gt_constellation), + "best_module": best_module, + "rows": rows, + } + (out_dir / "summary.json").write_text(json.dumps(summary, indent=2) + "\n") + print(f"\nresults -> {out_dir / 'summary.json'}") + + +if __name__ == "__main__": + import sys + + if "--self-test" in sys.argv: + _self_test() + else: + main() diff --git a/dimos/navigation/jnav/components/loop_closure/eval_kitti.py b/dimos/navigation/jnav/components/loop_closure/eval_kitti.py new file mode 100644 index 0000000000..b7dfac55a1 --- /dev/null +++ b/dimos/navigation/jnav/components/loop_closure/eval_kitti.py @@ -0,0 +1,192 @@ +# Copyright 2026 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Evaluate one PGO on a KITTI db with the official KITTI odometry error. + +Parallels eval.py, but for the kitti_to_db.py recordings. Runs the module over +the db's drifty ICP odometry (fastlio_odometry) + registered scans (fastlio_lidar), +reads the ground truth from the db's gt_odometry stream, and reports translational +(%) and rotational (deg/m) error — the leaderboard metric — for the corrected +trajectory and the raw-odometry baseline. + +Usage: + uv run python dimos/navigation/jnav/components/loop_closure/eval_kitti.py \ + --db-path ~/datasets/kitti/kitti_seq07/mem2.db \ + --module-path dimos/navigation/jnav/components/loop_closure/gsc_pgo/module.py \ + [--module-name PGO] [--pgo-config-json '{...}'] +""" + +from __future__ import annotations + +import argparse +import json +from pathlib import Path +from typing import Any + +import numpy as np +from scipy.spatial.transform import Rotation + +from dimos.navigation.jnav.components.loop_closure.eval import run_module_graph +from dimos.navigation.jnav.utils.kitti import kitti_odometry_error +from dimos.navigation.jnav.utils.module_loading import ( + filter_config_for_module, + load_module_class, +) +from dimos.navigation.jnav.utils.recording_db import iterate_stream + +DEFAULT_CONFIG = {"use_scan_context": True, "global_map_publish_rate": 0} + +# Stream names produced by kitti_to_db (overridable on the CLI). +DEFAULT_LIDAR_STREAM = "fastlio_lidar" +DEFAULT_ODOM_STREAM = "fastlio_odometry" +DEFAULT_GT_STREAM = "gt_odometry" + + +def poses_from_stream(db_path: Path, stream: str) -> tuple[list[np.ndarray], list[float]]: + """Read an Odometry stream as (4x4 poses, timestamps).""" + poses: list[np.ndarray] = [] + times: list[float] = [] + for timestamp, message in iterate_stream(db_path, stream): + orientation = message.pose.orientation + position = message.pose.position + transform = np.eye(4) + transform[:3, :3] = Rotation.from_quat( + [orientation.x, orientation.y, orientation.z, orientation.w] + ).as_matrix() + transform[:3, 3] = [position.x, position.y, position.z] + poses.append(transform) + times.append(timestamp) + return poses, times + + +def _gt_at( + gt_poses: list[np.ndarray], gt_times: list[float], query: list[float] +) -> list[np.ndarray]: + times = np.asarray(gt_times) + return [gt_poses[int(np.argmin(np.abs(times - t)))] for t in query] + + +def corrected_trajectory( + db_path: Path, + module_path: Path, + module_name: str, + config: dict[str, Any], + *, + lidar_stream: str, + odom_stream: str, +) -> tuple[list[Any], list[Any], Any]: + module_class = load_module_class(module_path, module_name) + config = filter_config_for_module(module_class, config) + graph, closures, _ = run_module_graph( + db_path, + module_class, + config, + lidar_stream=lidar_stream, + odom_stream=odom_stream, + lockstep=True, + ) + poses, times = [], [] + for node in graph: + transform = np.eye(4) + transform[:3, :3] = Rotation.from_quat(node[4:8]).as_matrix() + transform[:3, 3] = node[1:4] + poses.append(transform) + times.append(node[0]) + return poses, times, closures + + +def evaluate( + db_path: Path, + module_path: Path, + module_name: str, + config: dict[str, Any], + results_suffix: str = "", + *, + lidar_stream: str = DEFAULT_LIDAR_STREAM, + odom_stream: str = DEFAULT_ODOM_STREAM, + gt_stream: str = DEFAULT_GT_STREAM, +) -> dict[str, Any]: + gt_poses, gt_times = poses_from_stream(db_path, gt_stream) + odom_poses, _ = poses_from_stream(db_path, odom_stream) + if not gt_poses: + raise SystemExit(f"no {gt_stream!r} stream in {db_path}") + baseline = kitti_odometry_error(odom_poses, gt_poses) + + corrected, corrected_times, closures = corrected_trajectory( + db_path, + module_path, + module_name, + config, + lidar_stream=lidar_stream, + odom_stream=odom_stream, + ) + error = kitti_odometry_error(corrected, _gt_at(gt_poses, gt_times, corrected_times)) + + print( + f"raw odometry: {baseline['translational_percent']:.2f}% transl / " + f"{baseline['rotational_deg_per_m']:.4f} deg/m" + ) + print( + f"after {module_path.parent.name}: {error['translational_percent']:.2f}% transl / " + f"{error['rotational_deg_per_m']:.4f} deg/m ({closures} closures)" + ) + + module_key = module_path.parent.name + (f".{results_suffix}" if results_suffix else "") + summary = { + "db": str(db_path), + "module": module_key, + "scores": { + "translational_percent": error["translational_percent"], + "rotational_deg_per_m": error["rotational_deg_per_m"], + "baseline_translational_percent": baseline["translational_percent"], + "baseline_rotational_deg_per_m": baseline["rotational_deg_per_m"], + "closures": closures, + "keyframes": len(corrected), + }, + } + recording = db_path.parent.name + out_dir = Path(__file__).resolve().parent / "eval_results" / f"{recording}__{module_key}" + out_dir.mkdir(parents=True, exist_ok=True) + (out_dir / "kitti_summary.json").write_text(json.dumps(summary, indent=2)) + return summary + + +def main() -> None: + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument("--db-path", type=Path, required=True) + parser.add_argument("--module-path", type=Path, required=True) + parser.add_argument("--module-name", default="PGO") + parser.add_argument("--pgo-config-json", default="") + parser.add_argument("--results-suffix", default="") + parser.add_argument("--lidar-stream", default=DEFAULT_LIDAR_STREAM) + parser.add_argument("--odom-stream", default=DEFAULT_ODOM_STREAM) + parser.add_argument("--gt-stream", default=DEFAULT_GT_STREAM) + args = parser.parse_args() + config = dict(DEFAULT_CONFIG) + if args.pgo_config_json: + config.update(json.loads(args.pgo_config_json)) + evaluate( + args.db_path.expanduser(), + args.module_path, + args.module_name, + config, + args.results_suffix, + lidar_stream=args.lidar_stream, + odom_stream=args.odom_stream, + gt_stream=args.gt_stream, + ) + + +if __name__ == "__main__": + main() diff --git a/dimos/navigation/jnav/components/loop_closure/eval_kitti_all.py b/dimos/navigation/jnav/components/loop_closure/eval_kitti_all.py new file mode 100644 index 0000000000..3a16b799d1 --- /dev/null +++ b/dimos/navigation/jnav/components/loop_closure/eval_kitti_all.py @@ -0,0 +1,122 @@ +# Copyright 2026 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Run every PGO on KITTI db(s) and render an official-error comparison table. + +Parallels eval_all.py. For each KITTI db (one sequence) it evaluates every PGO +module in its own subprocess (they share LCM) via eval_kitti.py, then renders +translational (%) / rotational (deg/m) error per module to kitti_comparison.md, +sorted best-first (lower error = better), alongside the raw-odometry baseline. + +Usage: + uv run python dimos/navigation/jnav/components/loop_closure/eval_kitti_all.py \ + --recordings-dir ~/datasets/kitti/sequences # all kitti_seq*/mem2.db + [--db-path ~/datasets/kitti/sequences/kitti_seq07/mem2.db] # or one + [--only gsc_pgo,ivan_pgo] +""" + +from __future__ import annotations + +import argparse +import json +from pathlib import Path +import subprocess +import sys +from typing import Any + +LOOP_CLOSURE_DIR = Path(__file__).resolve().parent +RESULTS_DIR = LOOP_CLOSURE_DIR / "eval_results" +TABLE_PATH = RESULTS_DIR / "kitti_comparison.md" +MODULES = ("gsc_pgo", "ivan_pgo", "ivan_pgo_transformer", "unrefined_pgo") + + +def run_one(db_path: Path, module_dir: str) -> bool: + command = [ + sys.executable, + "-u", + str(LOOP_CLOSURE_DIR / "eval_kitti.py"), + "--db-path", + str(db_path), + "--module-path", + str(LOOP_CLOSURE_DIR / module_dir / "module.py"), + "--module-name", + "PGO", + ] + print(f"\n=== {db_path.parent.name} / {module_dir} ===", flush=True) + return subprocess.run(command, check=False).returncode == 0 + + +def render_table() -> Path: + by_recording: dict[str, list[dict[str, Any]]] = {} + for summary_path in sorted(RESULTS_DIR.glob("*/kitti_summary.json")): + summary = json.loads(summary_path.read_text()) + recording = Path(summary["db"]).parent.name + by_recording.setdefault(recording, []).append( + {"module": summary["module"], **summary["scores"]} + ) + + lines = ["# KITTI official-error PGO comparison", ""] + lines += [ + "Translational error (%) and rotational error (deg/m), the KITTI", + "leaderboard metric (avg relative pose error over 100..800m sub-sequences).", + "**Lower is better.** raw odometry = the scan-to-scan ICP input the PGO refines.", + "", + ] + for recording in sorted(by_recording): + rows = sorted(by_recording[recording], key=lambda r: r["translational_percent"]) + lines += [f"## {recording}", ""] + lines.append("| module | transl % | rot deg/m | closures | keyframes |") + lines.append("|---|---|---|---|---|") + baseline = rows[0] + lines.append( + f"| _raw odometry_ | {baseline['baseline_translational_percent']:.2f} | " + f"{baseline['baseline_rotational_deg_per_m']:.4f} | — | — |" + ) + for row in rows: + lines.append( + f"| {row['module']} | {row['translational_percent']:.2f} | " + f"{row['rotational_deg_per_m']:.4f} | {row['closures']} | {row['keyframes']} |" + ) + lines.append("") + RESULTS_DIR.mkdir(exist_ok=True) + TABLE_PATH.write_text("\n".join(lines)) + return TABLE_PATH + + +def main() -> None: + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument("--recordings-dir", type=Path, default=Path("~/datasets/kitti/sequences")) + parser.add_argument( + "--db-path", type=Path, default=None, help="a single kitti db (overrides dir)" + ) + parser.add_argument("--only", help="comma-separated module dirs") + args = parser.parse_args() + + if args.db_path: + dbs = [args.db_path.expanduser()] + else: + dbs = sorted(args.recordings_dir.expanduser().glob("kitti_seq*/mem2.db")) + if not dbs: + raise SystemExit("no KITTI dbs found (run kitti_to_db.py first)") + modules = [m.strip() for m in args.only.split(",")] if args.only else list(MODULES) + + for db_path in dbs: + for module_dir in modules: + run_one(db_path, module_dir) + table = render_table() + print(f"\ntable -> {table}") + + +if __name__ == "__main__": + main() diff --git a/dimos/navigation/jnav/components/loop_closure/gsc_pgo/add_april.py b/dimos/navigation/jnav/components/loop_closure/gsc_pgo/add_april.py new file mode 100644 index 0000000000..818a71e8f3 --- /dev/null +++ b/dimos/navigation/jnav/components/loop_closure/gsc_pgo/add_april.py @@ -0,0 +1,216 @@ +# Copyright 2026 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Build the raw_april_tags + april_tags streams into a recording's mem2.db and +record the outcome in summary.json. --summary recomputes only the result section. + +Usage: + python dimos/navigation/jnav/components/loop_closure/gsc_pgo/add_april.py --rec PATH + [--summary] [--output PATH] [--camera color_image] [--intrinsics PATH] + [--tag-size 0.10] [--dict DICT_APRILTAG_36h11] [--dynamic 17] +""" + +import argparse +import json +from pathlib import Path +from typing import Any + +from dimos.navigation.jnav.utils import recording_db as rdb +from dimos.navigation.jnav.utils.apriltag_agreement import ( + VISIT_GAP_S, + split_visits, +) +from dimos.navigation.jnav.utils.apriltags import ( + ensure_april_streams, + gate_params, + load_intrinsics_json, +) + +RAW_STREAM = "raw_april_tags" +FILTERED_STREAM = "april_tags" +SUMMARY_NAME = "summary.json" +MIN_REVISITS = 2 # a tag seen on fewer visits than this carries no agreement signal +DEFAULT_MARKER_LENGTH_METERS = 0.10 +DEFAULT_DICTIONARY = "DICT_APRILTAG_36h11" + + +def _parse_dynamic(raw: str | None) -> list[int]: + if not raw: + return [] + return sorted({int(token) for token in raw.replace(",", " ").split()}) + + +def _times_by_tag(store: Any, stream_name: str) -> dict[int, list[float]]: + by_tag: dict[int, list[float]] = {} + for observation in store.stream(stream_name): + by_tag.setdefault(int(observation.tags["marker_id"]), []).append(float(observation.ts)) + return by_tag + + +def summarize(store: Any) -> dict[str, Any]: + """Per-tag raw detections + filtered visits from the existing streams, flagging + never-revisited tags. Prints the table and returns the result for summary.json.""" + streams = set(store.list_streams()) + raw_available = RAW_STREAM in streams + raw = _times_by_tag(store, RAW_STREAM) if raw_available else {} + filtered = _times_by_tag(store, FILTERED_STREAM) if FILTERED_STREAM in streams else {} + tag_ids = sorted(set(raw) | set(filtered)) + + print( + f" {'tag':>5} {'raw':>6} {'filtered':>9} {'revisits':>9} (visit gap {VISIT_GAP_S:.0f}s)" + ) + tags: list[dict[str, Any]] = [] + not_revisited: list[int] = [] + for tag_id in tag_ids: + raw_count = len(raw.get(tag_id, [])) + filtered_times = sorted(filtered.get(tag_id, [])) + visits = len(split_visits(filtered_times, gap_s=VISIT_GAP_S)) if filtered_times else 0 + revisited = visits >= MIN_REVISITS + if not revisited: + not_revisited.append(tag_id) + tags.append( + { + "tag_id": tag_id, + "raw": raw_count if raw_available else None, + "filtered": len(filtered_times), + "revisits": visits, + "revisited": revisited, + } + ) + raw_display = raw_count if raw_available else "-" + flag = " <-- NOT revisited" if not revisited else "" + print(f" {tag_id:>5} {raw_display!s:>6} {len(filtered_times):>9} {visits:>9}{flag}") + + print( + f" totals: {len(tag_ids)} tags | {len(tag_ids) - len(not_revisited)} revisited" + f" (>={MIN_REVISITS} visits) | {len(not_revisited)} not revisited" + ) + if not_revisited: + print(f" NOT revisited: {not_revisited}") + if not raw_available: + print(f" (no '{RAW_STREAM}' yet — raw counts N/A; run without --summary to build it)") + + return { + "visit_gap_s": VISIT_GAP_S, + "min_revisits": MIN_REVISITS, + "all_unfiltered_tag_ids": sorted(raw) if raw_available else sorted(filtered), + "total_tags": len(tag_ids), + "revisited": len(tag_ids) - len(not_revisited), + "not_revisited": not_revisited, + "raw_available": raw_available, + "tags": tags, + } + + +def _update_summary_json( + path: Path, + *, + filter_parameters: dict[str, Any] | None = None, + result: dict[str, Any] | None = None, +) -> Path: + """Merge the april_tags section into summary.json, preserving every other key.""" + data: dict[str, Any] = json.loads(path.read_text()) if path.exists() else {} + section: dict[str, Any] = data.get("april_tags", {}) + if filter_parameters is not None: + section["filter_parameters"] = filter_parameters + if result is not None: + section["result"] = result + data["april_tags"] = section + path.write_text(json.dumps(data, indent=2)) + return path + + +def main() -> None: + parser = argparse.ArgumentParser( + description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter + ) + parser.add_argument("--rec", type=Path, required=True, help="recording dir or mem2.db path") + parser.add_argument("--camera", default="color_image") + parser.add_argument("--intrinsics", type=Path, default=None) + parser.add_argument("--tag-size", type=float, default=None, help="marker length (m)") + parser.add_argument("--dict", dest="dictionary", default=None) + parser.add_argument( + "--summary", + action="store_true", + help="read-only on streams: recompute and write ONLY april_tags.result (a subset of" + " the full run, which also rebuilds streams + filter_parameters)", + ) + parser.add_argument( + "--output", + type=Path, + default=None, + help="summary.json path to write (default: /summary.json) — pick another" + " location to avoid overwriting an existing one", + ) + parser.add_argument( + "--dynamic", + default=None, + help="comma/space-separated tag ids on moving objects — kept in raw, dropped from filtered", + ) + args = parser.parse_args() + + recording = args.rec.expanduser() + db_path = recording if recording.name == "mem2.db" else recording / "mem2.db" + if not db_path.exists(): + parser.error(f"no mem2.db at {db_path}") + store = rdb.store(db_path) + summary_path = args.output.expanduser() if args.output else (db_path.parent / SUMMARY_NAME) + print(f"=== {db_path.parent.name} ===") + + if args.summary: + result = summarize(store) + _update_summary_json(summary_path, result=result) + print(f" updated {summary_path} april_tags.result") + return + + # Rebuild both streams and overwrite filter_parameters + result. + dynamic_tags = _parse_dynamic(args.dynamic) + intrinsics_path = (args.intrinsics or (db_path.parent / "camera_intrinsics.json")).expanduser() + config = load_intrinsics_json(intrinsics_path) + marker_length = ( + args.tag_size + if args.tag_size is not None + else config.get("marker_length", DEFAULT_MARKER_LENGTH_METERS) + ) + dictionary = args.dictionary or config.get("dictionary", DEFAULT_DICTIONARY) + ensure_april_streams( + store, + config["intrinsics"], + config["distortion"], + image_stream=args.camera, + marker_length=marker_length, + dictionary=dictionary, + raw_stream=RAW_STREAM, + filtered_stream=FILTERED_STREAM, + exclude_tags=dynamic_tags, + force=True, + ) + filter_parameters = { + "gates": gate_params(), + "marker_length_m": marker_length, + "dictionary": dictionary, + "camera_stream": args.camera, + "raw_stream": RAW_STREAM, + "filtered_stream": FILTERED_STREAM, + "dynamic_tags_excluded": dynamic_tags, + } + result = summarize(store) + _update_summary_json(summary_path, filter_parameters=filter_parameters, result=result) + print( + f" updated {summary_path} april_tags (filter_parameters + result); dynamic={dynamic_tags}" + ) + + +if __name__ == "__main__": + main() diff --git a/dimos/navigation/jnav/components/loop_closure/gsc_pgo/detect_tags.py b/dimos/navigation/jnav/components/loop_closure/gsc_pgo/detect_tags.py new file mode 100644 index 0000000000..71b11201cc --- /dev/null +++ b/dimos/navigation/jnav/components/loop_closure/gsc_pgo/detect_tags.py @@ -0,0 +1,175 @@ +# Copyright 2026 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Untyped analysis script: gtsam/open3d/cv2 lack type stubs. +# mypy: ignore-errors +"""Build the raw AprilTag stream: EVERY detection over the camera image, NO filtering whatsoever +(no blur/reproj/distance/angle/motion/size gate, no time-clustering). One row per per-frame +detection that yields a valid PnP pose. Each row carries its gate diagnostics in tags +(sharpness, reproj_px, tag_px, distance_m, view_angle_deg, lin_speed, ang_speed) so downstream +gate tuning in post_process.py needs no re-detection. + +Prints the raw per-marker histogram + visit structure (visit = sightings >30s apart). + +Usage: python dimos/navigation/jnav/components/loop_closure/gsc_pgo/detect_tags.py --rec=PATH + [--camera=color_image] [--tag-size=0.10] + [--dict=DICT_APRILTAG_36h11] [--intrinsics=PATH] [--out=raw_april_tags] +""" + +import json +from pathlib import Path +import sys + +import cv2 +import numpy as np +from scipy.spatial.transform import Rotation + +from dimos.msgs.geometry_msgs.PoseStamped import PoseStamped +from dimos.msgs.sensor_msgs.Image import Image +from dimos.navigation.jnav.utils import recording_db as rdb +from dimos.navigation.jnav.utils.apriltags import ( + _camera_speeds, + estimate_marker_pose, + make_detector, + reprojection_error_px, + tag_pixel_size, + tag_sharpness, + view_quality, +) + + +def arg(flag, default=None): + return next((a.split("=", 1)[1] for a in sys.argv if a.startswith(flag + "=")), default) + + +REC_ARG = arg("--rec") +if not REC_ARG: + sys.exit( + "usage: python dimos/navigation/jnav/components/loop_closure/gsc_pgo/detect_tags.py --rec=PATH [--camera=...] [--tag-size=...] " + "[--dict=...] [--intrinsics=PATH] [--out=...] (--rec is required)" + ) +REC = Path(REC_ARG).expanduser() +CAMERA = arg("--camera", "color_image") # camera image stream the tags are detected on +MARKER_LENGTH_M = float(arg("--tag-size", "0.10")) +DICTIONARY = arg("--dict", "DICT_APRILTAG_36h11") +STREAM = arg("--out", "raw_april_tags") +VISIT_GAP_S = 30.0 + +DB = REC / "mem2.db" +intr_path = Path(arg("--intrinsics", str(REC / "camera_intrinsics.json"))).expanduser() +intr = json.loads(intr_path.read_text()) +K = np.array(intr["intrinsics"], float).reshape(3, 3) +dist = np.array(intr.get("distortion", []), float) +st = rdb.store(DB) +if CAMERA not in st.list_streams(): + sys.exit(f"!! camera stream '{CAMERA}' not in db — available: {st.list_streams()}") + +detector = make_detector(DICTIONARY) +print(f"loading '{CAMERA}' frames...") +images = st.stream(CAMERA, Image).to_list() +speed_by_ts, speed_available = _camera_speeds(images) +print( + f"detecting over {len(images)} frames (unfiltered), tag_size={MARKER_LENGTH_M} m, dict={DICTIONARY}..." +) + +rows = [] +for image_obs in images: + image = image_obs.data + bgr = image.numpy() if hasattr(image, "numpy") else np.asarray(image.data) + gray = cv2.cvtColor(bgr, cv2.COLOR_BGR2GRAY) if bgr.ndim == 3 else bgr + all_corners, marker_ids, _ = detector.detectMarkers(bgr) + if marker_ids is None: + continue + for corners, marker_id in zip(all_corners, marker_ids.flatten(), strict=False): + pose = estimate_marker_pose(corners, MARKER_LENGTH_M, K, dist) + if pose is None: + continue + rvec, tvec = pose + quat = Rotation.from_rotvec(rvec.reshape(3)).as_quat() # x,y,z,w + t = tvec.reshape(3) + tcm = [ + float(t[0]), + float(t[1]), + float(t[2]), + float(quat[0]), + float(quat[1]), + float(quat[2]), + float(quat[3]), + ] + distance, view_angle = view_quality(tcm) + speed = speed_by_ts.get(float(image_obs.ts)) + rows.append( + { + "ts": float(image_obs.ts), + "marker_id": int(marker_id), + "tcm": tcm, + "sharpness": float(tag_sharpness(gray, corners)), + "reproj_px": float( + reprojection_error_px(corners, rvec, tvec, MARKER_LENGTH_M, K, dist) + ), + "tag_px": float(tag_pixel_size(corners)), + "distance_m": float(distance), + "view_angle_deg": float(view_angle), + "lin_speed": float(speed[0]) if speed else -1.0, + "ang_speed": float(speed[1]) if speed else -1.0, + } + ) +rows.sort(key=lambda r: r["ts"]) + +if STREAM in st.list_streams(): + st.delete_stream(STREAM) +out = st.stream(STREAM, PoseStamped) +for r in rows: + tcm = r["tcm"] + out.append( + PoseStamped(ts=r["ts"], position=tcm[:3], orientation=tcm[3:]), + ts=r["ts"], + pose=tuple(tcm), + tags={ + k: r[k] + for k in ( + "marker_id", + "sharpness", + "reproj_px", + "tag_px", + "distance_m", + "view_angle_deg", + "lin_speed", + "ang_speed", + ) + }, + ) +print(f"\nwrote {STREAM}: {len(rows)} unfiltered detections") + +by = {} +for r in rows: + by.setdefault(r["marker_id"], []).append(r) +print(f"\n=== RAW per-marker (visit = >{VISIT_GAP_S:.0f}s apart) ===") +print( + f"{'tag':>4} {'det':>4} {'visits':>6} {'dist_m':>12} {'sharp>=60%':>10} {'reproj<=2%':>10} {'span_s':>7}" +) +for mid in sorted(by): + rs = sorted(by[mid], key=lambda r: r["ts"]) + times = [r["ts"] for r in rs] + visits = [[times[0]]] + for tt in times[1:]: + (visits[-1].append(tt) if tt - visits[-1][-1] <= VISIT_GAP_S else visits.append([tt])) + d = [r["distance_m"] for r in rs] + sharp_ok = 100 * np.mean([r["sharpness"] >= 60 for r in rs]) + reproj_ok = 100 * np.mean([r["reproj_px"] <= 2 for r in rs]) + print( + f"{mid:>4} {len(rs):>4} {len(visits):>6} {min(d):5.2f}-{max(d):5.2f} " + f"{sharp_ok:9.0f}% {reproj_ok:9.0f}% {times[-1] - times[0]:7.0f}" + ) +print("\nmarkers present:", sorted(by)) diff --git a/dimos/navigation/jnav/components/loop_closure/gsc_pgo/hyperparam_analysis.md b/dimos/navigation/jnav/components/loop_closure/gsc_pgo/hyperparam_analysis.md new file mode 100644 index 0000000000..8e7f608201 --- /dev/null +++ b/dimos/navigation/jnav/components/loop_closure/gsc_pgo/hyperparam_analysis.md @@ -0,0 +1,134 @@ +# gsc_pgo hyperparameter analysis + online tuning — sketch + +## The problem (from the eval campaign) + +cmu wins on big real-drift loops but **over-fires** elsewhere: +- `grassy_field` (05-32): **-0.369** tag with 110 closures — should be a no-op (+0.00x). + Open grass = feature-poor; ICP matches are garbage but get accepted. +- `gir_park1_2`: +0.578 with 23 closures, but unrefined got **+0.749 with 2 closures**. + cmu's extra closures are net noise — fewer, better closures would win. + +Goal: (a) **detect feature-poor / high-disagreement scans online and back off** +(ideally make PGO a no-op in grassy_field), and (b) accept **fewer, more +consistent** closures. Both reduce to: *trust loops less when the environment +can't support them, and only commit mutually-consistent ones.* + +## Signals cmu already computes (cheap to expose) + +Every candidate already produces these inside `searchForLoopPairs` / +`scan_context.cpp` — instrument the binary to log them per candidate (accepted +and rejected), keyed by ts: + +| signal | where | meaning | +|---|---|---| +| scan-context min cosine distance | `best_distance()` | place-match quality; high = no good match (new place OR feature-poor) | +| ICP fitness (`getFitnessScore`) | `searchForLoopPairs` | mean-sq inlier dist; high = bad geometric alignment | +| ICP converged + inlier ratio | PCL ICP | low inlier ratio = lots of disagreement (crowd/dynamic/degenerate) | +| candidate global distance | `loop_candidate_max_distance_m` gate | drift magnitude at the candidate | +| ring-key occupancy/entropy | scan-context descriptor | **feature richness** — near-uniform/empty descriptor = feature-poor | + +Two signals are *not* computed yet but are the highest-value adds: +- **registration Hessian eigenvalues** (J^T J of the ICP cost) — the canonical + degeneracy detector (below). +- **per-point residual distribution** of the scan-to-submap match — heavy tails = + dynamic objects (crowd). + +## Literature: detecting feature-poor / degenerate environments + +1. **Degeneracy factor (Zhang, Kaess, Singh — ICRA 2016, "On Degeneracy of + Optimization-based State Estimation").** THE standard. Eigen-decompose the + scan-matching information matrix `H = J^T J`; its smallest eigenvalue (or + `λ_min / λ_max` condition number) measures how well-constrained the solve is. + A featureless corridor/field has a small eigenvalue along the unconstrained + axis. → online "is this scan match trustworthy" scalar. Their **solution + remapping** projects the update out of degenerate directions; LOAM/LeGO-LOAM + use it. +2. **X-ICP (Tuna et al., T-RO 2023) — "Localizability-Aware ICP."** Per-axis + localizability from the optimization constraints; selectively adds soft + constraints only in observable directions. More principled than a scalar gate. +3. **Eigenvalue geometric features (Demantké 2011 / Weinmann 2015):** local + neighborhood covariance → linearity/planarity/scattering. Open grass scores + high "scattering" (3D-spread, no structure) — a direct feature-poverty flag. +4. **Scan-context descriptor entropy/occupancy** (cheap, already have the + descriptor): a low-entropy / sparsely-occupied ring-key = feature-poor scene + where cosine matching is unreliable. Raise the match bar or refuse loops. + +## Literature: scan disagreement / dynamic crowds + +5. **Robust kernels (Huber / Cauchy / Geman-McClure)** on the ICP residual — + down-weight the moving-people points. cmu currently uses raw fitness; a robust + cost + reporting the *inlier ratio* exposes "crowd present." +6. **Removert (Kim & Kim, IROS 2020):** range-image visibility differencing + removes dynamic points before matching. Strong for crowds. +7. **Stationarity / scan-to-scan consistency:** points that move between + consecutive frames are dynamic; a high dynamic-fraction → distrust this scan + for loop closure. (Cheap online proxy: residual after rigid alignment of + consecutive scans.) +8. **Learned moving-object segmentation (4DMOS, LMNet, MotionSeg3D)** — heavier; + masks people directly. Overkill for now but the ceiling. + +## Literature: loop-closure outlier rejection (the over-firing fix) + +The single highest-leverage change — cmu accepts each loop independently: + +9. **PCM — Pairwise Consistency Maximization (Mangelson et al., ICRA 2018).** + Build a consistency graph over candidate loops; keep only the **maximal + mutually-consistent clique**, reject the rest. Directly attacks "23 closures + where 2 good ones win." Cheap, backend-agnostic, no tuning. +10. **GNC — Graduated Non-Convexity (Yang et al., RA-L 2020) / Cauchy-IRLS.** + Backend robustly down-weights outlier loops during optimization. iSAM2-friendly. +11. **Switchable Constraints (Sünderhauf & Protzel 2012) / Dynamic Covariance + Scaling (Agarwal 2013):** the optimizer learns a switch/weight per loop and + can turn bad ones off. Lighter than PCM, online-friendly. + +## Online tuning design (signal → knob) + +Frame cmu's static thresholds as **initial values adapted by a measured +"environment trust" `τ ∈ [0,1]`** computed per keyframe from the signals above: + +``` +τ = f( degeneracy_factor, ringkey_entropy, icp_inlier_ratio, dynamic_fraction ) + # low when feature-poor / crowded / degenerate +``` + +Then drive the gates from `τ` (running statistics, not magic numbers): + +| knob (currently static) | online rule | +|---|---| +| `loop_score_thresh` (ICP gate) | tighten as `τ` drops; track running median/MAD of accepted-loop fitness and gate at `median − k·MAD` | +| `scan_context_match_threshold` | raise when ring-key entropy is low (cosine less discriminative in feature-poor scenes) | +| **accept/suppress** the loop | hard-gate: if `degeneracy_factor < ε` (Zhang) → **suppress** → PGO no-op in directions it can't observe (the grassy_field fix) | +| loop **batch** acceptance | replace independent accept with **PCM** over the recent candidate set → only commit the mutually-consistent subset (the gir_park1_2 fix) | +| loop noise model | already `Σ = fitness·I`; inflate further by `1/τ` so low-trust loops barely move the graph | + +Online-tuning techniques that fit (cheapest first): +- **Adaptive thresholding on running stats** (median/MAD, EWMA) — robust, no + training, the right default. +- **Switchable constraints / DCS** — lets the *optimizer* do the tuning; minimal code. +- **PCM** — not "tuning" but the structural fix for over-firing; do this first. +- (Avoid online Bayesian-opt/bandits for the fast gates — too slow/unstable to + converge within one run; reserve for offline meta-params.) + +## Proposed analysis (to validate before coding the online controller) + +1. **Instrument the binary** to emit per-candidate diagnostics (the table above) + to a sidecar jsonl. One pass per recording → the dataset that grounds `f(·)`. +2. **Static sweep** (via the new eval cell-caching + `--drift-per-sec`): vary + `loop_score_thresh`, `scan_context_match_threshold`, + `loop_candidate_max_distance_m`, `min_loop_detect_duration` per recording; + plot tag/voxel improvement & closure count. Expect grassy_field to be + monotone-better as the gate tightens (→ confirms "no-op is optimal there"). +3. **Correlate** accepted-vs-rejected closures with the signals: does + degeneracy_factor / ring-key entropy separate grassy_field's bad closures + from gir_park1_2's good ones? If yes, `τ` is learnable as a simple threshold. +4. **Prototype** the cheapest controller (PCM + degeneracy-gate + adaptive + `loop_score_thresh`) and re-run the full eval; target: grassy_field → ~0, + gir_park1_2 → ≥ unrefined's +0.749, no regression on the pointlio wins. + +## First concrete step + +Add the per-candidate diagnostic logging to `simple_pgo.cpp` (scan-context +distance, ICP fitness, inlier ratio, candidate distance) + compute the Zhang +degeneracy factor from the ICP Hessian. That single instrumentation pass +produces the data to decide which signals actually separate good/bad closures +here — everything else (the `f(·)` and the gates) follows from it. diff --git a/dimos/navigation/jnav/components/loop_closure/gsc_pgo/make_rrd.py b/dimos/navigation/jnav/components/loop_closure/gsc_pgo/make_rrd.py new file mode 100644 index 0000000000..bfd2db540a --- /dev/null +++ b/dimos/navigation/jnav/components/loop_closure/gsc_pgo/make_rrd.py @@ -0,0 +1,184 @@ +# Copyright 2026 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Untyped analysis script: gtsam/open3d/cv2 lack type stubs. +# mypy: ignore-errors +"""Combined comparison rrd: raw lidar cloud + EVERY gt_*_lidar version present in the db, each as +its own colored entity, plus AprilTag landmarks + trajectories. Re-run after adding a new GT method +and it picks the new stream up automatically. + +Importable: `build(...)` writes the rrd and returns its path (used by post_process.py). +Standalone: python dimos/navigation/jnav/components/loop_closure/gsc_pgo/make_rrd.py --rec=PATH [--lidar=...] [--odom=...] [--tags=...] [--out=...] +""" + +import json +from pathlib import Path +import sys + +from gtsam import Point3, Pose3, Rot3 +import numpy as np +import rerun as rr + +from dimos.navigation.jnav.utils import recording_db as rdb + +SCAN_STRIDE, VOXEL = 8, 0.10 +COLORS = {"raw": [220, 60, 60]} +PALETTE = [ + [60, 120, 230], + [60, 210, 90], + [230, 180, 50], + [200, 80, 220], + [80, 220, 220], + [240, 130, 60], +] +# same relaxed gates post_process uses, for placing landmark markers +GATE = dict(s=25.0, r=3.5, px=12.0, d=1.5, a=65.0, lv=1.5, av=150.0) + + +def build( + rec, + lidar_stream="pointlio_lidar", + odom_stream="pointlio_odometry", + tag_stream="raw_april_tags", + out_name="gt_compare.rrd", +): + rec = Path(rec).expanduser() + db = rec / "mem2.db" + out = rec / out_name + st = rdb.store(db) + intr = json.loads((rec / "camera_intrinsics.json").read_text()) + ext = np.array(intr["optical_in_base"], float) + Tbo = Pose3(Rot3.Quaternion(ext[6], ext[3], ext[4], ext[5]), Point3(ext[0], ext[1], ext[2])) + + def accumulate(name): + pts = [] + for i, o in enumerate(st.stream(name)): + if i % SCAN_STRIDE: + continue + xyz = np.asarray(o.data.points_f32()) + if len(xyz): + pts.append(xyz[::3]) + a = np.concatenate(pts, 0) + _, idx = np.unique(np.floor(a / VOXEL).astype(np.int64), axis=0, return_index=True) + return a[idx] + + def traj(name): + return np.array( + [ + [o.data.pose.position.x, o.data.pose.position.y, o.data.pose.position.z] + for o in st.stream(name) + ], + np.float32, + ) + + def landmarks(gt_odom): + gt = [ + ( + o.ts, + Pose3( + Rot3.Quaternion( + o.data.pose.orientation.w, + o.data.pose.orientation.x, + o.data.pose.orientation.y, + o.data.pose.orientation.z, + ), + Point3(o.data.pose.position.x, o.data.pose.position.y, o.data.pose.position.z), + ), + ) + for o in st.stream(gt_odom) + ] + gts = np.array([t for t, _ in gt]) + pos = {} + for obs in st.stream(tag_stream): + t = obs.tags + if not ( + float(t["sharpness"]) >= GATE["s"] + and float(t["reproj_px"]) <= GATE["r"] + and float(t["tag_px"]) >= GATE["px"] + and float(t["distance_m"]) <= GATE["d"] + and float(t["view_angle_deg"]) <= GATE["a"] + and (float(t["lin_speed"]) < 0 or float(t["lin_speed"]) <= GATE["lv"]) + and (float(t["ang_speed"]) < 0 or float(t["ang_speed"]) <= GATE["av"]) + ): + continue + ps = obs.data + cb = gt[int(np.argmin(np.abs(gts - float(obs.ts))))][1] + Tw = cb.compose(Tbo).compose( + Pose3( + Rot3.Quaternion( + ps.orientation.w, ps.orientation.x, ps.orientation.y, ps.orientation.z + ), + Point3(ps.x, ps.y, ps.z), + ) + ) + pos.setdefault(int(t["marker_id"]), []).append(np.asarray(Tw.translation())) + means = [np.mean(value, 0) for key, value in sorted(pos.items())] + lbl = [f"tag{key}" for key in sorted(pos)] + return np.array(means), lbl + + streams = st.list_streams() + gt_lidars = sorted(s for s in streams if s.startswith("gt_") and "_lidar" in s) + print("raw + GT lidar streams:", gt_lidars) + + rr.init("gt_compare") + rr.save(str(out)) + rr.log( + "raw/cloud", + rr.Points3D(accumulate(lidar_stream), colors=COLORS["raw"], radii=0.02), + static=True, + ) + rr.log( + "raw/trajectory", rr.LineStrips3D([traj(odom_stream)], colors=[255, 120, 120]), static=True + ) + for k, name in enumerate(gt_lidars): + color = PALETTE[k % len(PALETTE)] + cloud = accumulate(name) + rr.log(f"{name}/cloud", rr.Points3D(cloud, colors=color, radii=0.02), static=True) + print(f" logged {name}: {len(cloud):,} pts") + odom = name.replace("_lidar", "_odometry") + if odom in streams: + rr.log(f"{name}/trajectory", rr.LineStrips3D([traj(odom)], colors=color), static=True) + # landmarks placed against the first available gt odometry + gt_odoms = sorted(s for s in streams if s.startswith("gt_") and "_odometry" in s) + if gt_odoms: + lm, lbl = landmarks(gt_odoms[0]) + if len(lm): + rr.log( + "landmarks", + rr.Points3D(lm, colors=[255, 230, 0], radii=0.25, labels=lbl), + static=True, + ) + print(f" logged {len(lbl)} landmarks") + print("wrote", out) + return out + + +def _arg(flag, default=None): + return next((a.split("=", 1)[1] for a in sys.argv if a.startswith(flag + "=")), default) + + +if __name__ == "__main__": + rec_arg = _arg("--rec") + if not rec_arg: + sys.exit( + "usage: python dimos/navigation/jnav/components/loop_closure/gsc_pgo/make_rrd.py --rec=PATH [--lidar=...] [--odom=...] " + "[--tags=...] [--out=...] (--rec is required)" + ) + build( + rec_arg, + lidar_stream=_arg("--lidar", "pointlio_lidar"), + odom_stream=_arg("--odom", "pointlio_odometry"), + tag_stream=_arg("--tags", "raw_april_tags"), + out_name=_arg("--out", "gt_compare.rrd"), + ) diff --git a/dimos/navigation/jnav/components/loop_closure/gsc_pgo/module.py b/dimos/navigation/jnav/components/loop_closure/gsc_pgo/module.py new file mode 100644 index 0000000000..f2d412ecb3 --- /dev/null +++ b/dimos/navigation/jnav/components/loop_closure/gsc_pgo/module.py @@ -0,0 +1,231 @@ +# Copyright 2026 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Native C++ PGO module — faithful reimplementation of the original nav stack PGO. + +Uses GTSAM iSAM2 for pose graph optimization and PCL ICP for loop closure. +""" + +from __future__ import annotations + +from pathlib import Path + +from reactivex.disposable import Disposable + +from dimos.core.core import rpc +from dimos.core.native_module import NativeModule, NativeModuleConfig +from dimos.core.stream import In, Out +from dimos.msgs.geometry_msgs.Transform import Transform +from dimos.msgs.nav_msgs.Odometry import Odometry +from dimos.msgs.sensor_msgs.PointCloud2 import PointCloud2 +from dimos.msgs.vision_msgs.Detection3DArray import Detection3DArray +from dimos.navigation.jnav.msgs.Graph3D import Graph3D +from dimos.navigation.jnav.msgs.GraphDelta3D import GraphDelta3D +from dimos.navigation.jnav.msgs.Landmark import Landmark +from dimos.utils.logging_config import setup_logger + +logger = setup_logger() + + +class PGOConfig(NativeModuleConfig): + # C++ + nix flake live in the standalone repo github.com/jeff-hykin/gsc_pgo. + # Pinned to a rev for reproducibility; bump when the C++ changes. The build + # runs in this module dir and drops a `result` symlink here (gitignored). + cwd: str | None = str(Path(__file__).resolve().parent) + executable: str = "result/bin/pgo" + build_command: str | None = ( + 'nix build "github:jeff-hykin/gsc_pgo/494e7a1d657c3702ec805c9e3d251a2fe8bc9529#default"' + " --no-write-lock-file" + ) + + frame_id: str = "map" + child_frame_id: str = "odom" + body_frame: str = "base_link" + + # Keyframe detection + key_pose_delta_deg: float = 10.0 + key_pose_delta_trans: float = 0.5 + + # Loop closure + loop_search_radius: float = 3.0 + loop_time_thresh: float = 5.0 + loop_score_thresh: float = 0.15 + loop_submap_half_range: int = 5 + submap_resolution: float = 0.1 + min_loop_detect_duration: float = 2.0 + # Feature-poverty gate: skip loop search when the scan's descriptor + # vertical-structure std is below this (open grass can't place itself -> + # PGO no-op). 0 = off. Superseded by loop_min_occupancy/loop_min_degeneracy + # (structure overlaps too much between scenes to threshold cleanly). + min_descriptor_std: float = 0.0 + + # Structure-spread gate: require >= this many occupied Scan-Context cells. + # Open grass clusters returns near the sensor (few rings filled); built + # scenes spread out to range. Calibrated on go2 fastlio (1200-cell 20x60 + # descriptor): grassy ~70 vs gir_park ~88 vs downtown ~120 at equal point + # count -> measures spread, not density. 0 disables. + loop_min_occupancy: int = 80 + # Observability gate (Zhang 2016 / X-ICP degeneracy): reject a candidate + # whose source scan's smallest normalized normal-scatter eigenvalue is below + # this. Planar/degenerate (grass) -> ~0; ICP slides in-plane and reports low + # fitness for a bogus closure. Real scenes (incl. sparse gir_park) sit >0.15. + # 0 disables. + loop_min_degeneracy: float = 0.05 + + # Input mode: transform world-frame scans to body-frame using odom + unregister_input: bool = True + + # Debug global-map publishing — OFF by default. Emitted on the internal + # `_global_map` port (leading underscore) so it never autoconnects to a + # consumer's `global_map` In: the terrain_mapper is the planner's single + # authoritative global_map. Two producers on `global_map` made the costmap + # flicker. Set a rate > 0 only for viz/debug of the PGO's corrected cloud. + global_map_voxel_size: float = 0.1 + global_map_publish_rate: float = 0.0 + + # Scan Context place recognition (used by loop closure search) + use_scan_context: bool = True + scan_context_num_rings: int = 20 + scan_context_num_sectors: int = 60 + scan_context_max_range_m: float = 80.0 + scan_context_top_k: int = 10 + scan_context_match_threshold: float = 0.4 + scan_context_lidar_height_m: float = 2.0 + + # Skip ICP on candidates farther than this (m). 0 disables. + loop_candidate_max_distance_m: float = 30.0 + + # Tag (AprilTag/ArUco) loop closure + use_tag_loop_closure: bool = False + # LCM channel of the static TF tree (dimos "pattern#msg_name" convention). + tf_static_channel: str = "/tf_static#tf2_msgs.TFMessage" + tag_loop_time_thresh: float = 5.0 + tag_assoc_max_dt: float = 0.1 + tag_buffer_window: float = 2.0 + # Anisotropic tag noise (variances, tag frame; normal = +z): in-plane/yaw + # tight, range/out-of-plane loose so tag range error can't distort z. + tag_var_inplane_trans_m2: float = 0.0025 + tag_var_range_trans_m2: float = 0.25 + tag_var_yaw_rot_rad2: float = 0.0025 + tag_var_outplane_rot_rad2: float = 0.04 + # 6-DOF Mahalanobis gate vs current estimate (chi^2 95% = 12.59). 0 = off. + tag_consistency_chi2: float = 0.0 + # Robust (Huber) kernel on all loop factors (lidar + tag). Off = original. + loop_robust_kernel: bool = False + loop_robust_huber_k: float = 1.345 + + # Landmark events (decoupled perceiver -> PGO factor-graph manager) + # When set, the PGO ingests Landmark events on the `landmarks` In and + # attaches each as a graph landmark variable + a BetweenFactor(keyframe, + # landmark). Two sightings of the same landmark id share the variable, so a + # revisit closes the loop and GTSAM optimizes it jointly. A separate + # perceiver (utils/apriltag_perceiver.py) does the detection + noise/ + # confidence filtering and emits the Landmark events. Off by default. + use_landmarks: bool = False + # Anisotropic landmark observation noise (variances, landmark frame; normal = + # +z): in-plane/yaw tight, range/out-of-plane loose (mirrors the tag model). + landmark_var_inplane_trans_m2: float = 0.0025 + landmark_var_range_trans_m2: float = 0.25 + landmark_var_yaw_rot_rad2: float = 0.0025 + landmark_var_outplane_rot_rad2: float = 0.04 + landmark_assoc_max_dt: float = 0.2 + landmark_buffer_window: float = 3.0 + + # Gravity anchor + # Pin keyframe 0 (whose orientation is gravity-aligned by the LIO front end) + # so landmark/loop closures cannot rotate the initial roll/pitch off gravity. + # The full pose is pinned (also the gauge reference); roll/pitch stiffness is + # the gravity component. Variances (smaller = stiffer). + gravity_anchor: bool = True + gravity_anchor_rp_var: float = 1e-12 + gravity_anchor_yaw_var: float = 1e-12 + gravity_anchor_trans_var: float = 1e-12 + # Per-keyframe gravity anchor (roll/pitch-only prior on EVERY keyframe). Anchoring + # only kf0 lets a big loop closure tilt inner keyframes' roll/pitch, which converts + # horizontal travel into vertical and corrupts z by tens of metres. Pinning every + # keyframe's roll/pitch to its gravity-aligned LIO orientation (yaw + translation + # left free) keeps the closure in-plane and preserves the z structure. + # Default OFF: the anisotropic odometry between-factor is the primary gravity- + # preservation mechanism (and still lets landmarks correct slow tilt drift). + # This absolute prior is a harder lock for when the front end's absolute tilt + # is trustworthy (e.g. ZUPT in the LIO estimator). + gravity_anchor_per_keyframe: bool = False + gravity_anchor_kf_rp_var: float = 1e-4 + + # Anisotropic odometry between-factor: the LIO relative roll/pitch is accurate + # (IMU sees gravity each step) but yaw drifts, so roll/pitch are stiff and yaw + # looser. This keeps a loop closure from sloshing its (mostly-yaw) correction + # into roll/pitch — a tilt that converts horizontal travel into vertical and + # corrupts z. Roll/pitch variance is small but nonzero, so landmarks can still + # correct slow tilt drift across the graph ("accurate but not perfect"). + odom_rot_rp_var: float = 1e-8 + odom_rot_yaw_var: float = 1e-5 + odom_trans_xy_var: float = 1e-4 + odom_trans_z_var: float = 1e-6 + + # Bounded FIFO depth: keep at most this many pending scans, dropping the + # oldest when full (<=0 = unbounded). Generous enough that an ack-gated eval + # replay never drops a scan, bounded enough to cap live latency/memory. + max_scan_queue: int = 100 + + debug: bool = False + + +class PGO(NativeModule): + """Pose graph optimization with loop closure using GTSAM iSAM2 + PCL ICP.""" + + config: PGOConfig + + # named "lidar" to match the LoopClosure spec; the binary pairs it with the + # latest odometry pose internally, so a raw sensor-frame scan is expected. + lidar: In[PointCloud2] + odometry: In[Odometry] + # Optional: tag detections (tag-in-optical pose + numeric id) for tag-based + # loop closure. Only consumed when config.use_tag_loop_closure is set. + tag_detections: In[Detection3DArray] + # Optional: decoupled Landmark events from a perceiver (e.g. the AprilTag + # perceiver). Only consumed when config.use_landmarks is set; each becomes a + # graph landmark variable + observation factor that GTSAM optimizes jointly. + landmarks: In[Landmark] + corrected_odometry: Out[Odometry] + correction: Out[Transform] + # Internal/debug only (off by default) — see global_map_publish_rate. Named + # with a leading underscore so autoconnect won't wire it to `global_map` Ins. + _global_map: Out[PointCloud2] + pose_graph: Out[Graph3D] + loop_closure_event: Out[GraphDelta3D] + + @rpc + def start(self) -> None: + super().start() + self.tf.publish( + Transform( + frame_id=self.config.frame_id, + child_frame_id=self.config.child_frame_id, + ) + ) + self.register_disposable( + Disposable( + self.correction.transport.subscribe(self._on_correction_for_tf, self.correction) + ) + ) + if self.config.debug: + logger.info("PGO native module started (C++ iSAM2 + PCL ICP)") + + def _on_correction_for_tf(self, msg: Transform) -> None: + self.tf.publish(msg) + + @rpc + def stop(self) -> None: + super().stop() diff --git a/dimos/navigation/jnav/components/loop_closure/gsc_pgo/post_process.py b/dimos/navigation/jnav/components/loop_closure/gsc_pgo/post_process.py new file mode 100644 index 0000000000..9f52ae8dfe --- /dev/null +++ b/dimos/navigation/jnav/components/loop_closure/gsc_pgo/post_process.py @@ -0,0 +1,609 @@ +# Copyright 2026 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Untyped analysis script: gtsam/open3d/cv2 lack type stubs. +# mypy: ignore-errors +"""AprilTag-loop-closed + ICP-refined ground-truth post-processing for a go2 recording. + +Source of tags = the UNFILTERED tag stream (build it first with detect_tags.py). Each raw +detection carries its gate diagnostics, so gates are applied here post-hoc (no re-detection) +and are easy to relax. Factors: one robust (best-reproj) observation per keyframe x marker -> +denser, balanced loop closure than one-medoid-per-visit. + +Two-stage solve: (1) GTSAM tag PGO -- anisotropic odometry between-factors (stiff roll/pitch + z +anchor gravity, loose yaw) + quality-weighted AprilTag landmark factors fix macro drift; +(2) ICP loop closures between spatially-close / temporally-distant lidar submaps anchor local +geometry. Writes _odometry / _lidar back into the recording db, optionally a .pc2.lcm +log of the corrected cloud, and opens a comparison rrd. + +Usage: python dimos/navigation/jnav/components/loop_closure/gsc_pgo/post_process.py [odom|lidar|both] --rec=PATH + [--lidar=pointlio_lidar] [--odom=pointlio_odometry] [--tags=raw_april_tags] + [--out=gt_pointlio] [--suffix=...] [--ignore-tags=17] [--no-icp] [--no-lcm] [--no-rrd] +""" + +import json +from pathlib import Path +import re +import sqlite3 +import sys +import time + +from gtsam import ( + BetweenFactorPose3, + LevenbergMarquardtOptimizer, + LevenbergMarquardtParams, + NonlinearFactorGraph, + Point3, + Pose3, + PriorFactorPose3, + Rot3, + Symbol, + Values, + noiseModel, +) +import numpy as np + +from dimos.msgs.geometry_msgs.Pose import Pose +from dimos.msgs.nav_msgs.Odometry import Odometry +from dimos.msgs.sensor_msgs.PointCloud2 import PointCloud2 +from dimos.navigation.jnav.utils import recording_db as rdb +from dimos.navigation.jnav.utils.apriltags import ( + DEFAULT_MAX_ANGULAR_SPEED_DPS, + DEFAULT_MAX_DISTANCE_M, + DEFAULT_MAX_LINEAR_SPEED_MPS, + DEFAULT_MAX_REPROJ_PX, + DEFAULT_MAX_VIEW_ANGLE_DEG, + DEFAULT_MIN_SHARPNESS, + DEFAULT_MIN_TAG_PX, +) + +VISIT_GAP_S = 30.0 +WHAT = sys.argv[1] if len(sys.argv) > 1 and not sys.argv[1].startswith("-") else "both" + + +def arg(flag, default=""): + return next((a.split("=", 1)[1] for a in sys.argv if a.startswith(flag + "=")), default) + + +REC_ARG = arg("--rec") +SUFFIX = arg("--suffix") +LIDAR_STREAM = arg("--lidar", "pointlio_lidar") # input lidar stream (world-registered scans) +ODOM_STREAM = arg("--odom", "pointlio_odometry") # input odometry stream (keyframe source) +# ODOM_STREAM is interpolated as a table name (SQLite can't parameterize those); +# reject anything that isn't a plain identifier to keep that injection-free. +if not re.match(r"^[A-Za-z_][A-Za-z0-9_]*$", ODOM_STREAM): + raise ValueError(f"unsafe --odom stream name: {ODOM_STREAM!r}") +RAW_STREAM = arg("--tags", "raw_april_tags") # input unfiltered AprilTag stream +IGNORE_TAGS = { + int(x) for x in arg("--ignore-tags").replace(",", " ").split() +} # dynamic/moving tags +OUT_PREFIX = arg("--out", "gt_pointlio") # output prefix -> _odometry / _lidar +WRITE_LCM = "--no-lcm" not in sys.argv # also emit _lidar.pc2.lcm of the corrected cloud +OPEN_RRD = "--no-rrd" not in sys.argv # build + open a comparison rrd at the end +LCM_VOXEL = float(arg("--lcm-voxel", "0.05")) # voxel size for the aggregated .pc2.lcm cloud +LCM_OUTLIER_NN = 20 # statistical outlier removal: neighbor count +LCM_OUTLIER_STD = 2.0 # ...and std-ratio threshold (lower = more aggressive) +LIDAR_FRAME = arg("--lidar-frame", "mid360_link") # frame the raw lidar scans live in +WORLD_FRAME = arg("--world-frame", "world") # frame to register scans into +USE_TF = "--no-tf" not in sys.argv # world-register via recording tf (fallback: obs.pose) +TF_TOL = float(arg("--tf-tol", "0.5")) # max seconds to the nearest recorded transform + +# Per-glimpse gates (speed == -1 means "unknown" and always passes). +GATE = dict( + min_sharpness=DEFAULT_MIN_SHARPNESS, + max_reproj_px=DEFAULT_MAX_REPROJ_PX, + min_tag_px=DEFAULT_MIN_TAG_PX, + max_distance_m=DEFAULT_MAX_DISTANCE_M, + max_view_angle_deg=DEFAULT_MAX_VIEW_ANGLE_DEG, + max_lin_speed=DEFAULT_MAX_LINEAR_SPEED_MPS, + max_ang_speed=DEFAULT_MAX_ANGULAR_SPEED_DPS, +) + +if not REC_ARG: + sys.exit( + "usage: python dimos/navigation/jnav/components/loop_closure/gsc_pgo/post_process.py [odom|lidar|both] --rec=PATH " + "[--lidar=...] [--odom=...] [--tags=...] [--out=...] [--suffix=...] " + "[--no-icp] [--no-lcm] [--no-rrd] (--rec is required: path to the recording dir)" + ) +REC = Path(REC_ARG).expanduser() +DB = REC / "mem2.db" +intr = json.loads((REC / "camera_intrinsics.json").read_text()) +ext = np.array(intr["optical_in_base"], float) +Tbo = Pose3(Rot3.Quaternion(ext[6], ext[3], ext[4], ext[5]), Point3(ext[0], ext[1], ext[2])) +st = rdb.store(DB) +if RAW_STREAM not in st.list_streams(): + sys.exit( + f"!! {RAW_STREAM} missing -- run detect_tags.py first to build the unfiltered tag stream." + ) +from dimos.memory2.db_tf import transform_matrix + +_TF_AVAILABLE = USE_TF and st.tf.has_transforms() + + +def world_points(obs): + """Nx3 world-registered points for a lidar observation. + + Primary: the recording's tf (``world <- LIDAR_FRAME``) at the scan time. + Fallback: the observation's stored pose as the transform. Otherwise the + scan is assumed already world-registered (legacy recordings). + """ + pts = np.asarray(obs.data.points_f32()) + if not len(pts): + return pts + if _TF_AVAILABLE: + tf = st.tf.get(WORLD_FRAME, LIDAR_FRAME, float(obs.ts), TF_TOL) + if tf is not None: + rot, trans = transform_matrix(tf) + return pts @ rot.T + trans + pose = getattr(obs, "pose", None) + if isinstance(pose, (tuple, list)) and len(pose) >= 7: + rot = Rot3.Quaternion(pose[6], pose[3], pose[4], pose[5]).matrix() + return pts @ rot.T + np.array(pose[:3], float) + return pts # already world-registered + + +print(f"recording: {REC}", flush=True) +if _TF_AVAILABLE: + print(f"world-registering {LIDAR_STREAM} via tf ({WORLD_FRAME} <- {LIDAR_FRAME})", flush=True) +print( + f"streams: tags={RAW_STREAM} odom={ODOM_STREAM} lidar={LIDAR_STREAM} -> out={OUT_PREFIX}{SUFFIX}", + flush=True, +) + + +def passes(t): + return ( + t["sharpness"] >= GATE["min_sharpness"] + and t["reproj_px"] <= GATE["max_reproj_px"] + and t["tag_px"] >= GATE["min_tag_px"] + and t["distance_m"] <= GATE["max_distance_m"] + and t["view_angle_deg"] <= GATE["max_view_angle_deg"] + and (t["lin_speed"] < 0 or t["lin_speed"] <= GATE["max_lin_speed"]) + and (t["ang_speed"] < 0 or t["ang_speed"] <= GATE["max_ang_speed"]) + ) + + +# read raw detections (pose + diagnostics) +print("reading tag detections...", flush=True) +raw = [] +for obs in st.stream(RAW_STREAM): + ps = obs.data + tg = obs.tags + raw.append( + dict( + ts=float(obs.ts), + marker_id=int(tg["marker_id"]), + T_cam_tag=Pose3( + Rot3.Quaternion( + ps.orientation.w, ps.orientation.x, ps.orientation.y, ps.orientation.z + ), + Point3(ps.x, ps.y, ps.z), + ), + reproj_px=float(tg["reproj_px"]), + **{ + k: float(tg[k]) + for k in ( + "sharpness", + "tag_px", + "distance_m", + "view_angle_deg", + "lin_speed", + "ang_speed", + ) + }, + ) + ) +gated = [t for t in raw if passes(t) and t["marker_id"] not in IGNORE_TAGS] + +# keyframes from raw odometry +con = sqlite3.connect(f"file:{DB}?mode=ro", uri=True) +fo = np.array( + list( + con.execute( + "select ts,pose_x,pose_y,pose_z,pose_qx,pose_qy,pose_qz,pose_qw " + f"from {ODOM_STREAM} order by ts" + ) + ), + float, +) +con.close() + + +def pr(r): + return Rot3.Quaternion(r[7], r[4], r[5], r[6]), np.array(r[1:4]) + + +ki = [0] +prev_rot, prev_pos = pr(fo[0]) +for i in range(1, len(fo)): + rot, pos = pr(fo[i]) + if ( + np.linalg.norm(pos - prev_pos) > 0.5 + or np.degrees(np.linalg.norm(Rot3.Logmap(prev_rot.inverse() * rot))) > 10 + ): + ki.append(i) + prev_rot, prev_pos = rot, pos +kfs = [pr(fo[i]) for i in ki] +kts = fo[ki, 0] +N = len(kfs) + +# one factor per keyframe x marker: keep the best-reproj detection in each bucket +bucket = {} +for t in gated: + kf = int(np.argmin(np.abs(kts - t["ts"]))) + key = (kf, t["marker_id"]) + if key not in bucket or t["reproj_px"] < bucket[key]["reproj_px"]: + bucket[key] = t + +# revisit report +raw_by, vis_by = {}, {} +for t in raw: + raw_by.setdefault(t["marker_id"], 0) + raw_by[t["marker_id"]] += 1 +for (_kf, mid), t in bucket.items(): + vis_by.setdefault(mid, []).append(t["ts"]) + + +def n_visits(times): + times = sorted(times) + visits = [[times[0]]] + for tt in times[1:]: + (visits[-1].append(tt) if tt - visits[-1][-1] <= VISIT_GAP_S else visits.append([tt])) + return len(visits) + + +print(f"gates: {GATE}") +print( + f"raw detections {len(raw)} -> {len(gated)} pass gates -> {len(bucket)} keyframe-tag factors\n" +) +print(f"{'tag':>4} | {'raw viewings':>12} | {'filtered revisits':>17}") +not_revisited = [] +for mid in sorted(raw_by): + nv = n_visits(vis_by[mid]) if mid in vis_by else 0 + flag = "" if nv >= 2 else " <-- NOT REVISITED" + print(f"{mid:>4} | {raw_by[mid]:>12} | {nv:>10} visit(s){flag}") + if nv < 2: + not_revisited.append(mid) +print( + f"\ntags NOT revisited (no loop-closure constraint): {not_revisited if not_revisited else 'none'}\n" +) + +# factor graph + solve +odom = noiseModel.Diagonal.Variances(np.array([1e-8, 1e-8, 1e-5, 1e-4, 1e-4, 1e-6])) +grav0 = noiseModel.Diagonal.Variances(np.array([1e-8, 1e-8, 1e-6, 1e-8, 1e-8, 1e-8])) + + +# quality weighting: planar-PnP pose error grows ~quadratically with range, and reproj_px is a +# direct misfit proxy. Inflate a glimpse's covariance by (dist/REF_D)^2 * (reproj/REF_R)^2 so a +# far/oblique/blurry tag pose contributes almost nothing while close, sharp ones dominate. +REF_D, REF_R = 0.4, 1.0 + + +def tn(Rbt, distance_m=REF_D, reproj_px=REF_R): + s = max((max(distance_m, 0.2) / REF_D) ** 2 * (max(reproj_px, 0.5) / REF_R) ** 2, 0.25) + Rm = Rbt.matrix() + c = np.zeros((6, 6)) + c[:3, :3] = Rm @ np.diag([0.04, 0.04, 0.0025]) @ Rm.T + c[3:, 3:] = Rm @ np.diag([0.0025, 0.0025, 0.25]) @ Rm.T + return noiseModel.Gaussian.Covariance(c * s) + + +print(f"building factor graph over {N} keyframes...", flush=True) +g = NonlinearFactorGraph() +v = Values() +for i in range(N): + rot, pos = kfs[i] + v.insert(i, Pose3(rot, Point3(pos))) + if i == 0: + g.add(PriorFactorPose3(0, Pose3(rot, Point3(pos)), grav0)) + else: + rot_prev, pos_prev = kfs[i - 1] + g.add( + BetweenFactorPose3( + i - 1, + i, + Pose3( + rot_prev.inverse() * rot, + Point3(rot_prev.inverse().rotate(Point3(pos - pos_prev))), + ), + odom, + ) + ) +seen = set() +for (kf, mid), t in sorted(bucket.items()): + rot, pos = kfs[kf] + kp = Pose3(rot, Point3(pos)) + T = Tbo.compose(t["T_cam_tag"]) + if mid not in seen: + seen.add(mid) + v.insert(Symbol("l", mid).key(), kp.compose(T)) + g.add( + BetweenFactorPose3( + kf, Symbol("l", mid).key(), T, tn(T.rotation(), t["distance_m"], t["reproj_px"]) + ) + ) + +print("solving stage 1 (tag PGO)...", flush=True) +lm_params = LevenbergMarquardtParams() +lm_params.setMaxIterations(200) +est = LevenbergMarquardtOptimizer(g, v, lm_params).optimize() +raw_kf = [Pose3(kfs[i][0], Point3(kfs[i][1])) for i in range(N)] + +# STAGE 2: ICP loop-closure refinement (tags=macro, lidar ICP=local anchor) +# Tag PGO has pulled revisits roughly together; now ICP the lidar submaps of spatially-close, +# temporally-distant keyframe pairs to add precise 6-DOF relative constraints, then re-solve. +ICP = "--no-icp" not in sys.argv +if ICP: + import open3d as o3d + from scipy.spatial import cKDTree + + ICP_RADIUS_M = 4.0 # tag-corrected positions must be within this to be a revisit candidate + ICP_MIN_DT_S = 25.0 # ...and at least this far apart in time (a real revisit, not adjacency) + ICP_MAX_CORR_M = 0.6 # ICP correspondence distance + ICP_VOXEL = 0.15 + ICP_FIT_MIN, ICP_RMSE_MAX = 0.45, 0.25 + SUBMAP_HALF_S = 1.0 # accumulate scans within +/- this of a keyframe time into its submap + + corr = [est.atPose3(i) for i in range(N)] + cpos = np.array([np.asarray(p.translation()) for p in corr]) + + # revisit candidate pairs + tree = cKDTree(cpos) + pairs = set() + for i, j in tree.query_pairs(ICP_RADIUS_M): + if abs(kts[i] - kts[j]) >= ICP_MIN_DT_S: + pairs.add((min(i, j), max(i, j))) + pairs = sorted(pairs, key=lambda p: np.linalg.norm(cpos[p[0]] - cpos[p[1]])) + involved = {k for p in pairs for k in p} + print( + f"ICP stage: {len(pairs)} revisit candidate pairs over {len(involved)} keyframes", + flush=True, + ) + + # build per-(involved)-keyframe body-frame submaps from the input lidar (odom-registered) + print("ICP stage: reading lidar submaps...", flush=True) + submap = {k: [] for k in involved} + scan_n = 0 + t_sub = time.time() + for obs in st.stream(LIDAR_STREAM): + scan_n += 1 + if scan_n % 20000 == 0: + print(f" read {scan_n} scans, {time.time() - t_sub:.0f}s", flush=True) + ts = float(obs.ts) + k = int(np.argmin(np.abs(kts - ts))) + if k not in submap or abs(kts[k] - ts) > SUBMAP_HALF_S: + continue + rot_k, pos_k = kfs[k] + world = world_points(obs) + submap[k].append((world - pos_k) @ rot_k.matrix()) # world -> kf-k body frame + pcd = {} + for k, chunks in submap.items(): + if not chunks: + continue + p = o3d.geometry.PointCloud() + p.points = o3d.utility.Vector3dVector(np.concatenate(chunks, 0).astype(np.float64)) + p = p.voxel_down_sample(ICP_VOXEL) + p.estimate_normals(o3d.geometry.KDTreeSearchParamHybrid(radius=0.5, max_nn=30)) + pcd[k] = p + print(f"ICP stage: built {len(pcd)} submaps, registering {len(pairs)} pairs...", flush=True) + + icp_noise = noiseModel.Robust.Create( + noiseModel.mEstimator.Huber.Create(1.345), + noiseModel.Diagonal.Variances(np.array([4e-4, 4e-4, 4e-4, 2.5e-3, 2.5e-3, 2.5e-3])), + ) + added = 0 + t_icp = time.time() + for pair_n, (i, j) in enumerate(pairs): + if pair_n and pair_n % 5000 == 0: + print( + f" registered {pair_n}/{len(pairs)} pairs, {added} accepted, {time.time() - t_icp:.0f}s", + flush=True, + ) + if i not in pcd or j not in pcd: + continue + init = (corr[i].inverse() * corr[j]).matrix() # i<-j initial guess from tag correction + res = o3d.pipelines.registration.registration_icp( + pcd[j], + pcd[i], + ICP_MAX_CORR_M, + init, + o3d.pipelines.registration.TransformationEstimationPointToPlane(), + ) + if res.fitness >= ICP_FIT_MIN and res.inlier_rmse <= ICP_RMSE_MAX: + T = res.transformation + g.add(BetweenFactorPose3(i, j, Pose3(Rot3(T[:3, :3]), Point3(T[:3, 3])), icp_noise)) + added += 1 + print( + f"ICP stage: accepted {added}/{len(pairs)} loop closures " + f"(fit>={ICP_FIT_MIN}, rmse<={ICP_RMSE_MAX}m)", + flush=True, + ) + if added: + # est already holds every key (keyframe + landmark poses); warm-start from it + print("solving stage 2 (tag PGO + ICP closures)...", flush=True) + est = LevenbergMarquardtOptimizer(g, est, lm_params).optimize() + +C = [est.atPose3(i).compose(raw_kf[i].inverse()) for i in range(N)] +shift = max(float(np.linalg.norm(np.asarray(C[i].translation()))) for i in range(N)) +print( + f"PGO: N={N} keyframes, {len(bucket)} tag factors over {len(seen)} markers, " + f"max correction shift {shift:.1f} m", + flush=True, +) + + +def C_at(ts): + if ts <= kts[0]: + return C[0] + if ts >= kts[-1]: + return C[-1] + j = int(np.searchsorted(kts, ts)) + a, b = j - 1, j + al = (ts - kts[a]) / (kts[b] - kts[a]) + return C[a].compose(Pose3.Expmap(al * Pose3.Logmap(C[a].between(C[b])))) + + +def pose_tuple(P): + t = P.translation() + q = P.rotation().toQuaternion() + return (t[0], t[1], t[2], q.x(), q.y(), q.z(), q.w()) + + +if WHAT in ("odom", "both"): + name = f"{OUT_PREFIX}_odometry{SUFFIX}" + if name in st.list_streams(): + st.delete_stream(name) + out = st.stream(name, Odometry) + print(f"writing {name} ({len(fo)} poses)...", flush=True) + n = 0 + t0 = time.time() + for r in fo: + ts = float(r[0]) + P = C_at(ts).compose( + Pose3(Rot3.Quaternion(r[7], r[4], r[5], r[6]), Point3(r[1], r[2], r[3])) + ) + x, y, zz, qx, qy, qz, qw = pose_tuple(P) + out.append( + Odometry( + ts=ts, + frame_id="odom", + child_frame_id="base_link", + pose=Pose(x, y, zz, qx, qy, qz, qw), + ), + ts=ts, + pose=(x, y, zz, qx, qy, qz, qw), + ) + n += 1 + if n % 20000 == 0: + print(f" {n}/{len(fo)} poses, {time.time() - t0:.0f}s", flush=True) + print(f"wrote {name}: {len(fo)} poses in {time.time() - t0:.0f}s", flush=True) + +if WHAT in ("lidar", "both"): + name = f"{OUT_PREFIX}_lidar{SUFFIX}" + fo_ts = fo[:, 0] + + def base_pose(ts): + j = int(np.searchsorted(fo_ts, ts)) + j = min(max(j, 0), len(fo) - 1) + if j > 0 and abs(fo_ts[j - 1] - ts) < abs(fo_ts[j] - ts): + j -= 1 + r = fo[j] + return Pose3(Rot3.Quaternion(r[7], r[4], r[5], r[6]), Point3(r[1], r[2], r[3])) + + if name in st.list_streams(): + st.delete_stream(name) + out = st.stream(name, PointCloud2) + + # The db stream stays per-scan, but the .pc2.lcm is ONE aggregated cloud (voxel-downsampled + + # statistical-outlier-removed), not 5184 per-scan events. Intensity rides through open3d's + # voxel averaging via the color channel. Chunks are collapsed every CHUNK scans to bound memory. + if WRITE_LCM: + import open3d as o3d + + CHUNK = 1000 + agg_xyz, agg_i = [], [] # incrementally voxel-downsampled chunks + buf_xyz, buf_i = [], [] + have_inten = False + + def collapse(xyz_list, i_list, voxel): + pc = o3d.geometry.PointCloud() + pc.points = o3d.utility.Vector3dVector(np.concatenate(xyz_list).astype(np.float64)) + carry = bool(i_list) + if carry: + inten_col = np.concatenate(i_list).astype(np.float64)[:, None] + pc.colors = o3d.utility.Vector3dVector(np.repeat(inten_col, 3, axis=1)) + pc = pc.voxel_down_sample(voxel) + dx = np.asarray(pc.points, np.float32) + di = np.asarray(pc.colors, np.float32)[:, 0] if carry else None + return dx, di + + print(f"writing {name} (corrected lidar)...", flush=True) + n = 0 + t0 = time.time() + for obs in st.stream(LIDAR_STREAM): + ts = float(obs.ts) + Cts = C_at(ts) + Rm = Cts.rotation().matrix() + tv = np.asarray(Cts.translation()) + xyz = world_points(obs) + inten = obs.data.intensities_f32() + xyz2 = (xyz @ Rm.T + tv).astype(np.float32) + m = PointCloud2.from_numpy( + xyz2, frame_id="odom", intensities=(np.asarray(inten) if inten is not None else None) + ) + m.ts = ts # stamp the cloud (lcm_encode needs a non-None ts) + out.append(m, ts=ts, pose=pose_tuple(Cts.compose(base_pose(ts)))) + if WRITE_LCM: + buf_xyz.append(xyz2) + if inten is not None: + have_inten = True + buf_i.append(np.asarray(inten, np.float32)) + if len(buf_xyz) >= CHUNK: + dx, di = collapse(buf_xyz, buf_i if have_inten else [], LCM_VOXEL) + agg_xyz.append(dx) + if di is not None: + agg_i.append(di) + buf_xyz, buf_i = [], [] + n += 1 + if n % 2000 == 0: + print(f" {n} scans, {time.time() - t0:.0f}s", flush=True) + print(f"wrote {name}: {n} scans in {time.time() - t0:.0f}s", flush=True) + + if WRITE_LCM: + if buf_xyz: # flush remainder + dx, di = collapse(buf_xyz, buf_i if have_inten else [], LCM_VOXEL) + agg_xyz.append(dx) + if di is not None: + agg_i.append(di) + # final unified voxel pass over the per-chunk results, then statistical outlier removal + merged_i = agg_i if have_inten else [] + dx, di = collapse(agg_xyz, merged_i, LCM_VOXEL) + print( + f"aggregating .pc2.lcm: {len(dx):,} pts after voxel, removing outliers...", flush=True + ) + pc = o3d.geometry.PointCloud() + pc.points = o3d.utility.Vector3dVector(dx.astype(np.float64)) + if di is not None: + pc.colors = o3d.utility.Vector3dVector( + np.repeat(di.astype(np.float64)[:, None], 3, axis=1) + ) + pc, _keep = pc.remove_statistical_outlier(LCM_OUTLIER_NN, LCM_OUTLIER_STD) + merged_xyz = np.asarray(pc.points, np.float32) + merged_inten = np.asarray(pc.colors, np.float32)[:, 0] if di is not None else None + merged = PointCloud2.from_numpy(merged_xyz, frame_id="odom", intensities=merged_inten) + merged.ts = float(fo_ts[0]) + lcm_path = REC / f"{name}.pc2.lcm" + lcm_path.write_bytes(merged.lcm_encode()) + print( + f"wrote {lcm_path}: 1 aggregated cloud, {len(merged_xyz):,} pts " + f"(voxel {LCM_VOXEL} m, outlier nn={LCM_OUTLIER_NN}/std={LCM_OUTLIER_STD})", + flush=True, + ) + +# build + open the comparison rrd +if OPEN_RRD and WHAT in ("lidar", "both"): + import subprocess + + from dimos.navigation.jnav.components.loop_closure.gsc_pgo import make_rrd + + print("building comparison rrd...", flush=True) + rrd_path = make_rrd.build( + REC, lidar_stream=LIDAR_STREAM, odom_stream=ODOM_STREAM, tag_stream=RAW_STREAM + ) + rerun_bin = Path(sys.executable).parent / "rerun" + if rerun_bin.exists(): + subprocess.Popen([str(rerun_bin), str(rrd_path)]) + print(f"opened {rrd_path}", flush=True) + else: + print(f"rerun binary not found at {rerun_bin}; open manually: rerun {rrd_path}", flush=True) diff --git a/dimos/navigation/jnav/components/loop_closure/spec.py b/dimos/navigation/jnav/components/loop_closure/spec.py new file mode 100644 index 0000000000..d571192266 --- /dev/null +++ b/dimos/navigation/jnav/components/loop_closure/spec.py @@ -0,0 +1,33 @@ +# Copyright 2026 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import Protocol + +from dimos.core.stream import In, Out +from dimos.msgs.nav_msgs.Odometry import Odometry +from dimos.msgs.sensor_msgs.PointCloud2 import PointCloud2 +from dimos.navigation.jnav.msgs.Graph3D import Graph3D +from dimos.navigation.jnav.msgs.GraphDelta3D import GraphDelta3D + + +class LoopClosure(Protocol): + # frame:sensor_link + lidar: In[PointCloud2] + odometry: In[Odometry] + + corrected_odometry: Out[Odometry] + # frame:map + pose_graph: Out[Graph3D] + # frame:map + loop_closure_event: Out[GraphDelta3D] diff --git a/dimos/navigation/jnav/msgs/Graph3D.hpp b/dimos/navigation/jnav/msgs/Graph3D.hpp new file mode 100644 index 0000000000..a440127ae2 --- /dev/null +++ b/dimos/navigation/jnav/msgs/Graph3D.hpp @@ -0,0 +1,175 @@ +// Copyright 2026 Dimensional Inc. +// SPDX-License-Identifier: Apache-2.0 +// +// Typed C++ helper mirroring the Python `dimos.msgs.nav_msgs.Graph3D`. +// Canonical schema lives in `dimos/msgs/nav_msgs/Graph3D.ksy` — keep +// encode() in sync with that file (and with Graph3D.py.lcm_decode). +// +// Wire format (big-endian): +// +// uint64 edge_count +// uint64 node_count +// double timestamp // seconds since epoch +// per node (node_count): +// pose_stamped: +// double ts +// uint32 frame_id_len +// bytes frame_id (utf-8, no terminator) +// 7×double pos_x, pos_y, pos_z, quat_x, quat_y, quat_z, quat_w +// uint64 id +// uint64 metadata_id +// per edge (edge_count): +// uint64 start_id +// uint64 end_id +// double timestamp +// uint64 metadata_id +// +// Edges reference nodes by `id`, not by index. + +#pragma once + +#include +#include +#include +#include +#include + +#include + +namespace dimos { + +namespace graph3d_detail { + +// Host-order → big-endian byte writers. Avoid for portability +// (macOS uses different names) — write byte-by-byte from the top. + +inline void write_u32_be(std::vector& out, uint32_t v) { + out.push_back(static_cast((v >> 24) & 0xFF)); + out.push_back(static_cast((v >> 16) & 0xFF)); + out.push_back(static_cast((v >> 8) & 0xFF)); + out.push_back(static_cast( v & 0xFF)); +} + +inline void write_u64_be(std::vector& out, uint64_t v) { + for (int shift = 56; shift >= 0; shift -= 8) { + out.push_back(static_cast((v >> shift) & 0xFF)); + } +} + +inline void write_double_be(std::vector& out, double v) { + uint64_t bits; + std::memcpy(&bits, &v, sizeof(bits)); + write_u64_be(out, bits); +} + +inline void write_bytes(std::vector& out, const std::string& s) { + out.insert(out.end(), s.begin(), s.end()); +} + +} // namespace graph3d_detail + +class Graph3D { +public: + struct PoseStamped { + double ts = 0.0; + std::string frame_id; + double pos_x = 0.0, pos_y = 0.0, pos_z = 0.0; + double quat_x = 0.0, quat_y = 0.0, quat_z = 0.0, quat_w = 1.0; + }; + + struct Node3D { + PoseStamped pose; + uint64_t id = 0; + uint64_t metadata_id = 0; + }; + + struct Edge { + uint64_t start_id = 0; + uint64_t end_id = 0; + double timestamp = 0.0; + uint64_t metadata_id = 0; + }; + + Graph3D(std::string frame_id, double timestamp) + : frame_id_(std::move(frame_id)), timestamp_(timestamp) {} + + void reserve_nodes(size_t capacity) { nodes_.reserve(capacity); } + void reserve_edges(size_t capacity) { edges_.reserve(capacity); } + + // Add a node. The pose's frame_id defaults to the graph's frame_id — + // override per-node only if a node lives in a different frame. + void add_node(uint64_t id, uint64_t metadata_id, double pose_ts, + double pos_x, double pos_y, double pos_z, + double quat_x, double quat_y, double quat_z, double quat_w, + std::string node_frame_id = "") { + PoseStamped pose; + pose.ts = pose_ts; + pose.frame_id = node_frame_id.empty() ? frame_id_ : std::move(node_frame_id); + pose.pos_x = pos_x; pose.pos_y = pos_y; pose.pos_z = pos_z; + pose.quat_x = quat_x; pose.quat_y = quat_y; pose.quat_z = quat_z; pose.quat_w = quat_w; + nodes_.push_back({pose, id, metadata_id}); + } + + // Position-only convenience (orientation defaults to identity). + void add_node_xyz(uint64_t id, uint64_t metadata_id, double pose_ts, + double pos_x, double pos_y, double pos_z) { + add_node(id, metadata_id, pose_ts, pos_x, pos_y, pos_z, 0.0, 0.0, 0.0, 1.0); + } + + void add_edge(uint64_t start_id, uint64_t end_id, double edge_ts, + uint64_t metadata_id = 0) { + edges_.push_back({start_id, end_id, edge_ts, metadata_id}); + } + + size_t node_count() const { return nodes_.size(); } + size_t edge_count() const { return edges_.size(); } + const std::string& frame_id() const { return frame_id_; } + + std::vector encode() const { + using namespace graph3d_detail; + std::vector out; + // Conservative reservation: header + per-node fixed bytes + per-edge. + // frame_id strings add variable length on top — that just causes a + // realloc, not correctness issues. + out.reserve(24 + nodes_.size() * 84 + edges_.size() * 32); + write_u64_be(out, static_cast(edges_.size())); + write_u64_be(out, static_cast(nodes_.size())); + write_double_be(out, timestamp_); + for (const auto& n : nodes_) { + // pose_stamped first (per Graph3D.ksy) + write_double_be(out, n.pose.ts); + write_u32_be(out, static_cast(n.pose.frame_id.size())); + write_bytes(out, n.pose.frame_id); + write_double_be(out, n.pose.pos_x); + write_double_be(out, n.pose.pos_y); + write_double_be(out, n.pose.pos_z); + write_double_be(out, n.pose.quat_x); + write_double_be(out, n.pose.quat_y); + write_double_be(out, n.pose.quat_z); + write_double_be(out, n.pose.quat_w); + // then id, metadata_id + write_u64_be(out, n.id); + write_u64_be(out, n.metadata_id); + } + for (const auto& e : edges_) { + write_u64_be(out, e.start_id); + write_u64_be(out, e.end_id); + write_double_be(out, e.timestamp); + write_u64_be(out, e.metadata_id); + } + return out; + } + + int publish(lcm::LCM& lcm, const std::string& channel) const { + std::vector bytes = encode(); + return lcm.publish(channel, bytes.data(), static_cast(bytes.size())); + } + +private: + std::string frame_id_; + double timestamp_; + std::vector nodes_; + std::vector edges_; +}; + +} // namespace dimos diff --git a/dimos/navigation/jnav/msgs/Graph3D.py b/dimos/navigation/jnav/msgs/Graph3D.py new file mode 100644 index 0000000000..89d3b51e72 --- /dev/null +++ b/dimos/navigation/jnav/msgs/Graph3D.py @@ -0,0 +1,233 @@ +# Copyright 2026 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Graph3D: pose-graph / visibility-graph message with typed nodes and edges. + +Edges reference nodes by ``id`` (not list index), so producers are free +to reorder or re-emit nodes between snapshots. ``metadata_id`` is a +caller-defined enum — ex: for far_planner: 0=normal, 1=odom, 2=goal +""" + +from __future__ import annotations + +from dataclasses import dataclass +import struct +import time +from typing import TYPE_CHECKING, BinaryIO + +from dimos.msgs.geometry_msgs.PoseStamped import PoseStamped +from dimos.msgs.geometry_msgs.Quaternion import Quaternion +from dimos.msgs.geometry_msgs.Vector3 import Vector3 +from dimos.types.timestamped import Timestamped + +if TYPE_CHECKING: + from rerun._baseclasses import Archetype + + +_DEFAULT_NODE_COLORS: dict[int, tuple[int, int, int, int]] = { + 0: (180, 180, 180, 200), + 1: (0, 255, 0, 255), + 2: (255, 0, 0, 255), + 3: (255, 165, 0, 200), + 4: (0, 200, 255, 200), +} +_DEFAULT_NODE_COLOR = (200, 200, 200, 180) + +_DEFAULT_EDGE_COLORS: dict[int, tuple[int, int, int, int]] = { + 0: (0, 220, 100, 200), # odom / traversable — green + 1: (255, 180, 0, 220), # loop_closure / partial — yellow + 2: (255, 50, 50, 150), # blocked — red +} +_DEFAULT_EDGE_COLOR = (180, 180, 180, 180) + + +class Graph3D(Timestamped): + msg_name = "nav_msgs.Graph3D" + + @dataclass + class Node3D: + pose: PoseStamped + id: int = 0 + metadata_id: int = 0 + + @dataclass + class Edge: + start_id: int + end_id: int + timestamp: float = 0.0 + metadata_id: int = 0 + + ts: float + nodes: list[Node3D] + edges: list[Edge] + + def __init__( + self, + ts: float = 0.0, + nodes: list[Graph3D.Node3D] | None = None, + edges: list[Graph3D.Edge] | None = None, + ) -> None: + self.ts = ts if ts != 0 else time.time() + self.nodes = nodes if nodes is not None else [] + self.edges = edges if edges is not None else [] + + def lcm_encode(self) -> bytes: + # Field order matches Graph3D.ksy: edge_count, node_count, ts, + # nodes[] (pose, id, metadata_id), edges[]. + parts: list[bytes] = [] + parts.append(struct.pack(">QQd", len(self.edges), len(self.nodes), self.ts)) + for node in self.nodes: + frame_id_bytes = node.pose.frame_id.encode("utf-8") + parts.append(struct.pack(">d", node.pose.ts)) + parts.append(struct.pack(">I", len(frame_id_bytes))) + parts.append(frame_id_bytes) + parts.append( + struct.pack( + ">7d", + node.pose.position.x, + node.pose.position.y, + node.pose.position.z, + node.pose.orientation.x, + node.pose.orientation.y, + node.pose.orientation.z, + node.pose.orientation.w, + ) + ) + parts.append(struct.pack(">QQ", node.id, node.metadata_id)) + for edge in self.edges: + parts.append( + struct.pack(">QQdQ", edge.start_id, edge.end_id, edge.timestamp, edge.metadata_id) + ) + return b"".join(parts) + + @classmethod + def lcm_decode(cls, data: bytes | BinaryIO) -> Graph3D: + buf = data if isinstance(data, (bytes, bytearray)) else data.read() + offset = 0 + edge_count, node_count, graph_ts = struct.unpack_from(">QQd", buf, offset) + offset += 24 + + nodes: list[Graph3D.Node3D] = [] + for _ in range(node_count): + (pose_ts,) = struct.unpack_from(">d", buf, offset) + offset += 8 + (frame_id_len,) = struct.unpack_from(">I", buf, offset) + offset += 4 + frame_id = buf[offset : offset + frame_id_len].decode("utf-8") + offset += frame_id_len + px, py, pz, qx, qy, qz, qw = struct.unpack_from(">7d", buf, offset) + offset += 56 + node_id, metadata_id = struct.unpack_from(">QQ", buf, offset) + offset += 16 + pose = PoseStamped( + ts=pose_ts, + frame_id=frame_id, + position=Vector3(px, py, pz), + orientation=Quaternion(qx, qy, qz, qw), + ) + nodes.append(cls.Node3D(pose=pose, id=node_id, metadata_id=metadata_id)) + + edges: list[Graph3D.Edge] = [] + for _ in range(edge_count): + start_id, end_id, edge_ts, edge_metadata_id = struct.unpack_from(">QQdQ", buf, offset) + offset += 32 + edges.append( + cls.Edge( + start_id=start_id, + end_id=end_id, + timestamp=edge_ts, + metadata_id=edge_metadata_id, + ) + ) + + return cls(ts=graph_ts, nodes=nodes, edges=edges) + + def to_rerun( + self, + z_offset: float = 0.0, + radii: float = 0.12, + node_colors: dict[int, tuple[int, int, int, int]] | None = None, + ) -> Archetype: + """Default visualization: ``rr.Points3D`` of just the nodes. + + For nodes + edges in separate entity sub-paths, use + ``to_rerun_multi`` from a ``visual_override`` callback. + """ + import rerun as rr + + nc = node_colors if node_colors is not None else _DEFAULT_NODE_COLORS + positions = [ + [n.pose.position.x, n.pose.position.y, n.pose.position.z + z_offset] for n in self.nodes + ] + colors = [nc.get(n.metadata_id, _DEFAULT_NODE_COLOR) for n in self.nodes] + node_radii = [radii * 2.0 if n.metadata_id in (1, 2) else radii for n in self.nodes] + return rr.Points3D(positions, colors=colors, radii=node_radii) + + def to_rerun_multi( + self, + base_path: str, + z_offset: float = 0.0, + node_radius: float = 0.12, + edge_radius: float = 0.04, + node_colors: dict[int, tuple[int, int, int, int]] | None = None, + edge_colors: dict[int, tuple[int, int, int, int]] | None = None, + ) -> list[tuple[str, Archetype]]: + """Return ``[(base_path/nodes, Points3D), (base_path/edges, LineStrips3D)]``. + + Intended for use from ``visual_override`` callbacks where the + bridge supports the ``RerunMulti`` list-of-tuples form. + """ + import rerun as rr + + nc = node_colors if node_colors is not None else _DEFAULT_NODE_COLORS + ec = edge_colors if edge_colors is not None else _DEFAULT_EDGE_COLORS + + node_positions = [ + [n.pose.position.x, n.pose.position.y, n.pose.position.z + z_offset] for n in self.nodes + ] + node_colors_list = [nc.get(n.metadata_id, _DEFAULT_NODE_COLOR) for n in self.nodes] + node_radii = [ + node_radius * 2.0 if n.metadata_id in (1, 2) else node_radius for n in self.nodes + ] + nodes_archetype = rr.Points3D(node_positions, colors=node_colors_list, radii=node_radii) + + id_to_pose: dict[int, PoseStamped] = {n.id: n.pose for n in self.nodes} + strips: list[list[list[float]]] = [] + edge_colors_list: list[tuple[int, int, int, int]] = [] + for edge in self.edges: + start = id_to_pose.get(edge.start_id) + end = id_to_pose.get(edge.end_id) + if start is None or end is None: + continue + strips.append( + [ + [start.position.x, start.position.y, start.position.z + z_offset], + [end.position.x, end.position.y, end.position.z + z_offset], + ] + ) + edge_colors_list.append(ec.get(edge.metadata_id, _DEFAULT_EDGE_COLOR)) + edges_archetype = rr.LineStrips3D( + strips, colors=edge_colors_list, radii=[edge_radius] * len(strips) + ) + + return [ + (f"{base_path}/nodes", nodes_archetype), + (f"{base_path}/edges", edges_archetype), + ] + + def __len__(self) -> int: + return len(self.nodes) + + def __str__(self) -> str: + return f"Graph3D(nodes={len(self.nodes)}, edges={len(self.edges)})" diff --git a/dimos/navigation/jnav/msgs/GraphDelta3D.hpp b/dimos/navigation/jnav/msgs/GraphDelta3D.hpp new file mode 100644 index 0000000000..4db42eb71c --- /dev/null +++ b/dimos/navigation/jnav/msgs/GraphDelta3D.hpp @@ -0,0 +1,168 @@ +// Copyright 2026 Dimensional Inc. +// SPDX-License-Identifier: Apache-2.0 +// +// Typed C++ helper mirroring the Python `dimos.msgs.nav_msgs.GraphDelta3D`. +// +// Wire format (big-endian): +// +// uint64 node_count +// double timestamp // seconds since epoch +// per node (node_count): +// pose_stamped: // (same as Graph3D's node3d pose) +// double ts +// uint32 frame_id_len +// bytes frame_id (utf-8, no terminator) +// 7×double pos_x, pos_y, pos_z, quat_x, quat_y, quat_z, quat_w +// uint64 id +// uint64 metadata_id +// per transform (node_count): +// 7×double translation_x, translation_y, translation_z, +// rotation_x, rotation_y, rotation_z, rotation_w +// +// Two aligned arrays: ``transforms[i]`` is the SE(3) delta about to +// be applied to ``nodes[i]``. ``post_pose = transforms[i] * nodes[i].pose`` +// is the convention (left-multiply). +// +// `GraphDelta3D.py.lcm_decode` reads exactly this layout — keep in sync. + +#pragma once + +#include +#include +#include +#include +#include + +#include + +namespace dimos { + +namespace graph_delta3d_detail { + +inline void write_u32_be(std::vector& out, uint32_t v) { + out.push_back(static_cast((v >> 24) & 0xFF)); + out.push_back(static_cast((v >> 16) & 0xFF)); + out.push_back(static_cast((v >> 8) & 0xFF)); + out.push_back(static_cast( v & 0xFF)); +} + +inline void write_u64_be(std::vector& out, uint64_t v) { + for (int shift = 56; shift >= 0; shift -= 8) { + out.push_back(static_cast((v >> shift) & 0xFF)); + } +} + +inline void write_double_be(std::vector& out, double v) { + uint64_t bits; + std::memcpy(&bits, &v, sizeof(bits)); + write_u64_be(out, bits); +} + +inline void write_bytes(std::vector& out, const std::string& s) { + out.insert(out.end(), s.begin(), s.end()); +} + +} // namespace graph_delta3d_detail + +class GraphDelta3D { +public: + struct PoseStamped { + double ts = 0.0; + std::string frame_id; + double pos_x = 0.0, pos_y = 0.0, pos_z = 0.0; + double quat_x = 0.0, quat_y = 0.0, quat_z = 0.0, quat_w = 1.0; + }; + + struct Node3D { + PoseStamped pose; + uint64_t id = 0; + uint64_t metadata_id = 0; + }; + + struct Transform { + double translation_x = 0.0, translation_y = 0.0, translation_z = 0.0; + double rotation_x = 0.0, rotation_y = 0.0, rotation_z = 0.0, rotation_w = 1.0; + }; + + GraphDelta3D(std::string frame_id, double timestamp) + : frame_id_(std::move(frame_id)), timestamp_(timestamp) {} + + void reserve(size_t capacity) { + nodes_.reserve(capacity); + transforms_.reserve(capacity); + } + + // Add a node + its SE(3) delta. Pass empty `node_frame_id` to inherit + // the graph's frame_id. + void add(uint64_t id, uint64_t metadata_id, double pose_ts, + double pos_x, double pos_y, double pos_z, + double quat_x, double quat_y, double quat_z, double quat_w, + double translation_x, double translation_y, double translation_z, + double rotation_x, double rotation_y, double rotation_z, double rotation_w, + std::string node_frame_id = "") { + Node3D node; + node.id = id; + node.metadata_id = metadata_id; + node.pose.ts = pose_ts; + node.pose.frame_id = node_frame_id.empty() ? frame_id_ : std::move(node_frame_id); + node.pose.pos_x = pos_x; node.pose.pos_y = pos_y; node.pose.pos_z = pos_z; + node.pose.quat_x = quat_x; node.pose.quat_y = quat_y; + node.pose.quat_z = quat_z; node.pose.quat_w = quat_w; + nodes_.push_back(node); + + Transform tf; + tf.translation_x = translation_x; tf.translation_y = translation_y; tf.translation_z = translation_z; + tf.rotation_x = rotation_x; tf.rotation_y = rotation_y; + tf.rotation_z = rotation_z; tf.rotation_w = rotation_w; + transforms_.push_back(tf); + } + + size_t size() const { return nodes_.size(); } + bool empty() const { return nodes_.empty(); } + const std::string& frame_id() const { return frame_id_; } + + std::vector encode() const { + using namespace graph_delta3d_detail; + std::vector out; + out.reserve(16 + nodes_.size() * (84 + 56)); + write_u64_be(out, static_cast(nodes_.size())); + write_double_be(out, timestamp_); + for (const auto& n : nodes_) { + write_double_be(out, n.pose.ts); + write_u32_be(out, static_cast(n.pose.frame_id.size())); + write_bytes(out, n.pose.frame_id); + write_double_be(out, n.pose.pos_x); + write_double_be(out, n.pose.pos_y); + write_double_be(out, n.pose.pos_z); + write_double_be(out, n.pose.quat_x); + write_double_be(out, n.pose.quat_y); + write_double_be(out, n.pose.quat_z); + write_double_be(out, n.pose.quat_w); + write_u64_be(out, n.id); + write_u64_be(out, n.metadata_id); + } + for (const auto& t : transforms_) { + write_double_be(out, t.translation_x); + write_double_be(out, t.translation_y); + write_double_be(out, t.translation_z); + write_double_be(out, t.rotation_x); + write_double_be(out, t.rotation_y); + write_double_be(out, t.rotation_z); + write_double_be(out, t.rotation_w); + } + return out; + } + + int publish(lcm::LCM& lcm, const std::string& channel) const { + std::vector bytes = encode(); + return lcm.publish(channel, bytes.data(), static_cast(bytes.size())); + } + +private: + std::string frame_id_; + double timestamp_; + std::vector nodes_; + std::vector transforms_; +}; + +} // namespace dimos diff --git a/dimos/navigation/jnav/msgs/GraphDelta3D.py b/dimos/navigation/jnav/msgs/GraphDelta3D.py new file mode 100644 index 0000000000..36fccca459 --- /dev/null +++ b/dimos/navigation/jnav/msgs/GraphDelta3D.py @@ -0,0 +1,198 @@ +# Copyright 2026 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""GraphDelta3D: per-node SE(3) transforms about to be applied to a list of nodes. + +Two aligned arrays: ``nodes[i]`` is the node, ``transforms[i]`` is the +SE(3) delta about to be applied to it. ``post_pose = transforms[i] * +nodes[i].pose`` is the convention (left-multiply). + +Use case: PGO publishes this on ``loop_closure_event`` when iSAM2 +smooths the pose graph — ``nodes[i]`` is the keyframe pre-smooth, +``transforms[i]`` is the delta iSAM2 just applied to it. Consumers can +re-derive post-poses or filter to large deltas. + +Wire format mirrors ``Graph3D`` conventions: big-endian, ``Node3D`` +serialization shared, ``Transform`` is just 7 f8s (translation + +quaternion). Custom binary, dispatched by the ``#nav_msgs.GraphDelta3D`` +channel-name suffix. +""" + +from __future__ import annotations + +from dataclasses import dataclass +import struct +import time +from typing import TYPE_CHECKING, BinaryIO + +from dimos.msgs.geometry_msgs.PoseStamped import PoseStamped +from dimos.msgs.geometry_msgs.Quaternion import Quaternion +from dimos.msgs.geometry_msgs.Vector3 import Vector3 +from dimos.navigation.jnav.msgs.Graph3D import Graph3D +from dimos.types.timestamped import Timestamped + +if TYPE_CHECKING: + from rerun._baseclasses import Archetype + + +class GraphDelta3D(Timestamped): + msg_name = "nav_msgs.GraphDelta3D" + + # Reuse Graph3D's nested Node3D for wire-format consistency. A + # GraphDelta3D[i].node is byte-identical to a Graph3D.nodes[i]. + Node3D = Graph3D.Node3D + + @dataclass + class Transform: + """SE(3) transform — translation + rotation quaternion (xyzw).""" + + translation: Vector3 + rotation: Quaternion + + ts: float + nodes: list[Graph3D.Node3D] + transforms: list[Transform] + + def __init__( + self, + ts: float = 0.0, + nodes: list[Graph3D.Node3D] | None = None, + transforms: list[Transform] | None = None, + ) -> None: + self.ts = ts if ts != 0 else time.time() + self.nodes = nodes if nodes is not None else [] + self.transforms = transforms if transforms is not None else [] + if len(self.nodes) != len(self.transforms): + raise ValueError( + f"nodes ({len(self.nodes)}) and transforms ({len(self.transforms)}) " + "must be the same length — they're aligned arrays" + ) + + def lcm_encode(self) -> bytes: + parts: list[bytes] = [] + parts.append(struct.pack(">Qd", len(self.nodes), self.ts)) + for node in self.nodes: + frame_id_bytes = node.pose.frame_id.encode("utf-8") + parts.append(struct.pack(">d", node.pose.ts)) + parts.append(struct.pack(">I", len(frame_id_bytes))) + parts.append(frame_id_bytes) + parts.append( + struct.pack( + ">7d", + node.pose.position.x, + node.pose.position.y, + node.pose.position.z, + node.pose.orientation.x, + node.pose.orientation.y, + node.pose.orientation.z, + node.pose.orientation.w, + ) + ) + parts.append(struct.pack(">QQ", node.id, node.metadata_id)) + for transform in self.transforms: + parts.append( + struct.pack( + ">7d", + transform.translation.x, + transform.translation.y, + transform.translation.z, + transform.rotation.x, + transform.rotation.y, + transform.rotation.z, + transform.rotation.w, + ) + ) + return b"".join(parts) + + @classmethod + def lcm_decode(cls, data: bytes | BinaryIO) -> GraphDelta3D: + buf = data if isinstance(data, (bytes, bytearray)) else data.read() + offset = 0 + node_count, graph_ts = struct.unpack_from(">Qd", buf, offset) + offset += 16 + + nodes: list[Graph3D.Node3D] = [] + for _ in range(node_count): + (pose_ts,) = struct.unpack_from(">d", buf, offset) + offset += 8 + (frame_id_len,) = struct.unpack_from(">I", buf, offset) + offset += 4 + frame_id = buf[offset : offset + frame_id_len].decode("utf-8") + offset += frame_id_len + px, py, pz, qx, qy, qz, qw = struct.unpack_from(">7d", buf, offset) + offset += 56 + node_id, metadata_id = struct.unpack_from(">QQ", buf, offset) + offset += 16 + pose = PoseStamped( + ts=pose_ts, + frame_id=frame_id, + position=Vector3(px, py, pz), + orientation=Quaternion(qx, qy, qz, qw), + ) + nodes.append(Graph3D.Node3D(pose=pose, id=node_id, metadata_id=metadata_id)) + + transforms: list[GraphDelta3D.Transform] = [] + for _ in range(node_count): + tx, ty, tz, qx, qy, qz, qw = struct.unpack_from(">7d", buf, offset) + offset += 56 + transforms.append( + cls.Transform( + translation=Vector3(tx, ty, tz), + rotation=Quaternion(qx, qy, qz, qw), + ) + ) + + return cls(ts=graph_ts, nodes=nodes, transforms=transforms) + + def to_rerun( + self, + z_offset: float = 0.0, + arrow_scale: float = 1.0, + ) -> Archetype: + """Render each (node, transform) pair as an arrow from node.pose to post_pose. + + The arrow origin is the node's current position; the vector is + the translation component of the transform (scaled by + ``arrow_scale``). Rotation deltas aren't visualized by default — + callers wanting to see those can subclass. + """ + import rerun as rr + + if not self.nodes: + return rr.Arrows3D(origins=[], vectors=[]) + + origins = [] + vectors = [] + for node, transform in zip(self.nodes, self.transforms, strict=True): + origins.append( + [ + node.pose.position.x, + node.pose.position.y, + node.pose.position.z + z_offset, + ] + ) + vectors.append( + [ + transform.translation.x * arrow_scale, + transform.translation.y * arrow_scale, + transform.translation.z * arrow_scale, + ] + ) + return rr.Arrows3D(origins=origins, vectors=vectors) + + def __len__(self) -> int: + return len(self.nodes) + + def __str__(self) -> str: + return f"GraphDelta3D(nodes={len(self.nodes)})" diff --git a/dimos/navigation/jnav/msgs/Landmark.py b/dimos/navigation/jnav/msgs/Landmark.py new file mode 100644 index 0000000000..d299fa3404 --- /dev/null +++ b/dimos/navigation/jnav/msgs/Landmark.py @@ -0,0 +1,200 @@ +# Copyright 2026 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Landmark: a cross-map reference shared between maps by ``id``. + +Two maps that both observe the same ``id`` are bridgeable: MultiMap composes the +relative transform between their frames through the landmark. The pose is stored +relative to the frame it was observed in (a camera frame for an apriltag, an odom +frame for a relocalization event, the nearest cloud's frame for a UI click), NOT +the map root — so the uncertainty stays honest for a future gtsam graph. MultiMap +resolves it to the map root via the map's recorded tf at ``ts``. + +``id`` is URL-like and encodes the exact source so identical tag numbers from +different families/sizes don't false-bridge unrelated maps, e.g. +``apriltag://36h11/40cm/5`` or ``reloc://map0/dim_city``. + +``replacement`` (milliseconds) lets a perceiver publish a *corrective* landmark: +a consumer (e.g. the PGO factor-graph manager) wipes out any earlier landmark of +the same ``id`` observed within ``replacement`` ms before this one's ``ts`` and +keeps this one instead. ``0`` (the default) means "additive" — keep all prior +landmarks, which is what loop closure needs (the first and the revisit sighting +must both survive). A small positive window only supersedes the immediately-prior +noisy estimate of the *same* sighting, never the historical closure observation. + +Confidence is **per-axis**, not a single scalar: ``confidence_x/_y/_z`` (the +observed translation axes) and ``confidence_roll/_pitch/_yaw`` (the rotation +axes), each in ``[0, 1]``. A consumer maps each per-axis confidence to that +axis's measurement weight (variance ~ 1 / confidence), so a landmark can be well +observed on some DOF and weak on others — e.g. a face-on AprilTag pins in-plane +position + yaw tightly but its range and out-of-plane tilt are loose. ``confidence`` +(the scalar) is retained as a coarse overall value; the per-axis fields default +to it when unspecified. +""" + +from __future__ import annotations + +import struct +import time +from typing import BinaryIO + +from dimos.msgs.geometry_msgs.Pose import Pose +from dimos.msgs.geometry_msgs.Quaternion import Quaternion +from dimos.msgs.geometry_msgs.Vector3 import Vector3 +from dimos.types.timestamped import Timestamped + + +class Landmark(Timestamped): + msg_name = "jnav.Landmark" + + ts: float + id: str # URL-like, shared across maps — the bridge key + map_id: str # which map's frame system this is in + frame_id: str # the observation frame the pose is relative to + kind: str # "apriltag" | "aruco" | "reloc" | "ui_click" | "shortcut" + confidence: float # coarse overall confidence; per-axis fields default to it + pose: Pose + replacement: float # ms: wipe same-id landmarks newer than (ts - replacement); 0 = additive + # Per-axis confidence in [0, 1]: how much this landmark constrains each DOF. + confidence_x: float + confidence_y: float + confidence_z: float + confidence_roll: float + confidence_pitch: float + confidence_yaw: float + + def __init__( + self, + id: str = "", + map_id: str = "", + pose: Pose | None = None, + frame_id: str = "", + confidence: float = 0.0, + kind: str = "", + ts: float = 0.0, + replacement: float = 0.0, + confidence_x: float | None = None, + confidence_y: float | None = None, + confidence_z: float | None = None, + confidence_roll: float | None = None, + confidence_pitch: float | None = None, + confidence_yaw: float | None = None, + ) -> None: + self.ts = ts if ts != 0 else time.time() + self.id = id + self.map_id = map_id + self.frame_id = frame_id + self.kind = kind + self.confidence = confidence + self.pose = pose if pose is not None else Pose() + self.replacement = replacement + # Per-axis confidence defaults to the coarse overall confidence. + self.confidence_x = confidence if confidence_x is None else confidence_x + self.confidence_y = confidence if confidence_y is None else confidence_y + self.confidence_z = confidence if confidence_z is None else confidence_z + self.confidence_roll = confidence if confidence_roll is None else confidence_roll + self.confidence_pitch = confidence if confidence_pitch is None else confidence_pitch + self.confidence_yaw = confidence if confidence_yaw is None else confidence_yaw + + def axis_confidence(self) -> tuple[float, float, float, float, float, float]: + """Per-axis confidence as ``(x, y, z, roll, pitch, yaw)``.""" + return ( + self.confidence_x, + self.confidence_y, + self.confidence_z, + self.confidence_roll, + self.confidence_pitch, + self.confidence_yaw, + ) + + def lcm_encode(self) -> bytes: + parts: list[bytes] = [struct.pack(">d", self.ts)] + for text in (self.id, self.map_id, self.frame_id, self.kind): + encoded = text.encode("utf-8") + parts.append(struct.pack(">I", len(encoded))) + parts.append(encoded) + parts.append(struct.pack(">d", self.confidence)) + p = self.pose + parts.append( + struct.pack( + ">7d", + p.position.x, + p.position.y, + p.position.z, + p.orientation.x, + p.orientation.y, + p.orientation.z, + p.orientation.w, + ) + ) + parts.append(struct.pack(">d", self.replacement)) + parts.append( + struct.pack( + ">6d", + self.confidence_x, + self.confidence_y, + self.confidence_z, + self.confidence_roll, + self.confidence_pitch, + self.confidence_yaw, + ) + ) + return b"".join(parts) + + @classmethod + def lcm_decode(cls, data: bytes | BinaryIO) -> Landmark: + buf = data if isinstance(data, (bytes, bytearray)) else data.read() + offset = 0 + (ts,) = struct.unpack_from(">d", buf, offset) + offset += 8 + texts: list[str] = [] + for _ in range(4): + (length,) = struct.unpack_from(">I", buf, offset) + offset += 4 + texts.append(buf[offset : offset + length].decode("utf-8")) + offset += length + id_, map_id, frame_id, kind = texts + (confidence,) = struct.unpack_from(">d", buf, offset) + offset += 8 + px, py, pz, qx, qy, qz, qw = struct.unpack_from(">7d", buf, offset) + offset += 56 + pose = Pose() + pose.position = Vector3(px, py, pz) + pose.orientation = Quaternion(qx, qy, qz, qw) + # Backward-compatible: older encodings omit the trailing replacement field. + replacement = 0.0 + if offset + 8 <= len(buf): + (replacement,) = struct.unpack_from(">d", buf, offset) + offset += 8 + # Backward-compatible: older encodings omit the per-axis confidences; + # they then default to the coarse overall confidence (None -> confidence). + cx = cy = cz = croll = cpitch = cyaw = None + if offset + 48 <= len(buf): + cx, cy, cz, croll, cpitch, cyaw = struct.unpack_from(">6d", buf, offset) + return cls( + id=id_, + map_id=map_id, + pose=pose, + frame_id=frame_id, + confidence=confidence, + kind=kind, + ts=ts, + replacement=replacement, + confidence_x=cx, + confidence_y=cy, + confidence_z=cz, + confidence_roll=croll, + confidence_pitch=cpitch, + confidence_yaw=cyaw, + ) diff --git a/dimos/navigation/jnav/msgs/Marker.py b/dimos/navigation/jnav/msgs/Marker.py new file mode 100644 index 0000000000..a7ed3fef98 --- /dev/null +++ b/dimos/navigation/jnav/msgs/Marker.py @@ -0,0 +1,97 @@ +# Copyright 2026 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Marker: a named navigation marker — a pose in a frame, a marker name, and an +optional map name. + +Drives the objective handler's ``goal_marker`` (navigate to a named marker) and +``save_marker`` (persist the current/given pose under a name) streams, so callers +can reference waypoints by name (e.g. dim_city's ``test_waypoint_*``) and scope +them to a map, instead of passing a bare Point/Pose with no identity. +""" + +from __future__ import annotations + +import struct +import time +from typing import BinaryIO + +from dimos.msgs.geometry_msgs.Pose import Pose +from dimos.msgs.geometry_msgs.Quaternion import Quaternion +from dimos.msgs.geometry_msgs.Vector3 import Vector3 +from dimos.types.timestamped import Timestamped + + +class Marker(Timestamped): + msg_name = "jnav.Marker" + + ts: float + frame_id: str + pose: Pose + marker: str # marker name (identity) + map: str # optional map name this marker belongs to ("" = unspecified) + + def __init__( + self, + pose: Pose | None = None, + marker: str = "", + map: str = "", + ts: float = 0.0, + frame_id: str = "map", + ) -> None: + self.ts = ts if ts != 0 else time.time() + self.frame_id = frame_id + self.pose = pose if pose is not None else Pose() + self.marker = marker + self.map = map + + def lcm_encode(self) -> bytes: + parts: list[bytes] = [struct.pack(">d", self.ts)] + for text in (self.frame_id, self.marker, self.map): + encoded = text.encode("utf-8") + parts.append(struct.pack(">I", len(encoded))) + parts.append(encoded) + p = self.pose + parts.append( + struct.pack( + ">7d", + p.position.x, + p.position.y, + p.position.z, + p.orientation.x, + p.orientation.y, + p.orientation.z, + p.orientation.w, + ) + ) + return b"".join(parts) + + @classmethod + def lcm_decode(cls, data: bytes | BinaryIO) -> Marker: + buf = data if isinstance(data, (bytes, bytearray)) else data.read() + offset = 0 + (ts,) = struct.unpack_from(">d", buf, offset) + offset += 8 + texts: list[str] = [] + for _ in range(3): + (length,) = struct.unpack_from(">I", buf, offset) + offset += 4 + texts.append(buf[offset : offset + length].decode("utf-8")) + offset += length + frame_id, marker, map_name = texts + px, py, pz, qx, qy, qz, qw = struct.unpack_from(">7d", buf, offset) + pose = Pose() + pose.position = Vector3(px, py, pz) + pose.orientation = Quaternion(qx, qy, qz, qw) + return cls(pose=pose, marker=marker, map=map_name, ts=ts, frame_id=frame_id) diff --git a/dimos/navigation/jnav/msgs/test_Landmark.py b/dimos/navigation/jnav/msgs/test_Landmark.py new file mode 100644 index 0000000000..1df849151a --- /dev/null +++ b/dimos/navigation/jnav/msgs/test_Landmark.py @@ -0,0 +1,129 @@ +# Copyright 2026 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Roundtrip + field tests for the jnav Landmark message.""" + +from __future__ import annotations + +from dimos.memory2.codecs.base import codec_for +from dimos.memory2.codecs.lcm import LcmCodec +from dimos.msgs.geometry_msgs.Pose import Pose +from dimos.navigation.jnav.msgs.Landmark import Landmark + + +def _pose(x: float, y: float, z: float) -> Pose: + pose = Pose() + pose.position.x, pose.position.y, pose.position.z = x, y, z + return pose + + +def test_landmark_roundtrip_preserves_all_fields() -> None: + landmark = Landmark( + id="apriltag://36h11/40cm/5", + map_id="dim_city", + pose=_pose(1.5, -2.0, 0.3), + frame_id="camera_optical", + confidence=0.82, + kind="apriltag", + ts=1781565207.5, + ) + decoded = Landmark.lcm_decode(landmark.lcm_encode()) + + assert decoded.id == "apriltag://36h11/40cm/5" + assert decoded.map_id == "dim_city" + assert decoded.frame_id == "camera_optical" + assert decoded.kind == "apriltag" + assert decoded.confidence == 0.82 + assert decoded.ts == landmark.ts + assert decoded.pose.position.x == 1.5 + assert decoded.pose.position.y == -2.0 + assert decoded.pose.position.z == 0.3 + + +def test_landmark_defaults() -> None: + landmark = Landmark() + assert landmark.id == "" + assert landmark.map_id == "" + assert landmark.frame_id == "" + assert landmark.kind == "" + assert landmark.confidence == 0.0 + assert landmark.replacement == 0.0 # additive by default + assert landmark.ts > 0 # auto-stamped + + decoded = Landmark.lcm_decode(landmark.lcm_encode()) + assert decoded.id == "" and decoded.map_id == "" and decoded.kind == "" + assert decoded.replacement == 0.0 + + +def test_landmark_replacement_roundtrips() -> None: + landmark = Landmark( + id="apriltag://36h11/40cm/13", + pose=_pose(0.1, 0.2, 0.5), + kind="apriltag", + ts=1781565207.5, + replacement=2500.0, # ms: corrective landmark supersedes recent same-id ones + ) + decoded = Landmark.lcm_decode(landmark.lcm_encode()) + assert decoded.replacement == 2500.0 + + +def test_landmark_decode_tolerates_legacy_bytes_without_replacement() -> None: + """Bytes written before the replacement + per-axis fields decode to defaults.""" + landmark = Landmark(id="x", pose=_pose(0.0, 0.0, 0.0), confidence=0.7, replacement=999.0) + # Strip the trailing replacement double + the 6 per-axis confidence doubles. + legacy = landmark.lcm_encode()[: -(8 + 48)] + decoded = Landmark.lcm_decode(legacy) + assert decoded.id == "x" + assert decoded.replacement == 0.0 + # Per-axis confidences fall back to the coarse overall confidence. + assert decoded.axis_confidence() == (0.7, 0.7, 0.7, 0.7, 0.7, 0.7) + + +def test_per_axis_confidence_defaults_to_overall() -> None: + lm = Landmark(id="t", confidence=0.6) + assert lm.axis_confidence() == (0.6, 0.6, 0.6, 0.6, 0.6, 0.6) + + +def test_per_axis_confidence_roundtrips() -> None: + lm = Landmark( + id="apriltag://36h11/40cm/13", + pose=_pose(0.1, 0.2, 0.5), + kind="apriltag", + confidence=0.5, + confidence_x=0.9, + confidence_y=0.8, + confidence_z=0.7, + confidence_roll=0.2, + confidence_pitch=0.1, + confidence_yaw=0.95, + ) + d = Landmark.lcm_decode(lm.lcm_encode()) + assert d.axis_confidence() == (0.9, 0.8, 0.7, 0.2, 0.1, 0.95) + assert d.confidence == 0.5 # coarse overall preserved independently + + +def test_per_axis_confidence_can_be_silent_on_some_dof() -> None: + """A landmark can be confident on some DOF and zero on others.""" + lm = Landmark(id="t", pose=_pose(0.0, 0.0, 0.0), confidence_z=0.9, confidence_x=0.0) + d = Landmark.lcm_decode(lm.lcm_encode()) + assert d.confidence_z == 0.9 and d.confidence_x == 0.0 + + +def test_landmark_uses_lcm_codec_in_memory2() -> None: + codec = codec_for(Landmark) + assert isinstance(codec, LcmCodec) + landmark = Landmark(id="reloc://map0/dim_city", map_id="map0", confidence=0.6, kind="reloc") + decoded = codec.decode(codec.encode(landmark)) + assert decoded.id == "reloc://map0/dim_city" + assert decoded.confidence == 0.6 diff --git a/dimos/navigation/jnav/msgs/test_Marker.py b/dimos/navigation/jnav/msgs/test_Marker.py new file mode 100644 index 0000000000..61394999d7 --- /dev/null +++ b/dimos/navigation/jnav/msgs/test_Marker.py @@ -0,0 +1,67 @@ +# Copyright 2026 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Copyright 2026 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. + +"""Roundtrip + field tests for the jnav Marker message.""" + +from __future__ import annotations + +from dimos.msgs.geometry_msgs.Pose import Pose +from dimos.navigation.jnav.msgs.Marker import Marker + + +def _pose(x: float, y: float, z: float) -> Pose: + pose = Pose() + pose.position.x, pose.position.y, pose.position.z = x, y, z + return pose + + +def test_marker_roundtrip_preserves_all_fields() -> None: + marker = Marker( + pose=_pose(-52.93, -55.95, 0.4), + marker="test_waypoint_2", + map="dim_city", + ts=1781565207.5, + frame_id="map", + ) + decoded = Marker.lcm_decode(marker.lcm_encode()) + + assert decoded.marker == "test_waypoint_2" + assert decoded.map == "dim_city" + assert decoded.frame_id == "map" + assert decoded.ts == marker.ts + assert decoded.pose.position.x == -52.93 + assert decoded.pose.position.y == -55.95 + assert decoded.pose.position.z == 0.4 + + +def test_marker_defaults() -> None: + marker = Marker() + assert marker.marker == "" + assert marker.map == "" + assert marker.frame_id == "map" + assert marker.ts > 0 # auto-stamped + # empty optional strings survive the roundtrip + decoded = Marker.lcm_decode(marker.lcm_encode()) + assert decoded.marker == "" and decoded.map == "" + + +def test_marker_unicode_name() -> None: + decoded = Marker.lcm_decode(Marker(marker="café_door", map="map_α").lcm_encode()) # noqa: RUF001 + assert decoded.marker == "café_door" + assert decoded.map == "map_α" # noqa: RUF001 diff --git a/dimos/navigation/jnav/utils/apriltag_agreement.py b/dimos/navigation/jnav/utils/apriltag_agreement.py new file mode 100644 index 0000000000..bab26e2305 --- /dev/null +++ b/dimos/navigation/jnav/utils/apriltag_agreement.py @@ -0,0 +1,156 @@ +# Copyright 2026 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""April-tag agreement: a drift-quality metric for a corrected trajectory. + +A fixed April tag observed many times along a trajectory should map to ONE world +position. Odometry drift scatters those estimates (the same tag lands in a +different spot each lap); a good loop-closure / PGO correction pulls them back +together. So the spread of a tag's repeated world-position estimates is a +ground-truth-free measure of trajectory consistency — and the *drop* in spread +from raw odometry to PGO-corrected odometry is how much PGO helped. + +Pure functions over plain arrays; no PGO, sim, or I/O here (the benchmark harness +in gsc_pgo/pgo_apriltag_benchmark.py feeds these from real hk_village data). +""" + +from __future__ import annotations + +from dataclasses import dataclass + +import numpy as np + +from dimos.navigation.jnav.utils.trajectory_metrics import PoseLookup + +# sightings further apart than this are treated as separate visits +VISIT_GAP_S = 20.0 + + +@dataclass(frozen=True) +class TagAgreement: + """Per-tag spread of repeated world-position estimates (metres).""" + + tag_id: int + observations: int + spread: float # RMS distance of estimates to their centroid + + +@dataclass(frozen=True) +class AgreementReport: + """Agreement across all multiply-observed tags.""" + + per_tag: tuple[TagAgreement, ...] + mean_spread: float # mean per-tag spread (metres); lower = better agreement + total_observations: int + + @property + def tag_count(self) -> int: + return len(self.per_tag) + + +def tag_spread(positions: np.ndarray) -> float: + """RMS distance of a tag's world-position estimates to their centroid.""" + if len(positions) < 2: + return 0.0 + centroid = positions.mean(axis=0) + return float(np.sqrt(np.mean(np.sum((positions - centroid) ** 2, axis=1)))) + + +def agreement_report( + tag_positions: dict[int, np.ndarray], *, min_observations: int = 2 +) -> AgreementReport: + """Score per-tag agreement from ``tag_id -> (N, 3) world positions``. + + Tags seen fewer than ``min_observations`` times carry no agreement signal and + are excluded from ``mean_spread``. + """ + per_tag: list[TagAgreement] = [] + total = 0 + for tag_id in sorted(tag_positions): + positions = np.asarray(tag_positions[tag_id], dtype=np.float64).reshape(-1, 3) + total += len(positions) + if len(positions) < min_observations: + continue + per_tag.append(TagAgreement(tag_id, len(positions), tag_spread(positions))) + mean_spread = float(np.mean([t.spread for t in per_tag])) if per_tag else 0.0 + return AgreementReport(tuple(per_tag), mean_spread, total) + + +def agreement_improvement(raw: AgreementReport, corrected: AgreementReport) -> float: + """Fractional drop in mean spread from ``raw`` to ``corrected`` (1.0 = perfect). + + Positive means the correction tightened tag agreement; negative means it made + it worse. Returns 0.0 if there's no raw spread to improve on. + """ + if raw.mean_spread <= 0.0: + return 0.0 + return (raw.mean_spread - corrected.mean_spread) / raw.mean_spread + + +def tag_world_positions( + sightings: dict[int, list[float]], pose_lookup: PoseLookup +) -> dict[int, np.ndarray]: + """Map each tag's sighting times to robot world positions (the proxy estimate).""" + positions: dict[int, np.ndarray] = {} + for tag_id, times in sightings.items(): + located = [p for p in (pose_lookup(t) for t in times) if p is not None] + if located: + positions[tag_id] = np.vstack(located) + return positions + + +def split_visits(times: list[float], *, gap_s: float) -> list[list[float]]: + """Cluster sighting timestamps into visits separated by gaps > ``gap_s``.""" + visits: list[list[float]] = [] + for timestamp in sorted(times): + if visits and timestamp - visits[-1][-1] <= gap_s: + visits[-1].append(timestamp) + else: + visits.append([timestamp]) + return visits + + +def paired_tag_visit_positions( + sightings: dict[int, list[float]], + raw_lookup: PoseLookup, + corrected_lookup: PoseLookup, + *, + gap_s: float, +) -> tuple[dict[int, np.ndarray], dict[int, np.ndarray]]: + """One robot position per tag VISIT under both pose sources, visits paired. + + Outdoors a tag stays visible while the robot walks tens of metres, so + per-sighting spread is dominated by viewing distance, not drift. Visits are + clustered on timestamps only, and a visit is kept only when BOTH pose + sources can place it — so raw and corrected reports always score the exact + same visit set, and a visit outside the pose graph's coverage drops out + instead of skewing one side. + """ + raw_positions: dict[int, np.ndarray] = {} + corrected_positions: dict[int, np.ndarray] = {} + for tag_id, times in sightings.items(): + raw_medians: list[np.ndarray] = [] + corrected_medians: list[np.ndarray] = [] + for visit_times in split_visits(times, gap_s=gap_s): + raw_located = [p for p in (raw_lookup(t) for t in visit_times) if p is not None] + corrected_located = [ + p for p in (corrected_lookup(t) for t in visit_times) if p is not None + ] + if raw_located and corrected_located: + raw_medians.append(np.median(np.vstack(raw_located), axis=0)) + corrected_medians.append(np.median(np.vstack(corrected_located), axis=0)) + if raw_medians: + raw_positions[tag_id] = np.vstack(raw_medians) + corrected_positions[tag_id] = np.vstack(corrected_medians) + return raw_positions, corrected_positions diff --git a/dimos/navigation/jnav/utils/apriltags.py b/dimos/navigation/jnav/utils/apriltags.py new file mode 100644 index 0000000000..347065af38 --- /dev/null +++ b/dimos/navigation/jnav/utils/apriltags.py @@ -0,0 +1,914 @@ +# Copyright 2026 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""AprilTag detection over a mem2.db color stream. + +Detects tags in the `color_image` stream and rejects bad glimpses through several +independent gates — motion blur (per-tag sharpness), PnP misfit (reprojection +error), too-small / too-far / too-oblique views, and fast camera motion — then +clusters same-id detections in time and reduces each cluster to one robust pose +via a Huber-weighted refinement seeded at the cluster medoid. (Re)writes the +`april_tags` PoseStamped stream of tag-in-camera relative poses (solvePnP, +marker_id in each observation's tags) and returns those representatives for the +GTSAM solve. +""" + +from __future__ import annotations + +from collections import defaultdict +from collections.abc import Callable, Iterable +from copy import copy +from dataclasses import dataclass, field +from itertools import pairwise +import json +import math +from pathlib import Path +import time +from typing import Any + +import cv2 +import numpy as np +from scipy.spatial.transform import Rotation + +from dimos.msgs.geometry_msgs.PoseStamped import PoseStamped +from dimos.msgs.sensor_msgs.Image import Image + +DEFAULT_MAX_DISTANCE_M = 1.0 +DEFAULT_MAX_VIEW_ANGLE_DEG = 45.0 +DEFAULT_CLUSTER_GAP_SEC = 5.0 +DEFAULT_ROTATION_WEIGHT_M_PER_RAD = 0.5 +# A blurry tag still solves a pose; these reject the bad glimpses up front. +DEFAULT_MIN_SHARPNESS = 60.0 # Laplacian variance over the tag ROI +DEFAULT_MAX_REPROJ_PX = 2.0 # RMS solvePnP corner reprojection error +DEFAULT_MIN_TAG_PX = 24.0 # tag side length in pixels (sqrt of quad area) +DEFAULT_MAX_LINEAR_SPEED_MPS = 0.5 +DEFAULT_MAX_ANGULAR_SPEED_DPS = 50.0 +DEFAULT_MIN_OBSERVATIONS = 3 # clusters thinner than this are unreliable +DEFAULT_HUBER_DELTA_M = 0.05 # residual past which a sample is down-weighted +_HUBER_ITERATIONS = 5 + +# One tag observation: ts, marker_id, t_cam_marker (xyz + xyzw quat), and the +# per-glimpse quality fields (sharpness, reproj_px, tag_px, speed). +Detection = dict[str, Any] + + +def make_detector(dictionary_name: str) -> cv2.aruco.ArucoDetector: + d = cv2.aruco.getPredefinedDictionary(getattr(cv2.aruco, dictionary_name)) + return cv2.aruco.ArucoDetector(d, cv2.aruco.DetectorParameters()) + + +def _object_points(marker_length_m: float) -> np.ndarray: + h = marker_length_m / 2.0 + return np.array([[-h, h, 0.0], [h, h, 0.0], [h, -h, 0.0], [-h, -h, 0.0]], dtype=np.float32) + + +def estimate_marker_pose( + corners_pixels: np.ndarray, + marker_length_m: float, + intrinsics: np.ndarray, + distortion: np.ndarray, +) -> tuple[np.ndarray, np.ndarray] | None: + """solvePnP a single tag -> (rotation_vector, translation_vector) in the + camera_optical frame, or None if it failed.""" + image_corners = corners_pixels.reshape(4, 1, 2).astype(np.float32) + found, rotation_vector, translation_vector = cv2.solvePnP( + _object_points(marker_length_m), + image_corners, + intrinsics, + distortion, + flags=cv2.SOLVEPNP_IPPE_SQUARE, + ) + return (rotation_vector, translation_vector) if found else None + + +def view_quality(t_cam_marker: list[float]) -> tuple[float, float]: + """(distance_m, view_angle_deg) for a tag pose in the camera optical frame. + + distance is the camera->tag range; view_angle is the angle between the line + of sight and the tag's surface normal (0 = perfectly head-on).""" + translation = np.array(t_cam_marker[:3], dtype=np.float64) + distance = float(np.linalg.norm(translation)) + normal = Rotation.from_quat(t_cam_marker[3:7]).as_matrix()[:, 2] + line_of_sight = translation / (distance + 1e-9) + cos_angle = abs(float(np.dot(line_of_sight, normal))) + view_angle = math.degrees(math.acos(min(1.0, cos_angle))) + return distance, view_angle + + +def cluster_by_time(detections: list[Detection], gap_sec: float) -> list[list[Detection]]: + """Group same-marker detections into clusters. A new cluster begins whenever + the time gap to the previous same-marker detection exceeds gap_sec.""" + clusters: list[list[Detection]] = [] + by_marker: dict[int, list[Detection]] = defaultdict(list) + for detection in detections: + by_marker[detection["marker_id"]].append(detection) + for marker_detections in by_marker.values(): + marker_detections.sort(key=lambda detection: detection["ts"]) + current = [marker_detections[0]] + for detection in marker_detections[1:]: + if detection["ts"] - current[-1]["ts"] > gap_sec: + clusters.append(current) + current = [detection] + else: + current.append(detection) + clusters.append(current) + return clusters + + +def _pose_distance(a: list[float], b: list[float], rotation_weight_m_per_rad: float) -> float: + translation = float(np.linalg.norm(np.array(a[:3]) - np.array(b[:3]))) + rotation = 2.0 * math.acos(min(1.0, abs(float(np.dot(a[3:7], b[3:7]))))) + return translation + rotation_weight_m_per_rad * rotation + + +def cluster_medoid(cluster: list[Detection], rotation_weight_m_per_rad: float) -> Detection: + """The detection whose pose is most central (min total spatial+rotational + distance to the rest) — a robust representative of the cluster.""" + poses = [detection["t_cam_marker"] for detection in cluster] + best_index, best_cost = 0, float("inf") + for i in range(len(poses)): + cost = sum( + _pose_distance(poses[i], poses[j], rotation_weight_m_per_rad) + for j in range(len(poses)) + if j != i + ) + if cost < best_cost: + best_cost, best_index = cost, i + return cluster[best_index] + + +def reprojection_error_px( + corners_pixels: np.ndarray, + rotation_vector: np.ndarray, + translation_vector: np.ndarray, + marker_length_m: float, + intrinsics: np.ndarray, + distortion: np.ndarray, +) -> float: + """RMS pixel distance between detected corners and the solvePnP pose reprojected + back onto the image — a direct measure of how well the pose explains the tag.""" + projected, _ = cv2.projectPoints( + _object_points(marker_length_m), rotation_vector, translation_vector, intrinsics, distortion + ) + measured = corners_pixels.reshape(4, 2).astype(np.float64) + diff = projected.reshape(4, 2) - measured + return float(np.sqrt(np.mean(np.sum(diff * diff, axis=1)))) + + +def tag_pixel_size(corners_pixels: np.ndarray) -> float: + """Tag side length in pixels (sqrt of the quad's image area); small = unreliable.""" + quad = corners_pixels.reshape(4, 2).astype(np.float32) + return float(math.sqrt(abs(cv2.contourArea(quad)))) + + +def tag_sharpness(gray: np.ndarray, corners_pixels: np.ndarray) -> float: + """Laplacian variance over the tag's bounding box — low under motion blur.""" + quad = corners_pixels.reshape(4, 2) + x_min, y_min = np.floor(quad.min(0)).astype(int) + x_max, y_max = np.ceil(quad.max(0)).astype(int) + height, width = gray.shape[:2] + x_min, y_min = max(int(x_min), 0), max(int(y_min), 0) + x_max, y_max = min(int(x_max), width), min(int(y_max), height) + if x_max - x_min < 2 or y_max - y_min < 2: + return 0.0 + return float(cv2.Laplacian(gray[y_min:y_max, x_min:x_max], cv2.CV_64F).var()) + + +def _pose_xyz_quat(pose: Any) -> np.ndarray | None: + """Best-effort (x,y,z,qx,qy,qz,qw) from an observation pose (tuple/list or a msg + with .position/.orientation); None if it isn't a usable pose.""" + if pose is None: + return None + if hasattr(pose, "position") and hasattr(pose, "orientation"): + position, orientation = pose.position, pose.orientation + return np.array( + [ + position.x, + position.y, + position.z, + orientation.x, + orientation.y, + orientation.z, + orientation.w, + ] + ) + try: + values = [float(component) for component in pose] + except TypeError: + return None + return np.array(values[:7]) if len(values) >= 7 else None + + +def _camera_speeds(images: list[Any]) -> tuple[dict[float, tuple[float, float]], bool]: + """Per-image (linear m/s, angular deg/s) from consecutive posed frames. The second + return is False when too few frames carry poses to estimate motion (gate disabled).""" + posed = [ + (float(obs.ts), pose) + for obs in images + if (pose := _pose_xyz_quat(getattr(obs, "pose", None))) is not None + ] + posed.sort(key=lambda item: item[0]) + speeds: dict[float, tuple[float, float]] = {} + for (timestamp_a, pose_a), (timestamp_b, pose_b) in pairwise(posed): + dt = timestamp_b - timestamp_a + if dt <= 0: + continue + linear = float(np.linalg.norm(pose_b[:3] - pose_a[:3])) / dt + cos_half = min(1.0, abs(float(np.dot(pose_a[3:7], pose_b[3:7])))) + angular = math.degrees(2.0 * math.acos(cos_half)) / dt + speeds[timestamp_b] = (linear, angular) + return speeds, len(posed) >= 2 + + +def _huber_weights(residuals: np.ndarray, delta: float) -> np.ndarray: + """IRLS Huber weights: 1 inside `delta`, decaying as delta/r past it.""" + weights = np.ones_like(residuals) + outside = residuals > delta + weights[outside] = delta / residuals[outside] + return weights + + +def robust_cluster_pose( + cluster: list[Detection], rotation_weight_m_per_rad: float, huber_delta_m: float +) -> Detection: + """Cluster representative: the medoid, then refined by Huber-weighted IRLS — a + weighted-mean translation and weighted quaternion mean (Markley eigen method), + re-weighting each iteration so a lingering bad glimpse keeps losing influence.""" + medoid = cluster_medoid(cluster, rotation_weight_m_per_rad) + if len(cluster) < 2: + return medoid + poses = np.array([detection["t_cam_marker"] for detection in cluster], dtype=np.float64) + translations, quaternions = poses[:, :3], poses[:, 3:7] + reference = np.array(medoid["t_cam_marker"][3:7]) + signs = np.sign(quaternions @ reference) + signs[signs == 0] = 1.0 + quaternions = quaternions * signs[:, None] + estimate_translation = np.array(medoid["t_cam_marker"][:3]) + estimate_quaternion = reference.copy() + delta_rad = huber_delta_m / max(rotation_weight_m_per_rad, 1e-9) + for _ in range(_HUBER_ITERATIONS): + weights_t = _huber_weights( + np.linalg.norm(translations - estimate_translation, axis=1), huber_delta_m + ) + estimate_translation = (weights_t[:, None] * translations).sum(0) / weights_t.sum() + angular_residual = 2.0 * np.arccos( + np.clip(np.abs(quaternions @ estimate_quaternion), 0.0, 1.0) + ) + weights_r = _huber_weights(angular_residual, delta_rad) + scatter = ( + weights_r[:, None, None] * np.einsum("ni,nj->nij", quaternions, quaternions) + ).sum(0) + estimate_quaternion = np.linalg.eigh(scatter)[1][:, -1] + if estimate_quaternion @ reference < 0: + estimate_quaternion = -estimate_quaternion + return { + **medoid, + "t_cam_marker": [*estimate_translation.tolist(), *estimate_quaternion.tolist()], + } + + +def detect_apriltags( + store: Any, + intrinsics: np.ndarray, + distortion: np.ndarray, + image_stream: str = "color_image", + stream_name: str = "april_tags", + marker_length: float = 0.10, + dictionary: str = "DICT_APRILTAG_36h11", + *, + max_distance_m: float = DEFAULT_MAX_DISTANCE_M, + max_view_angle_deg: float = DEFAULT_MAX_VIEW_ANGLE_DEG, + cluster_gap_sec: float = DEFAULT_CLUSTER_GAP_SEC, + rotation_weight_m_per_rad: float = DEFAULT_ROTATION_WEIGHT_M_PER_RAD, + min_sharpness: float = DEFAULT_MIN_SHARPNESS, + max_reproj_px: float = DEFAULT_MAX_REPROJ_PX, + min_tag_px: float = DEFAULT_MIN_TAG_PX, + max_linear_speed_mps: float = DEFAULT_MAX_LINEAR_SPEED_MPS, + max_angular_speed_dps: float = DEFAULT_MAX_ANGULAR_SPEED_DPS, + min_observations: int = DEFAULT_MIN_OBSERVATIONS, + huber_delta_m: float = DEFAULT_HUBER_DELTA_M, +) -> list[Detection]: + """Detect tags in `image_stream`, reject bad glimpses (blur, PnP misfit, small/ + far/oblique views, fast motion), cluster same-id detections by time, drop thin + clusters, and (re)write the `april_tags` stream from one Huber-refined medoid + representative per cluster. Returns that list of representatives.""" + detector = make_detector(dictionary) + raw_detections: list[Detection] = [] + images = store.stream(image_stream, Image).to_list() + speed_by_ts, speed_available = _camera_speeds(images) + for image_obs in images: + image = image_obs.data + bgr = image.numpy() if hasattr(image, "numpy") else np.asarray(image.data) + gray = cv2.cvtColor(bgr, cv2.COLOR_BGR2GRAY) if bgr.ndim == 3 else bgr + all_corners, marker_ids, _ = detector.detectMarkers(bgr) + if marker_ids is None: + continue + for corners, marker_id in zip(all_corners, marker_ids.flatten(), strict=False): + pose = estimate_marker_pose(corners, marker_length, intrinsics, distortion) + if pose is None: + continue + rotation_vector, translation_vector = pose + quaternion = Rotation.from_rotvec(rotation_vector.reshape(3)).as_quat() # x,y,z,w + translation = translation_vector.reshape(3) + tag_in_camera = [ + float(translation[0]), + float(translation[1]), + float(translation[2]), + float(quaternion[0]), + float(quaternion[1]), + float(quaternion[2]), + float(quaternion[3]), + ] + raw_detections.append( + { + "ts": float(image_obs.ts), + "marker_id": int(marker_id), + "t_cam_marker": tag_in_camera, + "sharpness": tag_sharpness(gray, corners), + "reproj_px": reprojection_error_px( + corners, + rotation_vector, + translation_vector, + marker_length, + intrinsics, + distortion, + ), + "tag_px": tag_pixel_size(corners), + "speed": speed_by_ts.get(float(image_obs.ts)), + } + ) + + # Per-glimpse gates; count rejections per reason so thresholds are tunable. + rejected: dict[str, int] = defaultdict(int) + kept: list[Detection] = [] + for detection in raw_detections: + if detection["sharpness"] < min_sharpness: + rejected["blur"] += 1 + continue + if detection["reproj_px"] > max_reproj_px: + rejected["reproj"] += 1 + continue + if detection["tag_px"] < min_tag_px: + rejected["small"] += 1 + continue + distance, view_angle = view_quality(detection["t_cam_marker"]) + if distance > max_distance_m: + rejected["far"] += 1 + continue + if view_angle > max_view_angle_deg: + rejected["oblique"] += 1 + continue + speed = detection["speed"] + if speed is not None and ( + speed[0] > max_linear_speed_mps or speed[1] > max_angular_speed_dps + ): + rejected["motion"] += 1 + continue + kept.append(detection) + + # One Huber-refined representative per time-clustered group; drop thin clusters. + detections: list[Detection] = [] + thin_clusters = 0 + for cluster in cluster_by_time(kept, cluster_gap_sec): + if len(cluster) < min_observations: + thin_clusters += 1 + continue + detections.append( + { + **robust_cluster_pose(cluster, rotation_weight_m_per_rad, huber_delta_m), + "n_observations": len(cluster), + } + ) + detections.sort(key=lambda detection: detection["ts"]) + + if stream_name in store.list_streams(): + store.delete_stream(stream_name) + april_tag_stream = store.stream(stream_name, PoseStamped) + for detection in detections: + tag_in_camera = detection["t_cam_marker"] + pose_stamped = PoseStamped( + ts=detection["ts"], position=tag_in_camera[:3], orientation=tag_in_camera[3:] + ) + april_tag_stream.append( + pose_stamped, + ts=detection["ts"], + pose=tuple(tag_in_camera), + tags={"marker_id": detection["marker_id"]}, + ) + + found_ids = sorted({detection["marker_id"] for detection in detections}) + gate_summary = ", ".join(f"{reason}={count}" for reason, count in sorted(rejected.items())) + if not speed_available: + gate_summary += (", " if gate_summary else "") + "motion-gate-off(no poses)" + print( + f" april_tags: {len(raw_detections)} raw -> {len(kept)} in-spec " + f"-> {len(detections)} clusters (dropped {thin_clusters} thin), " + f"markers {found_ids} (over {len(images)} images)" + ) + if gate_summary: + print(f" april_tags rejected: {gate_summary}") + return detections + + +# Streaming / incremental API + +_DEFAULT_MARKER_LENGTH = 0.10 +_DEFAULT_DICTIONARY = "DICT_APRILTAG_36h11" + + +@dataclass +class TagInfo: + tag_number: float + confidence: float + pose: list[float] # [x, y, z, qx, qy, qz, qw] relative to camera + ts: float + camera_frame_id: str = "" + + +@dataclass +class GroupingOptions: + tag_ids_to_ignore: list[float] = field(default_factory=list) + time_window: float = DEFAULT_CLUSTER_GAP_SEC + max_distance: float = DEFAULT_MAX_DISTANCE_M + min_confidence: float = 0.0 + + +class AprilTagger: + def __init__( + self, + camera_intrinsics: Any, + grouping_options: Any, + tags_ids_to_ignore: list[float], + ) -> None: + self.camera_intrinsics = camera_intrinsics + self.grouping_options = grouping_options + self.tags_ids_to_ignore = tags_ids_to_ignore + + def get_tag(self, img: Any) -> list[TagInfo]: + return pure_get_tags(img, self.camera_intrinsics) + + def iter_april_tag_groups( + self, + next_observation: Any, + options: Any = None, + state: dict[str, Any] | None = None, + ) -> tuple[list[TagInfo], dict[float, TagInfo], dict[float, TagInfo]]: + if state is None: + state = {} + if options is None: + options = self.grouping_options + recent_tags, tag_estimates, finished_tags = pure_iter_april_tag_groups( + next_observation, + self.camera_intrinsics, + options, + state.get("recent_tags", []), + state.get("tag_estimates", {}), + ) + state["recent_tags"] = recent_tags + state["tag_estimates"] = tag_estimates + return recent_tags, tag_estimates, finished_tags + + +def pure_get_tags(img: Any, camera_intrinsics: Any) -> list[TagInfo]: + """Detect AprilTags in an image and return relative poses via solvePnP. + + img: ndarray or an Image msg (frame_id/ts are carried onto each TagInfo). + camera_intrinsics: dict with "intrinsics" (3x3 ndarray), and optionally + "distortion", "marker_length", "dictionary". + """ + intrinsics = camera_intrinsics["intrinsics"] + distortion = camera_intrinsics.get("distortion", np.zeros(5)) + marker_length = camera_intrinsics.get("marker_length", _DEFAULT_MARKER_LENGTH) + dictionary = camera_intrinsics.get("dictionary", _DEFAULT_DICTIONARY) + + camera_frame_id = getattr(img, "frame_id", "") + ts = getattr(img, "ts", None) + if ts is None: + ts = time.time() + + if isinstance(img, np.ndarray): + bgr = img + elif hasattr(img, "numpy"): + bgr = img.numpy() + elif hasattr(img, "data"): + bgr = np.asarray(img.data) + else: + bgr = np.asarray(img) + if bgr.ndim == 2: + bgr = cv2.cvtColor(bgr, cv2.COLOR_GRAY2BGR) + + detector = make_detector(dictionary) + all_corners, marker_ids, _ = detector.detectMarkers(bgr) + if marker_ids is None: + return [] + + tags: list[TagInfo] = [] + for corners, marker_id in zip(all_corners, marker_ids.flatten(), strict=False): + pose_result = estimate_marker_pose(corners, marker_length, intrinsics, distortion) + if pose_result is None: + continue + rotation_vector, translation_vector = pose_result + quaternion = Rotation.from_rotvec(rotation_vector.reshape(3)).as_quat() + translation = translation_vector.reshape(3) + pose = [ + float(translation[0]), + float(translation[1]), + float(translation[2]), + float(quaternion[0]), + float(quaternion[1]), + float(quaternion[2]), + float(quaternion[3]), + ] + reproj = reprojection_error_px( + corners, + rotation_vector, + translation_vector, + marker_length, + intrinsics, + distortion, + ) + confidence = max(0.0, 1.0 - reproj / DEFAULT_MAX_REPROJ_PX) + tags.append( + TagInfo( + tag_number=float(marker_id), + confidence=confidence, + pose=pose, + ts=float(ts), + camera_frame_id=camera_frame_id, + ) + ) + return tags + + +def pure_iter_april_tag_groups( + next_observation: Any, + camera_intrinsics: Any, + options: Any, + recent_tags: list[TagInfo], + tag_estimates: dict[float, TagInfo], +) -> tuple[list[TagInfo], dict[float, TagInfo], dict[float, TagInfo]]: + """Returns (recent_tags, tag_estimates, finished_tags). + + finished_tags holds estimates whose tag id aged out of the time window this + iteration — no fresher observation can revise them, so they are final.""" + tag_estimates = copy(tag_estimates) + recent_tags = list(recent_tags) + + tags = pure_get_tags(next_observation.color_image, camera_intrinsics) + recent_tags.extend(tags) + + tag_ids_to_ignore = set(getattr(options, "tag_ids_to_ignore", [])) + time_window = getattr(options, "time_window", DEFAULT_CLUSTER_GAP_SEC) + max_distance = getattr(options, "max_distance", DEFAULT_MAX_DISTANCE_M) + min_confidence = getattr(options, "min_confidence", 0.0) + + # Purge tags outside the time window + if recent_tags: + latest_ts = max(tag.ts for tag in recent_tags) + recent_tags = [tag for tag in recent_tags if latest_ts - tag.ts <= time_window] + + all_recent_tag_ids = set( + tag.tag_number for tag in recent_tags if tag.tag_number not in tag_ids_to_ignore + ) + + # Tag ids with an estimate but no surviving observations are finalized + finished_tags = { + tag_id: estimate + for tag_id, estimate in tag_estimates.items() + if tag_id not in all_recent_tag_ids + } + for tag_id in finished_tags: + del tag_estimates[tag_id] + + for each_tag_id in all_recent_tag_ids: + tags_for_id = [tag for tag in recent_tags if tag.tag_number == each_tag_id] + + # Filter by distance and confidence + filtered = [ + tag + for tag in tags_for_id + if tag.confidence >= min_confidence + and float(np.linalg.norm(tag.pose[:3])) <= max_distance + ] + if not filtered: + continue + + # Pick the medoid (most central observation by pose distance) + if len(filtered) == 1: + tag_estimates[each_tag_id] = filtered[0] + else: + best_index = 0 + best_cost = float("inf") + for i, tag_i in enumerate(filtered): + cost = sum( + _pose_distance(tag_i.pose, tag_j.pose, DEFAULT_ROTATION_WEIGHT_M_PER_RAD) + for j, tag_j in enumerate(filtered) + if j != i + ) + if cost < best_cost: + best_cost = cost + best_index = i + tag_estimates[each_tag_id] = filtered[best_index] + + return recent_tags, tag_estimates, finished_tags + + +def load_intrinsics_json(path: Path) -> dict[str, Any]: + raw = json.loads(path.read_text()) + config: dict[str, Any] = { + "intrinsics": np.array(raw["intrinsics"], dtype=np.float64).reshape(3, 3), + "distortion": np.array(raw.get("distortion", []), dtype=np.float64), + } + for key in ("marker_length", "dictionary"): + if key in raw: + config[key] = raw[key] + return config + + +def sightings_from_observations( + observations: Iterable[tuple[int, float]], +) -> dict[int, list[float]]: + """Group ``(marker_id, timestamp)`` tag observations into ``tag_id -> [timestamps]``.""" + sightings: dict[int, list[float]] = {} + for marker_id, timestamp in observations: + sightings.setdefault(int(marker_id), []).append(float(timestamp)) + return sightings + + +def load_or_detect_sightings( + stored: Iterable[tuple[int, float]], + detect: Callable[[], Iterable[tuple[int, float]]], +) -> tuple[dict[int, list[float]], str]: + """``tag_id -> [sighting timestamps]`` from already-stored tag observations, + falling back to ``detect()`` when none are stored. + + Only marker_id + ts are used — tag positions are taken relative to the odom + stream, so stored poses are never read. The caller supplies the stored + iterable and the detect thunk, so this stays db-free. Returns the sightings + plus a source label (``"stream"`` or ``"detected"``).""" + sightings = sightings_from_observations(stored) + if sightings: + return sightings, "stream" + return sightings_from_observations(detect()), "detected" + + +def detect_raw_detections( + store: Any, + intrinsics: np.ndarray, + distortion: np.ndarray, + *, + image_stream: str = "color_image", + marker_length: float = 0.10, + dictionary: str = "DICT_APRILTAG_36h11", +) -> tuple[list[Detection], bool, int]: + """Every valid-PnP tag detection over `image_stream`, unfiltered, with gate diagnostics.""" + detector = make_detector(dictionary) + raw_detections: list[Detection] = [] + images = store.stream(image_stream, Image).to_list() + speed_by_ts, speed_available = _camera_speeds(images) + for image_obs in images: + image = image_obs.data + bgr = image.numpy() if hasattr(image, "numpy") else np.asarray(image.data) + gray = cv2.cvtColor(bgr, cv2.COLOR_BGR2GRAY) if bgr.ndim == 3 else bgr + all_corners, marker_ids, _ = detector.detectMarkers(bgr) + if marker_ids is None: + continue + for corners, marker_id in zip(all_corners, marker_ids.flatten(), strict=False): + pose = estimate_marker_pose(corners, marker_length, intrinsics, distortion) + if pose is None: + continue + rotation_vector, translation_vector = pose + quaternion = Rotation.from_rotvec(rotation_vector.reshape(3)).as_quat() # x,y,z,w + translation = translation_vector.reshape(3) + tag_in_camera = [ + float(translation[0]), + float(translation[1]), + float(translation[2]), + float(quaternion[0]), + float(quaternion[1]), + float(quaternion[2]), + float(quaternion[3]), + ] + raw_detections.append( + { + "ts": float(image_obs.ts), + "marker_id": int(marker_id), + "t_cam_marker": tag_in_camera, + "sharpness": tag_sharpness(gray, corners), + "reproj_px": reprojection_error_px( + corners, + rotation_vector, + translation_vector, + marker_length, + intrinsics, + distortion, + ), + "tag_px": tag_pixel_size(corners), + "speed": speed_by_ts.get(float(image_obs.ts)), + } + ) + return raw_detections, speed_available, len(images) + + +def gate_detections( + raw_detections: list[Detection], + *, + max_distance_m: float = DEFAULT_MAX_DISTANCE_M, + max_view_angle_deg: float = DEFAULT_MAX_VIEW_ANGLE_DEG, + cluster_gap_sec: float = DEFAULT_CLUSTER_GAP_SEC, + rotation_weight_m_per_rad: float = DEFAULT_ROTATION_WEIGHT_M_PER_RAD, + min_sharpness: float = DEFAULT_MIN_SHARPNESS, + max_reproj_px: float = DEFAULT_MAX_REPROJ_PX, + min_tag_px: float = DEFAULT_MIN_TAG_PX, + max_linear_speed_mps: float = DEFAULT_MAX_LINEAR_SPEED_MPS, + max_angular_speed_dps: float = DEFAULT_MAX_ANGULAR_SPEED_DPS, + min_observations: int = DEFAULT_MIN_OBSERVATIONS, + huber_delta_m: float = DEFAULT_HUBER_DELTA_M, +) -> tuple[list[Detection], dict[str, int], int]: + """Gate + time-cluster raw detections into one Huber-refined medoid per cluster.""" + rejected: dict[str, int] = defaultdict(int) + kept: list[Detection] = [] + for detection in raw_detections: + if detection["sharpness"] < min_sharpness: + rejected["blur"] += 1 + continue + if detection["reproj_px"] > max_reproj_px: + rejected["reproj"] += 1 + continue + if detection["tag_px"] < min_tag_px: + rejected["small"] += 1 + continue + distance, view_angle = view_quality(detection["t_cam_marker"]) + if distance > max_distance_m: + rejected["far"] += 1 + continue + if view_angle > max_view_angle_deg: + rejected["oblique"] += 1 + continue + speed = detection["speed"] + if speed is not None and ( + speed[0] > max_linear_speed_mps or speed[1] > max_angular_speed_dps + ): + rejected["motion"] += 1 + continue + kept.append(detection) + + detections: list[Detection] = [] + thin_clusters = 0 + for cluster in cluster_by_time(kept, cluster_gap_sec): + if len(cluster) < min_observations: + thin_clusters += 1 + continue + detections.append( + { + **robust_cluster_pose(cluster, rotation_weight_m_per_rad, huber_delta_m), + "n_observations": len(cluster), + } + ) + detections.sort(key=lambda detection: detection["ts"]) + return detections, dict(rejected), thin_clusters + + +def _write_tag_stream( + store: Any, stream_name: str, detections: list[Detection], *, diagnostics: bool +) -> None: + """(Re)write a PoseStamped tag stream, with gate diagnostics when `diagnostics`.""" + if stream_name in store.list_streams(): + store.delete_stream(stream_name) + stream = store.stream(stream_name, PoseStamped) + for detection in detections: + pose = detection["t_cam_marker"] + tags: dict[str, Any] = {"marker_id": detection["marker_id"]} + if diagnostics: + speed = detection.get("speed") + distance, view_angle = view_quality(pose) + tags.update( + { + "sharpness": float(detection["sharpness"]), + "reproj_px": float(detection["reproj_px"]), + "tag_px": float(detection["tag_px"]), + "distance_m": float(distance), + "view_angle_deg": float(view_angle), + "lin_speed": float(speed[0]) if speed else -1.0, + "ang_speed": float(speed[1]) if speed else -1.0, + } + ) + stream.append( + PoseStamped(ts=detection["ts"], position=pose[:3], orientation=pose[3:]), + ts=detection["ts"], + pose=tuple(pose), + tags=tags, + ) + + +def write_april_streams( + store: Any, + raw_detections: list[Detection], + filtered_detections: list[Detection], + *, + raw_stream: str = "raw_april_tags", + filtered_stream: str = "april_tags", +) -> None: + """Persist the unfiltered `raw_april_tags` and the gated `april_tags` streams.""" + _write_tag_stream(store, raw_stream, raw_detections, diagnostics=True) + _write_tag_stream(store, filtered_stream, filtered_detections, diagnostics=False) + + +def _print_tag_summary( + stream_name: str, + n_raw: int, + detections: list[Detection], + rejected: dict[str, int], + thin_clusters: int, + speed_available: bool, + n_images: int, +) -> None: + in_spec = n_raw - sum(rejected.values()) + found_ids = sorted({detection["marker_id"] for detection in detections}) + gate_summary = ", ".join(f"{reason}={count}" for reason, count in sorted(rejected.items())) + if not speed_available: + gate_summary += (", " if gate_summary else "") + "motion-gate-off(no poses)" + print( + f" {stream_name}: {n_raw} raw -> {in_spec} in-spec " + f"-> {len(detections)} clusters (dropped {thin_clusters} thin), " + f"markers {found_ids} (over {n_images} images)" + ) + if gate_summary: + print(f" {stream_name} rejected: {gate_summary}") + + +def gate_params() -> dict[str, Any]: + """The current AprilTag gate thresholds, for recording in a sidecar.""" + return { + "max_distance_m": DEFAULT_MAX_DISTANCE_M, + "max_view_angle_deg": DEFAULT_MAX_VIEW_ANGLE_DEG, + "min_sharpness": DEFAULT_MIN_SHARPNESS, + "max_reproj_px": DEFAULT_MAX_REPROJ_PX, + "min_tag_px": DEFAULT_MIN_TAG_PX, + "max_linear_speed_mps": DEFAULT_MAX_LINEAR_SPEED_MPS, + "max_angular_speed_dps": DEFAULT_MAX_ANGULAR_SPEED_DPS, + "min_observations": DEFAULT_MIN_OBSERVATIONS, + "cluster_gap_sec": DEFAULT_CLUSTER_GAP_SEC, + "huber_delta_m": DEFAULT_HUBER_DELTA_M, + "rotation_weight_m_per_rad": DEFAULT_ROTATION_WEIGHT_M_PER_RAD, + } + + +def ensure_april_streams( + store: Any, + intrinsics: np.ndarray, + distortion: np.ndarray, + *, + image_stream: str = "color_image", + marker_length: float = 0.10, + dictionary: str = "DICT_APRILTAG_36h11", + raw_stream: str = "raw_april_tags", + filtered_stream: str = "april_tags", + exclude_tags: Iterable[int] = (), + force: bool = False, +) -> list[Detection]: + """Ensure both tag streams exist (`raw_april_tags`, `april_tags`); force=True rewrites. + + `exclude_tags` are kept in raw but dropped from filtered. Returns the filtered reps.""" + existing = set(store.list_streams()) + if not force and raw_stream in existing and filtered_stream in existing: + return [] + raw_detections, speed_available, n_images = detect_raw_detections( + store, + intrinsics, + distortion, + image_stream=image_stream, + marker_length=marker_length, + dictionary=dictionary, + ) + detections, rejected, thin_clusters = gate_detections(raw_detections) + excluded = set(exclude_tags) + if excluded: + detections = [d for d in detections if d["marker_id"] not in excluded] + write_april_streams( + store, raw_detections, detections, raw_stream=raw_stream, filtered_stream=filtered_stream + ) + print(f" {raw_stream}: {len(raw_detections)} detections (unfiltered)") + if excluded: + print(f" excluded dynamic tags from {filtered_stream}: {sorted(excluded)}") + _print_tag_summary( + filtered_stream, + len(raw_detections), + detections, + rejected, + thin_clusters, + speed_available, + n_images, + ) + return detections diff --git a/dimos/navigation/jnav/utils/kitti.py b/dimos/navigation/jnav/utils/kitti.py new file mode 100644 index 0000000000..d7358a976b --- /dev/null +++ b/dimos/navigation/jnav/utils/kitti.py @@ -0,0 +1,94 @@ +# Copyright 2026 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Official KITTI odometry error metric (translational % + rotational deg/m). + +The KITTI leaderboard metric: for every sub-sequence length in {100,200,...,800}m, +take every start frame, find the end frame ~that path-length away, and measure the +relative pose error between estimated and ground-truth. Average translational +error (as a fraction of length) and rotational error (rad per metre) over all +(start, length) pairs. Reported as translational % and rotational deg/m. This is +the devkit's `evaluate_odometry` algorithm. + +Poses are 4x4 body->world transforms (frame 0 = identity), one per frame. +""" + +from __future__ import annotations + +import numpy as np + +LENGTHS = (100.0, 200.0, 300.0, 400.0, 500.0, 600.0, 700.0, 800.0) +STEP = 10 # evaluate every 10th frame as a start (devkit convention) + + +def trajectory_distances(poses: list[np.ndarray]) -> list[float]: + """Cumulative path length at each frame.""" + distances = [0.0] + for index in range(1, len(poses)): + delta = poses[index][:3, 3] - poses[index - 1][:3, 3] + distances.append(distances[-1] + float(np.linalg.norm(delta))) + return distances + + +def _last_frame_for_length(distances: list[float], first: int, length: float) -> int: + target = distances[first] + length + for index in range(first, len(distances)): + if distances[index] >= target: + return index + return -1 + + +def _rotation_error(pose_error: np.ndarray) -> float: + trace = pose_error[0, 0] + pose_error[1, 1] + pose_error[2, 2] + return float(np.arccos(np.clip((trace - 1.0) / 2.0, -1.0, 1.0))) + + +def _translation_error(pose_error: np.ndarray) -> float: + return float(np.linalg.norm(pose_error[:3, 3])) + + +def kitti_odometry_error( + estimated: list[np.ndarray], ground_truth: list[np.ndarray] +) -> dict[str, float]: + """Average translational (%) and rotational (deg/m) error, devkit-style.""" + count = min(len(estimated), len(ground_truth)) + estimated, ground_truth = estimated[:count], ground_truth[:count] + distances = trajectory_distances(ground_truth) + + translational_errors: list[float] = [] + rotational_errors: list[float] = [] + for first in range(0, count, STEP): + for length in LENGTHS: + last = _last_frame_for_length(distances, first, length) + if last < 0: + continue + # Relative pose error: how far the estimated motion from first->last + # diverges from ground truth's. + gt_delta = np.linalg.inv(ground_truth[first]) @ ground_truth[last] + estimated_delta = np.linalg.inv(estimated[first]) @ estimated[last] + pose_error = np.linalg.inv(gt_delta) @ estimated_delta + translational_errors.append(_translation_error(pose_error) / length) + rotational_errors.append(_rotation_error(pose_error) / length) + + if not translational_errors: + return { + "translational_percent": float("nan"), + "rotational_deg_per_m": float("nan"), + "pairs": 0, + } + return { + "translational_percent": float(np.mean(translational_errors)) * 100.0, + "rotational_deg_per_m": float(np.degrees(np.mean(rotational_errors))), + "pairs": len(translational_errors), + } diff --git a/dimos/navigation/jnav/utils/module_loading.py b/dimos/navigation/jnav/utils/module_loading.py new file mode 100644 index 0000000000..3295abdea1 --- /dev/null +++ b/dimos/navigation/jnav/utils/module_loading.py @@ -0,0 +1,57 @@ +# Copyright 2026 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Dynamically load a Module class by file path and filter its config. + +Lets the loop-closure eval drivers swap in any module-under-test from a +``--module-path``/``--module-name`` pair without importing it statically. +""" + +from __future__ import annotations + +import importlib +from pathlib import Path +from typing import Any, get_type_hints + + +def load_module_class(module_path: Path, module_name: str) -> type: + """Import the class `module_name` from a python file. + + Resolved through the normal package system (path -> dotted name rooted at + the last `dimos` directory) so the class pickles/deploys into workers.""" + parts = module_path.resolve().with_suffix("").parts + if "dimos" not in parts: + raise SystemExit(f"--module-path must live inside the dimos package tree: {module_path}") + package_root = len(parts) - 1 - parts[::-1].index("dimos") + dotted = ".".join(parts[package_root:]) + module = importlib.import_module(dotted) + if not hasattr(module, module_name): + raise SystemExit(f"no class {module_name!r} in {dotted} ({module_path})") + return getattr(module, module_name) # type: ignore[no-any-return] + + +def filter_config_for_module(module_class: type, config: dict[str, Any]) -> dict[str, Any]: + """Drop config keys the module's Config class doesn't declare. + + DEFAULT_PGO_CONFIG carries cmu-specific knobs (use_scan_context, ...); + other loop-closure modules shouldn't crash on them.""" + try: + config_class = get_type_hints(module_class)["config"] + known = set(config_class.model_fields) + except Exception: + return config + dropped = sorted(set(config) - known) + if dropped: + print(f"config keys not in {config_class.__name__} (dropped): {dropped}") + return {key: value for key, value in config.items() if key in known} diff --git a/dimos/navigation/jnav/utils/recording_db.py b/dimos/navigation/jnav/utils/recording_db.py new file mode 100644 index 0000000000..88f1aab647 --- /dev/null +++ b/dimos/navigation/jnav/utils/recording_db.py @@ -0,0 +1,118 @@ +# Copyright 2026 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Read recording streams (mem2.db) for loop-closure evaluation. + +SqliteStore access keyed by path (one shared store per db), stream iteration, +and a nearest-pose lookup over an odometry stream. Stream names are always +passed in by the caller — nothing here is tied to a particular recording layout. +""" + +from __future__ import annotations + +import atexit +from collections.abc import Iterator +from pathlib import Path +from typing import Any + +import numpy as np + +from dimos.memory2.store.sqlite import SqliteStore +from dimos.msgs.geometry_msgs.Pose import Pose +from dimos.navigation.jnav.utils.trajectory_metrics import PoseLookup, trajectory_lookup + +# max |Δt| pairing a query time to an odom sample +ODOM_MATCH_TOLERANCE_S = 0.2 + +# Fixed-rate replay pacing + sizing caps (used by the eval replay drivers). +REPLAY_PUBLISH_HZ = 50.0 +REPLAY_DRAIN_MARGIN_S = 30.0 +MAX_REPLAY_SCANS = 4000 +MAX_REPLAY_ODOM = 16000 + +# One shared store per db path (SqliteStore can't take absolute paths through the +# replay adapter, so streams are read directly). +_stores: dict[str, SqliteStore] = {} + + +def store(db_path: Path) -> SqliteStore: + key = str(db_path) + cached = _stores.get(key) + if cached is None: + cached = SqliteStore(path=key, must_exist=True) + cached.start() + _stores[key] = cached + return cached + + +def close_all() -> None: + """Stop every cached store, releasing their file handles.""" + for cached in _stores.values(): + cached.stop() + _stores.clear() + + +atexit.register(close_all) + + +def list_streams(db_path: Path) -> list[str]: + return store(db_path).list_streams() + + +def stream_count(db_path: Path, stream_name: str) -> int: + return int(store(db_path).stream(stream_name).count()) + + +def iterate_stream( + db_path: Path, stream_name: str, *, stride: int = 1 +) -> Iterator[tuple[float, Any]]: + """Yield ``(timestamp, decoded message)``, decoding only kept rows.""" + stream: Any = store(db_path).stream(stream_name) + for index, observation in enumerate(stream): + if stride > 1 and index % stride: + continue + yield (float(observation.ts), observation.data) + + +def payload_pose(payload: Any) -> Pose: + """Normalize a recorded odometry payload to a ``Pose``. + + Handles both shapes found in recordings: ``Odometry`` (``.pose``) and + ``PoseStamped`` (flat position + ``.orientation``).""" + if hasattr(payload, "pose"): # Odometry + return payload.pose # type: ignore[no-any-return] + return Pose( # PoseStamped + payload.x, + payload.y, + payload.z, + payload.orientation.x, + payload.orientation.y, + payload.orientation.z, + payload.orientation.w, + ) + + +def odometry_lookup(db_path: Path, stream_name: str) -> PoseLookup: + """Nearest-position (``[x, y, z]``) lookup over a recording's odometry stream.""" + times: list[float] = [] + positions: list[list[float]] = [] + for timestamp, payload in iterate_stream(db_path, stream_name): + pose = payload_pose(payload) + times.append(timestamp) + positions.append([pose.position.x, pose.position.y, pose.position.z]) + return trajectory_lookup( + np.asarray(times, dtype=np.float64), + np.asarray(positions, dtype=np.float64), + ODOM_MATCH_TOLERANCE_S, + ) diff --git a/dimos/navigation/jnav/utils/test_apriltag_agreement.py b/dimos/navigation/jnav/utils/test_apriltag_agreement.py new file mode 100644 index 0000000000..fc177b3d3d --- /dev/null +++ b/dimos/navigation/jnav/utils/test_apriltag_agreement.py @@ -0,0 +1,74 @@ +# Copyright 2026 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for the April-tag agreement metric.""" + +from __future__ import annotations + +import numpy as np + +from dimos.navigation.jnav.utils.apriltag_agreement import ( + agreement_improvement, + agreement_report, + tag_spread, +) + + +def test_tag_spread_zero_for_identical() -> None: + assert tag_spread(np.array([[1.0, 2.0, 3.0]] * 4)) == 0.0 + + +def test_tag_spread_grows_with_scatter() -> None: + tight = tag_spread(np.array([[0.0, 0, 0], [0.1, 0, 0]])) + loose = tag_spread(np.array([[0.0, 0, 0], [2.0, 0, 0]])) + assert loose > tight + + +def test_single_observation_excluded() -> None: + report = agreement_report({7: np.array([[0.0, 0, 0]])}) + assert report.tag_count == 0 # one sighting carries no agreement signal + assert report.total_observations == 1 + assert report.mean_spread == 0.0 + + +def test_report_mean_spread() -> None: + report = agreement_report( + { + 1: np.array([[0.0, 0, 0], [0.0, 0, 0]]), # spread 0 + 2: np.array([[0.0, 0, 0], [2.0, 0, 0]]), # spread 1.0 + } + ) + assert report.tag_count == 2 + assert report.total_observations == 4 + assert abs(report.mean_spread - 0.5) < 1e-9 + + +def test_improvement_positive_when_corrected_tighter() -> None: + # Drifted: same tag scattered 4 m apart across laps. Corrected: pulled together. + raw = agreement_report({1: np.array([[0.0, 0, 0], [4.0, 0, 0]])}) + corrected = agreement_report({1: np.array([[0.0, 0, 0], [0.4, 0, 0]])}) + improvement = agreement_improvement(raw, corrected) + assert 0.85 < improvement <= 1.0 + + +def test_improvement_negative_when_corrected_worse() -> None: + raw = agreement_report({1: np.array([[0.0, 0, 0], [0.4, 0, 0]])}) + corrected = agreement_report({1: np.array([[0.0, 0, 0], [4.0, 0, 0]])}) + assert agreement_improvement(raw, corrected) < 0.0 + + +def test_improvement_zero_when_no_raw_spread() -> None: + raw = agreement_report({1: np.array([[0.0, 0, 0], [0.0, 0, 0]])}) + corrected = agreement_report({1: np.array([[0.0, 0, 0], [1.0, 0, 0]])}) + assert agreement_improvement(raw, corrected) == 0.0 diff --git a/dimos/navigation/jnav/utils/test_kitti.py b/dimos/navigation/jnav/utils/test_kitti.py new file mode 100644 index 0000000000..756e403cb6 --- /dev/null +++ b/dimos/navigation/jnav/utils/test_kitti.py @@ -0,0 +1,60 @@ +# Copyright 2026 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Devkit-verified self-tests for the official KITTI odometry error metric.""" + +from __future__ import annotations + +import numpy as np +from scipy.spatial.transform import Rotation + +from dimos.navigation.jnav.utils.kitti import kitti_odometry_error + +ZERO_ERROR_TOLERANCE = 1e-6 + + +def _straight_trajectory(scale: float = 1.0) -> list[np.ndarray]: + """A 1000 m straight run along x, one frame per metre, scaled along-track.""" + poses = [] + for index in range(1001): + pose = np.eye(4) + pose[0, 3] = float(index) * scale + poses.append(pose) + return poses + + +def test_identical_trajectory_has_zero_error() -> None: + ground_truth = _straight_trajectory() + result = kitti_odometry_error([pose.copy() for pose in ground_truth], ground_truth) + assert result["translational_percent"] < ZERO_ERROR_TOLERANCE + assert result["rotational_deg_per_m"] < ZERO_ERROR_TOLERANCE + + +def test_one_percent_scale_drift_reads_one_percent() -> None: + ground_truth = _straight_trajectory() + scaled = _straight_trajectory(scale=1.01) + result = kitti_odometry_error(scaled, ground_truth) + assert 0.8 < result["translational_percent"] < 1.2 + + +def test_constant_yaw_rate_has_nonzero_rotational_error() -> None: + ground_truth = _straight_trajectory() + yawed = [] + for index in range(1001): + pose = np.eye(4) + pose[:3, :3] = Rotation.from_euler("z", index * 0.01, degrees=True).as_matrix() + pose[0, 3] = float(index) + yawed.append(pose) + result = kitti_odometry_error(yawed, ground_truth) + assert result["rotational_deg_per_m"] > 0.0 diff --git a/dimos/navigation/jnav/utils/trajectory_metrics.py b/dimos/navigation/jnav/utils/trajectory_metrics.py new file mode 100644 index 0000000000..8701eb4785 --- /dev/null +++ b/dimos/navigation/jnav/utils/trajectory_metrics.py @@ -0,0 +1,268 @@ +# Copyright 2026 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Pure pose/trajectory math for loop-closure evaluation. + +Drift injection + correction and ground-truth(-free) trajectory metrics, all +operating on plain arrays and lookup callables (no db reads) so they can be +shared across the eval drivers. +""" + +from __future__ import annotations + +from collections.abc import Callable, Iterable +from typing import Any + +import numpy as np +from scipy.spatial.transform import Rotation + +from dimos.navigation.jnav.utils.voxel_map import VoxelMap + +# (ts, x, y, z, qx, qy, qz, qw) per keyframe +GraphPose = tuple[float, float, float, float, float, float, float, float] +# time -> nearest pose: 3-vec [x,y,z] (PoseLookup) or 7-vec +quat (PoseLookup7) +PoseLookup = Callable[[float], "np.ndarray | None"] +PoseLookup7 = Callable[[float], "np.ndarray | None"] + + +def trajectory_lookup(times: np.ndarray, positions: np.ndarray, tolerance: float) -> PoseLookup: + """A ``time -> [x, y, z]`` nearest-sample lookup over a (ts, position) trajectory.""" + if len(times) == 0: + return lambda _timestamp: None + + def lookup(timestamp: float) -> np.ndarray | None: + index = int(np.argmin(np.abs(times - timestamp))) + if abs(float(times[index]) - timestamp) > tolerance: + return None + return np.asarray(positions[index], dtype=np.float64) + + return lookup + + +def graph_lookup(graph: list[tuple[float, float, float, float]]) -> PoseLookup: + """Nearest-keyframe lookup over an optimized pose graph (no tolerance). + + Keyframes only spawn on motion, so a parked robot legitimately maps to an + old keyframe — that keyframe IS its position (within the keyframe delta). + Unbounded nearest-in-time is sound when the graph is never truncated.""" + times = np.asarray([node[0] for node in graph], dtype=np.float64) + positions = np.asarray([[node[1], node[2], node[3]] for node in graph], dtype=np.float64) + return trajectory_lookup(times, positions, float("inf")) + + +# Voxel-agreement sampling: cap per-scan points so the map fits in memory +# without holding the whole recording at once. +VOXEL_SIZE_M = 0.2 +VOXEL_MAX_POINTS_PER_SCAN = 4000 + + +def drift_offset( + timestamp: float, t0: float, drift_per_sec: list[float] | np.ndarray +) -> np.ndarray: + """World translation injected at ``timestamp`` (grows linearly from ``t0``).""" + return np.asarray(drift_per_sec, dtype=np.float64) * (timestamp - t0) + + +def has_drift(drift_per_sec: list[float] | np.ndarray) -> bool: + return bool(np.any(np.asarray(drift_per_sec, dtype=np.float64))) + + +def drifted_lookup( + base_lookup: Callable[[float], np.ndarray | None], + drift_per_sec: list[float], + drift_t0: float, +) -> Callable[[float], np.ndarray | None]: + """Wrap a pose lookup so its xyz gets the same drift the module was fed. + + Pass-through when drift is zero. Works for both the 3-vec (xyz) and 7-vec + (xyz+quat) lookups — only the first three components are shifted.""" + if not has_drift(drift_per_sec): + return base_lookup + drift = np.asarray(drift_per_sec, dtype=np.float64) + + def lookup(timestamp: float) -> np.ndarray | None: + pose = base_lookup(timestamp) + if pose is None: + return None + shifted = np.array(pose, dtype=np.float64) + shifted[:3] += drift * (timestamp - drift_t0) + return shifted + + return lookup + + +def pose7_lookup(times: np.ndarray, poses: np.ndarray, tolerance: float) -> PoseLookup7: + """time -> nearest [x,y,z,qx,qy,qz,qw], or None past the tolerance.""" + + def lookup(timestamp: float) -> np.ndarray | None: + if len(times) == 0: + return None + index = int(np.argmin(np.abs(times - timestamp))) + if abs(float(times[index]) - timestamp) > tolerance: + return None + return np.asarray(poses[index], dtype=np.float64) + + return lookup + + +def drift_delta_lookup( + graph: list[GraphPose], raw_lookup: PoseLookup7 +) -> Callable[[float], tuple[np.ndarray, np.ndarray] | None]: + """time -> nearest keyframe's drift correction (R_delta, t_delta). + + The delta is computed at the KEYFRAME's own timestamp — + T_corrected(kf) * T_raw(kf_ts)^-1 — so raw and corrected poses describe the + same instant. (Comparing a keyframe pose against the raw pose at a nearby + scan's timestamp snaps scans onto keyframes and fakes a tighter map: an + identity correction must yield an identity delta.)""" + times: list[float] = [] + rotations: list[np.ndarray] = [] + translations: list[np.ndarray] = [] + for node in graph: + raw_pose = raw_lookup(node[0]) + if raw_pose is None: + continue + rotation_raw = Rotation.from_quat(raw_pose[3:7]).as_matrix() + rotation_corrected = Rotation.from_quat(node[4:8]).as_matrix() + rotation_delta = rotation_corrected @ rotation_raw.T + translation_delta = np.asarray(node[1:4]) - rotation_delta @ raw_pose[:3] + times.append(node[0]) + rotations.append(rotation_delta) + translations.append(translation_delta) + times_array = np.asarray(times, dtype=np.float64) + + def lookup(timestamp: float) -> tuple[np.ndarray, np.ndarray] | None: + if len(times_array) == 0: + return None + index = int(np.argmin(np.abs(times_array - timestamp))) + return rotations[index], translations[index] + + return lookup + + +def rigid_align_rmse(source: np.ndarray, target: np.ndarray) -> float: + """Absolute trajectory error: RMSE of ``source`` to ``target`` after a + best-fit rigid (rotation+translation, no scale) alignment (Kabsch/Umeyama). + Both are (N, 3). The alignment removes the gauge freedom — two trajectories + of the same shape in different world frames score 0.""" + if len(source) < 3: + return 0.0 + source_centroid = source.mean(axis=0) + target_centroid = target.mean(axis=0) + source_centered = source - source_centroid + target_centered = target - target_centroid + covariance = source_centered.T @ target_centered + u_matrix, _, vt_matrix = np.linalg.svd(covariance) + # Reflection fix so the result is a proper rotation (det = +1). + reflection = np.sign(np.linalg.det(vt_matrix.T @ u_matrix.T)) + correction = np.diag([1.0, 1.0, reflection]) + rotation = vt_matrix.T @ correction @ u_matrix.T + aligned = (rotation @ source_centered.T).T + target_centroid + return float(np.sqrt(np.mean(np.sum((aligned - target) ** 2, axis=1)))) + + +def trajectory_recovery_error( + graph: list[GraphPose], + gt_lookup: Callable[[float], np.ndarray | None], + drift_per_sec: list[float], + drift_t0: float, +) -> dict[str, float] | None: + """Drift-recovery ATE: how close the module's corrected keyframe trajectory + gets to the *un-drifted* ground-truth, vs the drifted input it was given. + + Only meaningful with injected drift (then GT = the un-drifted odom the drift + was added to). This is the right metric where tag/voxel agreement is weak — + e.g. KITTI's long single-loop trajectory. Returns None when drift is off or + too few keyframes resolve. ``trajectory_improvement`` = fraction of the drift + ATE removed (1.0 = perfect recovery, 0 = no help, negative = worse).""" + if not has_drift(drift_per_sec): + return None + drift = np.asarray(drift_per_sec, dtype=np.float64) + corrected_points: list[list[float]] = [] + gt_points: list[np.ndarray] = [] + drifted_points: list[np.ndarray] = [] + for node in graph: + timestamp = node[0] + gt_pose = gt_lookup(timestamp) + if gt_pose is None: + continue + gt_xyz = np.asarray(gt_pose, dtype=np.float64)[:3] + corrected_points.append([node[1], node[2], node[3]]) + gt_points.append(gt_xyz) + drifted_points.append(gt_xyz + drift * (timestamp - drift_t0)) + if len(gt_points) < 3: + return None + gt_array = np.asarray(gt_points) + drifted_ate = rigid_align_rmse(np.asarray(drifted_points), gt_array) + corrected_ate = rigid_align_rmse(np.asarray(corrected_points), gt_array) + improvement = (drifted_ate - corrected_ate) / drifted_ate if drifted_ate > 1e-9 else 0.0 + return { + "drifted_ate_m": drifted_ate, + "corrected_ate_m": corrected_ate, + "trajectory_improvement": improvement, + } + + +def lidar_voxel_agreement( + scans: Iterable[tuple[float, np.ndarray]], + raw_lookup: PoseLookup7, + graph: list[GraphPose], + *, + voxel_size: float = VOXEL_SIZE_M, + max_points_per_scan: int = VOXEL_MAX_POINTS_PER_SCAN, + drift_per_sec: list[float] | None = None, + drift_t0: float = 0.0, +) -> dict[str, Any]: + """Occupied-voxel counts of the lidar map, raw vs corrected. + + ``scans`` yields ``(timestamp, points)`` where ``points`` is an (N, 3+) array + registered in the raw odom world frame. Each scan is re-anchored by its + nearest keyframe's drift correction (see `drift_delta_lookup`), so a good + correction collapses doubled walls and the corrected map occupies fewer + voxels. ``improvement`` is the fractional voxel drop (positive = tighter). + When ``drift_per_sec`` is set the raw scans are first shifted into the same + drifted world the module solved in.""" + drift = np.asarray(drift_per_sec or [0.0, 0.0, 0.0], dtype=np.float64) + apply_drift = has_drift(drift) + delta_lookup = drift_delta_lookup(graph, raw_lookup) + raw_clouds: list[np.ndarray] = [] + corrected_clouds: list[np.ndarray] = [] + used = 0 + for timestamp, scan_points in scans: + delta = delta_lookup(timestamp) + if delta is None or raw_lookup(timestamp) is None: + continue + rotation_delta, translation_delta = delta + points = np.asarray(scan_points, dtype=np.float64)[:, :3] + if len(points) > max_points_per_scan: + points = points[:: -(-len(points) // max_points_per_scan)] + if apply_drift: + points = points + drift * (timestamp - drift_t0) + raw_clouds.append(points) + corrected_clouds.append(points @ rotation_delta.T + translation_delta) + used += 1 + + if not raw_clouds: + return {"status": "skipped: no scans matched both pose sources"} + raw_voxels = VoxelMap.from_points(np.vstack(raw_clouds), voxel_size).count + corrected_voxels = VoxelMap.from_points(np.vstack(corrected_clouds), voxel_size).count + improvement = (raw_voxels - corrected_voxels) / raw_voxels if raw_voxels else 0.0 + return { + "status": "ok", + "raw_voxels": raw_voxels, + "corrected_voxels": corrected_voxels, + "improvement": improvement, + "voxel_size_m": voxel_size, + "scans_used": used, + } diff --git a/dimos/navigation/jnav/utils/voxel_map.py b/dimos/navigation/jnav/utils/voxel_map.py new file mode 100644 index 0000000000..c3055d8c87 --- /dev/null +++ b/dimos/navigation/jnav/utils/voxel_map.py @@ -0,0 +1,66 @@ +# Copyright 2026 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Force a point cloud onto a discrete voxel grid for occupancy comparisons. + +A ``VoxelMap`` is just the set of occupied integer voxel keys at a fixed voxel +size. ``diff`` (symmetric-difference count, lower == better aligned) and the raw +occupied ``count`` are the building blocks for map-quality scores: drift smears a +map into more, mis-aligned voxels; a good loop closure collapses it back. +""" + +from __future__ import annotations + +from dataclasses import dataclass + +import numpy as np + +from dimos.msgs.sensor_msgs.PointCloud2 import PointCloud2 + + +@dataclass(frozen=True) +class VoxelMap: + voxel_size: float + keys: frozenset[tuple[int, int, int]] + + @classmethod + def from_points(cls, points: np.ndarray, voxel_size: float) -> VoxelMap: + if points.shape[0] == 0: + return cls(voxel_size=voxel_size, keys=frozenset()) + indices = np.floor(np.asarray(points)[:, :3] / voxel_size).astype(np.int64) + unique = np.unique(indices, axis=0) + return cls(voxel_size=voxel_size, keys=frozenset(map(tuple, unique))) + + @classmethod + def from_pointcloud(cls, cloud: PointCloud2, voxel_size: float) -> VoxelMap: + return cls.from_points(cloud.points_f32(), voxel_size) + + @property + def count(self) -> int: + """Number of occupied voxels.""" + return len(self.keys) + + def __len__(self) -> int: + return len(self.keys) + + def diff(self, other: VoxelMap) -> int: + """Symmetric-difference count: voxels occupied in exactly one of the two.""" + return len(self.keys ^ other.keys) + + def intersection(self, other: VoxelMap) -> int: + return len(self.keys & other.keys) + + def iou(self, other: VoxelMap) -> float: + union = len(self.keys | other.keys) + return len(self.keys & other.keys) / union if union else 0.0 diff --git a/dimos/protocol/tf/tf.py b/dimos/protocol/tf/tf.py index eb3d72b470..4dd345a530 100644 --- a/dimos/protocol/tf/tf.py +++ b/dimos/protocol/tf/tf.py @@ -214,6 +214,19 @@ def _wait_get( return None self._cv.wait(timeout=remaining) + def lookup( + self, + parent_frame: str, + child_frame: str, + time_point: float | None = None, + time_tolerance: float | None = None, + ) -> Transform | None: + """Composed transform lookup that does NOT log on a miss (unlike `get`). + + For high-frequency / best-effort lookups where misses are expected and a + per-call warning would spam the log (e.g. per-scan world-registration).""" + return self._get(parent_frame, child_frame, time_point, time_tolerance) + def get( self, parent_frame: str, diff --git a/dimos/robot/all_blueprints.py b/dimos/robot/all_blueprints.py index 22bf7d27e1..c79a3b75f4 100644 --- a/dimos/robot/all_blueprints.py +++ b/dimos/robot/all_blueprints.py @@ -172,6 +172,7 @@ "go2-teleop-module": "dimos.teleop.quest.quest_extensions.Go2TeleopModule", "google-maps-skill-container": "dimos.agents.skills.google_maps_skill_container.GoogleMapsSkillContainer", "gps-nav-skill-container": "dimos.agents.skills.gps_nav_skill.GpsNavSkillContainer", + "graph-capture": "dimos.navigation.jnav.components.loop_closure.eval.GraphCapture", "grasping-module": "dimos.manipulation.grasping.grasping.GraspingModule", "gstreamer-camera-module": "dimos.hardware.sensors.camera.gstreamer.gstreamer_camera.GstreamerCameraModule", "hosted-arm-teleop-module": "dimos.teleop.quest_hosted.hosted_extensions.HostedArmTeleopModule", @@ -183,6 +184,7 @@ "keyboard-teleop": "dimos.robot.unitree.keyboard_teleop.KeyboardTeleop", "keyboard-teleop-module": "dimos.teleop.keyboard.keyboard_teleop_module.KeyboardTeleopModule", "local-planner": "dimos.navigation.cmu_nav.modules.local_planner.local_planner.LocalPlanner", + "lockstep-replay": "dimos.navigation.jnav.components.loop_closure.eval.LockstepReplay", "manipulation-module": "dimos.manipulation.manipulation_module.ManipulationModule", "map": "dimos.robot.unitree.type.map.Map", "marker-detection-stream-module": "dimos.perception.fiducial.marker_detection_stream_module.MarkerDetectionStreamModule", @@ -210,12 +212,13 @@ "perceive-loop-skill": "dimos.perception.perceive_loop_skill.PerceiveLoopSkill", "person-follow-skill-container": "dimos.agents.skills.person_follow.PersonFollowSkillContainer", "person-tracker": "dimos.perception.detection.person_tracker.PersonTracker", - "pgo": "dimos.navigation.cmu_nav.modules.pgo.pgo.PGO", + "pgo": "dimos.navigation.jnav.components.loop_closure.gsc_pgo.module.PGO", "phone-teleop-module": "dimos.teleop.phone.phone_teleop_module.PhoneTeleopModule", "pick-and-place-module": "dimos.manipulation.pick_and_place_module.PickAndPlaceModule", "point-lio": "dimos.hardware.sensors.lidar.pointlio.module.PointLio", "pointlio-recorder": "dimos.hardware.sensors.lidar.pointlio.recorder.PointlioRecorder", "quest-teleop-module": "dimos.teleop.quest.quest_teleop_module.QuestTeleopModule", + "rate-replay": "dimos.navigation.jnav.components.loop_closure.eval.RateReplay", "ray-tracing-voxel-map": "dimos.mapping.ray_tracing.module.RayTracingVoxelMap", "real-sense-camera": "dimos.hardware.sensors.camera.realsense.camera.RealSenseCamera", "receiver-module": "dimos.utils.demo_image_encoding.ReceiverModule", diff --git a/dimos/robot/unitree/go2/blueprints/smart/unitree_go2.py b/dimos/robot/unitree/go2/blueprints/smart/unitree_go2.py index 9993e541c4..e50cf8b8e3 100644 --- a/dimos/robot/unitree/go2/blueprints/smart/unitree_go2.py +++ b/dimos/robot/unitree/go2/blueprints/smart/unitree_go2.py @@ -62,13 +62,17 @@ class Go2Memory(Recorder): _last_odom_pose: Pose | None = None @pose_setter_for("odom") - def _odom_pose(self, msg: PoseStamped) -> Pose | None: + async def _odom_pose(self, msg: PoseStamped) -> Pose | None: self._last_odom_pose = msg return self._last_odom_pose @pose_setter_for("lidar") - def _lidar_pose(self, msg: PointCloud2) -> Pose | None: - return self._last_odom_pose # should always exist (odom alwyas wins the race) + async def _lidar_pose(self, msg: PointCloud2) -> Pose | None: + # go2 lidar (currently) is in world-frame + # so it doesn't make sense to register lidar at the odom pose + # but we do it anyways because map.py (for now) requires it + # TODO: fix map.py to use a transform frame + return getattr(self, "_last_odom_pose", None) unitree_go2_markers = ( diff --git a/experimental/docs/jnav/map_postprocessing.md b/experimental/docs/jnav/map_postprocessing.md new file mode 100644 index 0000000000..9311066e6c --- /dev/null +++ b/experimental/docs/jnav/map_postprocessing.md @@ -0,0 +1,75 @@ +# Map Postprocessing + +You recorded a run. The lidar map drifted. You want a clean one to compare against — ground truth, basically, without a motion-capture rig. + +This is the offline fix. Point it at a recording, it bends the trajectory back into shape using AprilTags it saw along the way, then snaps the local geometry together with ICP. Out comes a corrected map written back into the same recording. + +It runs on a `.db` after the fact. It is not part of the live nav stack and never touches the robot. + +## What you need in the recording + +A recording dir with `mem2.db` plus `camera_intrinsics.json`, and these streams inside the db: + +- a camera stream (`color_image`) +- odometry (`pointlio_odometry`) +- world-registered lidar scans (`pointlio_lidar`) +- AprilTags physically in the scene, sized and known + +The tags are the whole trick. Drift accumulates, but a tag you saw at minute 1 and again at minute 9 is the *same tag* — so the two sightings have to land in the same spot. That constraint is what pulls the map straight. + +## The three steps + +The scripts live in `dimos/navigation/jnav/components/loop_closure/gsc_pgo/`. + +**1. Detect the tags.** Run the camera frames through detection and write both the raw and the gated tag streams in one step: + +``` +python dimos/navigation/jnav/components/loop_closure/gsc_pgo/add_april.py --rec=PATH +``` + +`add_april.py` writes `raw_april_tags` (every detection, unfiltered, with its quality numbers attached — this is what postprocessing reads), the gated/clustered `april_tags` stream, and an `april_tags` section in the recording's `summary.json` (see below). Leaving `raw_april_tags` unfiltered matters: you tune the quality gates later without re-running detection, which is the slow part. (`detect_tags.py` writes just the raw stream if that's all you want; `--dynamic 17` keeps a moving tag in raw but drops it from the gated stream.) + +Inspect what was found without rebuilding anything — per tag, raw count and revisit count, flagging any tag never revisited (your fast check for whether a recording even has loop constraints): + +``` +python dimos/navigation/jnav/components/loop_closure/gsc_pgo/add_april.py --rec=PATH --summary +``` + +**2. Solve.** Two stages, one command: + +``` +python dimos/navigation/jnav/components/loop_closure/gsc_pgo/post_process.py both --rec=PATH +``` + +- **Tag PGO (GTSAM).** Odometry between-poses are stiff on roll/pitch and z (gravity isn't drifting) and loose on yaw, where the real error lives. The tag sightings are landmark factors, weighted by how good each detection was. This fixes the big-picture drift. +- **ICP refinement.** Lidar submaps that are close in space but far apart in time get aligned to each other. This cleans up the local geometry the tags don't directly constrain. + +It writes `gt_pointlio_odometry` and `gt_pointlio_lidar` back into the db, optionally a `.pc2.lcm` of the corrected cloud, and opens a comparison view. Run `odom`, `lidar`, or `both`. + +**3. Look at it.** `post_process.py` opens the rrd for you, but you can rebuild it anytime: + +``` +python dimos/navigation/jnav/components/loop_closure/gsc_pgo/make_rrd.py --rec=PATH +``` + +Raw cloud in red, every `gt_*` version in its own color, tag landmarks marked. Add another correction method and it shows up automatically — good for comparing approaches side by side. + +## What `add_april.py` records in `summary.json` + +It merges an `april_tags` section into the recording's `summary.json` (other keys preserved; it never touches `camera_intrinsics.json`): + +- `filter_parameters` — the exact gate thresholds used, marker size, dictionary, and any dynamic tags excluded. +- `result` — `all_unfiltered_tag_ids`, and per tag the raw detection count, filtered visits, and revisit count, plus the list of tags never revisited. + +So the gates and the outcome are auditable after the fact. `--summary` recomputes just the `result` from the existing streams (read-only). + +## Knobs worth knowing + +- `--no-icp` — tag PGO only, skip the ICP stage. +- `--no-lcm` / `--no-rrd` — skip the cloud export / the viewer. +- `--out=NAME` — output prefix, if you want to keep several corrections in one db. +- Tag quality gates (sharpness, reprojection error, distance, view angle, motion blur) are single-sourced in `dimos/navigation/jnav/utils/apriltags.py` (the `DEFAULT_*` constants); `post_process.py` and the eval both import them. They're relaxed by default to keep more sightings. Tighten them there if a bad tag pose is yanking the map around. + +## When it won't help + +No tags in the scene, or tags seen only once, means no loop constraints — you get ICP cleanup and not much else. Same story if the camera never got a clean look at a tag. Garbage detections in, garbage map out; that's what the gates are for. diff --git a/experimental/docs/jnav/pgo_migration_plan.md b/experimental/docs/jnav/pgo_migration_plan.md new file mode 100644 index 0000000000..a35efdc31f --- /dev/null +++ b/experimental/docs/jnav/pgo_migration_plan.md @@ -0,0 +1,144 @@ +# PGO → jnav Loop-Closure Migration Plan + +Goal: land the pose-graph-optimization (PGO) / loop-closure work on +`jeff/feat/jnav_pgo` in the new `jnav` layout, extract the C++ + nix flake into a +standalone `github:jeff-hykin/gsc_pgo` repo, and merge in the offline AprilTag +map-postprocessing tooling. + +## Source material + +Two branches hold the pieces; neither alone is the target. + +| Source | Has | Layout | +|---|---|---| +| `jeff/feat/jnav` | `jnav/{msgs,utils}/`, `loop_closure/{gsc_pgo,ivan_pgo,ivan_pgo_transformer,unrefined_pgo}`, `eval.py`, `eval_all.py`, Scan-Context + Landmark C++ | `dimos/navigation/jnav/modules/...` (inline C++) | +| `jeff/feat/better_pgo` | postprocessing scripts (`add_april.py`, `detect_tags.py`, `post_process.py`, `make_rrd.py`), `map_postprocessing.md` doc | `dimos/navigation/nav_stack/modules/pgo/scripts/...` | +| `jeff/feat/jnav_pgo` (current) | nothing yet — only `nav_stack→cmu_nav` rename on top of `main` | — | + +`jeff/feat/jnav` is the more evolved navigation reorg; `better_pgo` is the source +for the AprilTag postprocessing scripts/doc. The migration is the union of both. + +## Target layout (on `jeff/feat/jnav_pgo`) + +``` +dimos/navigation/jnav/ + msgs/ # Graph3D/GraphDelta3D/Landmark/Marker (.py + .hpp + tests) + utils/ # ALL pgo utils (apriltags, trajectory_metrics, recording_db, ...) + components/ # renamed from jnav's "modules/" + loop_closure/ + eval.py + eval_all.py + spec.py # LoopClosure Protocol + gsc_pgo/ + module.py # NativeModule -> builds external gsc_pgo flake + post_process.py # ported from better_pgo scripts/post_process.py + (add_april.py, detect_tags.py, make_rrd.py) # the other postprocess scripts + ivan_pgo/ ivan_pgo_transformer/ unrefined_pgo/ # comparison baselines (eval_all needs them) +experimental/docs/jnav/ + map_postprocessing.md # adapted from docs/capabilities/navigation/map_postprocessing.md +``` + +External repo: +``` +github:jeff-hykin/gsc_pgo # all C++ + flake.nix/flake.lock + flake.nix, flake.lock, CMakeLists.txt + main.cpp, simple_pgo.{cpp,h}, scan_context.{cpp,h}, + commons.{cpp,h}, point_cloud_utils.hpp, dimos_native_module.hpp, + pgo_landmark_test.cpp + msgs/ (Graph3D.hpp, GraphDelta3D.hpp, Landmark.hpp) # C++ wire helpers +``` + +## Decisions (confirmed by Jeff, 2026-06-22) + +1. **Base branch.** ✅ Base `jnav_pgo` on `jeff/feat/jnav`. BUT `post_process.py` + must come from `better_pgo` (not jnav, which lacks it). +2. **`components/` scope.** ⏳ pending (whole `jnav/modules/`→`components/` vs only + `loop_closure`). +3. **`gsc_pgo` repo.** ✅ **public**, created at + `github.com/jeff-hykin/gsc_pgo`, initial rev `494e7a1d657c3702ec805c9e3d251a2fe8bc9529`. + Flake input will pin to that rev: `github:jeff-hykin/gsc_pgo/494e7a1...#default`. + +## Work breakdown + +### Phase 0 — branch setup +- Confirm `jnav_pgo` base (decision 1). If basing on jnav: merge/cherry-pick the + `dimos/navigation/jnav/` tree onto current `jnav_pgo` (which already has the + `cmu_nav` rename). Resolve any overlap with cmu_nav's own `pgo` module. + +### Phase 1 — external `gsc_pgo` repo +- `gh repo create jeff-hykin/gsc_pgo` (visibility per decision 3). *(outward-facing — confirm first)* +- Move `gsc_pgo/cpp/*` (C++, CMakeLists, flake.nix, flake.lock, `msgs/*.hpp`, + `pgo_landmark_test.cpp`) into the new repo. Keep the flake's + `lcm-extended` / `dimos-lcm` / `gtsam-extended` inputs as-is. +- `flake.nix`: `src = ./.;` already self-contained — switching to a tracked git + repo *fixes* the untracked-files gotcha that forced `path:$PWD#default` + (see memory: nix untracked-files gotcha). Verify `nix build .#default` from a + clean checkout. +- Push, tag/record the rev for the flake pin. + +### Phase 2 — `gsc_pgo` module.py +- Update `PGOConfig`: + - `build_command = 'nix build "github:jeff-hykin/gsc_pgo/#default" --no-write-lock-file'` + - drop the `path:$PWD` workaround comment; `cwd` no longer needs a local `cpp/`. + - `executable` resolves from the nix `result/bin/pgo` (confirm NativeModule + out-of-tree build path handling). +- Keep `In/Out` stream wiring and all loop-closure hyperparameters unchanged. +- Confirm C++ `msgs/*.hpp` wire format still matches `jnav/msgs/*.py` decoders + (they're hand-synced; the `.hpp` headers travel with the C++ repo). + +### Phase 3 — msgs/ + utils/ +- `jnav/msgs/`: already present on jnav (Graph3D, GraphDelta3D, Landmark, Marker + + `.hpp` + tests). Bring over as-is. The `.hpp` files are duplicated into the + external repo (C++ side); decide whether the canonical `.hpp` lives in + `gsc_pgo` and `jnav/msgs/*.hpp` is a mirror, or vice-versa. Recommend: canonical + in `gsc_pgo`, keep a note in `jnav/msgs` pointing at it. +- `jnav/utils/`: bring all utils. `better_pgo`'s `eval_utils/` (apriltags, + apriltag_agreement, trajectory_metrics, recording_db, voxel_map, module_loading) + are already absorbed into jnav `utils/` — verify no newer logic in + `better_pgo` was lost (diff the 6 overlapping files). + +### Phase 4 — eval.py / eval_all.py +- Copy from jnav `loop_closure/`. Fix imports to `dimos.navigation.jnav.utils.*` + and `...jnav.msgs.*` (already correct on jnav). +- `eval_all.py` enumerates the comparison modules (`gsc_pgo`, `ivan_pgo`, + `ivan_pgo_transformer`, `unrefined_pgo`) → port all four component dirs so eval + doesn't break. (If baselines are unwanted, prune eval_all's list instead.) + +### Phase 5 — postprocessing scripts (from better_pgo) +- Port `scripts/post_process.py` → `components/loop_closure/gsc_pgo/post_process.py`. +- Port `add_april.py`, `detect_tags.py`, `make_rrd.py` alongside it (the doc's + 3-step flow references all of them). +- Rewrite their internal imports: `eval_utils.apriltags` → `dimos.navigation.jnav.utils.apriltags`, etc. +- Tag quality gates are single-sourced in `utils/apriltags.py` (`DEFAULT_*`); keep + post_process importing from there (don't duplicate constants). + +### Phase 6 — docs +- Adapt `docs/capabilities/navigation/map_postprocessing.md` → + `experimental/docs/jnav/map_postprocessing.md`. +- Rewrite every script path in the doc: + `dimos/navigation/nav_stack/modules/pgo/scripts/X.py` + → `dimos/navigation/jnav/components/loop_closure/gsc_pgo/X.py`. +- Update the `eval_utils/apriltags.py` reference → `jnav/utils/apriltags.py`. +- Add a back-link from the navigation readme if appropriate. + +### Phase 7 — wiring + tests +- `spec.py` `LoopClosure` Protocol: bring over, update any module paths. +- Blueprints: if any blueprint references the old `nav_stack`/`pgo` module path, + repoint to `jnav.components.loop_closure.gsc_pgo`. Regenerate + `all_blueprints.py` via `pytest dimos/robot/test_all_blueprints_generation.py`. +- Port tests (`test_pgo_synthetic_drift.py`, msgs tests, utils tests). Run + `uv run pytest` for the loop_closure + msgs + utils dirs. +- `ruff check --fix && ruff format`. + +## Risks / cut-corners to flag +- **C++ wire-format drift**: `.hpp` (C++) and `.py` (decode) are hand-synced. Once + the `.hpp` lives in an external repo, a change there can silently desync the + Python decoder. Mitigate: keep a `test_Graph3D` round-trip test in `jnav/msgs` + that builds against the pinned flake rev, or at least a schema-comment check. +- **Private flake in CI**: if `gsc_pgo` is private, dimos CI nix builds will fail + without a token — do not add a token to CI without asking (secrets rule). +- **eval_results/**: jnav carries committed `eval_results/*/summary.json` snapshots. + Decide whether to bring those (history/benchmarks) or regenerate. +- **Baseline PGO impls**: `ivan_pgo*` / `unrefined_pgo` carry their own inline C++ + (`unrefined_pgo/cpp`). If only `gsc_pgo` goes external, the layout is asymmetric + — acceptable (they're experimental baselines) but worth noting.