From dba1e5a3827287d8660a887713cf7a2e96537887 Mon Sep 17 00:00:00 2001 From: Jeff Hykin Date: Tue, 23 Jun 2026 19:12:24 -0700 Subject: [PATCH 01/21] recorder: async callbacks + async pose_setter_for Convert the memory2 Recorder from thread/disposable rx subscriptions to manual async callbacks via process_observable, and let pose_setter_for methods be async (awaited in _resolve_pose). Update the fastlio and go2 recorders accordingly. --- .../sensors/lidar/fastlio2/recorder.py | 4 +-- dimos/memory2/module.py | 35 +++++++++++-------- .../go2/blueprints/smart/unitree_go2.py | 9 +++-- 3 files changed, 29 insertions(+), 19 deletions(-) diff --git a/dimos/hardware/sensors/lidar/fastlio2/recorder.py b/dimos/hardware/sensors/lidar/fastlio2/recorder.py index 08784cfe89..5f081d13e9 100644 --- a/dimos/hardware/sensors/lidar/fastlio2/recorder.py +++ b/dimos/hardware/sensors/lidar/fastlio2/recorder.py @@ -45,13 +45,13 @@ class FastLio2Recorder(Recorder): _last_odom_pose: Pose | None = None @pose_setter_for("fastlio_odometry") - def _odom_pose(self, msg: Odometry) -> Pose | None: + async def _odom_pose(self, msg: Odometry) -> Pose | None: pose = getattr(msg, "pose", None) self._last_odom_pose = getattr(pose, "pose", None) if pose is not None else None return self._last_odom_pose @pose_setter_for("fastlio_lidar") - def _lidar_pose(self, msg: PointCloud2) -> Pose | None: + async def _lidar_pose(self, msg: PointCloud2) -> Pose | None: # Most-recent odometry pose, stamped directly (no tf). None before the # first odometry -> frame stored unposed, map-skipped. return self._last_odom_pose diff --git a/dimos/memory2/module.py b/dimos/memory2/module.py index 00cd46a1d0..f494a2070d 100644 --- a/dimos/memory2/module.py +++ b/dimos/memory2/module.py @@ -14,7 +14,7 @@ from __future__ import annotations -from collections.abc import Callable +from collections.abc import Awaitable, Callable import enum import inspect import os @@ -274,13 +274,14 @@ class RecorderConfig(MemoryModuleConfig): stream_remapping: dict[str, str] = Field(default_factory=dict) -PoseSetter = Callable[[Any], "Pose | None"] +PoseSetter = Callable[[Any], "Pose | None | Awaitable[Pose | None]"] def pose_setter_for(*stream_names: str) -> Callable[[Any], Any]: """Mark a method ``(self, msg) -> Pose | None`` as the pose setter for the - given recorded stream(s). Streams without a setter fall back to the tf-based - ``world <- frame_id`` lookup.""" + given recorded stream(s). The method may be sync or ``async def`` — the + recorder awaits it if it returns an awaitable. Streams without a setter fall + back to the tf-based ``world <- frame_id`` lookup.""" def decorate(fn: Any) -> Any: fn._pose_setter_for = tuple(stream_names) @@ -302,10 +303,11 @@ class MyRecorder(Recorder): Each stream's pose defaults to a ``world <- frame_id`` tf lookup; decorate a method with ``@pose_setter_for("stream")`` to source it elsewhere (e.g. from - an odometry stream):: + an odometry stream). Setters run on the module's event loop and may be + ``async def``:: @pose_setter_for("lidar") - def _lidar_pose(self, msg): + async def _lidar_pose(self, msg): return self._last_odom_pose """ @@ -368,12 +370,14 @@ def _port_to_stream(self, name: str, input_topic: In[Any], stream: Stream[Any]) already in world coords) fall back to ``config.default_frame_id`` — so every observation gets a robot-pose anchor when tf is publishing. - Registers the subscription as a disposable on this module. + Each port is recorded by an async callback dispatched on the module's + event loop via :meth:`process_observable`, which serialises invocations + and registers the subscription for cleanup on stop(). """ - def on_msg(msg: Any) -> None: + async def on_msg(msg: Any) -> None: ts = self._resolve_ts(name, msg) - pose = self._resolve_pose(name, msg, ts) + pose = await self._resolve_pose(name, msg, ts) if not pose: logger.warning( "[%s] No pose for time %s (msg ts: %s), storing without pose", @@ -383,7 +387,7 @@ def on_msg(msg: Any) -> None: ) stream.append(msg, ts=ts, pose=pose) - self.register_disposable(Disposable(input_topic.subscribe(on_msg))) + self.process_observable(input_topic.pure_observable(), on_msg) def _prepare_streams(self) -> None: """On APPEND, drop the streams this recorder is about to (re)write — the @@ -401,13 +405,16 @@ def _resolve_ts(self, name: str, msg: Any) -> float: """Timestamp to record *msg* at. Override to re-base onto another clock.""" return getattr(msg, "ts", None) or time.time() - def _resolve_pose(self, name: str, msg: Any, ts: float) -> Pose | None: + async def _resolve_pose(self, name: str, msg: Any, ts: float) -> Pose | None: """Pose to anchor *msg* with. Dispatches to the stream's - ``@pose_setter_for`` if one is defined, else falls back to a - ``world <- frame_id`` tf lookup.""" + ``@pose_setter_for`` if one is defined (awaited when async), else falls + back to a ``world <- frame_id`` tf lookup.""" setter = self._pose_setters.get(name) if setter is not None: - return cast("Pose | None", setter(msg)) + result = setter(msg) + if inspect.isawaitable(result): + result = await result + return cast("Pose | None", result) frame_id = getattr(msg, "frame_id", None) or self.config.default_frame_id transform = self.tf.get( self.config.root_frame, frame_id, time_point=ts, time_tolerance=self.config.tf_tolerance diff --git a/dimos/robot/unitree/go2/blueprints/smart/unitree_go2.py b/dimos/robot/unitree/go2/blueprints/smart/unitree_go2.py index 9993e541c4..3351a3dc16 100644 --- a/dimos/robot/unitree/go2/blueprints/smart/unitree_go2.py +++ b/dimos/robot/unitree/go2/blueprints/smart/unitree_go2.py @@ -62,13 +62,16 @@ class Go2Memory(Recorder): _last_odom_pose: Pose | None = None @pose_setter_for("odom") - def _odom_pose(self, msg: PoseStamped) -> Pose | None: + async def _odom_pose(self, msg: PoseStamped) -> Pose | None: self._last_odom_pose = msg return self._last_odom_pose @pose_setter_for("lidar") - def _lidar_pose(self, msg: PointCloud2) -> Pose | None: - return self._last_odom_pose # should always exist (odom alwyas wins the race) + async def _lidar_pose(self, msg: PointCloud2) -> Pose | None: + # Yes, it doesn't make sense to register lidar at the odom pose because the + # go2 lidar is in the world frame, but map.py (for now) needs this. + # TODO: fix map.py to use a transform frame + return getattr(self, "_last_odom_pose", None) unitree_go2_markers = ( From af6a06be3071f37af2f9df7241f8b4e0c899b4f6 Mon Sep 17 00:00:00 2001 From: Jeff Hykin Date: Tue, 23 Jun 2026 23:49:58 -0700 Subject: [PATCH 02/21] recorder: require @pose_setter_for setters to be async Raise TypeError at decoration time if a non-async function is decorated, and always await the setter in _resolve_pose. --- dimos/memory2/module.py | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/dimos/memory2/module.py b/dimos/memory2/module.py index f494a2070d..e16c1527e7 100644 --- a/dimos/memory2/module.py +++ b/dimos/memory2/module.py @@ -274,16 +274,20 @@ class RecorderConfig(MemoryModuleConfig): stream_remapping: dict[str, str] = Field(default_factory=dict) -PoseSetter = Callable[[Any], "Pose | None | Awaitable[Pose | None]"] +PoseSetter = Callable[[Any], "Awaitable[Pose | None]"] def pose_setter_for(*stream_names: str) -> Callable[[Any], Any]: - """Mark a method ``(self, msg) -> Pose | None`` as the pose setter for the - given recorded stream(s). The method may be sync or ``async def`` — the - recorder awaits it if it returns an awaitable. Streams without a setter fall - back to the tf-based ``world <- frame_id`` lookup.""" + """Mark an ``async def`` method ``(self, msg) -> Pose | None`` as the pose + setter for the given recorded stream(s). Streams without a setter fall back + to the tf-based ``world <- frame_id`` lookup.""" def decorate(fn: Any) -> Any: + if not inspect.iscoroutinefunction(fn): + raise TypeError( + f"@pose_setter_for must decorate an `async def` method; " + f"{getattr(fn, '__qualname__', fn)} is not async" + ) fn._pose_setter_for = tuple(stream_names) return fn @@ -406,15 +410,12 @@ def _resolve_ts(self, name: str, msg: Any) -> float: return getattr(msg, "ts", None) or time.time() async def _resolve_pose(self, name: str, msg: Any, ts: float) -> Pose | None: - """Pose to anchor *msg* with. Dispatches to the stream's - ``@pose_setter_for`` if one is defined (awaited when async), else falls - back to a ``world <- frame_id`` tf lookup.""" + """Pose to anchor *msg* with. Dispatches to the stream's (async) + ``@pose_setter_for`` if one is defined, else falls back to a + ``world <- frame_id`` tf lookup.""" setter = self._pose_setters.get(name) if setter is not None: - result = setter(msg) - if inspect.isawaitable(result): - result = await result - return cast("Pose | None", result) + return cast("Pose | None", await setter(msg)) frame_id = getattr(msg, "frame_id", None) or self.config.default_frame_id transform = self.tf.get( self.config.root_frame, frame_id, time_point=ts, time_tolerance=self.config.tf_tolerance From df4d802ebbf26daf0868536c50a5c0c9c3913444 Mon Sep 17 00:00:00 2001 From: Jeff Hykin Date: Wed, 24 Jun 2026 15:10:35 +0800 Subject: [PATCH 03/21] - --- dimos/robot/unitree/go2/blueprints/smart/unitree_go2.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/dimos/robot/unitree/go2/blueprints/smart/unitree_go2.py b/dimos/robot/unitree/go2/blueprints/smart/unitree_go2.py index 3351a3dc16..e50cf8b8e3 100644 --- a/dimos/robot/unitree/go2/blueprints/smart/unitree_go2.py +++ b/dimos/robot/unitree/go2/blueprints/smart/unitree_go2.py @@ -68,8 +68,9 @@ async def _odom_pose(self, msg: PoseStamped) -> Pose | None: @pose_setter_for("lidar") async def _lidar_pose(self, msg: PointCloud2) -> Pose | None: - # Yes, it doesn't make sense to register lidar at the odom pose because the - # go2 lidar is in the world frame, but map.py (for now) needs this. + # go2 lidar (currently) is in world-frame + # so it doesn't make sense to register lidar at the odom pose + # but we do it anyways because map.py (for now) requires it # TODO: fix map.py to use a transform frame return getattr(self, "_last_odom_pose", None) From e8287a2daf448f3ec3546a1d3d329b8408cd9b26 Mon Sep 17 00:00:00 2001 From: Jeff Hykin Date: Wed, 24 Jun 2026 14:51:57 +0800 Subject: [PATCH 04/21] add gsc_pgo loop-closure module pinned to public gsc_pgo repo --- .../components/loop_closure/gsc_pgo/module.py | 231 ++++++++++++++++++ 1 file changed, 231 insertions(+) create mode 100644 dimos/navigation/jnav/components/loop_closure/gsc_pgo/module.py diff --git a/dimos/navigation/jnav/components/loop_closure/gsc_pgo/module.py b/dimos/navigation/jnav/components/loop_closure/gsc_pgo/module.py new file mode 100644 index 0000000000..a0d0c22099 --- /dev/null +++ b/dimos/navigation/jnav/components/loop_closure/gsc_pgo/module.py @@ -0,0 +1,231 @@ +# Copyright 2026 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Native C++ PGO module — faithful reimplementation of the original nav stack PGO. + +Uses GTSAM iSAM2 for pose graph optimization and PCL ICP for loop closure. +""" + +from __future__ import annotations + +from pathlib import Path + +from reactivex.disposable import Disposable + +from dimos.core.core import rpc +from dimos.core.native_module import NativeModule, NativeModuleConfig +from dimos.core.stream import In, Out +from dimos.msgs.geometry_msgs.Transform import Transform +from dimos.msgs.nav_msgs.Odometry import Odometry +from dimos.msgs.sensor_msgs.PointCloud2 import PointCloud2 +from dimos.msgs.vision_msgs.Detection3DArray import Detection3DArray +from dimos.navigation.jnav.msgs.Graph3D import Graph3D +from dimos.navigation.jnav.msgs.GraphDelta3D import GraphDelta3D +from dimos.navigation.jnav.msgs.Landmark import Landmark +from dimos.utils.logging_config import setup_logger + +logger = setup_logger() + + +class PGOConfig(NativeModuleConfig): + # C++ + nix flake live in the standalone repo github.com/jeff-hykin/gsc_pgo. + # Pinned to a rev for reproducibility; bump when the C++ changes. The build + # runs in this module dir and drops a `result` symlink here (gitignored). + cwd: str | None = str(Path(__file__).resolve().parent) + executable: str = "result/bin/pgo" + build_command: str | None = ( + 'nix build "github:jeff-hykin/gsc_pgo/494e7a1d657c3702ec805c9e3d251a2fe8bc9529#default"' + " --no-write-lock-file" + ) + + frame_id: str = "map" + child_frame_id: str = "odom" + body_frame: str = "base_link" + + # Keyframe detection + key_pose_delta_deg: float = 10.0 + key_pose_delta_trans: float = 0.5 + + # Loop closure + loop_search_radius: float = 3.0 + loop_time_thresh: float = 5.0 + loop_score_thresh: float = 0.15 + loop_submap_half_range: int = 5 + submap_resolution: float = 0.1 + min_loop_detect_duration: float = 2.0 + # Feature-poverty gate: skip loop search when the scan's descriptor + # vertical-structure std is below this (open grass can't place itself -> + # PGO no-op). 0 = off. Superseded by loop_min_occupancy/loop_min_degeneracy + # (structure overlaps too much between scenes to threshold cleanly). + min_descriptor_std: float = 0.0 + + # Structure-spread gate: require >= this many occupied Scan-Context cells. + # Open grass clusters returns near the sensor (few rings filled); built + # scenes spread out to range. Calibrated on go2 fastlio (1200-cell 20x60 + # descriptor): grassy ~70 vs gir_park ~88 vs downtown ~120 at equal point + # count -> measures spread, not density. 0 disables. + loop_min_occupancy: int = 80 + # Observability gate (Zhang 2016 / X-ICP degeneracy): reject a candidate + # whose source scan's smallest normalized normal-scatter eigenvalue is below + # this. Planar/degenerate (grass) -> ~0; ICP slides in-plane and reports low + # fitness for a bogus closure. Real scenes (incl. sparse gir_park) sit >0.15. + # 0 disables. + loop_min_degeneracy: float = 0.05 + + # Input mode: transform world-frame scans to body-frame using odom + unregister_input: bool = True + + # Debug global-map publishing — OFF by default. Emitted on the internal + # `_global_map` port (leading underscore) so it never autoconnects to a + # consumer's `global_map` In: the terrain_mapper is the planner's single + # authoritative global_map. Two producers on `global_map` made the costmap + # flicker. Set a rate > 0 only for viz/debug of the PGO's corrected cloud. + global_map_voxel_size: float = 0.1 + global_map_publish_rate: float = 0.0 + + # Scan Context place recognition (used by loop closure search) + use_scan_context: bool = True + scan_context_num_rings: int = 20 + scan_context_num_sectors: int = 60 + scan_context_max_range_m: float = 80.0 + scan_context_top_k: int = 10 + scan_context_match_threshold: float = 0.4 + scan_context_lidar_height_m: float = 2.0 + + # Skip ICP on candidates farther than this (m). 0 disables. + loop_candidate_max_distance_m: float = 30.0 + + # --- Tag (AprilTag/ArUco) loop closure -------------------------------- + use_tag_loop_closure: bool = False + # LCM channel of the static TF tree (dimos "pattern#msg_name" convention). + tf_static_channel: str = "/tf_static#tf2_msgs.TFMessage" + tag_loop_time_thresh: float = 5.0 + tag_assoc_max_dt: float = 0.1 + tag_buffer_window: float = 2.0 + # Anisotropic tag noise (variances, tag frame; normal = +z): in-plane/yaw + # tight, range/out-of-plane loose so tag range error can't distort z. + tag_var_inplane_trans_m2: float = 0.0025 + tag_var_range_trans_m2: float = 0.25 + tag_var_yaw_rot_rad2: float = 0.0025 + tag_var_outplane_rot_rad2: float = 0.04 + # 6-DOF Mahalanobis gate vs current estimate (chi^2 95% = 12.59). 0 = off. + tag_consistency_chi2: float = 0.0 + # Robust (Huber) kernel on all loop factors (lidar + tag). Off = original. + loop_robust_kernel: bool = False + loop_robust_huber_k: float = 1.345 + + # --- Landmark events (decoupled perceiver -> PGO factor-graph manager) ---- + # When set, the PGO ingests Landmark events on the `landmarks` In and + # attaches each as a graph landmark variable + a BetweenFactor(keyframe, + # landmark). Two sightings of the same landmark id share the variable, so a + # revisit closes the loop and GTSAM optimizes it jointly. A separate + # perceiver (utils/apriltag_perceiver.py) does the detection + noise/ + # confidence filtering and emits the Landmark events. Off by default. + use_landmarks: bool = False + # Anisotropic landmark observation noise (variances, landmark frame; normal = + # +z): in-plane/yaw tight, range/out-of-plane loose (mirrors the tag model). + landmark_var_inplane_trans_m2: float = 0.0025 + landmark_var_range_trans_m2: float = 0.25 + landmark_var_yaw_rot_rad2: float = 0.0025 + landmark_var_outplane_rot_rad2: float = 0.04 + landmark_assoc_max_dt: float = 0.2 + landmark_buffer_window: float = 3.0 + + # --- Gravity anchor ------------------------------------------------------ + # Pin keyframe 0 (whose orientation is gravity-aligned by the LIO front end) + # so landmark/loop closures cannot rotate the initial roll/pitch off gravity. + # The full pose is pinned (also the gauge reference); roll/pitch stiffness is + # the gravity component. Variances (smaller = stiffer). + gravity_anchor: bool = True + gravity_anchor_rp_var: float = 1e-12 + gravity_anchor_yaw_var: float = 1e-12 + gravity_anchor_trans_var: float = 1e-12 + # Per-keyframe gravity anchor (roll/pitch-only prior on EVERY keyframe). Anchoring + # only kf0 lets a big loop closure tilt inner keyframes' roll/pitch, which converts + # horizontal travel into vertical and corrupts z by tens of metres. Pinning every + # keyframe's roll/pitch to its gravity-aligned LIO orientation (yaw + translation + # left free) keeps the closure in-plane and preserves the z structure. + # Default OFF: the anisotropic odometry between-factor is the primary gravity- + # preservation mechanism (and still lets landmarks correct slow tilt drift). + # This absolute prior is a harder lock for when the front end's absolute tilt + # is trustworthy (e.g. ZUPT in the LIO estimator). + gravity_anchor_per_keyframe: bool = False + gravity_anchor_kf_rp_var: float = 1e-4 + + # Anisotropic odometry between-factor: the LIO relative roll/pitch is accurate + # (IMU sees gravity each step) but yaw drifts, so roll/pitch are stiff and yaw + # looser. This keeps a loop closure from sloshing its (mostly-yaw) correction + # into roll/pitch — a tilt that converts horizontal travel into vertical and + # corrupts z. Roll/pitch variance is small but nonzero, so landmarks can still + # correct slow tilt drift across the graph ("accurate but not perfect"). + odom_rot_rp_var: float = 1e-8 + odom_rot_yaw_var: float = 1e-5 + odom_trans_xy_var: float = 1e-4 + odom_trans_z_var: float = 1e-6 + + # Bounded FIFO depth: keep at most this many pending scans, dropping the + # oldest when full (<=0 = unbounded). Generous enough that an ack-gated eval + # replay never drops a scan, bounded enough to cap live latency/memory. + max_scan_queue: int = 100 + + debug: bool = False + + +class PGO(NativeModule): + """Pose graph optimization with loop closure using GTSAM iSAM2 + PCL ICP.""" + + config: PGOConfig + + # named "lidar" to match the LoopClosure spec; the binary pairs it with the + # latest odometry pose internally, so a raw sensor-frame scan is expected. + lidar: In[PointCloud2] + odometry: In[Odometry] + # Optional: tag detections (tag-in-optical pose + numeric id) for tag-based + # loop closure. Only consumed when config.use_tag_loop_closure is set. + tag_detections: In[Detection3DArray] + # Optional: decoupled Landmark events from a perceiver (e.g. the AprilTag + # perceiver). Only consumed when config.use_landmarks is set; each becomes a + # graph landmark variable + observation factor that GTSAM optimizes jointly. + landmarks: In[Landmark] + corrected_odometry: Out[Odometry] + correction: Out[Transform] + # Internal/debug only (off by default) — see global_map_publish_rate. Named + # with a leading underscore so autoconnect won't wire it to `global_map` Ins. + _global_map: Out[PointCloud2] + pose_graph: Out[Graph3D] + loop_closure_event: Out[GraphDelta3D] + + @rpc + def start(self) -> None: + super().start() + self.tf.publish( + Transform( + frame_id=self.config.frame_id, + child_frame_id=self.config.child_frame_id, + ) + ) + self.register_disposable( + Disposable( + self.correction.transport.subscribe(self._on_correction_for_tf, self.correction) + ) + ) + if self.config.debug: + logger.info("PGO native module started (C++ iSAM2 + PCL ICP)") + + def _on_correction_for_tf(self, msg: Transform) -> None: + self.tf.publish(msg) + + @rpc + def stop(self) -> None: + super().stop() From f9d3acb8f8e7f7a26c71b696366259a326d0ad29 Mon Sep 17 00:00:00 2001 From: Jeff Hykin Date: Wed, 24 Jun 2026 14:52:11 +0800 Subject: [PATCH 05/21] add jnav PGO message types (Graph3D, GraphDelta3D, Landmark, Marker) --- dimos/navigation/jnav/msgs/Graph3D.hpp | 175 +++++++++++++++ dimos/navigation/jnav/msgs/Graph3D.py | 233 ++++++++++++++++++++ dimos/navigation/jnav/msgs/GraphDelta3D.hpp | 168 ++++++++++++++ dimos/navigation/jnav/msgs/GraphDelta3D.py | 198 +++++++++++++++++ dimos/navigation/jnav/msgs/Landmark.py | 200 +++++++++++++++++ dimos/navigation/jnav/msgs/Marker.py | 97 ++++++++ dimos/navigation/jnav/msgs/test_Landmark.py | 129 +++++++++++ dimos/navigation/jnav/msgs/test_Marker.py | 67 ++++++ 8 files changed, 1267 insertions(+) create mode 100644 dimos/navigation/jnav/msgs/Graph3D.hpp create mode 100644 dimos/navigation/jnav/msgs/Graph3D.py create mode 100644 dimos/navigation/jnav/msgs/GraphDelta3D.hpp create mode 100644 dimos/navigation/jnav/msgs/GraphDelta3D.py create mode 100644 dimos/navigation/jnav/msgs/Landmark.py create mode 100644 dimos/navigation/jnav/msgs/Marker.py create mode 100644 dimos/navigation/jnav/msgs/test_Landmark.py create mode 100644 dimos/navigation/jnav/msgs/test_Marker.py diff --git a/dimos/navigation/jnav/msgs/Graph3D.hpp b/dimos/navigation/jnav/msgs/Graph3D.hpp new file mode 100644 index 0000000000..a440127ae2 --- /dev/null +++ b/dimos/navigation/jnav/msgs/Graph3D.hpp @@ -0,0 +1,175 @@ +// Copyright 2026 Dimensional Inc. +// SPDX-License-Identifier: Apache-2.0 +// +// Typed C++ helper mirroring the Python `dimos.msgs.nav_msgs.Graph3D`. +// Canonical schema lives in `dimos/msgs/nav_msgs/Graph3D.ksy` — keep +// encode() in sync with that file (and with Graph3D.py.lcm_decode). +// +// Wire format (big-endian): +// +// uint64 edge_count +// uint64 node_count +// double timestamp // seconds since epoch +// per node (node_count): +// pose_stamped: +// double ts +// uint32 frame_id_len +// bytes frame_id (utf-8, no terminator) +// 7×double pos_x, pos_y, pos_z, quat_x, quat_y, quat_z, quat_w +// uint64 id +// uint64 metadata_id +// per edge (edge_count): +// uint64 start_id +// uint64 end_id +// double timestamp +// uint64 metadata_id +// +// Edges reference nodes by `id`, not by index. + +#pragma once + +#include +#include +#include +#include +#include + +#include + +namespace dimos { + +namespace graph3d_detail { + +// Host-order → big-endian byte writers. Avoid for portability +// (macOS uses different names) — write byte-by-byte from the top. + +inline void write_u32_be(std::vector& out, uint32_t v) { + out.push_back(static_cast((v >> 24) & 0xFF)); + out.push_back(static_cast((v >> 16) & 0xFF)); + out.push_back(static_cast((v >> 8) & 0xFF)); + out.push_back(static_cast( v & 0xFF)); +} + +inline void write_u64_be(std::vector& out, uint64_t v) { + for (int shift = 56; shift >= 0; shift -= 8) { + out.push_back(static_cast((v >> shift) & 0xFF)); + } +} + +inline void write_double_be(std::vector& out, double v) { + uint64_t bits; + std::memcpy(&bits, &v, sizeof(bits)); + write_u64_be(out, bits); +} + +inline void write_bytes(std::vector& out, const std::string& s) { + out.insert(out.end(), s.begin(), s.end()); +} + +} // namespace graph3d_detail + +class Graph3D { +public: + struct PoseStamped { + double ts = 0.0; + std::string frame_id; + double pos_x = 0.0, pos_y = 0.0, pos_z = 0.0; + double quat_x = 0.0, quat_y = 0.0, quat_z = 0.0, quat_w = 1.0; + }; + + struct Node3D { + PoseStamped pose; + uint64_t id = 0; + uint64_t metadata_id = 0; + }; + + struct Edge { + uint64_t start_id = 0; + uint64_t end_id = 0; + double timestamp = 0.0; + uint64_t metadata_id = 0; + }; + + Graph3D(std::string frame_id, double timestamp) + : frame_id_(std::move(frame_id)), timestamp_(timestamp) {} + + void reserve_nodes(size_t capacity) { nodes_.reserve(capacity); } + void reserve_edges(size_t capacity) { edges_.reserve(capacity); } + + // Add a node. The pose's frame_id defaults to the graph's frame_id — + // override per-node only if a node lives in a different frame. + void add_node(uint64_t id, uint64_t metadata_id, double pose_ts, + double pos_x, double pos_y, double pos_z, + double quat_x, double quat_y, double quat_z, double quat_w, + std::string node_frame_id = "") { + PoseStamped pose; + pose.ts = pose_ts; + pose.frame_id = node_frame_id.empty() ? frame_id_ : std::move(node_frame_id); + pose.pos_x = pos_x; pose.pos_y = pos_y; pose.pos_z = pos_z; + pose.quat_x = quat_x; pose.quat_y = quat_y; pose.quat_z = quat_z; pose.quat_w = quat_w; + nodes_.push_back({pose, id, metadata_id}); + } + + // Position-only convenience (orientation defaults to identity). + void add_node_xyz(uint64_t id, uint64_t metadata_id, double pose_ts, + double pos_x, double pos_y, double pos_z) { + add_node(id, metadata_id, pose_ts, pos_x, pos_y, pos_z, 0.0, 0.0, 0.0, 1.0); + } + + void add_edge(uint64_t start_id, uint64_t end_id, double edge_ts, + uint64_t metadata_id = 0) { + edges_.push_back({start_id, end_id, edge_ts, metadata_id}); + } + + size_t node_count() const { return nodes_.size(); } + size_t edge_count() const { return edges_.size(); } + const std::string& frame_id() const { return frame_id_; } + + std::vector encode() const { + using namespace graph3d_detail; + std::vector out; + // Conservative reservation: header + per-node fixed bytes + per-edge. + // frame_id strings add variable length on top — that just causes a + // realloc, not correctness issues. + out.reserve(24 + nodes_.size() * 84 + edges_.size() * 32); + write_u64_be(out, static_cast(edges_.size())); + write_u64_be(out, static_cast(nodes_.size())); + write_double_be(out, timestamp_); + for (const auto& n : nodes_) { + // pose_stamped first (per Graph3D.ksy) + write_double_be(out, n.pose.ts); + write_u32_be(out, static_cast(n.pose.frame_id.size())); + write_bytes(out, n.pose.frame_id); + write_double_be(out, n.pose.pos_x); + write_double_be(out, n.pose.pos_y); + write_double_be(out, n.pose.pos_z); + write_double_be(out, n.pose.quat_x); + write_double_be(out, n.pose.quat_y); + write_double_be(out, n.pose.quat_z); + write_double_be(out, n.pose.quat_w); + // then id, metadata_id + write_u64_be(out, n.id); + write_u64_be(out, n.metadata_id); + } + for (const auto& e : edges_) { + write_u64_be(out, e.start_id); + write_u64_be(out, e.end_id); + write_double_be(out, e.timestamp); + write_u64_be(out, e.metadata_id); + } + return out; + } + + int publish(lcm::LCM& lcm, const std::string& channel) const { + std::vector bytes = encode(); + return lcm.publish(channel, bytes.data(), static_cast(bytes.size())); + } + +private: + std::string frame_id_; + double timestamp_; + std::vector nodes_; + std::vector edges_; +}; + +} // namespace dimos diff --git a/dimos/navigation/jnav/msgs/Graph3D.py b/dimos/navigation/jnav/msgs/Graph3D.py new file mode 100644 index 0000000000..89d3b51e72 --- /dev/null +++ b/dimos/navigation/jnav/msgs/Graph3D.py @@ -0,0 +1,233 @@ +# Copyright 2026 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Graph3D: pose-graph / visibility-graph message with typed nodes and edges. + +Edges reference nodes by ``id`` (not list index), so producers are free +to reorder or re-emit nodes between snapshots. ``metadata_id`` is a +caller-defined enum — ex: for far_planner: 0=normal, 1=odom, 2=goal +""" + +from __future__ import annotations + +from dataclasses import dataclass +import struct +import time +from typing import TYPE_CHECKING, BinaryIO + +from dimos.msgs.geometry_msgs.PoseStamped import PoseStamped +from dimos.msgs.geometry_msgs.Quaternion import Quaternion +from dimos.msgs.geometry_msgs.Vector3 import Vector3 +from dimos.types.timestamped import Timestamped + +if TYPE_CHECKING: + from rerun._baseclasses import Archetype + + +_DEFAULT_NODE_COLORS: dict[int, tuple[int, int, int, int]] = { + 0: (180, 180, 180, 200), + 1: (0, 255, 0, 255), + 2: (255, 0, 0, 255), + 3: (255, 165, 0, 200), + 4: (0, 200, 255, 200), +} +_DEFAULT_NODE_COLOR = (200, 200, 200, 180) + +_DEFAULT_EDGE_COLORS: dict[int, tuple[int, int, int, int]] = { + 0: (0, 220, 100, 200), # odom / traversable — green + 1: (255, 180, 0, 220), # loop_closure / partial — yellow + 2: (255, 50, 50, 150), # blocked — red +} +_DEFAULT_EDGE_COLOR = (180, 180, 180, 180) + + +class Graph3D(Timestamped): + msg_name = "nav_msgs.Graph3D" + + @dataclass + class Node3D: + pose: PoseStamped + id: int = 0 + metadata_id: int = 0 + + @dataclass + class Edge: + start_id: int + end_id: int + timestamp: float = 0.0 + metadata_id: int = 0 + + ts: float + nodes: list[Node3D] + edges: list[Edge] + + def __init__( + self, + ts: float = 0.0, + nodes: list[Graph3D.Node3D] | None = None, + edges: list[Graph3D.Edge] | None = None, + ) -> None: + self.ts = ts if ts != 0 else time.time() + self.nodes = nodes if nodes is not None else [] + self.edges = edges if edges is not None else [] + + def lcm_encode(self) -> bytes: + # Field order matches Graph3D.ksy: edge_count, node_count, ts, + # nodes[] (pose, id, metadata_id), edges[]. + parts: list[bytes] = [] + parts.append(struct.pack(">QQd", len(self.edges), len(self.nodes), self.ts)) + for node in self.nodes: + frame_id_bytes = node.pose.frame_id.encode("utf-8") + parts.append(struct.pack(">d", node.pose.ts)) + parts.append(struct.pack(">I", len(frame_id_bytes))) + parts.append(frame_id_bytes) + parts.append( + struct.pack( + ">7d", + node.pose.position.x, + node.pose.position.y, + node.pose.position.z, + node.pose.orientation.x, + node.pose.orientation.y, + node.pose.orientation.z, + node.pose.orientation.w, + ) + ) + parts.append(struct.pack(">QQ", node.id, node.metadata_id)) + for edge in self.edges: + parts.append( + struct.pack(">QQdQ", edge.start_id, edge.end_id, edge.timestamp, edge.metadata_id) + ) + return b"".join(parts) + + @classmethod + def lcm_decode(cls, data: bytes | BinaryIO) -> Graph3D: + buf = data if isinstance(data, (bytes, bytearray)) else data.read() + offset = 0 + edge_count, node_count, graph_ts = struct.unpack_from(">QQd", buf, offset) + offset += 24 + + nodes: list[Graph3D.Node3D] = [] + for _ in range(node_count): + (pose_ts,) = struct.unpack_from(">d", buf, offset) + offset += 8 + (frame_id_len,) = struct.unpack_from(">I", buf, offset) + offset += 4 + frame_id = buf[offset : offset + frame_id_len].decode("utf-8") + offset += frame_id_len + px, py, pz, qx, qy, qz, qw = struct.unpack_from(">7d", buf, offset) + offset += 56 + node_id, metadata_id = struct.unpack_from(">QQ", buf, offset) + offset += 16 + pose = PoseStamped( + ts=pose_ts, + frame_id=frame_id, + position=Vector3(px, py, pz), + orientation=Quaternion(qx, qy, qz, qw), + ) + nodes.append(cls.Node3D(pose=pose, id=node_id, metadata_id=metadata_id)) + + edges: list[Graph3D.Edge] = [] + for _ in range(edge_count): + start_id, end_id, edge_ts, edge_metadata_id = struct.unpack_from(">QQdQ", buf, offset) + offset += 32 + edges.append( + cls.Edge( + start_id=start_id, + end_id=end_id, + timestamp=edge_ts, + metadata_id=edge_metadata_id, + ) + ) + + return cls(ts=graph_ts, nodes=nodes, edges=edges) + + def to_rerun( + self, + z_offset: float = 0.0, + radii: float = 0.12, + node_colors: dict[int, tuple[int, int, int, int]] | None = None, + ) -> Archetype: + """Default visualization: ``rr.Points3D`` of just the nodes. + + For nodes + edges in separate entity sub-paths, use + ``to_rerun_multi`` from a ``visual_override`` callback. + """ + import rerun as rr + + nc = node_colors if node_colors is not None else _DEFAULT_NODE_COLORS + positions = [ + [n.pose.position.x, n.pose.position.y, n.pose.position.z + z_offset] for n in self.nodes + ] + colors = [nc.get(n.metadata_id, _DEFAULT_NODE_COLOR) for n in self.nodes] + node_radii = [radii * 2.0 if n.metadata_id in (1, 2) else radii for n in self.nodes] + return rr.Points3D(positions, colors=colors, radii=node_radii) + + def to_rerun_multi( + self, + base_path: str, + z_offset: float = 0.0, + node_radius: float = 0.12, + edge_radius: float = 0.04, + node_colors: dict[int, tuple[int, int, int, int]] | None = None, + edge_colors: dict[int, tuple[int, int, int, int]] | None = None, + ) -> list[tuple[str, Archetype]]: + """Return ``[(base_path/nodes, Points3D), (base_path/edges, LineStrips3D)]``. + + Intended for use from ``visual_override`` callbacks where the + bridge supports the ``RerunMulti`` list-of-tuples form. + """ + import rerun as rr + + nc = node_colors if node_colors is not None else _DEFAULT_NODE_COLORS + ec = edge_colors if edge_colors is not None else _DEFAULT_EDGE_COLORS + + node_positions = [ + [n.pose.position.x, n.pose.position.y, n.pose.position.z + z_offset] for n in self.nodes + ] + node_colors_list = [nc.get(n.metadata_id, _DEFAULT_NODE_COLOR) for n in self.nodes] + node_radii = [ + node_radius * 2.0 if n.metadata_id in (1, 2) else node_radius for n in self.nodes + ] + nodes_archetype = rr.Points3D(node_positions, colors=node_colors_list, radii=node_radii) + + id_to_pose: dict[int, PoseStamped] = {n.id: n.pose for n in self.nodes} + strips: list[list[list[float]]] = [] + edge_colors_list: list[tuple[int, int, int, int]] = [] + for edge in self.edges: + start = id_to_pose.get(edge.start_id) + end = id_to_pose.get(edge.end_id) + if start is None or end is None: + continue + strips.append( + [ + [start.position.x, start.position.y, start.position.z + z_offset], + [end.position.x, end.position.y, end.position.z + z_offset], + ] + ) + edge_colors_list.append(ec.get(edge.metadata_id, _DEFAULT_EDGE_COLOR)) + edges_archetype = rr.LineStrips3D( + strips, colors=edge_colors_list, radii=[edge_radius] * len(strips) + ) + + return [ + (f"{base_path}/nodes", nodes_archetype), + (f"{base_path}/edges", edges_archetype), + ] + + def __len__(self) -> int: + return len(self.nodes) + + def __str__(self) -> str: + return f"Graph3D(nodes={len(self.nodes)}, edges={len(self.edges)})" diff --git a/dimos/navigation/jnav/msgs/GraphDelta3D.hpp b/dimos/navigation/jnav/msgs/GraphDelta3D.hpp new file mode 100644 index 0000000000..4db42eb71c --- /dev/null +++ b/dimos/navigation/jnav/msgs/GraphDelta3D.hpp @@ -0,0 +1,168 @@ +// Copyright 2026 Dimensional Inc. +// SPDX-License-Identifier: Apache-2.0 +// +// Typed C++ helper mirroring the Python `dimos.msgs.nav_msgs.GraphDelta3D`. +// +// Wire format (big-endian): +// +// uint64 node_count +// double timestamp // seconds since epoch +// per node (node_count): +// pose_stamped: // (same as Graph3D's node3d pose) +// double ts +// uint32 frame_id_len +// bytes frame_id (utf-8, no terminator) +// 7×double pos_x, pos_y, pos_z, quat_x, quat_y, quat_z, quat_w +// uint64 id +// uint64 metadata_id +// per transform (node_count): +// 7×double translation_x, translation_y, translation_z, +// rotation_x, rotation_y, rotation_z, rotation_w +// +// Two aligned arrays: ``transforms[i]`` is the SE(3) delta about to +// be applied to ``nodes[i]``. ``post_pose = transforms[i] * nodes[i].pose`` +// is the convention (left-multiply). +// +// `GraphDelta3D.py.lcm_decode` reads exactly this layout — keep in sync. + +#pragma once + +#include +#include +#include +#include +#include + +#include + +namespace dimos { + +namespace graph_delta3d_detail { + +inline void write_u32_be(std::vector& out, uint32_t v) { + out.push_back(static_cast((v >> 24) & 0xFF)); + out.push_back(static_cast((v >> 16) & 0xFF)); + out.push_back(static_cast((v >> 8) & 0xFF)); + out.push_back(static_cast( v & 0xFF)); +} + +inline void write_u64_be(std::vector& out, uint64_t v) { + for (int shift = 56; shift >= 0; shift -= 8) { + out.push_back(static_cast((v >> shift) & 0xFF)); + } +} + +inline void write_double_be(std::vector& out, double v) { + uint64_t bits; + std::memcpy(&bits, &v, sizeof(bits)); + write_u64_be(out, bits); +} + +inline void write_bytes(std::vector& out, const std::string& s) { + out.insert(out.end(), s.begin(), s.end()); +} + +} // namespace graph_delta3d_detail + +class GraphDelta3D { +public: + struct PoseStamped { + double ts = 0.0; + std::string frame_id; + double pos_x = 0.0, pos_y = 0.0, pos_z = 0.0; + double quat_x = 0.0, quat_y = 0.0, quat_z = 0.0, quat_w = 1.0; + }; + + struct Node3D { + PoseStamped pose; + uint64_t id = 0; + uint64_t metadata_id = 0; + }; + + struct Transform { + double translation_x = 0.0, translation_y = 0.0, translation_z = 0.0; + double rotation_x = 0.0, rotation_y = 0.0, rotation_z = 0.0, rotation_w = 1.0; + }; + + GraphDelta3D(std::string frame_id, double timestamp) + : frame_id_(std::move(frame_id)), timestamp_(timestamp) {} + + void reserve(size_t capacity) { + nodes_.reserve(capacity); + transforms_.reserve(capacity); + } + + // Add a node + its SE(3) delta. Pass empty `node_frame_id` to inherit + // the graph's frame_id. + void add(uint64_t id, uint64_t metadata_id, double pose_ts, + double pos_x, double pos_y, double pos_z, + double quat_x, double quat_y, double quat_z, double quat_w, + double translation_x, double translation_y, double translation_z, + double rotation_x, double rotation_y, double rotation_z, double rotation_w, + std::string node_frame_id = "") { + Node3D node; + node.id = id; + node.metadata_id = metadata_id; + node.pose.ts = pose_ts; + node.pose.frame_id = node_frame_id.empty() ? frame_id_ : std::move(node_frame_id); + node.pose.pos_x = pos_x; node.pose.pos_y = pos_y; node.pose.pos_z = pos_z; + node.pose.quat_x = quat_x; node.pose.quat_y = quat_y; + node.pose.quat_z = quat_z; node.pose.quat_w = quat_w; + nodes_.push_back(node); + + Transform tf; + tf.translation_x = translation_x; tf.translation_y = translation_y; tf.translation_z = translation_z; + tf.rotation_x = rotation_x; tf.rotation_y = rotation_y; + tf.rotation_z = rotation_z; tf.rotation_w = rotation_w; + transforms_.push_back(tf); + } + + size_t size() const { return nodes_.size(); } + bool empty() const { return nodes_.empty(); } + const std::string& frame_id() const { return frame_id_; } + + std::vector encode() const { + using namespace graph_delta3d_detail; + std::vector out; + out.reserve(16 + nodes_.size() * (84 + 56)); + write_u64_be(out, static_cast(nodes_.size())); + write_double_be(out, timestamp_); + for (const auto& n : nodes_) { + write_double_be(out, n.pose.ts); + write_u32_be(out, static_cast(n.pose.frame_id.size())); + write_bytes(out, n.pose.frame_id); + write_double_be(out, n.pose.pos_x); + write_double_be(out, n.pose.pos_y); + write_double_be(out, n.pose.pos_z); + write_double_be(out, n.pose.quat_x); + write_double_be(out, n.pose.quat_y); + write_double_be(out, n.pose.quat_z); + write_double_be(out, n.pose.quat_w); + write_u64_be(out, n.id); + write_u64_be(out, n.metadata_id); + } + for (const auto& t : transforms_) { + write_double_be(out, t.translation_x); + write_double_be(out, t.translation_y); + write_double_be(out, t.translation_z); + write_double_be(out, t.rotation_x); + write_double_be(out, t.rotation_y); + write_double_be(out, t.rotation_z); + write_double_be(out, t.rotation_w); + } + return out; + } + + int publish(lcm::LCM& lcm, const std::string& channel) const { + std::vector bytes = encode(); + return lcm.publish(channel, bytes.data(), static_cast(bytes.size())); + } + +private: + std::string frame_id_; + double timestamp_; + std::vector nodes_; + std::vector transforms_; +}; + +} // namespace dimos diff --git a/dimos/navigation/jnav/msgs/GraphDelta3D.py b/dimos/navigation/jnav/msgs/GraphDelta3D.py new file mode 100644 index 0000000000..36fccca459 --- /dev/null +++ b/dimos/navigation/jnav/msgs/GraphDelta3D.py @@ -0,0 +1,198 @@ +# Copyright 2026 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""GraphDelta3D: per-node SE(3) transforms about to be applied to a list of nodes. + +Two aligned arrays: ``nodes[i]`` is the node, ``transforms[i]`` is the +SE(3) delta about to be applied to it. ``post_pose = transforms[i] * +nodes[i].pose`` is the convention (left-multiply). + +Use case: PGO publishes this on ``loop_closure_event`` when iSAM2 +smooths the pose graph — ``nodes[i]`` is the keyframe pre-smooth, +``transforms[i]`` is the delta iSAM2 just applied to it. Consumers can +re-derive post-poses or filter to large deltas. + +Wire format mirrors ``Graph3D`` conventions: big-endian, ``Node3D`` +serialization shared, ``Transform`` is just 7 f8s (translation + +quaternion). Custom binary, dispatched by the ``#nav_msgs.GraphDelta3D`` +channel-name suffix. +""" + +from __future__ import annotations + +from dataclasses import dataclass +import struct +import time +from typing import TYPE_CHECKING, BinaryIO + +from dimos.msgs.geometry_msgs.PoseStamped import PoseStamped +from dimos.msgs.geometry_msgs.Quaternion import Quaternion +from dimos.msgs.geometry_msgs.Vector3 import Vector3 +from dimos.navigation.jnav.msgs.Graph3D import Graph3D +from dimos.types.timestamped import Timestamped + +if TYPE_CHECKING: + from rerun._baseclasses import Archetype + + +class GraphDelta3D(Timestamped): + msg_name = "nav_msgs.GraphDelta3D" + + # Reuse Graph3D's nested Node3D for wire-format consistency. A + # GraphDelta3D[i].node is byte-identical to a Graph3D.nodes[i]. + Node3D = Graph3D.Node3D + + @dataclass + class Transform: + """SE(3) transform — translation + rotation quaternion (xyzw).""" + + translation: Vector3 + rotation: Quaternion + + ts: float + nodes: list[Graph3D.Node3D] + transforms: list[Transform] + + def __init__( + self, + ts: float = 0.0, + nodes: list[Graph3D.Node3D] | None = None, + transforms: list[Transform] | None = None, + ) -> None: + self.ts = ts if ts != 0 else time.time() + self.nodes = nodes if nodes is not None else [] + self.transforms = transforms if transforms is not None else [] + if len(self.nodes) != len(self.transforms): + raise ValueError( + f"nodes ({len(self.nodes)}) and transforms ({len(self.transforms)}) " + "must be the same length — they're aligned arrays" + ) + + def lcm_encode(self) -> bytes: + parts: list[bytes] = [] + parts.append(struct.pack(">Qd", len(self.nodes), self.ts)) + for node in self.nodes: + frame_id_bytes = node.pose.frame_id.encode("utf-8") + parts.append(struct.pack(">d", node.pose.ts)) + parts.append(struct.pack(">I", len(frame_id_bytes))) + parts.append(frame_id_bytes) + parts.append( + struct.pack( + ">7d", + node.pose.position.x, + node.pose.position.y, + node.pose.position.z, + node.pose.orientation.x, + node.pose.orientation.y, + node.pose.orientation.z, + node.pose.orientation.w, + ) + ) + parts.append(struct.pack(">QQ", node.id, node.metadata_id)) + for transform in self.transforms: + parts.append( + struct.pack( + ">7d", + transform.translation.x, + transform.translation.y, + transform.translation.z, + transform.rotation.x, + transform.rotation.y, + transform.rotation.z, + transform.rotation.w, + ) + ) + return b"".join(parts) + + @classmethod + def lcm_decode(cls, data: bytes | BinaryIO) -> GraphDelta3D: + buf = data if isinstance(data, (bytes, bytearray)) else data.read() + offset = 0 + node_count, graph_ts = struct.unpack_from(">Qd", buf, offset) + offset += 16 + + nodes: list[Graph3D.Node3D] = [] + for _ in range(node_count): + (pose_ts,) = struct.unpack_from(">d", buf, offset) + offset += 8 + (frame_id_len,) = struct.unpack_from(">I", buf, offset) + offset += 4 + frame_id = buf[offset : offset + frame_id_len].decode("utf-8") + offset += frame_id_len + px, py, pz, qx, qy, qz, qw = struct.unpack_from(">7d", buf, offset) + offset += 56 + node_id, metadata_id = struct.unpack_from(">QQ", buf, offset) + offset += 16 + pose = PoseStamped( + ts=pose_ts, + frame_id=frame_id, + position=Vector3(px, py, pz), + orientation=Quaternion(qx, qy, qz, qw), + ) + nodes.append(Graph3D.Node3D(pose=pose, id=node_id, metadata_id=metadata_id)) + + transforms: list[GraphDelta3D.Transform] = [] + for _ in range(node_count): + tx, ty, tz, qx, qy, qz, qw = struct.unpack_from(">7d", buf, offset) + offset += 56 + transforms.append( + cls.Transform( + translation=Vector3(tx, ty, tz), + rotation=Quaternion(qx, qy, qz, qw), + ) + ) + + return cls(ts=graph_ts, nodes=nodes, transforms=transforms) + + def to_rerun( + self, + z_offset: float = 0.0, + arrow_scale: float = 1.0, + ) -> Archetype: + """Render each (node, transform) pair as an arrow from node.pose to post_pose. + + The arrow origin is the node's current position; the vector is + the translation component of the transform (scaled by + ``arrow_scale``). Rotation deltas aren't visualized by default — + callers wanting to see those can subclass. + """ + import rerun as rr + + if not self.nodes: + return rr.Arrows3D(origins=[], vectors=[]) + + origins = [] + vectors = [] + for node, transform in zip(self.nodes, self.transforms, strict=True): + origins.append( + [ + node.pose.position.x, + node.pose.position.y, + node.pose.position.z + z_offset, + ] + ) + vectors.append( + [ + transform.translation.x * arrow_scale, + transform.translation.y * arrow_scale, + transform.translation.z * arrow_scale, + ] + ) + return rr.Arrows3D(origins=origins, vectors=vectors) + + def __len__(self) -> int: + return len(self.nodes) + + def __str__(self) -> str: + return f"GraphDelta3D(nodes={len(self.nodes)})" diff --git a/dimos/navigation/jnav/msgs/Landmark.py b/dimos/navigation/jnav/msgs/Landmark.py new file mode 100644 index 0000000000..d299fa3404 --- /dev/null +++ b/dimos/navigation/jnav/msgs/Landmark.py @@ -0,0 +1,200 @@ +# Copyright 2026 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Landmark: a cross-map reference shared between maps by ``id``. + +Two maps that both observe the same ``id`` are bridgeable: MultiMap composes the +relative transform between their frames through the landmark. The pose is stored +relative to the frame it was observed in (a camera frame for an apriltag, an odom +frame for a relocalization event, the nearest cloud's frame for a UI click), NOT +the map root — so the uncertainty stays honest for a future gtsam graph. MultiMap +resolves it to the map root via the map's recorded tf at ``ts``. + +``id`` is URL-like and encodes the exact source so identical tag numbers from +different families/sizes don't false-bridge unrelated maps, e.g. +``apriltag://36h11/40cm/5`` or ``reloc://map0/dim_city``. + +``replacement`` (milliseconds) lets a perceiver publish a *corrective* landmark: +a consumer (e.g. the PGO factor-graph manager) wipes out any earlier landmark of +the same ``id`` observed within ``replacement`` ms before this one's ``ts`` and +keeps this one instead. ``0`` (the default) means "additive" — keep all prior +landmarks, which is what loop closure needs (the first and the revisit sighting +must both survive). A small positive window only supersedes the immediately-prior +noisy estimate of the *same* sighting, never the historical closure observation. + +Confidence is **per-axis**, not a single scalar: ``confidence_x/_y/_z`` (the +observed translation axes) and ``confidence_roll/_pitch/_yaw`` (the rotation +axes), each in ``[0, 1]``. A consumer maps each per-axis confidence to that +axis's measurement weight (variance ~ 1 / confidence), so a landmark can be well +observed on some DOF and weak on others — e.g. a face-on AprilTag pins in-plane +position + yaw tightly but its range and out-of-plane tilt are loose. ``confidence`` +(the scalar) is retained as a coarse overall value; the per-axis fields default +to it when unspecified. +""" + +from __future__ import annotations + +import struct +import time +from typing import BinaryIO + +from dimos.msgs.geometry_msgs.Pose import Pose +from dimos.msgs.geometry_msgs.Quaternion import Quaternion +from dimos.msgs.geometry_msgs.Vector3 import Vector3 +from dimos.types.timestamped import Timestamped + + +class Landmark(Timestamped): + msg_name = "jnav.Landmark" + + ts: float + id: str # URL-like, shared across maps — the bridge key + map_id: str # which map's frame system this is in + frame_id: str # the observation frame the pose is relative to + kind: str # "apriltag" | "aruco" | "reloc" | "ui_click" | "shortcut" + confidence: float # coarse overall confidence; per-axis fields default to it + pose: Pose + replacement: float # ms: wipe same-id landmarks newer than (ts - replacement); 0 = additive + # Per-axis confidence in [0, 1]: how much this landmark constrains each DOF. + confidence_x: float + confidence_y: float + confidence_z: float + confidence_roll: float + confidence_pitch: float + confidence_yaw: float + + def __init__( + self, + id: str = "", + map_id: str = "", + pose: Pose | None = None, + frame_id: str = "", + confidence: float = 0.0, + kind: str = "", + ts: float = 0.0, + replacement: float = 0.0, + confidence_x: float | None = None, + confidence_y: float | None = None, + confidence_z: float | None = None, + confidence_roll: float | None = None, + confidence_pitch: float | None = None, + confidence_yaw: float | None = None, + ) -> None: + self.ts = ts if ts != 0 else time.time() + self.id = id + self.map_id = map_id + self.frame_id = frame_id + self.kind = kind + self.confidence = confidence + self.pose = pose if pose is not None else Pose() + self.replacement = replacement + # Per-axis confidence defaults to the coarse overall confidence. + self.confidence_x = confidence if confidence_x is None else confidence_x + self.confidence_y = confidence if confidence_y is None else confidence_y + self.confidence_z = confidence if confidence_z is None else confidence_z + self.confidence_roll = confidence if confidence_roll is None else confidence_roll + self.confidence_pitch = confidence if confidence_pitch is None else confidence_pitch + self.confidence_yaw = confidence if confidence_yaw is None else confidence_yaw + + def axis_confidence(self) -> tuple[float, float, float, float, float, float]: + """Per-axis confidence as ``(x, y, z, roll, pitch, yaw)``.""" + return ( + self.confidence_x, + self.confidence_y, + self.confidence_z, + self.confidence_roll, + self.confidence_pitch, + self.confidence_yaw, + ) + + def lcm_encode(self) -> bytes: + parts: list[bytes] = [struct.pack(">d", self.ts)] + for text in (self.id, self.map_id, self.frame_id, self.kind): + encoded = text.encode("utf-8") + parts.append(struct.pack(">I", len(encoded))) + parts.append(encoded) + parts.append(struct.pack(">d", self.confidence)) + p = self.pose + parts.append( + struct.pack( + ">7d", + p.position.x, + p.position.y, + p.position.z, + p.orientation.x, + p.orientation.y, + p.orientation.z, + p.orientation.w, + ) + ) + parts.append(struct.pack(">d", self.replacement)) + parts.append( + struct.pack( + ">6d", + self.confidence_x, + self.confidence_y, + self.confidence_z, + self.confidence_roll, + self.confidence_pitch, + self.confidence_yaw, + ) + ) + return b"".join(parts) + + @classmethod + def lcm_decode(cls, data: bytes | BinaryIO) -> Landmark: + buf = data if isinstance(data, (bytes, bytearray)) else data.read() + offset = 0 + (ts,) = struct.unpack_from(">d", buf, offset) + offset += 8 + texts: list[str] = [] + for _ in range(4): + (length,) = struct.unpack_from(">I", buf, offset) + offset += 4 + texts.append(buf[offset : offset + length].decode("utf-8")) + offset += length + id_, map_id, frame_id, kind = texts + (confidence,) = struct.unpack_from(">d", buf, offset) + offset += 8 + px, py, pz, qx, qy, qz, qw = struct.unpack_from(">7d", buf, offset) + offset += 56 + pose = Pose() + pose.position = Vector3(px, py, pz) + pose.orientation = Quaternion(qx, qy, qz, qw) + # Backward-compatible: older encodings omit the trailing replacement field. + replacement = 0.0 + if offset + 8 <= len(buf): + (replacement,) = struct.unpack_from(">d", buf, offset) + offset += 8 + # Backward-compatible: older encodings omit the per-axis confidences; + # they then default to the coarse overall confidence (None -> confidence). + cx = cy = cz = croll = cpitch = cyaw = None + if offset + 48 <= len(buf): + cx, cy, cz, croll, cpitch, cyaw = struct.unpack_from(">6d", buf, offset) + return cls( + id=id_, + map_id=map_id, + pose=pose, + frame_id=frame_id, + confidence=confidence, + kind=kind, + ts=ts, + replacement=replacement, + confidence_x=cx, + confidence_y=cy, + confidence_z=cz, + confidence_roll=croll, + confidence_pitch=cpitch, + confidence_yaw=cyaw, + ) diff --git a/dimos/navigation/jnav/msgs/Marker.py b/dimos/navigation/jnav/msgs/Marker.py new file mode 100644 index 0000000000..a7ed3fef98 --- /dev/null +++ b/dimos/navigation/jnav/msgs/Marker.py @@ -0,0 +1,97 @@ +# Copyright 2026 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Marker: a named navigation marker — a pose in a frame, a marker name, and an +optional map name. + +Drives the objective handler's ``goal_marker`` (navigate to a named marker) and +``save_marker`` (persist the current/given pose under a name) streams, so callers +can reference waypoints by name (e.g. dim_city's ``test_waypoint_*``) and scope +them to a map, instead of passing a bare Point/Pose with no identity. +""" + +from __future__ import annotations + +import struct +import time +from typing import BinaryIO + +from dimos.msgs.geometry_msgs.Pose import Pose +from dimos.msgs.geometry_msgs.Quaternion import Quaternion +from dimos.msgs.geometry_msgs.Vector3 import Vector3 +from dimos.types.timestamped import Timestamped + + +class Marker(Timestamped): + msg_name = "jnav.Marker" + + ts: float + frame_id: str + pose: Pose + marker: str # marker name (identity) + map: str # optional map name this marker belongs to ("" = unspecified) + + def __init__( + self, + pose: Pose | None = None, + marker: str = "", + map: str = "", + ts: float = 0.0, + frame_id: str = "map", + ) -> None: + self.ts = ts if ts != 0 else time.time() + self.frame_id = frame_id + self.pose = pose if pose is not None else Pose() + self.marker = marker + self.map = map + + def lcm_encode(self) -> bytes: + parts: list[bytes] = [struct.pack(">d", self.ts)] + for text in (self.frame_id, self.marker, self.map): + encoded = text.encode("utf-8") + parts.append(struct.pack(">I", len(encoded))) + parts.append(encoded) + p = self.pose + parts.append( + struct.pack( + ">7d", + p.position.x, + p.position.y, + p.position.z, + p.orientation.x, + p.orientation.y, + p.orientation.z, + p.orientation.w, + ) + ) + return b"".join(parts) + + @classmethod + def lcm_decode(cls, data: bytes | BinaryIO) -> Marker: + buf = data if isinstance(data, (bytes, bytearray)) else data.read() + offset = 0 + (ts,) = struct.unpack_from(">d", buf, offset) + offset += 8 + texts: list[str] = [] + for _ in range(3): + (length,) = struct.unpack_from(">I", buf, offset) + offset += 4 + texts.append(buf[offset : offset + length].decode("utf-8")) + offset += length + frame_id, marker, map_name = texts + px, py, pz, qx, qy, qz, qw = struct.unpack_from(">7d", buf, offset) + pose = Pose() + pose.position = Vector3(px, py, pz) + pose.orientation = Quaternion(qx, qy, qz, qw) + return cls(pose=pose, marker=marker, map=map_name, ts=ts, frame_id=frame_id) diff --git a/dimos/navigation/jnav/msgs/test_Landmark.py b/dimos/navigation/jnav/msgs/test_Landmark.py new file mode 100644 index 0000000000..1df849151a --- /dev/null +++ b/dimos/navigation/jnav/msgs/test_Landmark.py @@ -0,0 +1,129 @@ +# Copyright 2026 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Roundtrip + field tests for the jnav Landmark message.""" + +from __future__ import annotations + +from dimos.memory2.codecs.base import codec_for +from dimos.memory2.codecs.lcm import LcmCodec +from dimos.msgs.geometry_msgs.Pose import Pose +from dimos.navigation.jnav.msgs.Landmark import Landmark + + +def _pose(x: float, y: float, z: float) -> Pose: + pose = Pose() + pose.position.x, pose.position.y, pose.position.z = x, y, z + return pose + + +def test_landmark_roundtrip_preserves_all_fields() -> None: + landmark = Landmark( + id="apriltag://36h11/40cm/5", + map_id="dim_city", + pose=_pose(1.5, -2.0, 0.3), + frame_id="camera_optical", + confidence=0.82, + kind="apriltag", + ts=1781565207.5, + ) + decoded = Landmark.lcm_decode(landmark.lcm_encode()) + + assert decoded.id == "apriltag://36h11/40cm/5" + assert decoded.map_id == "dim_city" + assert decoded.frame_id == "camera_optical" + assert decoded.kind == "apriltag" + assert decoded.confidence == 0.82 + assert decoded.ts == landmark.ts + assert decoded.pose.position.x == 1.5 + assert decoded.pose.position.y == -2.0 + assert decoded.pose.position.z == 0.3 + + +def test_landmark_defaults() -> None: + landmark = Landmark() + assert landmark.id == "" + assert landmark.map_id == "" + assert landmark.frame_id == "" + assert landmark.kind == "" + assert landmark.confidence == 0.0 + assert landmark.replacement == 0.0 # additive by default + assert landmark.ts > 0 # auto-stamped + + decoded = Landmark.lcm_decode(landmark.lcm_encode()) + assert decoded.id == "" and decoded.map_id == "" and decoded.kind == "" + assert decoded.replacement == 0.0 + + +def test_landmark_replacement_roundtrips() -> None: + landmark = Landmark( + id="apriltag://36h11/40cm/13", + pose=_pose(0.1, 0.2, 0.5), + kind="apriltag", + ts=1781565207.5, + replacement=2500.0, # ms: corrective landmark supersedes recent same-id ones + ) + decoded = Landmark.lcm_decode(landmark.lcm_encode()) + assert decoded.replacement == 2500.0 + + +def test_landmark_decode_tolerates_legacy_bytes_without_replacement() -> None: + """Bytes written before the replacement + per-axis fields decode to defaults.""" + landmark = Landmark(id="x", pose=_pose(0.0, 0.0, 0.0), confidence=0.7, replacement=999.0) + # Strip the trailing replacement double + the 6 per-axis confidence doubles. + legacy = landmark.lcm_encode()[: -(8 + 48)] + decoded = Landmark.lcm_decode(legacy) + assert decoded.id == "x" + assert decoded.replacement == 0.0 + # Per-axis confidences fall back to the coarse overall confidence. + assert decoded.axis_confidence() == (0.7, 0.7, 0.7, 0.7, 0.7, 0.7) + + +def test_per_axis_confidence_defaults_to_overall() -> None: + lm = Landmark(id="t", confidence=0.6) + assert lm.axis_confidence() == (0.6, 0.6, 0.6, 0.6, 0.6, 0.6) + + +def test_per_axis_confidence_roundtrips() -> None: + lm = Landmark( + id="apriltag://36h11/40cm/13", + pose=_pose(0.1, 0.2, 0.5), + kind="apriltag", + confidence=0.5, + confidence_x=0.9, + confidence_y=0.8, + confidence_z=0.7, + confidence_roll=0.2, + confidence_pitch=0.1, + confidence_yaw=0.95, + ) + d = Landmark.lcm_decode(lm.lcm_encode()) + assert d.axis_confidence() == (0.9, 0.8, 0.7, 0.2, 0.1, 0.95) + assert d.confidence == 0.5 # coarse overall preserved independently + + +def test_per_axis_confidence_can_be_silent_on_some_dof() -> None: + """A landmark can be confident on some DOF and zero on others.""" + lm = Landmark(id="t", pose=_pose(0.0, 0.0, 0.0), confidence_z=0.9, confidence_x=0.0) + d = Landmark.lcm_decode(lm.lcm_encode()) + assert d.confidence_z == 0.9 and d.confidence_x == 0.0 + + +def test_landmark_uses_lcm_codec_in_memory2() -> None: + codec = codec_for(Landmark) + assert isinstance(codec, LcmCodec) + landmark = Landmark(id="reloc://map0/dim_city", map_id="map0", confidence=0.6, kind="reloc") + decoded = codec.decode(codec.encode(landmark)) + assert decoded.id == "reloc://map0/dim_city" + assert decoded.confidence == 0.6 diff --git a/dimos/navigation/jnav/msgs/test_Marker.py b/dimos/navigation/jnav/msgs/test_Marker.py new file mode 100644 index 0000000000..61394999d7 --- /dev/null +++ b/dimos/navigation/jnav/msgs/test_Marker.py @@ -0,0 +1,67 @@ +# Copyright 2026 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Copyright 2026 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. + +"""Roundtrip + field tests for the jnav Marker message.""" + +from __future__ import annotations + +from dimos.msgs.geometry_msgs.Pose import Pose +from dimos.navigation.jnav.msgs.Marker import Marker + + +def _pose(x: float, y: float, z: float) -> Pose: + pose = Pose() + pose.position.x, pose.position.y, pose.position.z = x, y, z + return pose + + +def test_marker_roundtrip_preserves_all_fields() -> None: + marker = Marker( + pose=_pose(-52.93, -55.95, 0.4), + marker="test_waypoint_2", + map="dim_city", + ts=1781565207.5, + frame_id="map", + ) + decoded = Marker.lcm_decode(marker.lcm_encode()) + + assert decoded.marker == "test_waypoint_2" + assert decoded.map == "dim_city" + assert decoded.frame_id == "map" + assert decoded.ts == marker.ts + assert decoded.pose.position.x == -52.93 + assert decoded.pose.position.y == -55.95 + assert decoded.pose.position.z == 0.4 + + +def test_marker_defaults() -> None: + marker = Marker() + assert marker.marker == "" + assert marker.map == "" + assert marker.frame_id == "map" + assert marker.ts > 0 # auto-stamped + # empty optional strings survive the roundtrip + decoded = Marker.lcm_decode(marker.lcm_encode()) + assert decoded.marker == "" and decoded.map == "" + + +def test_marker_unicode_name() -> None: + decoded = Marker.lcm_decode(Marker(marker="café_door", map="map_α").lcm_encode()) # noqa: RUF001 + assert decoded.marker == "café_door" + assert decoded.map == "map_α" # noqa: RUF001 From 617e5f7172b3bb21364695d50cfd155fbbb113f9 Mon Sep 17 00:00:00 2001 From: Jeff Hykin Date: Wed, 24 Jun 2026 14:52:23 +0800 Subject: [PATCH 06/21] add jnav utils for PGO (apriltags, recording_db, trajectory_metrics, kitti, voxel_map, module_loading) --- .../jnav/utils/apriltag_agreement.py | 156 ++++ dimos/navigation/jnav/utils/apriltags.py | 665 ++++++++++++++++++ dimos/navigation/jnav/utils/kitti.py | 94 +++ dimos/navigation/jnav/utils/module_loading.py | 57 ++ dimos/navigation/jnav/utils/recording_db.py | 107 +++ .../jnav/utils/test_apriltag_agreement.py | 74 ++ dimos/navigation/jnav/utils/test_kitti.py | 60 ++ .../jnav/utils/trajectory_metrics.py | 268 +++++++ dimos/navigation/jnav/utils/voxel_map.py | 66 ++ 9 files changed, 1547 insertions(+) create mode 100644 dimos/navigation/jnav/utils/apriltag_agreement.py create mode 100644 dimos/navigation/jnav/utils/apriltags.py create mode 100644 dimos/navigation/jnav/utils/kitti.py create mode 100644 dimos/navigation/jnav/utils/module_loading.py create mode 100644 dimos/navigation/jnav/utils/recording_db.py create mode 100644 dimos/navigation/jnav/utils/test_apriltag_agreement.py create mode 100644 dimos/navigation/jnav/utils/test_kitti.py create mode 100644 dimos/navigation/jnav/utils/trajectory_metrics.py create mode 100644 dimos/navigation/jnav/utils/voxel_map.py diff --git a/dimos/navigation/jnav/utils/apriltag_agreement.py b/dimos/navigation/jnav/utils/apriltag_agreement.py new file mode 100644 index 0000000000..bab26e2305 --- /dev/null +++ b/dimos/navigation/jnav/utils/apriltag_agreement.py @@ -0,0 +1,156 @@ +# Copyright 2026 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""April-tag agreement: a drift-quality metric for a corrected trajectory. + +A fixed April tag observed many times along a trajectory should map to ONE world +position. Odometry drift scatters those estimates (the same tag lands in a +different spot each lap); a good loop-closure / PGO correction pulls them back +together. So the spread of a tag's repeated world-position estimates is a +ground-truth-free measure of trajectory consistency — and the *drop* in spread +from raw odometry to PGO-corrected odometry is how much PGO helped. + +Pure functions over plain arrays; no PGO, sim, or I/O here (the benchmark harness +in gsc_pgo/pgo_apriltag_benchmark.py feeds these from real hk_village data). +""" + +from __future__ import annotations + +from dataclasses import dataclass + +import numpy as np + +from dimos.navigation.jnav.utils.trajectory_metrics import PoseLookup + +# sightings further apart than this are treated as separate visits +VISIT_GAP_S = 20.0 + + +@dataclass(frozen=True) +class TagAgreement: + """Per-tag spread of repeated world-position estimates (metres).""" + + tag_id: int + observations: int + spread: float # RMS distance of estimates to their centroid + + +@dataclass(frozen=True) +class AgreementReport: + """Agreement across all multiply-observed tags.""" + + per_tag: tuple[TagAgreement, ...] + mean_spread: float # mean per-tag spread (metres); lower = better agreement + total_observations: int + + @property + def tag_count(self) -> int: + return len(self.per_tag) + + +def tag_spread(positions: np.ndarray) -> float: + """RMS distance of a tag's world-position estimates to their centroid.""" + if len(positions) < 2: + return 0.0 + centroid = positions.mean(axis=0) + return float(np.sqrt(np.mean(np.sum((positions - centroid) ** 2, axis=1)))) + + +def agreement_report( + tag_positions: dict[int, np.ndarray], *, min_observations: int = 2 +) -> AgreementReport: + """Score per-tag agreement from ``tag_id -> (N, 3) world positions``. + + Tags seen fewer than ``min_observations`` times carry no agreement signal and + are excluded from ``mean_spread``. + """ + per_tag: list[TagAgreement] = [] + total = 0 + for tag_id in sorted(tag_positions): + positions = np.asarray(tag_positions[tag_id], dtype=np.float64).reshape(-1, 3) + total += len(positions) + if len(positions) < min_observations: + continue + per_tag.append(TagAgreement(tag_id, len(positions), tag_spread(positions))) + mean_spread = float(np.mean([t.spread for t in per_tag])) if per_tag else 0.0 + return AgreementReport(tuple(per_tag), mean_spread, total) + + +def agreement_improvement(raw: AgreementReport, corrected: AgreementReport) -> float: + """Fractional drop in mean spread from ``raw`` to ``corrected`` (1.0 = perfect). + + Positive means the correction tightened tag agreement; negative means it made + it worse. Returns 0.0 if there's no raw spread to improve on. + """ + if raw.mean_spread <= 0.0: + return 0.0 + return (raw.mean_spread - corrected.mean_spread) / raw.mean_spread + + +def tag_world_positions( + sightings: dict[int, list[float]], pose_lookup: PoseLookup +) -> dict[int, np.ndarray]: + """Map each tag's sighting times to robot world positions (the proxy estimate).""" + positions: dict[int, np.ndarray] = {} + for tag_id, times in sightings.items(): + located = [p for p in (pose_lookup(t) for t in times) if p is not None] + if located: + positions[tag_id] = np.vstack(located) + return positions + + +def split_visits(times: list[float], *, gap_s: float) -> list[list[float]]: + """Cluster sighting timestamps into visits separated by gaps > ``gap_s``.""" + visits: list[list[float]] = [] + for timestamp in sorted(times): + if visits and timestamp - visits[-1][-1] <= gap_s: + visits[-1].append(timestamp) + else: + visits.append([timestamp]) + return visits + + +def paired_tag_visit_positions( + sightings: dict[int, list[float]], + raw_lookup: PoseLookup, + corrected_lookup: PoseLookup, + *, + gap_s: float, +) -> tuple[dict[int, np.ndarray], dict[int, np.ndarray]]: + """One robot position per tag VISIT under both pose sources, visits paired. + + Outdoors a tag stays visible while the robot walks tens of metres, so + per-sighting spread is dominated by viewing distance, not drift. Visits are + clustered on timestamps only, and a visit is kept only when BOTH pose + sources can place it — so raw and corrected reports always score the exact + same visit set, and a visit outside the pose graph's coverage drops out + instead of skewing one side. + """ + raw_positions: dict[int, np.ndarray] = {} + corrected_positions: dict[int, np.ndarray] = {} + for tag_id, times in sightings.items(): + raw_medians: list[np.ndarray] = [] + corrected_medians: list[np.ndarray] = [] + for visit_times in split_visits(times, gap_s=gap_s): + raw_located = [p for p in (raw_lookup(t) for t in visit_times) if p is not None] + corrected_located = [ + p for p in (corrected_lookup(t) for t in visit_times) if p is not None + ] + if raw_located and corrected_located: + raw_medians.append(np.median(np.vstack(raw_located), axis=0)) + corrected_medians.append(np.median(np.vstack(corrected_located), axis=0)) + if raw_medians: + raw_positions[tag_id] = np.vstack(raw_medians) + corrected_positions[tag_id] = np.vstack(corrected_medians) + return raw_positions, corrected_positions diff --git a/dimos/navigation/jnav/utils/apriltags.py b/dimos/navigation/jnav/utils/apriltags.py new file mode 100644 index 0000000000..9bf22cdf82 --- /dev/null +++ b/dimos/navigation/jnav/utils/apriltags.py @@ -0,0 +1,665 @@ +# Copyright 2026 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""AprilTag detection over a mem2.db color stream. + +Detects tags in the `color_image` stream and rejects bad glimpses through several +independent gates — motion blur (per-tag sharpness), PnP misfit (reprojection +error), too-small / too-far / too-oblique views, and fast camera motion — then +clusters same-id detections in time and reduces each cluster to one robust pose +via a Huber-weighted refinement seeded at the cluster medoid. (Re)writes the +`april_tags` PoseStamped stream of tag-in-camera relative poses (solvePnP, +marker_id in each observation's tags) and returns those representatives for the +GTSAM solve. +""" + +from __future__ import annotations + +from collections import defaultdict +from collections.abc import Callable, Iterable +from copy import copy +from dataclasses import dataclass, field +from itertools import pairwise +import json +import math +from pathlib import Path +import time +from typing import Any + +import cv2 +import numpy as np +from scipy.spatial.transform import Rotation + +from dimos.msgs.geometry_msgs.PoseStamped import PoseStamped +from dimos.msgs.sensor_msgs.Image import Image + +DEFAULT_MAX_DISTANCE_M = 1.0 +DEFAULT_MAX_VIEW_ANGLE_DEG = 45.0 +DEFAULT_CLUSTER_GAP_SEC = 5.0 +DEFAULT_ROTATION_WEIGHT_M_PER_RAD = 0.5 +# A blurry tag still solves a pose; these reject the bad glimpses up front. +DEFAULT_MIN_SHARPNESS = 60.0 # Laplacian variance over the tag ROI +DEFAULT_MAX_REPROJ_PX = 2.0 # RMS solvePnP corner reprojection error +DEFAULT_MIN_TAG_PX = 24.0 # tag side length in pixels (sqrt of quad area) +DEFAULT_MAX_LINEAR_SPEED_MPS = 0.5 +DEFAULT_MAX_ANGULAR_SPEED_DPS = 50.0 +DEFAULT_MIN_OBSERVATIONS = 3 # clusters thinner than this are unreliable +DEFAULT_HUBER_DELTA_M = 0.05 # residual past which a sample is down-weighted +_HUBER_ITERATIONS = 5 + +# One tag observation: ts, marker_id, t_cam_marker (xyz + xyzw quat), and the +# per-glimpse quality fields (sharpness, reproj_px, tag_px, speed). +Detection = dict[str, Any] + + +def make_detector(dictionary_name: str) -> cv2.aruco.ArucoDetector: + d = cv2.aruco.getPredefinedDictionary(getattr(cv2.aruco, dictionary_name)) + return cv2.aruco.ArucoDetector(d, cv2.aruco.DetectorParameters()) + + +def _object_points(marker_length_m: float) -> np.ndarray: + h = marker_length_m / 2.0 + return np.array([[-h, h, 0.0], [h, h, 0.0], [h, -h, 0.0], [-h, -h, 0.0]], dtype=np.float32) + + +def estimate_marker_pose( + corners_pixels: np.ndarray, + marker_length_m: float, + intrinsics: np.ndarray, + distortion: np.ndarray, +) -> tuple[np.ndarray, np.ndarray] | None: + """solvePnP a single tag -> (rotation_vector, translation_vector) in the + camera_optical frame, or None if it failed.""" + image_corners = corners_pixels.reshape(4, 1, 2).astype(np.float32) + found, rotation_vector, translation_vector = cv2.solvePnP( + _object_points(marker_length_m), + image_corners, + intrinsics, + distortion, + flags=cv2.SOLVEPNP_IPPE_SQUARE, + ) + return (rotation_vector, translation_vector) if found else None + + +def view_quality(t_cam_marker: list[float]) -> tuple[float, float]: + """(distance_m, view_angle_deg) for a tag pose in the camera optical frame. + + distance is the camera->tag range; view_angle is the angle between the line + of sight and the tag's surface normal (0 = perfectly head-on).""" + translation = np.array(t_cam_marker[:3], dtype=np.float64) + distance = float(np.linalg.norm(translation)) + normal = Rotation.from_quat(t_cam_marker[3:7]).as_matrix()[:, 2] + line_of_sight = translation / (distance + 1e-9) + cos_angle = abs(float(np.dot(line_of_sight, normal))) + view_angle = math.degrees(math.acos(min(1.0, cos_angle))) + return distance, view_angle + + +def cluster_by_time(detections: list[Detection], gap_sec: float) -> list[list[Detection]]: + """Group same-marker detections into clusters. A new cluster begins whenever + the time gap to the previous same-marker detection exceeds gap_sec.""" + clusters: list[list[Detection]] = [] + by_marker: dict[int, list[Detection]] = defaultdict(list) + for detection in detections: + by_marker[detection["marker_id"]].append(detection) + for marker_detections in by_marker.values(): + marker_detections.sort(key=lambda detection: detection["ts"]) + current = [marker_detections[0]] + for detection in marker_detections[1:]: + if detection["ts"] - current[-1]["ts"] > gap_sec: + clusters.append(current) + current = [detection] + else: + current.append(detection) + clusters.append(current) + return clusters + + +def _pose_distance(a: list[float], b: list[float], rotation_weight_m_per_rad: float) -> float: + translation = float(np.linalg.norm(np.array(a[:3]) - np.array(b[:3]))) + rotation = 2.0 * math.acos(min(1.0, abs(float(np.dot(a[3:7], b[3:7]))))) + return translation + rotation_weight_m_per_rad * rotation + + +def cluster_medoid(cluster: list[Detection], rotation_weight_m_per_rad: float) -> Detection: + """The detection whose pose is most central (min total spatial+rotational + distance to the rest) — a robust representative of the cluster.""" + poses = [detection["t_cam_marker"] for detection in cluster] + best_index, best_cost = 0, float("inf") + for i in range(len(poses)): + cost = sum( + _pose_distance(poses[i], poses[j], rotation_weight_m_per_rad) + for j in range(len(poses)) + if j != i + ) + if cost < best_cost: + best_cost, best_index = cost, i + return cluster[best_index] + + +def reprojection_error_px( + corners_pixels: np.ndarray, + rotation_vector: np.ndarray, + translation_vector: np.ndarray, + marker_length_m: float, + intrinsics: np.ndarray, + distortion: np.ndarray, +) -> float: + """RMS pixel distance between detected corners and the solvePnP pose reprojected + back onto the image — a direct measure of how well the pose explains the tag.""" + projected, _ = cv2.projectPoints( + _object_points(marker_length_m), rotation_vector, translation_vector, intrinsics, distortion + ) + measured = corners_pixels.reshape(4, 2).astype(np.float64) + diff = projected.reshape(4, 2) - measured + return float(np.sqrt(np.mean(np.sum(diff * diff, axis=1)))) + + +def tag_pixel_size(corners_pixels: np.ndarray) -> float: + """Tag side length in pixels (sqrt of the quad's image area); small = unreliable.""" + quad = corners_pixels.reshape(4, 2).astype(np.float32) + return float(math.sqrt(abs(cv2.contourArea(quad)))) + + +def tag_sharpness(gray: np.ndarray, corners_pixels: np.ndarray) -> float: + """Laplacian variance over the tag's bounding box — low under motion blur.""" + quad = corners_pixels.reshape(4, 2) + x_min, y_min = np.floor(quad.min(0)).astype(int) + x_max, y_max = np.ceil(quad.max(0)).astype(int) + height, width = gray.shape[:2] + x_min, y_min = max(int(x_min), 0), max(int(y_min), 0) + x_max, y_max = min(int(x_max), width), min(int(y_max), height) + if x_max - x_min < 2 or y_max - y_min < 2: + return 0.0 + return float(cv2.Laplacian(gray[y_min:y_max, x_min:x_max], cv2.CV_64F).var()) + + +def _pose_xyz_quat(pose: Any) -> np.ndarray | None: + """Best-effort (x,y,z,qx,qy,qz,qw) from an observation pose (tuple/list or a msg + with .position/.orientation); None if it isn't a usable pose.""" + if pose is None: + return None + if hasattr(pose, "position") and hasattr(pose, "orientation"): + position, orientation = pose.position, pose.orientation + return np.array( + [ + position.x, + position.y, + position.z, + orientation.x, + orientation.y, + orientation.z, + orientation.w, + ] + ) + try: + values = [float(component) for component in pose] + except TypeError: + return None + return np.array(values[:7]) if len(values) >= 7 else None + + +def _camera_speeds(images: list[Any]) -> tuple[dict[float, tuple[float, float]], bool]: + """Per-image (linear m/s, angular deg/s) from consecutive posed frames. The second + return is False when too few frames carry poses to estimate motion (gate disabled).""" + posed = [ + (float(obs.ts), pose) + for obs in images + if (pose := _pose_xyz_quat(getattr(obs, "pose", None))) is not None + ] + posed.sort(key=lambda item: item[0]) + speeds: dict[float, tuple[float, float]] = {} + for (timestamp_a, pose_a), (timestamp_b, pose_b) in pairwise(posed): + dt = timestamp_b - timestamp_a + if dt <= 0: + continue + linear = float(np.linalg.norm(pose_b[:3] - pose_a[:3])) / dt + cos_half = min(1.0, abs(float(np.dot(pose_a[3:7], pose_b[3:7])))) + angular = math.degrees(2.0 * math.acos(cos_half)) / dt + speeds[timestamp_b] = (linear, angular) + return speeds, len(posed) >= 2 + + +def _huber_weights(residuals: np.ndarray, delta: float) -> np.ndarray: + """IRLS Huber weights: 1 inside `delta`, decaying as delta/r past it.""" + weights = np.ones_like(residuals) + outside = residuals > delta + weights[outside] = delta / residuals[outside] + return weights + + +def robust_cluster_pose( + cluster: list[Detection], rotation_weight_m_per_rad: float, huber_delta_m: float +) -> Detection: + """Cluster representative: the medoid, then refined by Huber-weighted IRLS — a + weighted-mean translation and weighted quaternion mean (Markley eigen method), + re-weighting each iteration so a lingering bad glimpse keeps losing influence.""" + medoid = cluster_medoid(cluster, rotation_weight_m_per_rad) + if len(cluster) < 2: + return medoid + poses = np.array([detection["t_cam_marker"] for detection in cluster], dtype=np.float64) + translations, quaternions = poses[:, :3], poses[:, 3:7] + reference = np.array(medoid["t_cam_marker"][3:7]) + signs = np.sign(quaternions @ reference) + signs[signs == 0] = 1.0 + quaternions = quaternions * signs[:, None] + estimate_translation = np.array(medoid["t_cam_marker"][:3]) + estimate_quaternion = reference.copy() + delta_rad = huber_delta_m / max(rotation_weight_m_per_rad, 1e-9) + for _ in range(_HUBER_ITERATIONS): + weights_t = _huber_weights( + np.linalg.norm(translations - estimate_translation, axis=1), huber_delta_m + ) + estimate_translation = (weights_t[:, None] * translations).sum(0) / weights_t.sum() + angular_residual = 2.0 * np.arccos( + np.clip(np.abs(quaternions @ estimate_quaternion), 0.0, 1.0) + ) + weights_r = _huber_weights(angular_residual, delta_rad) + scatter = ( + weights_r[:, None, None] * np.einsum("ni,nj->nij", quaternions, quaternions) + ).sum(0) + estimate_quaternion = np.linalg.eigh(scatter)[1][:, -1] + if estimate_quaternion @ reference < 0: + estimate_quaternion = -estimate_quaternion + return { + **medoid, + "t_cam_marker": [*estimate_translation.tolist(), *estimate_quaternion.tolist()], + } + + +def detect_apriltags( + store: Any, + intrinsics: np.ndarray, + distortion: np.ndarray, + image_stream: str = "color_image", + stream_name: str = "april_tags", + marker_length: float = 0.10, + dictionary: str = "DICT_APRILTAG_36h11", + *, + max_distance_m: float = DEFAULT_MAX_DISTANCE_M, + max_view_angle_deg: float = DEFAULT_MAX_VIEW_ANGLE_DEG, + cluster_gap_sec: float = DEFAULT_CLUSTER_GAP_SEC, + rotation_weight_m_per_rad: float = DEFAULT_ROTATION_WEIGHT_M_PER_RAD, + min_sharpness: float = DEFAULT_MIN_SHARPNESS, + max_reproj_px: float = DEFAULT_MAX_REPROJ_PX, + min_tag_px: float = DEFAULT_MIN_TAG_PX, + max_linear_speed_mps: float = DEFAULT_MAX_LINEAR_SPEED_MPS, + max_angular_speed_dps: float = DEFAULT_MAX_ANGULAR_SPEED_DPS, + min_observations: int = DEFAULT_MIN_OBSERVATIONS, + huber_delta_m: float = DEFAULT_HUBER_DELTA_M, +) -> list[Detection]: + """Detect tags in `image_stream`, reject bad glimpses (blur, PnP misfit, small/ + far/oblique views, fast motion), cluster same-id detections by time, drop thin + clusters, and (re)write the `april_tags` stream from one Huber-refined medoid + representative per cluster. Returns that list of representatives.""" + detector = make_detector(dictionary) + raw_detections: list[Detection] = [] + images = store.stream(image_stream, Image).to_list() + speed_by_ts, speed_available = _camera_speeds(images) + for image_obs in images: + image = image_obs.data + bgr = image.numpy() if hasattr(image, "numpy") else np.asarray(image.data) + gray = cv2.cvtColor(bgr, cv2.COLOR_BGR2GRAY) if bgr.ndim == 3 else bgr + all_corners, marker_ids, _ = detector.detectMarkers(bgr) + if marker_ids is None: + continue + for corners, marker_id in zip(all_corners, marker_ids.flatten(), strict=False): + pose = estimate_marker_pose(corners, marker_length, intrinsics, distortion) + if pose is None: + continue + rotation_vector, translation_vector = pose + quaternion = Rotation.from_rotvec(rotation_vector.reshape(3)).as_quat() # x,y,z,w + translation = translation_vector.reshape(3) + tag_in_camera = [ + float(translation[0]), + float(translation[1]), + float(translation[2]), + float(quaternion[0]), + float(quaternion[1]), + float(quaternion[2]), + float(quaternion[3]), + ] + raw_detections.append( + { + "ts": float(image_obs.ts), + "marker_id": int(marker_id), + "t_cam_marker": tag_in_camera, + "sharpness": tag_sharpness(gray, corners), + "reproj_px": reprojection_error_px( + corners, + rotation_vector, + translation_vector, + marker_length, + intrinsics, + distortion, + ), + "tag_px": tag_pixel_size(corners), + "speed": speed_by_ts.get(float(image_obs.ts)), + } + ) + + # Per-glimpse gates; count rejections per reason so thresholds are tunable. + rejected: dict[str, int] = defaultdict(int) + kept: list[Detection] = [] + for detection in raw_detections: + if detection["sharpness"] < min_sharpness: + rejected["blur"] += 1 + continue + if detection["reproj_px"] > max_reproj_px: + rejected["reproj"] += 1 + continue + if detection["tag_px"] < min_tag_px: + rejected["small"] += 1 + continue + distance, view_angle = view_quality(detection["t_cam_marker"]) + if distance > max_distance_m: + rejected["far"] += 1 + continue + if view_angle > max_view_angle_deg: + rejected["oblique"] += 1 + continue + speed = detection["speed"] + if speed is not None and ( + speed[0] > max_linear_speed_mps or speed[1] > max_angular_speed_dps + ): + rejected["motion"] += 1 + continue + kept.append(detection) + + # One Huber-refined representative per time-clustered group; drop thin clusters. + detections: list[Detection] = [] + thin_clusters = 0 + for cluster in cluster_by_time(kept, cluster_gap_sec): + if len(cluster) < min_observations: + thin_clusters += 1 + continue + detections.append( + { + **robust_cluster_pose(cluster, rotation_weight_m_per_rad, huber_delta_m), + "n_observations": len(cluster), + } + ) + detections.sort(key=lambda detection: detection["ts"]) + + if stream_name in store.list_streams(): + store.delete_stream(stream_name) + april_tag_stream = store.stream(stream_name, PoseStamped) + for detection in detections: + tag_in_camera = detection["t_cam_marker"] + pose_stamped = PoseStamped( + ts=detection["ts"], position=tag_in_camera[:3], orientation=tag_in_camera[3:] + ) + april_tag_stream.append( + pose_stamped, + ts=detection["ts"], + pose=tuple(tag_in_camera), + tags={"marker_id": detection["marker_id"]}, + ) + + found_ids = sorted({detection["marker_id"] for detection in detections}) + gate_summary = ", ".join(f"{reason}={count}" for reason, count in sorted(rejected.items())) + if not speed_available: + gate_summary += (", " if gate_summary else "") + "motion-gate-off(no poses)" + print( + f" april_tags: {len(raw_detections)} raw -> {len(kept)} in-spec " + f"-> {len(detections)} clusters (dropped {thin_clusters} thin), " + f"markers {found_ids} (over {len(images)} images)" + ) + if gate_summary: + print(f" april_tags rejected: {gate_summary}") + return detections + + +# --------------------------------------------------------------------------- +# Streaming / incremental API +# --------------------------------------------------------------------------- + +_DEFAULT_MARKER_LENGTH = 0.10 +_DEFAULT_DICTIONARY = "DICT_APRILTAG_36h11" + + +@dataclass +class TagInfo: + tag_number: float + confidence: float + pose: list[float] # [x, y, z, qx, qy, qz, qw] relative to camera + ts: float + camera_frame_id: str = "" + + +@dataclass +class GroupingOptions: + tag_ids_to_ignore: list[float] = field(default_factory=list) + time_window: float = DEFAULT_CLUSTER_GAP_SEC + max_distance: float = DEFAULT_MAX_DISTANCE_M + min_confidence: float = 0.0 + + +class AprilTagger: + def __init__( + self, + camera_intrinsics: Any, + grouping_options: Any, + tags_ids_to_ignore: list[float], + ) -> None: + self.camera_intrinsics = camera_intrinsics + self.grouping_options = grouping_options + self.tags_ids_to_ignore = tags_ids_to_ignore + + def get_tag(self, img: Any) -> list[TagInfo]: + return pure_get_tags(img, self.camera_intrinsics) + + def iter_april_tag_groups( + self, + next_observation: Any, + options: Any = None, + state: dict[str, Any] | None = None, + ) -> tuple[list[TagInfo], dict[float, TagInfo], dict[float, TagInfo]]: + if state is None: + state = {} + if options is None: + options = self.grouping_options + recent_tags, tag_estimates, finished_tags = pure_iter_april_tag_groups( + next_observation, + self.camera_intrinsics, + options, + state.get("recent_tags", []), + state.get("tag_estimates", {}), + ) + state["recent_tags"] = recent_tags + state["tag_estimates"] = tag_estimates + return recent_tags, tag_estimates, finished_tags + + +def pure_get_tags(img: Any, camera_intrinsics: Any) -> list[TagInfo]: + """Detect AprilTags in an image and return relative poses via solvePnP. + + img: ndarray or an Image msg (frame_id/ts are carried onto each TagInfo). + camera_intrinsics: dict with "intrinsics" (3x3 ndarray), and optionally + "distortion", "marker_length", "dictionary". + """ + intrinsics = camera_intrinsics["intrinsics"] + distortion = camera_intrinsics.get("distortion", np.zeros(5)) + marker_length = camera_intrinsics.get("marker_length", _DEFAULT_MARKER_LENGTH) + dictionary = camera_intrinsics.get("dictionary", _DEFAULT_DICTIONARY) + + camera_frame_id = getattr(img, "frame_id", "") + ts = getattr(img, "ts", None) + if ts is None: + ts = time.time() + + if isinstance(img, np.ndarray): + bgr = img + elif hasattr(img, "numpy"): + bgr = img.numpy() + elif hasattr(img, "data"): + bgr = np.asarray(img.data) + else: + bgr = np.asarray(img) + if bgr.ndim == 2: + bgr = cv2.cvtColor(bgr, cv2.COLOR_GRAY2BGR) + + detector = make_detector(dictionary) + all_corners, marker_ids, _ = detector.detectMarkers(bgr) + if marker_ids is None: + return [] + + tags: list[TagInfo] = [] + for corners, marker_id in zip(all_corners, marker_ids.flatten(), strict=False): + pose_result = estimate_marker_pose(corners, marker_length, intrinsics, distortion) + if pose_result is None: + continue + rotation_vector, translation_vector = pose_result + quaternion = Rotation.from_rotvec(rotation_vector.reshape(3)).as_quat() + translation = translation_vector.reshape(3) + pose = [ + float(translation[0]), + float(translation[1]), + float(translation[2]), + float(quaternion[0]), + float(quaternion[1]), + float(quaternion[2]), + float(quaternion[3]), + ] + reproj = reprojection_error_px( + corners, + rotation_vector, + translation_vector, + marker_length, + intrinsics, + distortion, + ) + confidence = max(0.0, 1.0 - reproj / DEFAULT_MAX_REPROJ_PX) + tags.append( + TagInfo( + tag_number=float(marker_id), + confidence=confidence, + pose=pose, + ts=float(ts), + camera_frame_id=camera_frame_id, + ) + ) + return tags + + +def pure_iter_april_tag_groups( + next_observation: Any, + camera_intrinsics: Any, + options: Any, + recent_tags: list[TagInfo], + tag_estimates: dict[float, TagInfo], +) -> tuple[list[TagInfo], dict[float, TagInfo], dict[float, TagInfo]]: + """Returns (recent_tags, tag_estimates, finished_tags). + + finished_tags holds estimates whose tag id aged out of the time window this + iteration — no fresher observation can revise them, so they are final.""" + tag_estimates = copy(tag_estimates) + recent_tags = list(recent_tags) + + tags = pure_get_tags(next_observation.color_image, camera_intrinsics) + recent_tags.extend(tags) + + tag_ids_to_ignore = set(getattr(options, "tag_ids_to_ignore", [])) + time_window = getattr(options, "time_window", DEFAULT_CLUSTER_GAP_SEC) + max_distance = getattr(options, "max_distance", DEFAULT_MAX_DISTANCE_M) + min_confidence = getattr(options, "min_confidence", 0.0) + + # Purge tags outside the time window + if recent_tags: + latest_ts = max(tag.ts for tag in recent_tags) + recent_tags = [tag for tag in recent_tags if latest_ts - tag.ts <= time_window] + + all_recent_tag_ids = set( + tag.tag_number for tag in recent_tags if tag.tag_number not in tag_ids_to_ignore + ) + + # Tag ids with an estimate but no surviving observations are finalized + finished_tags = { + tag_id: estimate + for tag_id, estimate in tag_estimates.items() + if tag_id not in all_recent_tag_ids + } + for tag_id in finished_tags: + del tag_estimates[tag_id] + + for each_tag_id in all_recent_tag_ids: + tags_for_id = [tag for tag in recent_tags if tag.tag_number == each_tag_id] + + # Filter by distance and confidence + filtered = [ + tag + for tag in tags_for_id + if tag.confidence >= min_confidence + and float(np.linalg.norm(tag.pose[:3])) <= max_distance + ] + if not filtered: + continue + + # Pick the medoid (most central observation by pose distance) + if len(filtered) == 1: + tag_estimates[each_tag_id] = filtered[0] + else: + best_index = 0 + best_cost = float("inf") + for i, tag_i in enumerate(filtered): + cost = sum( + _pose_distance(tag_i.pose, tag_j.pose, DEFAULT_ROTATION_WEIGHT_M_PER_RAD) + for j, tag_j in enumerate(filtered) + if j != i + ) + if cost < best_cost: + best_cost = cost + best_index = i + tag_estimates[each_tag_id] = filtered[best_index] + + return recent_tags, tag_estimates, finished_tags + + +def load_intrinsics_json(path: Path) -> dict[str, Any]: + raw = json.loads(path.read_text()) + config: dict[str, Any] = { + "intrinsics": np.array(raw["intrinsics"], dtype=np.float64).reshape(3, 3), + "distortion": np.array(raw.get("distortion", []), dtype=np.float64), + } + for key in ("marker_length", "dictionary"): + if key in raw: + config[key] = raw[key] + return config + + +def sightings_from_observations( + observations: Iterable[tuple[int, float]], +) -> dict[int, list[float]]: + """Group ``(marker_id, timestamp)`` tag observations into ``tag_id -> [timestamps]``.""" + sightings: dict[int, list[float]] = {} + for marker_id, timestamp in observations: + sightings.setdefault(int(marker_id), []).append(float(timestamp)) + return sightings + + +def load_or_detect_sightings( + stored: Iterable[tuple[int, float]], + detect: Callable[[], Iterable[tuple[int, float]]], +) -> tuple[dict[int, list[float]], str]: + """``tag_id -> [sighting timestamps]`` from already-stored tag observations, + falling back to ``detect()`` when none are stored. + + Only marker_id + ts are used — tag positions are taken relative to the odom + stream, so stored poses are never read. The caller supplies the stored + iterable and the detect thunk, so this stays db-free. Returns the sightings + plus a source label (``"stream"`` or ``"detected"``).""" + sightings = sightings_from_observations(stored) + if sightings: + return sightings, "stream" + return sightings_from_observations(detect()), "detected" diff --git a/dimos/navigation/jnav/utils/kitti.py b/dimos/navigation/jnav/utils/kitti.py new file mode 100644 index 0000000000..d7358a976b --- /dev/null +++ b/dimos/navigation/jnav/utils/kitti.py @@ -0,0 +1,94 @@ +# Copyright 2026 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Official KITTI odometry error metric (translational % + rotational deg/m). + +The KITTI leaderboard metric: for every sub-sequence length in {100,200,...,800}m, +take every start frame, find the end frame ~that path-length away, and measure the +relative pose error between estimated and ground-truth. Average translational +error (as a fraction of length) and rotational error (rad per metre) over all +(start, length) pairs. Reported as translational % and rotational deg/m. This is +the devkit's `evaluate_odometry` algorithm. + +Poses are 4x4 body->world transforms (frame 0 = identity), one per frame. +""" + +from __future__ import annotations + +import numpy as np + +LENGTHS = (100.0, 200.0, 300.0, 400.0, 500.0, 600.0, 700.0, 800.0) +STEP = 10 # evaluate every 10th frame as a start (devkit convention) + + +def trajectory_distances(poses: list[np.ndarray]) -> list[float]: + """Cumulative path length at each frame.""" + distances = [0.0] + for index in range(1, len(poses)): + delta = poses[index][:3, 3] - poses[index - 1][:3, 3] + distances.append(distances[-1] + float(np.linalg.norm(delta))) + return distances + + +def _last_frame_for_length(distances: list[float], first: int, length: float) -> int: + target = distances[first] + length + for index in range(first, len(distances)): + if distances[index] >= target: + return index + return -1 + + +def _rotation_error(pose_error: np.ndarray) -> float: + trace = pose_error[0, 0] + pose_error[1, 1] + pose_error[2, 2] + return float(np.arccos(np.clip((trace - 1.0) / 2.0, -1.0, 1.0))) + + +def _translation_error(pose_error: np.ndarray) -> float: + return float(np.linalg.norm(pose_error[:3, 3])) + + +def kitti_odometry_error( + estimated: list[np.ndarray], ground_truth: list[np.ndarray] +) -> dict[str, float]: + """Average translational (%) and rotational (deg/m) error, devkit-style.""" + count = min(len(estimated), len(ground_truth)) + estimated, ground_truth = estimated[:count], ground_truth[:count] + distances = trajectory_distances(ground_truth) + + translational_errors: list[float] = [] + rotational_errors: list[float] = [] + for first in range(0, count, STEP): + for length in LENGTHS: + last = _last_frame_for_length(distances, first, length) + if last < 0: + continue + # Relative pose error: how far the estimated motion from first->last + # diverges from ground truth's. + gt_delta = np.linalg.inv(ground_truth[first]) @ ground_truth[last] + estimated_delta = np.linalg.inv(estimated[first]) @ estimated[last] + pose_error = np.linalg.inv(gt_delta) @ estimated_delta + translational_errors.append(_translation_error(pose_error) / length) + rotational_errors.append(_rotation_error(pose_error) / length) + + if not translational_errors: + return { + "translational_percent": float("nan"), + "rotational_deg_per_m": float("nan"), + "pairs": 0, + } + return { + "translational_percent": float(np.mean(translational_errors)) * 100.0, + "rotational_deg_per_m": float(np.degrees(np.mean(rotational_errors))), + "pairs": len(translational_errors), + } diff --git a/dimos/navigation/jnav/utils/module_loading.py b/dimos/navigation/jnav/utils/module_loading.py new file mode 100644 index 0000000000..3295abdea1 --- /dev/null +++ b/dimos/navigation/jnav/utils/module_loading.py @@ -0,0 +1,57 @@ +# Copyright 2026 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Dynamically load a Module class by file path and filter its config. + +Lets the loop-closure eval drivers swap in any module-under-test from a +``--module-path``/``--module-name`` pair without importing it statically. +""" + +from __future__ import annotations + +import importlib +from pathlib import Path +from typing import Any, get_type_hints + + +def load_module_class(module_path: Path, module_name: str) -> type: + """Import the class `module_name` from a python file. + + Resolved through the normal package system (path -> dotted name rooted at + the last `dimos` directory) so the class pickles/deploys into workers.""" + parts = module_path.resolve().with_suffix("").parts + if "dimos" not in parts: + raise SystemExit(f"--module-path must live inside the dimos package tree: {module_path}") + package_root = len(parts) - 1 - parts[::-1].index("dimos") + dotted = ".".join(parts[package_root:]) + module = importlib.import_module(dotted) + if not hasattr(module, module_name): + raise SystemExit(f"no class {module_name!r} in {dotted} ({module_path})") + return getattr(module, module_name) # type: ignore[no-any-return] + + +def filter_config_for_module(module_class: type, config: dict[str, Any]) -> dict[str, Any]: + """Drop config keys the module's Config class doesn't declare. + + DEFAULT_PGO_CONFIG carries cmu-specific knobs (use_scan_context, ...); + other loop-closure modules shouldn't crash on them.""" + try: + config_class = get_type_hints(module_class)["config"] + known = set(config_class.model_fields) + except Exception: + return config + dropped = sorted(set(config) - known) + if dropped: + print(f"config keys not in {config_class.__name__} (dropped): {dropped}") + return {key: value for key, value in config.items() if key in known} diff --git a/dimos/navigation/jnav/utils/recording_db.py b/dimos/navigation/jnav/utils/recording_db.py new file mode 100644 index 0000000000..c71ea06a4d --- /dev/null +++ b/dimos/navigation/jnav/utils/recording_db.py @@ -0,0 +1,107 @@ +# Copyright 2026 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Read recording streams (mem2.db) for loop-closure evaluation. + +SqliteStore access keyed by path (one shared store per db), stream iteration, +and a nearest-pose lookup over an odometry stream. Stream names are always +passed in by the caller — nothing here is tied to a particular recording layout. +""" + +from __future__ import annotations + +from collections.abc import Iterator +from pathlib import Path +from typing import Any + +import numpy as np + +from dimos.memory2.store.sqlite import SqliteStore +from dimos.msgs.geometry_msgs.Pose import Pose +from dimos.navigation.jnav.utils.trajectory_metrics import PoseLookup, trajectory_lookup + +# max |Δt| pairing a query time to an odom sample +ODOM_MATCH_TOLERANCE_S = 0.2 + +# Fixed-rate replay pacing + sizing caps (used by the eval replay drivers). +REPLAY_PUBLISH_HZ = 50.0 +REPLAY_DRAIN_MARGIN_S = 30.0 +MAX_REPLAY_SCANS = 4000 +MAX_REPLAY_ODOM = 16000 + +# One shared store per db path (SqliteStore can't take absolute paths through the +# replay adapter, so streams are read directly). +_stores: dict[str, SqliteStore] = {} + + +def store(db_path: Path) -> SqliteStore: + key = str(db_path) + cached = _stores.get(key) + if cached is None: + cached = SqliteStore(path=key, must_exist=True) + cached.start() + _stores[key] = cached + return cached + + +def list_streams(db_path: Path) -> list[str]: + return store(db_path).list_streams() + + +def stream_count(db_path: Path, stream_name: str) -> int: + return int(store(db_path).stream(stream_name).count()) + + +def iterate_stream( + db_path: Path, stream_name: str, *, stride: int = 1 +) -> Iterator[tuple[float, Any]]: + """Yield ``(timestamp, decoded message)``, decoding only kept rows.""" + stream: Any = store(db_path).stream(stream_name) + for index, observation in enumerate(stream): + if stride > 1 and index % stride: + continue + yield (float(observation.ts), observation.data) + + +def payload_pose(payload: Any) -> Pose: + """Normalize a recorded odometry payload to a ``Pose``. + + Handles both shapes found in recordings: ``Odometry`` (``.pose``) and + ``PoseStamped`` (flat position + ``.orientation``).""" + if hasattr(payload, "pose"): # Odometry + return payload.pose # type: ignore[no-any-return] + return Pose( # PoseStamped + payload.x, + payload.y, + payload.z, + payload.orientation.x, + payload.orientation.y, + payload.orientation.z, + payload.orientation.w, + ) + + +def odometry_lookup(db_path: Path, stream_name: str) -> PoseLookup: + """Nearest-position (``[x, y, z]``) lookup over a recording's odometry stream.""" + times: list[float] = [] + positions: list[list[float]] = [] + for timestamp, payload in iterate_stream(db_path, stream_name): + pose = payload_pose(payload) + times.append(timestamp) + positions.append([pose.position.x, pose.position.y, pose.position.z]) + return trajectory_lookup( + np.asarray(times, dtype=np.float64), + np.asarray(positions, dtype=np.float64), + ODOM_MATCH_TOLERANCE_S, + ) diff --git a/dimos/navigation/jnav/utils/test_apriltag_agreement.py b/dimos/navigation/jnav/utils/test_apriltag_agreement.py new file mode 100644 index 0000000000..fc177b3d3d --- /dev/null +++ b/dimos/navigation/jnav/utils/test_apriltag_agreement.py @@ -0,0 +1,74 @@ +# Copyright 2026 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for the April-tag agreement metric.""" + +from __future__ import annotations + +import numpy as np + +from dimos.navigation.jnav.utils.apriltag_agreement import ( + agreement_improvement, + agreement_report, + tag_spread, +) + + +def test_tag_spread_zero_for_identical() -> None: + assert tag_spread(np.array([[1.0, 2.0, 3.0]] * 4)) == 0.0 + + +def test_tag_spread_grows_with_scatter() -> None: + tight = tag_spread(np.array([[0.0, 0, 0], [0.1, 0, 0]])) + loose = tag_spread(np.array([[0.0, 0, 0], [2.0, 0, 0]])) + assert loose > tight + + +def test_single_observation_excluded() -> None: + report = agreement_report({7: np.array([[0.0, 0, 0]])}) + assert report.tag_count == 0 # one sighting carries no agreement signal + assert report.total_observations == 1 + assert report.mean_spread == 0.0 + + +def test_report_mean_spread() -> None: + report = agreement_report( + { + 1: np.array([[0.0, 0, 0], [0.0, 0, 0]]), # spread 0 + 2: np.array([[0.0, 0, 0], [2.0, 0, 0]]), # spread 1.0 + } + ) + assert report.tag_count == 2 + assert report.total_observations == 4 + assert abs(report.mean_spread - 0.5) < 1e-9 + + +def test_improvement_positive_when_corrected_tighter() -> None: + # Drifted: same tag scattered 4 m apart across laps. Corrected: pulled together. + raw = agreement_report({1: np.array([[0.0, 0, 0], [4.0, 0, 0]])}) + corrected = agreement_report({1: np.array([[0.0, 0, 0], [0.4, 0, 0]])}) + improvement = agreement_improvement(raw, corrected) + assert 0.85 < improvement <= 1.0 + + +def test_improvement_negative_when_corrected_worse() -> None: + raw = agreement_report({1: np.array([[0.0, 0, 0], [0.4, 0, 0]])}) + corrected = agreement_report({1: np.array([[0.0, 0, 0], [4.0, 0, 0]])}) + assert agreement_improvement(raw, corrected) < 0.0 + + +def test_improvement_zero_when_no_raw_spread() -> None: + raw = agreement_report({1: np.array([[0.0, 0, 0], [0.0, 0, 0]])}) + corrected = agreement_report({1: np.array([[0.0, 0, 0], [1.0, 0, 0]])}) + assert agreement_improvement(raw, corrected) == 0.0 diff --git a/dimos/navigation/jnav/utils/test_kitti.py b/dimos/navigation/jnav/utils/test_kitti.py new file mode 100644 index 0000000000..756e403cb6 --- /dev/null +++ b/dimos/navigation/jnav/utils/test_kitti.py @@ -0,0 +1,60 @@ +# Copyright 2026 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Devkit-verified self-tests for the official KITTI odometry error metric.""" + +from __future__ import annotations + +import numpy as np +from scipy.spatial.transform import Rotation + +from dimos.navigation.jnav.utils.kitti import kitti_odometry_error + +ZERO_ERROR_TOLERANCE = 1e-6 + + +def _straight_trajectory(scale: float = 1.0) -> list[np.ndarray]: + """A 1000 m straight run along x, one frame per metre, scaled along-track.""" + poses = [] + for index in range(1001): + pose = np.eye(4) + pose[0, 3] = float(index) * scale + poses.append(pose) + return poses + + +def test_identical_trajectory_has_zero_error() -> None: + ground_truth = _straight_trajectory() + result = kitti_odometry_error([pose.copy() for pose in ground_truth], ground_truth) + assert result["translational_percent"] < ZERO_ERROR_TOLERANCE + assert result["rotational_deg_per_m"] < ZERO_ERROR_TOLERANCE + + +def test_one_percent_scale_drift_reads_one_percent() -> None: + ground_truth = _straight_trajectory() + scaled = _straight_trajectory(scale=1.01) + result = kitti_odometry_error(scaled, ground_truth) + assert 0.8 < result["translational_percent"] < 1.2 + + +def test_constant_yaw_rate_has_nonzero_rotational_error() -> None: + ground_truth = _straight_trajectory() + yawed = [] + for index in range(1001): + pose = np.eye(4) + pose[:3, :3] = Rotation.from_euler("z", index * 0.01, degrees=True).as_matrix() + pose[0, 3] = float(index) + yawed.append(pose) + result = kitti_odometry_error(yawed, ground_truth) + assert result["rotational_deg_per_m"] > 0.0 diff --git a/dimos/navigation/jnav/utils/trajectory_metrics.py b/dimos/navigation/jnav/utils/trajectory_metrics.py new file mode 100644 index 0000000000..8701eb4785 --- /dev/null +++ b/dimos/navigation/jnav/utils/trajectory_metrics.py @@ -0,0 +1,268 @@ +# Copyright 2026 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Pure pose/trajectory math for loop-closure evaluation. + +Drift injection + correction and ground-truth(-free) trajectory metrics, all +operating on plain arrays and lookup callables (no db reads) so they can be +shared across the eval drivers. +""" + +from __future__ import annotations + +from collections.abc import Callable, Iterable +from typing import Any + +import numpy as np +from scipy.spatial.transform import Rotation + +from dimos.navigation.jnav.utils.voxel_map import VoxelMap + +# (ts, x, y, z, qx, qy, qz, qw) per keyframe +GraphPose = tuple[float, float, float, float, float, float, float, float] +# time -> nearest pose: 3-vec [x,y,z] (PoseLookup) or 7-vec +quat (PoseLookup7) +PoseLookup = Callable[[float], "np.ndarray | None"] +PoseLookup7 = Callable[[float], "np.ndarray | None"] + + +def trajectory_lookup(times: np.ndarray, positions: np.ndarray, tolerance: float) -> PoseLookup: + """A ``time -> [x, y, z]`` nearest-sample lookup over a (ts, position) trajectory.""" + if len(times) == 0: + return lambda _timestamp: None + + def lookup(timestamp: float) -> np.ndarray | None: + index = int(np.argmin(np.abs(times - timestamp))) + if abs(float(times[index]) - timestamp) > tolerance: + return None + return np.asarray(positions[index], dtype=np.float64) + + return lookup + + +def graph_lookup(graph: list[tuple[float, float, float, float]]) -> PoseLookup: + """Nearest-keyframe lookup over an optimized pose graph (no tolerance). + + Keyframes only spawn on motion, so a parked robot legitimately maps to an + old keyframe — that keyframe IS its position (within the keyframe delta). + Unbounded nearest-in-time is sound when the graph is never truncated.""" + times = np.asarray([node[0] for node in graph], dtype=np.float64) + positions = np.asarray([[node[1], node[2], node[3]] for node in graph], dtype=np.float64) + return trajectory_lookup(times, positions, float("inf")) + + +# Voxel-agreement sampling: cap per-scan points so the map fits in memory +# without holding the whole recording at once. +VOXEL_SIZE_M = 0.2 +VOXEL_MAX_POINTS_PER_SCAN = 4000 + + +def drift_offset( + timestamp: float, t0: float, drift_per_sec: list[float] | np.ndarray +) -> np.ndarray: + """World translation injected at ``timestamp`` (grows linearly from ``t0``).""" + return np.asarray(drift_per_sec, dtype=np.float64) * (timestamp - t0) + + +def has_drift(drift_per_sec: list[float] | np.ndarray) -> bool: + return bool(np.any(np.asarray(drift_per_sec, dtype=np.float64))) + + +def drifted_lookup( + base_lookup: Callable[[float], np.ndarray | None], + drift_per_sec: list[float], + drift_t0: float, +) -> Callable[[float], np.ndarray | None]: + """Wrap a pose lookup so its xyz gets the same drift the module was fed. + + Pass-through when drift is zero. Works for both the 3-vec (xyz) and 7-vec + (xyz+quat) lookups — only the first three components are shifted.""" + if not has_drift(drift_per_sec): + return base_lookup + drift = np.asarray(drift_per_sec, dtype=np.float64) + + def lookup(timestamp: float) -> np.ndarray | None: + pose = base_lookup(timestamp) + if pose is None: + return None + shifted = np.array(pose, dtype=np.float64) + shifted[:3] += drift * (timestamp - drift_t0) + return shifted + + return lookup + + +def pose7_lookup(times: np.ndarray, poses: np.ndarray, tolerance: float) -> PoseLookup7: + """time -> nearest [x,y,z,qx,qy,qz,qw], or None past the tolerance.""" + + def lookup(timestamp: float) -> np.ndarray | None: + if len(times) == 0: + return None + index = int(np.argmin(np.abs(times - timestamp))) + if abs(float(times[index]) - timestamp) > tolerance: + return None + return np.asarray(poses[index], dtype=np.float64) + + return lookup + + +def drift_delta_lookup( + graph: list[GraphPose], raw_lookup: PoseLookup7 +) -> Callable[[float], tuple[np.ndarray, np.ndarray] | None]: + """time -> nearest keyframe's drift correction (R_delta, t_delta). + + The delta is computed at the KEYFRAME's own timestamp — + T_corrected(kf) * T_raw(kf_ts)^-1 — so raw and corrected poses describe the + same instant. (Comparing a keyframe pose against the raw pose at a nearby + scan's timestamp snaps scans onto keyframes and fakes a tighter map: an + identity correction must yield an identity delta.)""" + times: list[float] = [] + rotations: list[np.ndarray] = [] + translations: list[np.ndarray] = [] + for node in graph: + raw_pose = raw_lookup(node[0]) + if raw_pose is None: + continue + rotation_raw = Rotation.from_quat(raw_pose[3:7]).as_matrix() + rotation_corrected = Rotation.from_quat(node[4:8]).as_matrix() + rotation_delta = rotation_corrected @ rotation_raw.T + translation_delta = np.asarray(node[1:4]) - rotation_delta @ raw_pose[:3] + times.append(node[0]) + rotations.append(rotation_delta) + translations.append(translation_delta) + times_array = np.asarray(times, dtype=np.float64) + + def lookup(timestamp: float) -> tuple[np.ndarray, np.ndarray] | None: + if len(times_array) == 0: + return None + index = int(np.argmin(np.abs(times_array - timestamp))) + return rotations[index], translations[index] + + return lookup + + +def rigid_align_rmse(source: np.ndarray, target: np.ndarray) -> float: + """Absolute trajectory error: RMSE of ``source`` to ``target`` after a + best-fit rigid (rotation+translation, no scale) alignment (Kabsch/Umeyama). + Both are (N, 3). The alignment removes the gauge freedom — two trajectories + of the same shape in different world frames score 0.""" + if len(source) < 3: + return 0.0 + source_centroid = source.mean(axis=0) + target_centroid = target.mean(axis=0) + source_centered = source - source_centroid + target_centered = target - target_centroid + covariance = source_centered.T @ target_centered + u_matrix, _, vt_matrix = np.linalg.svd(covariance) + # Reflection fix so the result is a proper rotation (det = +1). + reflection = np.sign(np.linalg.det(vt_matrix.T @ u_matrix.T)) + correction = np.diag([1.0, 1.0, reflection]) + rotation = vt_matrix.T @ correction @ u_matrix.T + aligned = (rotation @ source_centered.T).T + target_centroid + return float(np.sqrt(np.mean(np.sum((aligned - target) ** 2, axis=1)))) + + +def trajectory_recovery_error( + graph: list[GraphPose], + gt_lookup: Callable[[float], np.ndarray | None], + drift_per_sec: list[float], + drift_t0: float, +) -> dict[str, float] | None: + """Drift-recovery ATE: how close the module's corrected keyframe trajectory + gets to the *un-drifted* ground-truth, vs the drifted input it was given. + + Only meaningful with injected drift (then GT = the un-drifted odom the drift + was added to). This is the right metric where tag/voxel agreement is weak — + e.g. KITTI's long single-loop trajectory. Returns None when drift is off or + too few keyframes resolve. ``trajectory_improvement`` = fraction of the drift + ATE removed (1.0 = perfect recovery, 0 = no help, negative = worse).""" + if not has_drift(drift_per_sec): + return None + drift = np.asarray(drift_per_sec, dtype=np.float64) + corrected_points: list[list[float]] = [] + gt_points: list[np.ndarray] = [] + drifted_points: list[np.ndarray] = [] + for node in graph: + timestamp = node[0] + gt_pose = gt_lookup(timestamp) + if gt_pose is None: + continue + gt_xyz = np.asarray(gt_pose, dtype=np.float64)[:3] + corrected_points.append([node[1], node[2], node[3]]) + gt_points.append(gt_xyz) + drifted_points.append(gt_xyz + drift * (timestamp - drift_t0)) + if len(gt_points) < 3: + return None + gt_array = np.asarray(gt_points) + drifted_ate = rigid_align_rmse(np.asarray(drifted_points), gt_array) + corrected_ate = rigid_align_rmse(np.asarray(corrected_points), gt_array) + improvement = (drifted_ate - corrected_ate) / drifted_ate if drifted_ate > 1e-9 else 0.0 + return { + "drifted_ate_m": drifted_ate, + "corrected_ate_m": corrected_ate, + "trajectory_improvement": improvement, + } + + +def lidar_voxel_agreement( + scans: Iterable[tuple[float, np.ndarray]], + raw_lookup: PoseLookup7, + graph: list[GraphPose], + *, + voxel_size: float = VOXEL_SIZE_M, + max_points_per_scan: int = VOXEL_MAX_POINTS_PER_SCAN, + drift_per_sec: list[float] | None = None, + drift_t0: float = 0.0, +) -> dict[str, Any]: + """Occupied-voxel counts of the lidar map, raw vs corrected. + + ``scans`` yields ``(timestamp, points)`` where ``points`` is an (N, 3+) array + registered in the raw odom world frame. Each scan is re-anchored by its + nearest keyframe's drift correction (see `drift_delta_lookup`), so a good + correction collapses doubled walls and the corrected map occupies fewer + voxels. ``improvement`` is the fractional voxel drop (positive = tighter). + When ``drift_per_sec`` is set the raw scans are first shifted into the same + drifted world the module solved in.""" + drift = np.asarray(drift_per_sec or [0.0, 0.0, 0.0], dtype=np.float64) + apply_drift = has_drift(drift) + delta_lookup = drift_delta_lookup(graph, raw_lookup) + raw_clouds: list[np.ndarray] = [] + corrected_clouds: list[np.ndarray] = [] + used = 0 + for timestamp, scan_points in scans: + delta = delta_lookup(timestamp) + if delta is None or raw_lookup(timestamp) is None: + continue + rotation_delta, translation_delta = delta + points = np.asarray(scan_points, dtype=np.float64)[:, :3] + if len(points) > max_points_per_scan: + points = points[:: -(-len(points) // max_points_per_scan)] + if apply_drift: + points = points + drift * (timestamp - drift_t0) + raw_clouds.append(points) + corrected_clouds.append(points @ rotation_delta.T + translation_delta) + used += 1 + + if not raw_clouds: + return {"status": "skipped: no scans matched both pose sources"} + raw_voxels = VoxelMap.from_points(np.vstack(raw_clouds), voxel_size).count + corrected_voxels = VoxelMap.from_points(np.vstack(corrected_clouds), voxel_size).count + improvement = (raw_voxels - corrected_voxels) / raw_voxels if raw_voxels else 0.0 + return { + "status": "ok", + "raw_voxels": raw_voxels, + "corrected_voxels": corrected_voxels, + "improvement": improvement, + "voxel_size_m": voxel_size, + "scans_used": used, + } diff --git a/dimos/navigation/jnav/utils/voxel_map.py b/dimos/navigation/jnav/utils/voxel_map.py new file mode 100644 index 0000000000..c3055d8c87 --- /dev/null +++ b/dimos/navigation/jnav/utils/voxel_map.py @@ -0,0 +1,66 @@ +# Copyright 2026 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Force a point cloud onto a discrete voxel grid for occupancy comparisons. + +A ``VoxelMap`` is just the set of occupied integer voxel keys at a fixed voxel +size. ``diff`` (symmetric-difference count, lower == better aligned) and the raw +occupied ``count`` are the building blocks for map-quality scores: drift smears a +map into more, mis-aligned voxels; a good loop closure collapses it back. +""" + +from __future__ import annotations + +from dataclasses import dataclass + +import numpy as np + +from dimos.msgs.sensor_msgs.PointCloud2 import PointCloud2 + + +@dataclass(frozen=True) +class VoxelMap: + voxel_size: float + keys: frozenset[tuple[int, int, int]] + + @classmethod + def from_points(cls, points: np.ndarray, voxel_size: float) -> VoxelMap: + if points.shape[0] == 0: + return cls(voxel_size=voxel_size, keys=frozenset()) + indices = np.floor(np.asarray(points)[:, :3] / voxel_size).astype(np.int64) + unique = np.unique(indices, axis=0) + return cls(voxel_size=voxel_size, keys=frozenset(map(tuple, unique))) + + @classmethod + def from_pointcloud(cls, cloud: PointCloud2, voxel_size: float) -> VoxelMap: + return cls.from_points(cloud.points_f32(), voxel_size) + + @property + def count(self) -> int: + """Number of occupied voxels.""" + return len(self.keys) + + def __len__(self) -> int: + return len(self.keys) + + def diff(self, other: VoxelMap) -> int: + """Symmetric-difference count: voxels occupied in exactly one of the two.""" + return len(self.keys ^ other.keys) + + def intersection(self, other: VoxelMap) -> int: + return len(self.keys & other.keys) + + def iou(self, other: VoxelMap) -> float: + union = len(self.keys | other.keys) + return len(self.keys & other.keys) / union if union else 0.0 From ac693ad6bfb190a421b26e9406d922c2fd1e0e29 Mon Sep 17 00:00:00 2001 From: Jeff Hykin Date: Wed, 24 Jun 2026 14:52:31 +0800 Subject: [PATCH 07/21] add ivan_pgo, ivan_pgo_transformer and unrefined_pgo loop-closure modules --- .../loop_closure/gsc_pgo/.gitignore | 2 + .../gsc_pgo/hyperparam_analysis.md | 134 ++++ .../loop_closure/ivan_pgo/module.py | 637 ++++++++++++++++++ .../ivan_pgo_transformer/module.py | 263 ++++++++ .../unrefined_pgo/cpp/CMakeLists.txt | 49 ++ .../unrefined_pgo/cpp/commons.cpp | 8 + .../loop_closure/unrefined_pgo/cpp/commons.h | 29 + .../unrefined_pgo/cpp/dimos_native_module.hpp | 97 +++ .../loop_closure/unrefined_pgo/cpp/flake.lock | 144 ++++ .../loop_closure/unrefined_pgo/cpp/flake.nix | 70 ++ .../loop_closure/unrefined_pgo/cpp/main.cpp | 299 ++++++++ .../unrefined_pgo/cpp/point_cloud_utils.hpp | 170 +++++ .../unrefined_pgo/cpp/simple_pgo.cpp | 211 ++++++ .../unrefined_pgo/cpp/simple_pgo.h | 78 +++ .../loop_closure/unrefined_pgo/module.py | 286 ++++++++ 15 files changed, 2477 insertions(+) create mode 100644 dimos/navigation/jnav/components/loop_closure/gsc_pgo/.gitignore create mode 100644 dimos/navigation/jnav/components/loop_closure/gsc_pgo/hyperparam_analysis.md create mode 100644 dimos/navigation/jnav/components/loop_closure/ivan_pgo/module.py create mode 100644 dimos/navigation/jnav/components/loop_closure/ivan_pgo_transformer/module.py create mode 100644 dimos/navigation/jnav/components/loop_closure/unrefined_pgo/cpp/CMakeLists.txt create mode 100644 dimos/navigation/jnav/components/loop_closure/unrefined_pgo/cpp/commons.cpp create mode 100644 dimos/navigation/jnav/components/loop_closure/unrefined_pgo/cpp/commons.h create mode 100644 dimos/navigation/jnav/components/loop_closure/unrefined_pgo/cpp/dimos_native_module.hpp create mode 100644 dimos/navigation/jnav/components/loop_closure/unrefined_pgo/cpp/flake.lock create mode 100644 dimos/navigation/jnav/components/loop_closure/unrefined_pgo/cpp/flake.nix create mode 100644 dimos/navigation/jnav/components/loop_closure/unrefined_pgo/cpp/main.cpp create mode 100644 dimos/navigation/jnav/components/loop_closure/unrefined_pgo/cpp/point_cloud_utils.hpp create mode 100644 dimos/navigation/jnav/components/loop_closure/unrefined_pgo/cpp/simple_pgo.cpp create mode 100644 dimos/navigation/jnav/components/loop_closure/unrefined_pgo/cpp/simple_pgo.h create mode 100644 dimos/navigation/jnav/components/loop_closure/unrefined_pgo/module.py diff --git a/dimos/navigation/jnav/components/loop_closure/gsc_pgo/.gitignore b/dimos/navigation/jnav/components/loop_closure/gsc_pgo/.gitignore new file mode 100644 index 0000000000..750baebf41 --- /dev/null +++ b/dimos/navigation/jnav/components/loop_closure/gsc_pgo/.gitignore @@ -0,0 +1,2 @@ +result +result-* diff --git a/dimos/navigation/jnav/components/loop_closure/gsc_pgo/hyperparam_analysis.md b/dimos/navigation/jnav/components/loop_closure/gsc_pgo/hyperparam_analysis.md new file mode 100644 index 0000000000..8e7f608201 --- /dev/null +++ b/dimos/navigation/jnav/components/loop_closure/gsc_pgo/hyperparam_analysis.md @@ -0,0 +1,134 @@ +# gsc_pgo hyperparameter analysis + online tuning — sketch + +## The problem (from the eval campaign) + +cmu wins on big real-drift loops but **over-fires** elsewhere: +- `grassy_field` (05-32): **-0.369** tag with 110 closures — should be a no-op (+0.00x). + Open grass = feature-poor; ICP matches are garbage but get accepted. +- `gir_park1_2`: +0.578 with 23 closures, but unrefined got **+0.749 with 2 closures**. + cmu's extra closures are net noise — fewer, better closures would win. + +Goal: (a) **detect feature-poor / high-disagreement scans online and back off** +(ideally make PGO a no-op in grassy_field), and (b) accept **fewer, more +consistent** closures. Both reduce to: *trust loops less when the environment +can't support them, and only commit mutually-consistent ones.* + +## Signals cmu already computes (cheap to expose) + +Every candidate already produces these inside `searchForLoopPairs` / +`scan_context.cpp` — instrument the binary to log them per candidate (accepted +and rejected), keyed by ts: + +| signal | where | meaning | +|---|---|---| +| scan-context min cosine distance | `best_distance()` | place-match quality; high = no good match (new place OR feature-poor) | +| ICP fitness (`getFitnessScore`) | `searchForLoopPairs` | mean-sq inlier dist; high = bad geometric alignment | +| ICP converged + inlier ratio | PCL ICP | low inlier ratio = lots of disagreement (crowd/dynamic/degenerate) | +| candidate global distance | `loop_candidate_max_distance_m` gate | drift magnitude at the candidate | +| ring-key occupancy/entropy | scan-context descriptor | **feature richness** — near-uniform/empty descriptor = feature-poor | + +Two signals are *not* computed yet but are the highest-value adds: +- **registration Hessian eigenvalues** (J^T J of the ICP cost) — the canonical + degeneracy detector (below). +- **per-point residual distribution** of the scan-to-submap match — heavy tails = + dynamic objects (crowd). + +## Literature: detecting feature-poor / degenerate environments + +1. **Degeneracy factor (Zhang, Kaess, Singh — ICRA 2016, "On Degeneracy of + Optimization-based State Estimation").** THE standard. Eigen-decompose the + scan-matching information matrix `H = J^T J`; its smallest eigenvalue (or + `λ_min / λ_max` condition number) measures how well-constrained the solve is. + A featureless corridor/field has a small eigenvalue along the unconstrained + axis. → online "is this scan match trustworthy" scalar. Their **solution + remapping** projects the update out of degenerate directions; LOAM/LeGO-LOAM + use it. +2. **X-ICP (Tuna et al., T-RO 2023) — "Localizability-Aware ICP."** Per-axis + localizability from the optimization constraints; selectively adds soft + constraints only in observable directions. More principled than a scalar gate. +3. **Eigenvalue geometric features (Demantké 2011 / Weinmann 2015):** local + neighborhood covariance → linearity/planarity/scattering. Open grass scores + high "scattering" (3D-spread, no structure) — a direct feature-poverty flag. +4. **Scan-context descriptor entropy/occupancy** (cheap, already have the + descriptor): a low-entropy / sparsely-occupied ring-key = feature-poor scene + where cosine matching is unreliable. Raise the match bar or refuse loops. + +## Literature: scan disagreement / dynamic crowds + +5. **Robust kernels (Huber / Cauchy / Geman-McClure)** on the ICP residual — + down-weight the moving-people points. cmu currently uses raw fitness; a robust + cost + reporting the *inlier ratio* exposes "crowd present." +6. **Removert (Kim & Kim, IROS 2020):** range-image visibility differencing + removes dynamic points before matching. Strong for crowds. +7. **Stationarity / scan-to-scan consistency:** points that move between + consecutive frames are dynamic; a high dynamic-fraction → distrust this scan + for loop closure. (Cheap online proxy: residual after rigid alignment of + consecutive scans.) +8. **Learned moving-object segmentation (4DMOS, LMNet, MotionSeg3D)** — heavier; + masks people directly. Overkill for now but the ceiling. + +## Literature: loop-closure outlier rejection (the over-firing fix) + +The single highest-leverage change — cmu accepts each loop independently: + +9. **PCM — Pairwise Consistency Maximization (Mangelson et al., ICRA 2018).** + Build a consistency graph over candidate loops; keep only the **maximal + mutually-consistent clique**, reject the rest. Directly attacks "23 closures + where 2 good ones win." Cheap, backend-agnostic, no tuning. +10. **GNC — Graduated Non-Convexity (Yang et al., RA-L 2020) / Cauchy-IRLS.** + Backend robustly down-weights outlier loops during optimization. iSAM2-friendly. +11. **Switchable Constraints (Sünderhauf & Protzel 2012) / Dynamic Covariance + Scaling (Agarwal 2013):** the optimizer learns a switch/weight per loop and + can turn bad ones off. Lighter than PCM, online-friendly. + +## Online tuning design (signal → knob) + +Frame cmu's static thresholds as **initial values adapted by a measured +"environment trust" `τ ∈ [0,1]`** computed per keyframe from the signals above: + +``` +τ = f( degeneracy_factor, ringkey_entropy, icp_inlier_ratio, dynamic_fraction ) + # low when feature-poor / crowded / degenerate +``` + +Then drive the gates from `τ` (running statistics, not magic numbers): + +| knob (currently static) | online rule | +|---|---| +| `loop_score_thresh` (ICP gate) | tighten as `τ` drops; track running median/MAD of accepted-loop fitness and gate at `median − k·MAD` | +| `scan_context_match_threshold` | raise when ring-key entropy is low (cosine less discriminative in feature-poor scenes) | +| **accept/suppress** the loop | hard-gate: if `degeneracy_factor < ε` (Zhang) → **suppress** → PGO no-op in directions it can't observe (the grassy_field fix) | +| loop **batch** acceptance | replace independent accept with **PCM** over the recent candidate set → only commit the mutually-consistent subset (the gir_park1_2 fix) | +| loop noise model | already `Σ = fitness·I`; inflate further by `1/τ` so low-trust loops barely move the graph | + +Online-tuning techniques that fit (cheapest first): +- **Adaptive thresholding on running stats** (median/MAD, EWMA) — robust, no + training, the right default. +- **Switchable constraints / DCS** — lets the *optimizer* do the tuning; minimal code. +- **PCM** — not "tuning" but the structural fix for over-firing; do this first. +- (Avoid online Bayesian-opt/bandits for the fast gates — too slow/unstable to + converge within one run; reserve for offline meta-params.) + +## Proposed analysis (to validate before coding the online controller) + +1. **Instrument the binary** to emit per-candidate diagnostics (the table above) + to a sidecar jsonl. One pass per recording → the dataset that grounds `f(·)`. +2. **Static sweep** (via the new eval cell-caching + `--drift-per-sec`): vary + `loop_score_thresh`, `scan_context_match_threshold`, + `loop_candidate_max_distance_m`, `min_loop_detect_duration` per recording; + plot tag/voxel improvement & closure count. Expect grassy_field to be + monotone-better as the gate tightens (→ confirms "no-op is optimal there"). +3. **Correlate** accepted-vs-rejected closures with the signals: does + degeneracy_factor / ring-key entropy separate grassy_field's bad closures + from gir_park1_2's good ones? If yes, `τ` is learnable as a simple threshold. +4. **Prototype** the cheapest controller (PCM + degeneracy-gate + adaptive + `loop_score_thresh`) and re-run the full eval; target: grassy_field → ~0, + gir_park1_2 → ≥ unrefined's +0.749, no regression on the pointlio wins. + +## First concrete step + +Add the per-candidate diagnostic logging to `simple_pgo.cpp` (scan-context +distance, ICP fitness, inlier ratio, candidate distance) + compute the Zhang +degeneracy factor from the ICP Hessian. That single instrumentation pass +produces the data to decide which signals actually separate good/bad closures +here — everything else (the `f(·)` and the gates) follows from it. diff --git a/dimos/navigation/jnav/components/loop_closure/ivan_pgo/module.py b/dimos/navigation/jnav/components/loop_closure/ivan_pgo/module.py new file mode 100644 index 0000000000..7c86f6dfb5 --- /dev/null +++ b/dimos/navigation/jnav/components/loop_closure/ivan_pgo/module.py @@ -0,0 +1,637 @@ +# Copyright 2026 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Pure-Python PGO ported from Ivan's `ivan/feat/go2loopclosure` branch +(`dimos/mapping/pgo.py`), adapted to the jnav LoopClosure spec. + +GTSAM iSAM2 pose graph + Open3D point-to-plane ICP loop verification + +KD-tree loop candidate search. Differences from the original: + * input renamed `registered_scan` -> `lidar` (LoopClosure spec) + * publishes `pose_graph: Out[Graph3D]` (optimized keyframes, odometry + + loop edges) and `loop_closure_event: Out[GraphDelta3D]` so the eval + harness can capture the corrected trajectory and count closures + * offline experiment helpers (transformers, two-pass voxel pipelines) + were left behind — this is just the runtime module +""" + +from __future__ import annotations + +from dataclasses import dataclass +import threading +import time +from typing import Any + +import gtsam # type: ignore[import-untyped] +import numpy as np +import open3d as o3d # type: ignore[import-untyped] +import open3d.core as o3c # type: ignore[import-untyped] +from reactivex.disposable import Disposable +from scipy.spatial import KDTree +from scipy.spatial.transform import Rotation + +from dimos.constants import DEFAULT_THREAD_JOIN_TIMEOUT +from dimos.core.core import rpc +from dimos.core.module import Module, ModuleConfig +from dimos.core.stream import In, Out +from dimos.msgs.geometry_msgs.Pose import Pose +from dimos.msgs.geometry_msgs.PoseStamped import PoseStamped +from dimos.msgs.geometry_msgs.Quaternion import Quaternion +from dimos.msgs.geometry_msgs.Transform import Transform +from dimos.msgs.geometry_msgs.Vector3 import Vector3 +from dimos.msgs.nav_msgs.Odometry import Odometry +from dimos.msgs.sensor_msgs.PointCloud2 import PointCloud2 +from dimos.navigation.jnav.components.loop_closure.spec import LoopClosure +from dimos.navigation.jnav.msgs.Graph3D import Graph3D +from dimos.navigation.jnav.msgs.GraphDelta3D import GraphDelta3D +from dimos.utils.logging_config import setup_logger + +FRAME_MAP = "map" +FRAME_ODOM = "odom" +FRAME_BODY = "base_link" + +logger = setup_logger() + + +class PGOConfig(ModuleConfig): + world_frame: str = FRAME_MAP + + # Keyframe detection + key_pose_delta_trans: float = 0.5 + key_pose_delta_deg: float = 10.0 + + # Loop closure + loop_search_radius: float = 2.0 + loop_time_thresh: float = 20.0 + loop_score_thresh: float = 0.3 + loop_submap_half_range: int = 10 + min_icp_inliers: int = 10 + min_keyframes_for_loop_search: int = 10 + loop_closure_extra_iterations: int = 4 + submap_resolution: float = 0.2 + min_loop_detect_duration: float = 5.0 + + # Input mode + unregister_input: bool = True # Transform world-frame scans to body-frame using odom + + # Global map + publish_global_map: bool = True + global_map_publish_rate: float = 0.5 + global_map_voxel_size: float = 0.15 + + # ICP + max_icp_iterations: int = 50 + max_icp_correspondence_dist: float = 1.0 + + +@dataclass +class _KeyPose: + r_local: np.ndarray # 3x3 rotation in local/odom frame + t_local: np.ndarray # 3-vec translation in local/odom frame + r_global: np.ndarray # 3x3 corrected rotation + t_global: np.ndarray # 3-vec corrected translation + timestamp: float + body_cloud: np.ndarray # Nx3 points in body frame + + +def _icp( + source: np.ndarray, + target: np.ndarray, + max_iter: int = 50, + max_dist: float = 1.0, + tol: float = 1e-6, + min_inliers: int = 10, + init: np.ndarray | None = None, +) -> tuple[np.ndarray, float]: + """Point-to-plane ICP using Open3D's tensor pipeline. + + Returns ``(T, fitness)`` where ``fitness`` is mean squared inlier + distance (m²).""" + if len(source) < min_inliers or len(target) < min_inliers: + return np.eye(4), float("inf") + + cpu = o3c.Device("CPU:0") + src_pcd = o3d.t.geometry.PointCloud(o3c.Tensor(source.astype(np.float32), device=cpu)) + tgt_pcd = o3d.t.geometry.PointCloud(o3c.Tensor(target.astype(np.float32), device=cpu)) + + # Normals on the target enable point-to-plane ICP, which converges + # tighter than point-to-point on indoor scenes (walls give unambiguous + # normals that resolve the slide-along-wall ambiguity). + tgt_pcd.estimate_normals(max_nn=30, radius=0.3) + + init_T = ( + o3c.Tensor(init.astype(np.float64), dtype=o3c.float64, device=cpu) + if init is not None + else o3c.Tensor.eye(4, dtype=o3c.float64, device=cpu) + ) + + # Silence Open3D's "0 correspondence" warning — we deliberately use a + # tight max_correspondence_distance and reject loops with poor fitness. + with o3d.utility.VerbosityContextManager(o3d.utility.VerbosityLevel.Error): + result = o3d.t.pipelines.registration.icp( + source=src_pcd, + target=tgt_pcd, + max_correspondence_distance=max_dist, + init_source_to_target=init_T, + estimation_method=o3d.t.pipelines.registration.TransformationEstimationPointToPlane(), + criteria=o3d.t.pipelines.registration.ICPConvergenceCriteria( + relative_fitness=tol, + relative_rmse=tol, + max_iteration=max_iter, + ), + ) + + fitness_inlier_frac = float(result.fitness) + if fitness_inlier_frac == 0.0: + return np.eye(4), float("inf") + + rmse = float(result.inlier_rmse) + T = result.transformation.numpy() + return T, rmse * rmse + + +def _voxel_downsample(pts: np.ndarray, voxel_size: float) -> np.ndarray: + if len(pts) == 0 or voxel_size <= 0: + return pts + keys = np.floor(pts / voxel_size).astype(np.int32) + _, idx = np.unique(keys, axis=0, return_index=True) + return pts[idx] + + +class _SimplePGO: + def __init__(self, config: PGOConfig) -> None: + self._cfg = config + self._key_poses: list[_KeyPose] = [] + self._history_pairs: list[tuple[int, int]] = [] + self._cache_pairs: list[dict[str, Any]] = [] + self._r_offset = np.eye(3) + self._t_offset = np.zeros(3) + + params = gtsam.ISAM2Params() + params.setRelinearizeThreshold(0.01) + params.relinearizeSkip = 1 + self._isam2 = gtsam.ISAM2(params) + self._graph = gtsam.NonlinearFactorGraph() + self._values = gtsam.Values() + + def is_key_pose(self, r: np.ndarray, t: np.ndarray) -> bool: + if not self._key_poses: + return True + last = self._key_poses[-1] + delta_trans = np.linalg.norm(t - last.t_local) + # Angular distance via quaternion dot product + q_cur = Rotation.from_matrix(r).as_quat() # [x,y,z,w] + q_last = Rotation.from_matrix(last.r_local).as_quat() + dot = abs(np.dot(q_cur, q_last)) + delta_deg = np.degrees(2.0 * np.arccos(min(dot, 1.0))) + return bool( + delta_trans > self._cfg.key_pose_delta_trans or delta_deg > self._cfg.key_pose_delta_deg + ) + + def add_key_pose( + self, r_local: np.ndarray, t_local: np.ndarray, timestamp: float, body_cloud: np.ndarray + ) -> bool: + if not self.is_key_pose(r_local, t_local): + return False + + idx = len(self._key_poses) + init_r = self._r_offset @ r_local + init_t = self._r_offset @ t_local + self._t_offset + + pose = gtsam.Pose3(gtsam.Rot3(init_r), gtsam.Point3(init_t)) + self._values.insert(idx, pose) + + if idx == 0: + noise = gtsam.noiseModel.Diagonal.Variances(np.full(6, 1e-12)) + self._graph.add(gtsam.PriorFactorPose3(idx, pose, noise)) + else: + last = self._key_poses[-1] + r_between = last.r_local.T @ r_local + t_between = last.r_local.T @ (t_local - last.t_local) + noise = gtsam.noiseModel.Diagonal.Variances( + np.array([1e-6, 1e-6, 1e-6, 1e-4, 1e-4, 1e-6]) + ) + self._graph.add( + gtsam.BetweenFactorPose3( + idx - 1, idx, gtsam.Pose3(gtsam.Rot3(r_between), gtsam.Point3(t_between)), noise + ) + ) + + kp = _KeyPose( + r_local=r_local.copy(), + t_local=t_local.copy(), + r_global=init_r.copy(), + t_global=init_t.copy(), + timestamp=timestamp, + body_cloud=_voxel_downsample(body_cloud, self._cfg.submap_resolution), + ) + self._key_poses.append(kp) + return True + + def _get_submap(self, idx: int, half_range: int) -> np.ndarray: + lo = max(0, idx - half_range) + hi = min(len(self._key_poses) - 1, idx + half_range) + parts = [] + for i in range(lo, hi + 1): + kp = self._key_poses[i] + world = (kp.r_global @ kp.body_cloud.T).T + kp.t_global + parts.append(world) + if not parts: + return np.empty((0, 3)) + cloud = np.vstack(parts) + return _voxel_downsample(cloud, self._cfg.submap_resolution) + + def search_for_loops(self) -> None: + if len(self._key_poses) < self._cfg.min_keyframes_for_loop_search: + return + + # Rate limit + if self._history_pairs: + cur_time = self._key_poses[-1].timestamp + last_time = self._key_poses[self._history_pairs[-1][1]].timestamp + if cur_time - last_time < self._cfg.min_loop_detect_duration: + return + + cur_idx = len(self._key_poses) - 1 + cur_kp = self._key_poses[-1] + + # Build KD-tree of previous keyframe positions + positions = np.array([kp.t_global for kp in self._key_poses[:-1]]) + tree = KDTree(positions) + + idxs = tree.query_ball_point(cur_kp.t_global, self._cfg.loop_search_radius) + if not idxs: + return + + # Pick the spatially closest keyframe that's also old enough in time. + # query_ball_point doesn't sort, so we sort by distance ourselves. + candidates = [ + (float(np.linalg.norm(self._key_poses[i].t_global - cur_kp.t_global)), i) + for i in idxs + if abs(cur_kp.timestamp - self._key_poses[i].timestamp) > self._cfg.loop_time_thresh + ] + if not candidates: + return + candidates.sort() + loop_idx = candidates[0][1] + + # ICP verification + target = self._get_submap(loop_idx, self._cfg.loop_submap_half_range) + source = self._get_submap(cur_idx, 0) + + transform, fitness = _icp( + source, + target, + max_iter=self._cfg.max_icp_iterations, + max_dist=self._cfg.max_icp_correspondence_dist, + min_inliers=self._cfg.min_icp_inliers, + ) + if fitness > self._cfg.loop_score_thresh: + return + + # Compute relative pose + R_icp = transform[:3, :3] + t_icp = transform[:3, 3] + r_refined = R_icp @ cur_kp.r_global + t_refined = R_icp @ cur_kp.t_global + t_icp + r_offset = self._key_poses[loop_idx].r_global.T @ r_refined + t_offset = self._key_poses[loop_idx].r_global.T @ ( + t_refined - self._key_poses[loop_idx].t_global + ) + + self._cache_pairs.append( + { + "source": cur_idx, + "target": loop_idx, + "r_offset": r_offset, + "t_offset": t_offset, + "score": fitness, + } + ) + self._history_pairs.append((loop_idx, cur_idx)) + logger.info( + "Loop closure detected", + source=cur_idx, + target=loop_idx, + score=round(fitness, 4), + ) + + def smooth_and_update(self) -> None: + has_loop = bool(self._cache_pairs) + + for pair in self._cache_pairs: + # Pose3 noise model is [rx, ry, rz, x, y, z]. Use ICP fitness as + # the translation variance and a generous fixed rotation variance — + # loops shouldn't be trusted to fix rotation tightly. + trans_var = max(0.01, float(pair["score"])) # >= sigma_trans = 10 cm + rot_var = 0.05 # sigma_rot ~= 13 deg + noise = gtsam.noiseModel.Diagonal.Variances( + np.array([rot_var, rot_var, rot_var, trans_var, trans_var, trans_var]) + ) + self._graph.add( + gtsam.BetweenFactorPose3( + pair["target"], + pair["source"], + gtsam.Pose3(gtsam.Rot3(pair["r_offset"]), gtsam.Point3(pair["t_offset"])), + noise, + ) + ) + self._cache_pairs.clear() + + self._isam2.update(self._graph, self._values) + self._isam2.update() + if has_loop: + for _ in range(self._cfg.loop_closure_extra_iterations): + self._isam2.update() + self._graph = gtsam.NonlinearFactorGraph() + self._values = gtsam.Values() + + estimates = self._isam2.calculateBestEstimate() + for i in range(len(self._key_poses)): + pose = estimates.atPose3(i) + self._key_poses[i].r_global = pose.rotation().matrix() + self._key_poses[i].t_global = pose.translation() + + last = self._key_poses[-1] + self._r_offset = last.r_global @ last.r_local.T + self._t_offset = last.t_global - self._r_offset @ last.t_local + + def get_corrected_pose( + self, r_local: np.ndarray, t_local: np.ndarray + ) -> tuple[np.ndarray, np.ndarray]: + return self._r_offset @ r_local, self._r_offset @ t_local + self._t_offset + + def build_global_map(self, voxel_size: float) -> np.ndarray: + if not self._key_poses: + return np.empty((0, 3), dtype=np.float32) + parts = [] + for kp in self._key_poses: + world = (kp.r_global @ kp.body_cloud.T).T + kp.t_global + parts.append(world) + cloud = np.vstack(parts).astype(np.float32) + return _voxel_downsample(cloud, voxel_size) + + @property + def num_key_poses(self) -> int: + return len(self._key_poses) + + +def process_scan( + pgo: _SimplePGO, + cloud: PointCloud2, + r_local: np.ndarray, + t_local: np.ndarray, + ts: float, + unregister_input: bool, +) -> tuple[Odometry, Transform, bool] | None: + """Add a keyframe, run loop closure, return (corrected odom, map->odom tf, + keyframe_added) — or None on empty cloud. + + Caller must hold ``pgo``'s lock during this call.""" + points, _ = cloud.as_numpy() + if len(points) == 0: + return None + + if unregister_input: + # registered_scan is world-frame; transform back to body-frame. + body_pts = (r_local.T @ (points[:, :3].T - t_local[:, None])).T + else: + body_pts = points[:, :3] + + added = pgo.add_key_pose(r_local, t_local, ts, body_pts) + if added: + pgo.search_for_loops() + pgo.smooth_and_update() + + r_corr, t_corr = pgo.get_corrected_pose(r_local, t_local) + return ( + build_corrected_odometry(r_corr, t_corr, ts), + build_map_odom_tf(pgo._r_offset.copy(), pgo._t_offset.copy(), ts), + added, + ) + + +def build_corrected_odometry( + r: np.ndarray, + t: np.ndarray, + ts: float, + world_frame: str = FRAME_MAP, +) -> Odometry: + q = Rotation.from_matrix(r).as_quat() # [x,y,z,w] + return Odometry( + ts=ts, + frame_id=world_frame, + child_frame_id=FRAME_BODY, + pose=Pose( + position=[float(t[0]), float(t[1]), float(t[2])], + orientation=[float(q[0]), float(q[1]), float(q[2]), float(q[3])], + ), + ) + + +def build_map_odom_tf( + r_offset: np.ndarray, + t_offset: np.ndarray, + ts: float, + world_frame: str = FRAME_MAP, + odom_frame: str = FRAME_ODOM, +) -> Transform: + q = Rotation.from_matrix(r_offset).as_quat() # [x,y,z,w] + return Transform( + frame_id=world_frame, + child_frame_id=odom_frame, + translation=Vector3(float(t_offset[0]), float(t_offset[1]), float(t_offset[2])), + rotation=Quaternion(float(q[0]), float(q[1]), float(q[2]), float(q[3])), + ts=ts, + ) + + +def _keyframe_node(index: int, key_pose: _KeyPose, world_frame: str) -> Graph3D.Node3D: + q = Rotation.from_matrix(key_pose.r_global).as_quat() # [x,y,z,w] + return Graph3D.Node3D( + pose=PoseStamped( + ts=key_pose.timestamp, + frame_id=world_frame, + position=[float(v) for v in key_pose.t_global], + orientation=[float(q[0]), float(q[1]), float(q[2]), float(q[3])], + ), + id=index, + ) + + +class PGO(Module, LoopClosure): + """Pose graph optimization with loop closure (pure Python). + + Detects keyframes, performs loop closure via ICP + KD-tree search, and + optimizes the pose graph with GTSAM iSAM2. Publishes corrected odometry, + the optimized pose graph, loop-closure events, and an accumulated + global map.""" + + config: PGOConfig + + lidar: In[PointCloud2] + odometry: In[Odometry] + corrected_odometry: Out[Odometry] + pose_graph: Out[Graph3D] + loop_closure_event: Out[GraphDelta3D] + global_map: Out[PointCloud2] + + def __init__(self, **kwargs: Any) -> None: + super().__init__(**kwargs) + self._running = False + self._thread: threading.Thread | None = None + self._pgo: _SimplePGO | None = None + self._latest_r = np.eye(3) + self._latest_t = np.zeros(3) + self._latest_time = 0.0 + self._has_odom = False + self._last_global_map_time = 0.0 + self._published_loops = 0 + self._lock = threading.Lock() + # Protects _pgo mutations (add_key_pose, search_for_loops, + # smooth_and_update, build_global_map) against concurrent access + # from _on_scan and _publish_loop threads. + self._pgo_lock = threading.Lock() + + @rpc + def start(self) -> None: + super().start() + self._pgo = _SimplePGO(self.config) + # Identity map -> odom so consumers querying map -> body get a result + # before any loop-closure correction exists. + self.tf.publish(build_map_odom_tf(np.eye(3), np.zeros(3), time.time())) + self.register_disposable(Disposable(self.odometry.subscribe(self._on_odom))) + self.register_disposable(Disposable(self.lidar.subscribe(self._on_scan))) + self._running = True + if self.config.publish_global_map: + self._thread = threading.Thread(target=self._publish_global_map_loop, daemon=True) + self._thread.start() + logger.info( + "PGO module started (gtsam iSAM2, pure python)", + publish_global_map=self.config.publish_global_map, + ) + + @rpc + def stop(self) -> None: + self._running = False + if self._thread: + self._thread.join(timeout=DEFAULT_THREAD_JOIN_TIMEOUT) + super().stop() + + def _on_odom(self, msg: Odometry) -> None: + q = [ + msg.pose.orientation.x, + msg.pose.orientation.y, + msg.pose.orientation.z, + msg.pose.orientation.w, + ] + r = Rotation.from_quat(q).as_matrix() + t = np.array([msg.pose.position.x, msg.pose.position.y, msg.pose.position.z]) + with self._lock: + self._latest_r = r + self._latest_t = t + self._latest_time = msg.ts if msg.ts else time.time() + self._has_odom = True + + def _on_scan(self, cloud: PointCloud2) -> None: + with self._lock: + if not self._has_odom: + return + r_local = self._latest_r.copy() + t_local = self._latest_t.copy() + ts = self._latest_time + + pgo = self._pgo + assert pgo is not None + + with self._pgo_lock: + result = process_scan(pgo, cloud, r_local, t_local, ts, self.config.unregister_input) + if result is None: + return + corrected_odom, tf_msg, keyframe_added = result + if keyframe_added: + graph_msg, loop_events = self._snapshot_graph(pgo, ts) + else: + graph_msg, loop_events = None, [] + + self.corrected_odometry.publish(corrected_odom) + self.tf.publish(tf_msg) + if graph_msg is not None: + self.pose_graph.publish(graph_msg) + for event in loop_events: + self.loop_closure_event.publish(event) + + def _snapshot_graph(self, pgo: _SimplePGO, ts: float) -> tuple[Graph3D, list[GraphDelta3D]]: + """The optimized graph (odometry chain + loop edges) and one + GraphDelta3D per loop pair not yet published. + + Caller must hold ``_pgo_lock``.""" + world_frame = self.config.world_frame + nodes = [ + _keyframe_node(index, key_pose, world_frame) + for index, key_pose in enumerate(pgo._key_poses) + ] + edges = [ + Graph3D.Edge( + start_id=index - 1, end_id=index, timestamp=pgo._key_poses[index].timestamp + ) + for index in range(1, len(pgo._key_poses)) + ] + edges += [ + Graph3D.Edge(start_id=target, end_id=source, metadata_id=1) + for target, source in pgo._history_pairs + ] + graph_msg = Graph3D(ts=ts, nodes=nodes, edges=edges) + + loop_events: list[GraphDelta3D] = [] + identity = GraphDelta3D.Transform( + translation=Vector3(0.0, 0.0, 0.0), rotation=Quaternion(0.0, 0.0, 0.0, 1.0) + ) + for target, source in pgo._history_pairs[self._published_loops :]: + loop_events.append( + GraphDelta3D( + ts=ts, + nodes=[ + _keyframe_node(target, pgo._key_poses[target], world_frame), + _keyframe_node(source, pgo._key_poses[source], world_frame), + ], + transforms=[identity, identity], + ) + ) + self._published_loops = len(pgo._history_pairs) + return graph_msg, loop_events + + def _publish_global_map_loop(self) -> None: + pgo = self._pgo + assert pgo is not None + rate = self.config.global_map_publish_rate + interval = 1.0 / rate if rate > 0 else 2.0 + + while self._running: + t0 = time.monotonic() + + if t0 - self._last_global_map_time > interval and pgo.num_key_poses > 0: + with self._pgo_lock: + cloud_np = pgo.build_global_map(self.config.global_map_voxel_size) + if len(cloud_np) > 0: + now = time.time() + self.global_map.publish( + PointCloud2.from_numpy( + cloud_np, frame_id=self.config.world_frame, timestamp=now + ) + ) + self._last_global_map_time = t0 + + elapsed = time.monotonic() - t0 + sleep_time = max(DEFAULT_THREAD_JOIN_TIMEOUT, interval - elapsed) + time.sleep(sleep_time) diff --git a/dimos/navigation/jnav/components/loop_closure/ivan_pgo_transformer/module.py b/dimos/navigation/jnav/components/loop_closure/ivan_pgo_transformer/module.py new file mode 100644 index 0000000000..17e7c84511 --- /dev/null +++ b/dimos/navigation/jnav/components/loop_closure/ivan_pgo_transformer/module.py @@ -0,0 +1,263 @@ +# Copyright 2026 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Ivan's CURRENT PGO (dimos/mapping/loop_closure/pgo.py, main, June 2026) +forced into an online LoopClosure module. + +That code ships as an offline memory2 Transformer, but its `_PGOState` core is +already incremental — one `process(pose, ts, world_cloud)` call per frame. This +wrapper brute-forces the online shape: every arriving scan is paired with the +latest odometry pose and pushed straight into `_PGOState`, then the corrected +odometry / pose graph / loop events are read back out of its (private) state. +No changes to the mapping code itself; this module owns the coupling. + +`_PGOState.process` expects world-frame registered scans (it unregisters them +internally via the odom pose) — set `input_is_registered: false` for raw +body-frame scans and they'll be pre-registered here first.""" + +from __future__ import annotations + +import threading +import time +from typing import Any + +import numpy as np +from scipy.spatial.transform import Rotation + +from dimos.core.core import rpc +from dimos.core.module import Module, ModuleConfig +from dimos.core.stream import In, Out +from dimos.mapping.loop_closure.pgo import ( + PGOConfig as MappingPGOConfig, + _PGOState, + _pose3_to_transform, +) +from dimos.msgs.geometry_msgs.Pose import Pose +from dimos.msgs.geometry_msgs.PoseStamped import PoseStamped +from dimos.msgs.geometry_msgs.Quaternion import Quaternion +from dimos.msgs.geometry_msgs.Transform import Transform +from dimos.msgs.geometry_msgs.Vector3 import Vector3 +from dimos.msgs.nav_msgs.Odometry import Odometry +from dimos.msgs.sensor_msgs.PointCloud2 import PointCloud2 +from dimos.navigation.jnav.components.loop_closure.spec import LoopClosure +from dimos.navigation.jnav.msgs.Graph3D import Graph3D +from dimos.navigation.jnav.msgs.GraphDelta3D import GraphDelta3D +from dimos.utils.logging_config import setup_logger + +logger = setup_logger() + + +class PGOConfig(ModuleConfig): + world_frame: str = "map" + odom_frame: str = "odom" + body_frame: str = "base_link" + # World-frame registered input (fastlio); false = raw body-frame scans. + input_is_registered: bool = True + + # Mirrors mapping/loop_closure PGOConfig — forwarded verbatim. + key_pose_delta_trans: float = 0.5 + key_pose_delta_deg: float = 10.0 + loop_search_radius: float = 2.0 + loop_time_thresh: float = 20.0 + loop_score_thresh: float = 0.3 + loop_submap_half_range: int = 10 + min_icp_inliers: int = 10 + min_keyframes_for_loop_search: int = 10 + loop_closure_extra_iterations: int = 4 + submap_resolution: float = 0.2 + min_loop_detect_duration: float = 5.0 + max_icp_iterations: int = 50 + max_icp_correspondence_dist: float = 1.0 + odom_rot_var: float = 1e-6 + odom_trans_var_xy: float = 1e-4 + odom_trans_var_z: float = 1e-6 + loop_rot_var: float = 0.05 + + +def _mapping_config(config: PGOConfig) -> MappingPGOConfig: + fields = {name: getattr(config, name) for name in MappingPGOConfig.model_fields} + return MappingPGOConfig(**fields) + + +def _pose3_node(index: int, pose: Any, ts: float, world_frame: str) -> Graph3D.Node3D: + translation = np.asarray(pose.translation()) + quaternion = Rotation.from_matrix(pose.rotation().matrix()).as_quat() # [x,y,z,w] + return Graph3D.Node3D( + pose=PoseStamped( + ts=ts, + frame_id=world_frame, + position=[float(v) for v in translation], + orientation=[float(v) for v in quaternion], + ), + id=index, + ) + + +class PGO(Module, LoopClosure): + """Online wrapper over Ivan's current incremental PGO core (`_PGOState`).""" + + config: PGOConfig + + lidar: In[PointCloud2] + odometry: In[Odometry] + corrected_odometry: Out[Odometry] + pose_graph: Out[Graph3D] + loop_closure_event: Out[GraphDelta3D] + + def __init__(self, **kwargs: Any) -> None: + super().__init__(**kwargs) + self._state: _PGOState | None = None + self._latest_odom: Odometry | None = None + self._published_loops = 0 + self._odom_lock = threading.Lock() + self._state_lock = threading.Lock() + self._unsub_odom: Any = None + self._unsub_lidar: Any = None + + @rpc + def start(self) -> None: + super().start() + self._state = _PGOState(_mapping_config(self.config)) + # Identity map -> odom so consumers querying map -> body get a result + # before any correction exists. + self.tf.publish(self._correction_tf(np.eye(4), time.time())) + self._unsub_odom = self.odometry.subscribe(self._on_odom) + self._unsub_lidar = self.lidar.subscribe(self._on_scan) + logger.info("PGO (ivan transformer core) started") + + @rpc + def stop(self) -> None: + if self._unsub_odom is not None: + self._unsub_odom.dispose() + if self._unsub_lidar is not None: + self._unsub_lidar.dispose() + super().stop() + + def _on_odom(self, msg: Odometry) -> None: + with self._odom_lock: + self._latest_odom = msg + + def _on_scan(self, cloud: PointCloud2) -> None: + import gtsam # type: ignore[import-untyped] + + with self._odom_lock: + odom = self._latest_odom + if odom is None or len(cloud) == 0: + return + state = self._state + assert state is not None + + position = odom.pose.position + orientation = odom.pose.orientation + local_pose = gtsam.Pose3( + gtsam.Rot3.Quaternion(orientation.w, orientation.x, orientation.y, orientation.z), + gtsam.Point3(position.x, position.y, position.z), + ) + ts = odom.ts if odom.ts else time.time() + + if not self.config.input_is_registered: + cloud = cloud.transform( + _pose3_to_transform( + local_pose, + ts=ts, + frame_id=self.config.world_frame, + child_frame_id=self.config.body_frame, + ) + ) + + with self._state_lock: + state.process(local_pose, ts, cloud) + keyframe_count = len(state._key_poses) + correction = state._world_correction + corrected = correction.compose(local_pose) + graph_msg = self._snapshot_graph(state, ts) if keyframe_count else None + loop_events = self._new_loop_events(state, ts) + + self._publish_corrected_odometry(corrected, ts) + self.tf.publish(self._correction_tf(correction.matrix(), ts)) + if graph_msg is not None: + self.pose_graph.publish(graph_msg) + for event in loop_events: + self.loop_closure_event.publish(event) + + def _publish_corrected_odometry(self, pose: Any, ts: float) -> None: + translation = np.asarray(pose.translation()) + quaternion = Rotation.from_matrix(pose.rotation().matrix()).as_quat() + self.corrected_odometry.publish( + Odometry( + ts=ts, + frame_id=self.config.world_frame, + child_frame_id=self.config.body_frame, + pose=Pose( + position=[float(v) for v in translation], + orientation=[float(v) for v in quaternion], + ), + ) + ) + + def _correction_tf(self, matrix: np.ndarray, ts: float) -> Transform: + quaternion = Rotation.from_matrix(matrix[:3, :3]).as_quat() + return Transform( + frame_id=self.config.world_frame, + child_frame_id=self.config.odom_frame, + translation=Vector3(float(matrix[0, 3]), float(matrix[1, 3]), float(matrix[2, 3])), + rotation=Quaternion(*[float(v) for v in quaternion]), + ts=ts, + ) + + def _snapshot_graph(self, state: _PGOState, ts: float) -> Graph3D: + """Optimized keyframes + odometry-chain and loop edges. + + Caller must hold ``_state_lock``.""" + world_frame = self.config.world_frame + nodes = [ + _pose3_node(index, key_pose.optimized, key_pose.timestamp, world_frame) + for index, key_pose in enumerate(state._key_poses) + ] + edges = [ + Graph3D.Edge( + start_id=index - 1, end_id=index, timestamp=state._key_poses[index].timestamp + ) + for index in range(1, len(state._key_poses)) + ] + edges += [ + Graph3D.Edge(start_id=pair.target, end_id=pair.source, metadata_id=1) + for pair in state._accepted_loops + ] + return Graph3D(ts=ts, nodes=nodes, edges=edges) + + def _new_loop_events(self, state: _PGOState, ts: float) -> list[GraphDelta3D]: + """One GraphDelta3D per accepted loop not yet published. + + Caller must hold ``_state_lock``.""" + world_frame = self.config.world_frame + identity = GraphDelta3D.Transform( + translation=Vector3(0.0, 0.0, 0.0), rotation=Quaternion(0.0, 0.0, 0.0, 1.0) + ) + events: list[GraphDelta3D] = [] + for pair in state._accepted_loops[self._published_loops :]: + source = state._key_poses[pair.source] + target = state._key_poses[pair.target] + events.append( + GraphDelta3D( + ts=ts, + nodes=[ + _pose3_node(pair.target, target.optimized, target.timestamp, world_frame), + _pose3_node(pair.source, source.optimized, source.timestamp, world_frame), + ], + transforms=[identity, identity], + ) + ) + self._published_loops = len(state._accepted_loops) + return events diff --git a/dimos/navigation/jnav/components/loop_closure/unrefined_pgo/cpp/CMakeLists.txt b/dimos/navigation/jnav/components/loop_closure/unrefined_pgo/cpp/CMakeLists.txt new file mode 100644 index 0000000000..8c7b6d5b94 --- /dev/null +++ b/dimos/navigation/jnav/components/loop_closure/unrefined_pgo/cpp/CMakeLists.txt @@ -0,0 +1,49 @@ +cmake_minimum_required(VERSION 3.14) +project(pgo CXX) + +set(CMAKE_CXX_STANDARD 17) +set(CMAKE_CXX_STANDARD_REQUIRED ON) +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3") + +include(FetchContent) +FetchContent_Declare(dimos_lcm + GIT_REPOSITORY https://github.com/dimensionalOS/dimos-lcm.git + GIT_TAG main + GIT_SHALLOW TRUE +) +FetchContent_MakeAvailable(dimos_lcm) + +find_package(PkgConfig REQUIRED) +pkg_check_modules(LCM REQUIRED lcm) +find_package(Eigen3 REQUIRED) +find_package(PCL 1.8 REQUIRED COMPONENTS common filters kdtree registration io) +find_package(GTSAM REQUIRED) + +add_definitions(-DUSE_PCL) + +add_executable(pgo + main.cpp + simple_pgo.cpp + commons.cpp +) + +target_include_directories(pgo PRIVATE + ${CMAKE_CURRENT_SOURCE_DIR} + ${dimos_lcm_SOURCE_DIR}/generated/cpp_lcm_msgs + ${LCM_INCLUDE_DIRS} + ${EIGEN3_INCLUDE_DIR} + ${PCL_INCLUDE_DIRS} + ${GTSAM_INCLUDE_DIR} +) + +target_link_libraries(pgo PRIVATE + ${LCM_LIBRARIES} + ${PCL_LIBRARIES} + gtsam +) + +target_link_directories(pgo PRIVATE + ${LCM_LIBRARY_DIRS} +) + +install(TARGETS pgo DESTINATION bin) diff --git a/dimos/navigation/jnav/components/loop_closure/unrefined_pgo/cpp/commons.cpp b/dimos/navigation/jnav/components/loop_closure/unrefined_pgo/cpp/commons.cpp new file mode 100644 index 0000000000..f309a97a85 --- /dev/null +++ b/dimos/navigation/jnav/components/loop_closure/unrefined_pgo/cpp/commons.cpp @@ -0,0 +1,8 @@ +#include "commons.h" + +void PoseWithTime::setTime(int32_t _sec, uint32_t _nsec) +{ + sec = _sec; + nsec = _nsec; + second = static_cast(sec) + static_cast(nsec) / 1e9; +} diff --git a/dimos/navigation/jnav/components/loop_closure/unrefined_pgo/cpp/commons.h b/dimos/navigation/jnav/components/loop_closure/unrefined_pgo/cpp/commons.h new file mode 100644 index 0000000000..2f9244ede6 --- /dev/null +++ b/dimos/navigation/jnav/components/loop_closure/unrefined_pgo/cpp/commons.h @@ -0,0 +1,29 @@ +#pragma once +#include +#include +#include + +using PointType = pcl::PointXYZI; +using CloudType = pcl::PointCloud; +using PointVec = std::vector>; + +using M3D = Eigen::Matrix3d; +using V3D = Eigen::Vector3d; +using M3F = Eigen::Matrix3f; +using V3F = Eigen::Vector3f; +using M4F = Eigen::Matrix4f; +using V4F = Eigen::Vector4f; + +struct PoseWithTime { + V3D t; + M3D r; + int32_t sec; + uint32_t nsec; + double second; + void setTime(int32_t sec, uint32_t nsec); +}; + +struct CloudWithPose { + CloudType::Ptr cloud; + PoseWithTime pose; +}; diff --git a/dimos/navigation/jnav/components/loop_closure/unrefined_pgo/cpp/dimos_native_module.hpp b/dimos/navigation/jnav/components/loop_closure/unrefined_pgo/cpp/dimos_native_module.hpp new file mode 100644 index 0000000000..c22cca4326 --- /dev/null +++ b/dimos/navigation/jnav/components/loop_closure/unrefined_pgo/cpp/dimos_native_module.hpp @@ -0,0 +1,97 @@ +// SmartNav Native Module helpers. +// Re-exports dimos NativeModule patterns for CLI arg parsing and LCM helpers. +// Based on dimos/hardware/sensors/lidar/common/dimos_native_module.hpp + +#pragma once + +#include +#include +#include +#include + +#include "std_msgs/Header.hpp" +#include "std_msgs/Time.hpp" + +namespace dimos { + +class NativeModule { +public: + NativeModule(int argc, char** argv) { + for (int i = 1; i < argc; ++i) { + std::string arg(argv[i]); + if (arg.size() > 2 && arg[0] == '-' && arg[1] == '-' && i + 1 < argc) { + args_[arg.substr(2)] = argv[++i]; + } + } + } + + /// Get the full LCM channel string for a declared port. + const std::string& topic(const std::string& port) const { + auto it = args_.find(port); + if (it == args_.end()) { + throw std::runtime_error("NativeModule: no topic for port '" + port + "'"); + } + return it->second; + } + + /// Get a string arg value, or a default if not present. + std::string arg(const std::string& key, const std::string& default_val = "") const { + auto it = args_.find(key); + return it != args_.end() ? it->second : default_val; + } + + /// Get a float arg value, or a default if not present. + float arg_float(const std::string& key, float default_val = 0.0f) const { + auto it = args_.find(key); + return it != args_.end() ? std::stof(it->second) : default_val; + } + + /// Get an int arg value, or a default if not present. + int arg_int(const std::string& key, int default_val = 0) const { + auto it = args_.find(key); + return it != args_.end() ? std::stoi(it->second) : default_val; + } + + /// Get a bool arg value, or a default if not present. + /// Present-but-unparseable values throw, matching arg_int/arg_float's + /// std::stoi/std::stof behaviour — a typo'd value or empty string is a + /// misconfiguration we want to surface immediately, not silently coerce + /// to false. + bool arg_bool(const std::string& key, bool default_val = false) const { + auto it = args_.find(key); + if (it == args_.end()) return default_val; + if (it->second == "true" || it->second == "1") return true; + if (it->second == "false" || it->second == "0") return false; + throw std::runtime_error( + "NativeModule: arg '--" + key + "' has unparseable bool value '" + + it->second + "' (expected true/false or 1/0)"); + } + + /// Check if a port/arg was provided. + bool has(const std::string& key) const { + return args_.count(key) > 0; + } + +private: + std::map args_; +}; + +/// Convert seconds (double) to a ROS-style Time message. +inline std_msgs::Time time_from_seconds(double t) { + std_msgs::Time ts; + ts.sec = static_cast(t); + ts.nsec = static_cast((t - ts.sec) * 1e9); + return ts; +} + +/// Build a stamped Header with auto-incrementing sequence number. +inline std_msgs::Header make_header(const std::string& frame_id, double ts) { + static std::atomic seq{0}; + std_msgs::Header h; + h.seq = seq.fetch_add(1, std::memory_order_relaxed); + h.stamp = time_from_seconds(ts); + h.frame_id = frame_id; + return h; +} + +} // namespace dimos diff --git a/dimos/navigation/jnav/components/loop_closure/unrefined_pgo/cpp/flake.lock b/dimos/navigation/jnav/components/loop_closure/unrefined_pgo/cpp/flake.lock new file mode 100644 index 0000000000..16898a2c2d --- /dev/null +++ b/dimos/navigation/jnav/components/loop_closure/unrefined_pgo/cpp/flake.lock @@ -0,0 +1,144 @@ +{ + "nodes": { + "dimos-lcm": { + "flake": false, + "locked": { + "lastModified": 1769774949, + "narHash": "sha256-icRK7jerqNlwK1WZBrnIP04I2WozzFqTD7qsmnPxQuo=", + "owner": "dimensionalOS", + "repo": "dimos-lcm", + "rev": "0aa72b7b1bd3a65f50f5c03485ee9b728df56afe", + "type": "github" + }, + "original": { + "owner": "dimensionalOS", + "ref": "main", + "repo": "dimos-lcm", + "type": "github" + } + }, + "flake-utils": { + "inputs": { + "systems": "systems" + }, + "locked": { + "lastModified": 1731533236, + "narHash": "sha256-l0KFg5HjrsfsO/JpG+r7fRrqm12kzFHyUHqHCVpMMbI=", + "owner": "numtide", + "repo": "flake-utils", + "rev": "11707dc2f618dd54ca8739b309ec4fc024de578b", + "type": "github" + }, + "original": { + "owner": "numtide", + "repo": "flake-utils", + "type": "github" + } + }, + "gtsam-extended": { + "inputs": { + "flake-utils": [ + "flake-utils" + ], + "nixpkgs": [ + "nixpkgs" + ], + "nixpkgs-stable": "nixpkgs-stable" + }, + "locked": { + "lastModified": 1775168452, + "narHash": "sha256-fsUWPQIw+lk4VEQUOniiPLwiPoldWh8ZdnIdos202+I=", + "owner": "jeff-hykin", + "repo": "gtsam-extended", + "rev": "f4572a80b6339181693aee6029ca28153e59a993", + "type": "github" + }, + "original": { + "owner": "jeff-hykin", + "repo": "gtsam-extended", + "type": "github" + } + }, + "lcm-extended": { + "inputs": { + "flake-utils": [ + "flake-utils" + ], + "nixpkgs": [ + "nixpkgs" + ] + }, + "locked": { + "lastModified": 1774902379, + "narHash": "sha256-gRFvEkbXCEoG4jEmsT+i0bMZ5kDHOtAaPsrbStXjdu4=", + "owner": "jeff-hykin", + "repo": "lcm_extended", + "rev": "7d12ad8546d3daae30528a6c28f2c9ff5b10baf7", + "type": "github" + }, + "original": { + "owner": "jeff-hykin", + "repo": "lcm_extended", + "type": "github" + } + }, + "nixpkgs": { + "locked": { + "lastModified": 1777954456, + "narHash": "sha256-hGdgeU2Nk87RAuZyYjyDjFL6LK7dAZN5RE9+hrDTkDU=", + "owner": "NixOS", + "repo": "nixpkgs", + "rev": "549bd84d6279f9852cae6225e372cc67fb91a4c1", + "type": "github" + }, + "original": { + "owner": "NixOS", + "ref": "nixos-unstable", + "repo": "nixpkgs", + "type": "github" + } + }, + "nixpkgs-stable": { + "locked": { + "lastModified": 1751274312, + "narHash": "sha256-/bVBlRpECLVzjV19t5KMdMFWSwKLtb5RyXdjz3LJT+g=", + "owner": "NixOS", + "repo": "nixpkgs", + "rev": "50ab793786d9de88ee30ec4e4c24fb4236fc2674", + "type": "github" + }, + "original": { + "owner": "NixOS", + "ref": "nixos-24.11", + "repo": "nixpkgs", + "type": "github" + } + }, + "root": { + "inputs": { + "dimos-lcm": "dimos-lcm", + "flake-utils": "flake-utils", + "gtsam-extended": "gtsam-extended", + "lcm-extended": "lcm-extended", + "nixpkgs": "nixpkgs" + } + }, + "systems": { + "locked": { + "lastModified": 1681028828, + "narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=", + "owner": "nix-systems", + "repo": "default", + "rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e", + "type": "github" + }, + "original": { + "owner": "nix-systems", + "repo": "default", + "type": "github" + } + } + }, + "root": "root", + "version": 7 +} diff --git a/dimos/navigation/jnav/components/loop_closure/unrefined_pgo/cpp/flake.nix b/dimos/navigation/jnav/components/loop_closure/unrefined_pgo/cpp/flake.nix new file mode 100644 index 0000000000..ea8eae1327 --- /dev/null +++ b/dimos/navigation/jnav/components/loop_closure/unrefined_pgo/cpp/flake.nix @@ -0,0 +1,70 @@ +{ + description = "SmartNav PGO native module (pose graph optimization with iSAM2 + PCL ICP)"; + + inputs = { + nixpkgs.url = "github:NixOS/nixpkgs/nixos-unstable"; + flake-utils.url = "github:numtide/flake-utils"; + lcm-extended = { + url = "github:jeff-hykin/lcm_extended"; + inputs.nixpkgs.follows = "nixpkgs"; + inputs.flake-utils.follows = "flake-utils"; + }; + dimos-lcm = { + url = "github:dimensionalOS/dimos-lcm/main"; + flake = false; + }; + gtsam-extended = { + url = "github:jeff-hykin/gtsam-extended"; + inputs.nixpkgs.follows = "nixpkgs"; + inputs.flake-utils.follows = "flake-utils"; + }; + }; + + outputs = { self, nixpkgs, flake-utils, lcm-extended, dimos-lcm, gtsam-extended, ... }: + flake-utils.lib.eachDefaultSystem (system: + let + pkgs = import nixpkgs { inherit system; }; + lcm = lcm-extended.packages.${system}.lcm; + + gtsam-base = gtsam-extended.packages.${system}.gtsam-cpp; + gtsam = gtsam-base.overrideAttrs (_old: { + src = pkgs.fetchFromGitHub { + owner = "borglab"; + repo = "gtsam"; + rev = "1a9792a7ede244850a413739557635b606f295c0"; + sha256 = "sha256-zxm5TGVPW1vipFVpw01zcvKRw4mkh+5ZBCR1n6G466o="; + }; + env.NIX_CFLAGS_COMPILE = "-Wno-error=array-bounds"; + }); + in { + packages.default = pkgs.stdenv.mkDerivation { + pname = "smartnav-pgo"; + version = "0.1.0"; + src = ./.; + + nativeBuildInputs = [ pkgs.cmake pkgs.pkg-config ]; + buildInputs = [ + lcm + pkgs.glib + pkgs.eigen + pkgs.boost + pkgs.pcl + gtsam + ]; + + env.NIX_CFLAGS_COMPILE = "-Wno-error=array-bounds"; + + cmakeFlags = [ + "-DCMAKE_POLICY_VERSION_MINIMUM=3.5" + "-DFETCHCONTENT_SOURCE_DIR_DIMOS_LCM=${dimos-lcm}" + ]; + + # On macOS, libgtsam.4.dylib is referenced via @rpath but the binary + # has no LC_RPATH entries, so it fails to load at runtime. Add one + # pointing at the gtsam lib dir. + postInstall = pkgs.lib.optionalString pkgs.stdenv.isDarwin '' + ${pkgs.darwin.cctools}/bin/install_name_tool -add_rpath ${gtsam}/lib $out/bin/pgo + ''; + }; + }); +} diff --git a/dimos/navigation/jnav/components/loop_closure/unrefined_pgo/cpp/main.cpp b/dimos/navigation/jnav/components/loop_closure/unrefined_pgo/cpp/main.cpp new file mode 100644 index 0000000000..38314d4390 --- /dev/null +++ b/dimos/navigation/jnav/components/loop_closure/unrefined_pgo/cpp/main.cpp @@ -0,0 +1,299 @@ +// PGO NativeModule — faithful port of pgo_node.cpp from ROS2 to LCM. +// Subscribes to registered_scan + odometry, runs SimplePGO (iSAM2 + PCL ICP), +// publishes corrected_odometry, global_map, and TF correction offset. + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "commons.h" +#include "simple_pgo.h" +#include "dimos_native_module.hpp" +#include "point_cloud_utils.hpp" + +#include "nav_msgs/Odometry.hpp" +#include "sensor_msgs/PointCloud2.hpp" +#include "geometry_msgs/Pose.hpp" +#include "geometry_msgs/Quaternion.hpp" +#include "geometry_msgs/Point.hpp" + +static std::atomic g_running{true}; +static void signal_handler(int) { g_running.store(false); } + +// Shared state between LCM callbacks and main loop +static std::mutex g_buffer_mutex; +static std::queue g_cloud_buffer; +static double g_last_message_time = 0.0; + +// Latest odometry for non-keyframe TF broadcasting +static std::mutex g_odom_mutex; +static M3D g_latest_r = M3D::Identity(); +static V3D g_latest_t = V3D::Zero(); +static double g_latest_time = 0.0; +static bool g_has_odom = false; + +class Handlers { +public: + void on_odometry(const lcm::ReceiveBuffer*, const std::string&, + const nav_msgs::Odometry* msg) { + M3D r = Eigen::Quaterniond( + msg->pose.pose.orientation.w, + msg->pose.pose.orientation.x, + msg->pose.pose.orientation.y, + msg->pose.pose.orientation.z + ).toRotationMatrix(); + + V3D t(msg->pose.pose.position.x, + msg->pose.pose.position.y, + msg->pose.pose.position.z); + + double ts = msg->header.stamp.sec + msg->header.stamp.nsec / 1e9; + + std::lock_guard lock(g_odom_mutex); + g_latest_r = r; + g_latest_t = t; + g_latest_time = ts; + g_has_odom = true; + } + + void on_registered_scan(const lcm::ReceiveBuffer*, const std::string&, + const sensor_msgs::PointCloud2* msg) { + std::lock_guard odom_lock(g_odom_mutex); + if (!g_has_odom) + return; + + double ts = g_latest_time; + + // Reject out-of-order messages + if (ts < g_last_message_time) + return; + g_last_message_time = ts; + + CloudWithPose cp; + cp.pose.r = g_latest_r; + cp.pose.t = g_latest_t; + cp.pose.setTime(static_cast(ts), + static_cast((ts - static_cast(ts)) * 1e9)); + + // Parse PointCloud2 to PCL + cp.cloud = CloudType::Ptr(new CloudType); + smartnav::to_pcl(*msg, *cp.cloud); + + std::lock_guard buf_lock(g_buffer_mutex); + g_cloud_buffer.push(cp); + } +}; + +static nav_msgs::Odometry build_odometry(const M3D& r, const V3D& t, double ts, + const std::string& frame_id, + const std::string& child_frame_id) { + nav_msgs::Odometry odom; + odom.header = dimos::make_header(frame_id, ts); + odom.child_frame_id = child_frame_id; + + Eigen::Quaterniond q(r); + odom.pose.pose.position.x = t.x(); + odom.pose.pose.position.y = t.y(); + odom.pose.pose.position.z = t.z(); + odom.pose.pose.orientation.x = q.x(); + odom.pose.pose.orientation.y = q.y(); + odom.pose.pose.orientation.z = q.z(); + odom.pose.pose.orientation.w = q.w(); + + return odom; +} + +int main(int argc, char** argv) +{ + signal(SIGTERM, signal_handler); + signal(SIGINT, signal_handler); + + dimos::NativeModule mod(argc, argv); + + // Port topics + std::string scan_topic = mod.topic("registered_scan"); + std::string odom_topic = mod.topic("odometry"); + std::string corrected_odom_topic = mod.topic("corrected_odometry"); + std::string global_map_topic = mod.topic("global_map"); + std::string tf_topic = mod.topic("pgo_tf"); + + // Config parameters + Config config; + config.key_pose_delta_deg = mod.arg_float("key_pose_delta_deg", 10.0f); + config.key_pose_delta_trans = mod.arg_float("key_pose_delta_trans", 0.5f); + config.loop_search_radius = mod.arg_float("loop_search_radius", 1.0f); + config.loop_time_tresh = mod.arg_float("loop_time_thresh", 60.0f); + config.loop_score_tresh = mod.arg_float("loop_score_thresh", 0.15f); + config.loop_submap_half_range = mod.arg_int("loop_submap_half_range", 5); + config.submap_resolution = mod.arg_float("submap_resolution", 0.1f); + config.min_loop_detect_duration = mod.arg_float("min_loop_detect_duration", 5.0f); + + // Node-level config + std::string world_frame = mod.arg("world_frame", "map"); + std::string local_frame = mod.arg("local_frame", "odom"); + float global_map_voxel_size = mod.arg_float("global_map_voxel_size", 0.1f); + float global_map_publish_rate = mod.arg_float("global_map_publish_rate", 1.0f); + // rate <= 0 disables global_map entirely (it's a big cloud that can overflow + // LCM's single-message limit and congest the bus). Consumers that only need + // corrected_odometry / pose_graph should turn it off. + bool publish_global_map = global_map_publish_rate > 0; + double global_map_interval = publish_global_map ? 1.0 / global_map_publish_rate : 0.0; + + // Unregister mode: transform world-frame scans to body-frame + bool unregister_input = mod.arg_bool("unregister_input", true); + + bool debug = mod.arg_bool("debug", false); + + pcl::console::setVerbosityLevel( + debug ? pcl::console::L_INFO : pcl::console::L_ERROR); + + SimplePGO pgo(config); + + lcm::LCM lcm; + if (!lcm.good()) { + fprintf(stderr, "PGO: LCM init failed\n"); + return 1; + } + + Handlers handlers; + lcm.subscribe(odom_topic, &Handlers::on_odometry, &handlers); + lcm.subscribe(scan_topic, &Handlers::on_registered_scan, &handlers); + + if (debug) { + fprintf(stderr, "PGO native module started\n"); + fprintf(stderr, " registered_scan: %s\n", scan_topic.c_str()); + fprintf(stderr, " odometry: %s\n", odom_topic.c_str()); + fprintf(stderr, " corrected_odometry: %s\n", corrected_odom_topic.c_str()); + fprintf(stderr, " global_map: %s\n", global_map_topic.c_str()); + fprintf(stderr, " pgo_tf: %s\n", tf_topic.c_str()); + } + + double last_global_map_time = 0.0; + int timer_period_ms = 50; // 20 Hz, matching original + + while (g_running.load()) { + // Drain all pending LCM messages + while (lcm.handleTimeout(0) > 0) {} + + // Check buffer + CloudWithPose cp; + bool has_data = false; + { + std::lock_guard lock(g_buffer_mutex); + if (!g_cloud_buffer.empty()) { + cp = g_cloud_buffer.front(); + // Drain entire queue (matching original: process oldest, discard rest) + while (!g_cloud_buffer.empty()) { + g_cloud_buffer.pop(); + } + has_data = true; + } + } + + if (!has_data) { + std::this_thread::sleep_for(std::chrono::milliseconds(timer_period_ms)); + continue; + } + + // Optionally transform world-frame scan to body-frame + if (unregister_input && cp.cloud && cp.cloud->size() > 0) { + CloudType::Ptr body_cloud(new CloudType); + // body = R_odom^T * (world_pts - t_odom) + M3D r_inv = cp.pose.r.transpose(); + for (const auto& pt : *cp.cloud) { + V3D world_pt(pt.x, pt.y, pt.z); + V3D body_pt = r_inv * (world_pt - cp.pose.t); + PointType bp; + bp.x = static_cast(body_pt.x()); + bp.y = static_cast(body_pt.y()); + bp.z = static_cast(body_pt.z()); + bp.intensity = pt.intensity; + body_cloud->push_back(bp); + } + cp.cloud = body_cloud; + } + + double cur_time = cp.pose.second; + + if (!pgo.addKeyPose(cp)) { + // Not a keyframe — still broadcast TF and corrected odom + M3D corr_r = pgo.offsetR() * cp.pose.r; + V3D corr_t = pgo.offsetR() * cp.pose.t + pgo.offsetT(); + + nav_msgs::Odometry corrected = build_odometry( + corr_r, corr_t, cur_time, world_frame, "base_link"); + lcm.publish(corrected_odom_topic, &corrected); + + nav_msgs::Odometry tf_msg = build_odometry( + pgo.offsetR(), pgo.offsetT(), cur_time, world_frame, local_frame); + lcm.publish(tf_topic, &tf_msg); + + std::this_thread::sleep_for(std::chrono::milliseconds(timer_period_ms)); + continue; + } + + // Keyframe added + pgo.searchForLoopPairs(); + pgo.smoothAndUpdate(); + + if (debug) { + fprintf(stderr, "PGO: keyframe %zu at (%.1f, %.1f, %.1f)\n", + pgo.keyPoses().size(), + cp.pose.t.x(), cp.pose.t.y(), cp.pose.t.z()); + } + + // Publish corrected odometry + M3D corr_r = pgo.offsetR() * cp.pose.r; + V3D corr_t = pgo.offsetR() * cp.pose.t + pgo.offsetT(); + nav_msgs::Odometry corrected = build_odometry( + corr_r, corr_t, cur_time, world_frame, "base_link"); + lcm.publish(corrected_odom_topic, &corrected); + + // Publish TF correction (map -> odom offset) + nav_msgs::Odometry tf_msg = build_odometry( + pgo.offsetR(), pgo.offsetT(), cur_time, world_frame, local_frame); + lcm.publish(tf_topic, &tf_msg); + + // Publish global map (throttled) + double now = cur_time; + if (publish_global_map && now - last_global_map_time >= global_map_interval) { + last_global_map_time = now; + + if (!pgo.keyPoses().empty()) { + CloudType::Ptr global_cloud(new CloudType); + for (size_t i = 0; i < pgo.keyPoses().size(); i++) { + CloudType::Ptr world_cloud(new CloudType); + pcl::transformPointCloud( + *pgo.keyPoses()[i].body_cloud, + *world_cloud, + pgo.keyPoses()[i].t_global, + Eigen::Quaterniond(pgo.keyPoses()[i].r_global)); + *global_cloud += *world_cloud; + } + + // Voxel downsample + CloudType::Ptr filtered(new CloudType); + pcl::VoxelGrid voxel; + voxel.setInputCloud(global_cloud); + voxel.setLeafSize(global_map_voxel_size, global_map_voxel_size, global_map_voxel_size); + voxel.filter(*filtered); + + sensor_msgs::PointCloud2 map_msg = smartnav::from_pcl(*filtered, world_frame, now); + lcm.publish(global_map_topic, &map_msg); + } + } + + std::this_thread::sleep_for(std::chrono::milliseconds(timer_period_ms)); + } + + if (debug) fprintf(stderr, "PGO native module shutting down\n"); + return 0; +} diff --git a/dimos/navigation/jnav/components/loop_closure/unrefined_pgo/cpp/point_cloud_utils.hpp b/dimos/navigation/jnav/components/loop_closure/unrefined_pgo/cpp/point_cloud_utils.hpp new file mode 100644 index 0000000000..0970e1f8de --- /dev/null +++ b/dimos/navigation/jnav/components/loop_closure/unrefined_pgo/cpp/point_cloud_utils.hpp @@ -0,0 +1,170 @@ +// Point cloud utility functions for SmartNav native modules. +// Provides PointCloud2 building/parsing helpers that work with dimos-lcm types. +// When USE_PCL is defined, also provides PCL interop utilities. + +#pragma once + +#include +#include +#include + +#include "sensor_msgs/PointCloud2.hpp" +#include "sensor_msgs/PointField.hpp" +#include "std_msgs/Header.hpp" + +#include "dimos_native_module.hpp" + +#ifdef USE_PCL +#include +#include +#include +#endif + +namespace smartnav { + +// Simple XYZI point structure (no PCL dependency) +struct PointXYZI { + float x, y, z, intensity; +}; + +// Build PointCloud2 from vector of XYZI points +inline sensor_msgs::PointCloud2 build_pointcloud2( + const std::vector& points, + const std::string& frame_id, + double timestamp +) { + sensor_msgs::PointCloud2 pc; + pc.header = dimos::make_header(frame_id, timestamp); + pc.height = 1; + pc.width = static_cast(points.size()); + pc.is_bigendian = 0; + pc.is_dense = 1; + + // Fields: x, y, z, intensity (all float32) + pc.fields_length = 4; + pc.fields.resize(4); + auto make_field = [](const std::string& name, int32_t offset) { + sensor_msgs::PointField f; + f.name = name; + f.offset = offset; + f.datatype = sensor_msgs::PointField::FLOAT32; + f.count = 1; + return f; + }; + pc.fields[0] = make_field("x", 0); + pc.fields[1] = make_field("y", 4); + pc.fields[2] = make_field("z", 8); + pc.fields[3] = make_field("intensity", 12); + + pc.point_step = 16; + pc.row_step = pc.point_step * pc.width; + pc.data_length = pc.row_step; + pc.data.resize(pc.data_length); + + for (size_t i = 0; i < points.size(); ++i) { + float* dst = reinterpret_cast(pc.data.data() + i * 16); + dst[0] = points[i].x; + dst[1] = points[i].y; + dst[2] = points[i].z; + dst[3] = points[i].intensity; + } + + return pc; +} + +// Parse PointCloud2 into vector of XYZI points +inline std::vector parse_pointcloud2(const sensor_msgs::PointCloud2& pc) { + std::vector points; + if (pc.width == 0 || pc.height == 0) return points; + + int num_points = pc.width * pc.height; + points.reserve(num_points); + + // Find field offsets + int x_off = -1, y_off = -1, z_off = -1, i_off = -1; + for (const auto& f : pc.fields) { + if (f.name == "x") x_off = f.offset; + else if (f.name == "y") y_off = f.offset; + else if (f.name == "z") z_off = f.offset; + else if (f.name == "intensity") i_off = f.offset; + } + + if (x_off < 0 || y_off < 0 || z_off < 0) return points; + + for (int n = 0; n < num_points; ++n) { + if (static_cast((n + 1) * pc.point_step) > pc.data.size()) break; + const uint8_t* base = pc.data.data() + n * pc.point_step; + PointXYZI p; + std::memcpy(&p.x, base + x_off, sizeof(float)); + std::memcpy(&p.y, base + y_off, sizeof(float)); + std::memcpy(&p.z, base + z_off, sizeof(float)); + if (i_off >= 0) std::memcpy(&p.intensity, base + i_off, sizeof(float)); + else p.intensity = 0.0f; + points.push_back(p); + } + + return points; +} + +// Get timestamp from PointCloud2 header +inline double get_timestamp(const sensor_msgs::PointCloud2& pc) { + return pc.header.stamp.sec + pc.header.stamp.nsec / 1e9; +} + +#ifdef USE_PCL +// Convert dimos-lcm PointCloud2 to PCL point cloud +inline void to_pcl(const sensor_msgs::PointCloud2& pc, + pcl::PointCloud& cloud) { + auto points = parse_pointcloud2(pc); + cloud.clear(); + cloud.reserve(points.size()); + for (const auto& p : points) { + pcl::PointXYZI pt; + pt.x = p.x; + pt.y = p.y; + pt.z = p.z; + pt.intensity = p.intensity; + cloud.push_back(pt); + } + cloud.width = cloud.size(); + cloud.height = 1; + cloud.is_dense = true; +} + +// Convert PCL point cloud to dimos-lcm PointCloud2 +inline sensor_msgs::PointCloud2 from_pcl( + const pcl::PointCloud& cloud, + const std::string& frame_id, + double timestamp +) { + std::vector points; + points.reserve(cloud.size()); + for (const auto& pt : cloud) { + points.push_back({pt.x, pt.y, pt.z, pt.intensity}); + } + return build_pointcloud2(points, frame_id, timestamp); +} +#endif + +// Quaternion to RPY conversion +inline void quat_to_rpy(double qx, double qy, double qz, double qw, + double& roll, double& pitch, double& yaw) { + // Roll (x-axis rotation) + double sinr_cosp = 2.0 * (qw * qx + qy * qz); + double cosr_cosp = 1.0 - 2.0 * (qx * qx + qy * qy); + roll = std::atan2(sinr_cosp, cosr_cosp); + + // Pitch (y-axis rotation) + double sinp = 2.0 * (qw * qy - qz * qx); + if (std::abs(sinp) >= 1.0) + pitch = std::copysign(M_PI / 2, sinp); + else + pitch = std::asin(sinp); + + // Yaw (z-axis rotation) + double siny_cosp = 2.0 * (qw * qz + qx * qy); + double cosy_cosp = 1.0 - 2.0 * (qy * qy + qz * qz); + yaw = std::atan2(siny_cosp, cosy_cosp); +} + +} // namespace smartnav diff --git a/dimos/navigation/jnav/components/loop_closure/unrefined_pgo/cpp/simple_pgo.cpp b/dimos/navigation/jnav/components/loop_closure/unrefined_pgo/cpp/simple_pgo.cpp new file mode 100644 index 0000000000..5fc18bf0e7 --- /dev/null +++ b/dimos/navigation/jnav/components/loop_closure/unrefined_pgo/cpp/simple_pgo.cpp @@ -0,0 +1,211 @@ +#include "simple_pgo.h" + +SimplePGO::SimplePGO(const Config &config) : m_config(config) +{ + gtsam::ISAM2Params isam2_params; + isam2_params.relinearizeThreshold = 0.01; + isam2_params.relinearizeSkip = 1; + m_isam2 = std::make_shared(isam2_params); + m_initial_values.clear(); + m_graph.resize(0); + m_r_offset.setIdentity(); + m_t_offset.setZero(); + + m_icp.setMaximumIterations(50); + m_icp.setMaxCorrespondenceDistance(10); + m_icp.setTransformationEpsilon(1e-6); + m_icp.setEuclideanFitnessEpsilon(1e-6); + m_icp.setRANSACIterations(0); +} + +bool SimplePGO::isKeyPose(const PoseWithTime &pose) +{ + if (m_key_poses.size() == 0) + return true; + const KeyPoseWithCloud &last_item = m_key_poses.back(); + double delta_trans = (pose.t - last_item.t_local).norm(); + double delta_deg = Eigen::Quaterniond(pose.r).angularDistance(Eigen::Quaterniond(last_item.r_local)) * 57.324; + if (delta_trans > m_config.key_pose_delta_trans || delta_deg > m_config.key_pose_delta_deg) + return true; + return false; +} +bool SimplePGO::addKeyPose(const CloudWithPose &cloud_with_pose) +{ + bool is_key_pose = isKeyPose(cloud_with_pose.pose); + if (!is_key_pose) + return false; + size_t idx = m_key_poses.size(); + M3D init_r = m_r_offset * cloud_with_pose.pose.r; + V3D init_t = m_r_offset * cloud_with_pose.pose.t + m_t_offset; + // 添加初始值 + m_initial_values.insert(idx, gtsam::Pose3(gtsam::Rot3(init_r), gtsam::Point3(init_t))); + if (idx == 0) + { + // 添加先验约束 + gtsam::noiseModel::Diagonal::shared_ptr noise = gtsam::noiseModel::Diagonal::Variances(gtsam::Vector6::Ones() * 1e-12); + m_graph.add(gtsam::PriorFactor(idx, gtsam::Pose3(gtsam::Rot3(init_r), gtsam::Point3(init_t)), noise)); + } + else + { + // 添加里程计约束 + const KeyPoseWithCloud &last_item = m_key_poses.back(); + M3D r_between = last_item.r_local.transpose() * cloud_with_pose.pose.r; + V3D t_between = last_item.r_local.transpose() * (cloud_with_pose.pose.t - last_item.t_local); + gtsam::noiseModel::Diagonal::shared_ptr noise = gtsam::noiseModel::Diagonal::Variances((gtsam::Vector(6) << 1e-6, 1e-6, 1e-6, 1e-4, 1e-4, 1e-6).finished()); + m_graph.add(gtsam::BetweenFactor(idx - 1, idx, gtsam::Pose3(gtsam::Rot3(r_between), gtsam::Point3(t_between)), noise)); + } + KeyPoseWithCloud item; + item.time = cloud_with_pose.pose.second; + item.r_local = cloud_with_pose.pose.r; + item.t_local = cloud_with_pose.pose.t; + item.body_cloud = cloud_with_pose.cloud; + item.r_global = init_r; + item.t_global = init_t; + m_key_poses.push_back(item); + return true; +} + +CloudType::Ptr SimplePGO::getSubMap(int idx, int half_range, double resolution) +{ + assert(idx >= 0 && idx < static_cast(m_key_poses.size())); + int min_idx = std::max(0, idx - half_range); + int max_idx = std::min(static_cast(m_key_poses.size()) - 1, idx + half_range); + + CloudType::Ptr ret(new CloudType); + for (int i = min_idx; i <= max_idx; i++) + { + + CloudType::Ptr body_cloud = m_key_poses[i].body_cloud; + CloudType::Ptr global_cloud(new CloudType); + pcl::transformPointCloud(*body_cloud, *global_cloud, m_key_poses[i].t_global, Eigen::Quaterniond(m_key_poses[i].r_global)); + *ret += *global_cloud; + } + if (resolution > 0) + { + pcl::VoxelGrid voxel_grid; + voxel_grid.setLeafSize(resolution, resolution, resolution); + voxel_grid.setInputCloud(ret); + voxel_grid.filter(*ret); + } + return ret; +} + +void SimplePGO::searchForLoopPairs() +{ + if (m_key_poses.size() < 10) + return; + if (m_config.min_loop_detect_duration > 0.0) + { + if (m_history_pairs.size() > 0) + { + double current_time = m_key_poses.back().time; + double last_time = m_key_poses[m_history_pairs.back().second].time; + if (current_time - last_time < m_config.min_loop_detect_duration) + return; + } + } + + size_t cur_idx = m_key_poses.size() - 1; + const KeyPoseWithCloud &last_item = m_key_poses.back(); + pcl::PointXYZ last_pose_pt; + last_pose_pt.x = last_item.t_global(0); + last_pose_pt.y = last_item.t_global(1); + last_pose_pt.z = last_item.t_global(2); + + pcl::PointCloud::Ptr key_poses_cloud(new pcl::PointCloud); + for (size_t i = 0; i < m_key_poses.size() - 1; i++) + { + pcl::PointXYZ pt; + pt.x = m_key_poses[i].t_global(0); + pt.y = m_key_poses[i].t_global(1); + pt.z = m_key_poses[i].t_global(2); + key_poses_cloud->push_back(pt); + } + pcl::KdTreeFLANN kdtree; + kdtree.setInputCloud(key_poses_cloud); + std::vector ids; + std::vector sqdists; + int neighbors = kdtree.radiusSearch(last_pose_pt, m_config.loop_search_radius, ids, sqdists); + if (neighbors == 0) + return; + + int loop_idx = -1; + for (size_t i = 0; i < ids.size(); i++) + { + int idx = ids[i]; + if (std::abs(last_item.time - m_key_poses[idx].time) > m_config.loop_time_tresh) + { + loop_idx = idx; + break; + } + } + + if (loop_idx == -1) + return; + + CloudType::Ptr target_cloud = getSubMap(loop_idx, m_config.loop_submap_half_range, m_config.submap_resolution); + CloudType::Ptr source_cloud = getSubMap(m_key_poses.size() - 1, 0, m_config.submap_resolution); + CloudType::Ptr align_cloud(new CloudType); + + m_icp.setInputSource(source_cloud); + m_icp.setInputTarget(target_cloud); + m_icp.align(*align_cloud); + + if (!m_icp.hasConverged() || m_icp.getFitnessScore() > m_config.loop_score_tresh) + return; + + M4F loop_transform = m_icp.getFinalTransformation(); + + LoopPair one_pair; + one_pair.source_id = cur_idx; + one_pair.target_id = loop_idx; + one_pair.score = m_icp.getFitnessScore(); + M3D r_refined = loop_transform.block<3, 3>(0, 0).cast() * m_key_poses[cur_idx].r_global; + V3D t_refined = loop_transform.block<3, 3>(0, 0).cast() * m_key_poses[cur_idx].t_global + loop_transform.block<3, 1>(0, 3).cast(); + one_pair.r_offset = m_key_poses[loop_idx].r_global.transpose() * r_refined; + one_pair.t_offset = m_key_poses[loop_idx].r_global.transpose() * (t_refined - m_key_poses[loop_idx].t_global); + m_cache_pairs.push_back(one_pair); + m_history_pairs.emplace_back(one_pair.target_id, one_pair.source_id); +} + +void SimplePGO::smoothAndUpdate() +{ + bool has_loop = !m_cache_pairs.empty(); + // 添加回环因子 + if (has_loop) + { + for (LoopPair &pair : m_cache_pairs) + { + m_graph.add(gtsam::BetweenFactor(pair.target_id, pair.source_id, + gtsam::Pose3(gtsam::Rot3(pair.r_offset), + gtsam::Point3(pair.t_offset)), + gtsam::noiseModel::Diagonal::Variances(gtsam::Vector6::Ones() * pair.score))); + } + std::vector().swap(m_cache_pairs); + } + // smooth and mapping + m_isam2->update(m_graph, m_initial_values); + m_isam2->update(); + if (has_loop) + { + m_isam2->update(); + m_isam2->update(); + m_isam2->update(); + m_isam2->update(); + } + m_graph.resize(0); + m_initial_values.clear(); + + // update key poses + gtsam::Values estimate_values = m_isam2->calculateBestEstimate(); + for (size_t i = 0; i < m_key_poses.size(); i++) + { + gtsam::Pose3 pose = estimate_values.at(i); + m_key_poses[i].r_global = pose.rotation().matrix().cast(); + m_key_poses[i].t_global = pose.translation().matrix().cast(); + } + // update offset + const KeyPoseWithCloud &last_item = m_key_poses.back(); + m_r_offset = last_item.r_global * last_item.r_local.transpose(); + m_t_offset = last_item.t_global - m_r_offset * last_item.t_local; +} diff --git a/dimos/navigation/jnav/components/loop_closure/unrefined_pgo/cpp/simple_pgo.h b/dimos/navigation/jnav/components/loop_closure/unrefined_pgo/cpp/simple_pgo.h new file mode 100644 index 0000000000..7f80c5b09a --- /dev/null +++ b/dimos/navigation/jnav/components/loop_closure/unrefined_pgo/cpp/simple_pgo.h @@ -0,0 +1,78 @@ +#pragma once +#include "commons.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +struct KeyPoseWithCloud +{ + M3D r_local; + V3D t_local; + M3D r_global; + V3D t_global; + double time; + CloudType::Ptr body_cloud; +}; +struct LoopPair +{ + size_t source_id; + size_t target_id; + M3D r_offset; + V3D t_offset; + double score; +}; + +struct Config +{ + double key_pose_delta_deg = 10; + double key_pose_delta_trans = 1.0; + double loop_search_radius = 1.0; + double loop_time_tresh = 60.0; + double loop_score_tresh = 0.15; + int loop_submap_half_range = 5; + double submap_resolution = 0.1; + double min_loop_detect_duration = 10.0; +}; + +class SimplePGO +{ +public: + SimplePGO(const Config &config); + + bool isKeyPose(const PoseWithTime &pose); + + bool addKeyPose(const CloudWithPose &cloud_with_pose); + + bool hasLoop(){return m_cache_pairs.size() > 0;} + + void searchForLoopPairs(); + + void smoothAndUpdate(); + + CloudType::Ptr getSubMap(int idx, int half_range, double resolution); + std::vector> &historyPairs() { return m_history_pairs; } + std::vector &keyPoses() { return m_key_poses; } + + M3D offsetR() { return m_r_offset; } + V3D offsetT() { return m_t_offset; } + +private: + Config m_config; + std::vector m_key_poses; + std::vector> m_history_pairs; + std::vector m_cache_pairs; + M3D m_r_offset; + V3D m_t_offset; + std::shared_ptr m_isam2; + gtsam::Values m_initial_values; + gtsam::NonlinearFactorGraph m_graph; + pcl::IterativeClosestPoint m_icp; +}; diff --git a/dimos/navigation/jnav/components/loop_closure/unrefined_pgo/module.py b/dimos/navigation/jnav/components/loop_closure/unrefined_pgo/module.py new file mode 100644 index 0000000000..73a5fc59e4 --- /dev/null +++ b/dimos/navigation/jnav/components/loop_closure/unrefined_pgo/module.py @@ -0,0 +1,286 @@ +# Copyright 2026 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""The unrefined PGO — a frozen standalone snapshot of main's cmu_nav PGO +(GTSAM iSAM2 + PCL ICP C++ binary, the original gsc_pgo was refined from), +adapted to the LoopClosure spec without touching the binary. + +The cpp/ directory here is a copy of `cmu_nav/modules/pgo/cpp` at the time +of the snapshot, so later cmu_nav changes don't silently move this baseline. + +Spec adaptations, all Python-side: + * `lidar` — the binary expects a `registered_scan` topic; `_collect_topics` + aliases the lidar port's topic onto that arg name. + * `pose_graph` — the binary doesn't expose its internal graph, only the + current map->odom offset (`pgo_tf`) and corrected odometry. The wrapper + keyframes the RAW odometry stream (same delta gates as the binary) and + re-applies the LATEST offset to every keyframe on each correction update. + A single global offset can't reproduce iSAM2's per-keyframe smoothing — + but that offset is exactly what this PGO exposes to consumers, so the + synthesized graph is an honest picture of its output. + * `loop_closure_event` — emitted when the offset jumps by more than the + `_LOOP_EVENT_*` thresholds (the offset only moves materially when a loop + closure lands).""" + +from __future__ import annotations + +from dataclasses import dataclass +import math +from pathlib import Path +import threading +import time + +import numpy as np +from reactivex.disposable import Disposable +from scipy.spatial.transform import Rotation + +from dimos.core.core import rpc +from dimos.core.native_module import NativeModule, NativeModuleConfig +from dimos.core.stream import In, Out +from dimos.msgs.geometry_msgs.PoseStamped import PoseStamped +from dimos.msgs.geometry_msgs.Quaternion import Quaternion +from dimos.msgs.geometry_msgs.Transform import Transform +from dimos.msgs.geometry_msgs.Vector3 import Vector3 +from dimos.msgs.nav_msgs.Odometry import Odometry +from dimos.msgs.sensor_msgs.PointCloud2 import PointCloud2 +from dimos.navigation.jnav.components.loop_closure.spec import LoopClosure +from dimos.navigation.jnav.msgs.Graph3D import Graph3D +from dimos.navigation.jnav.msgs.GraphDelta3D import GraphDelta3D +from dimos.utils.logging_config import setup_logger + +logger = setup_logger() + +# Offset jumps below these are smoothing noise, not loop closures. +_LOOP_EVENT_MIN_TRANS_M = 0.05 +_LOOP_EVENT_MIN_ROT_DEG = 1.0 + + +class PGOConfig(NativeModuleConfig): + cwd: str | None = str(Path(__file__).resolve().parent / "cpp") + # Absolute so the exists() check works from any worker cwd (skips rebuild). + executable: str = str(Path(__file__).resolve().parent / "cpp/result/bin/pgo") + # path:$PWD makes nix see this (git-untracked) copied directory. + build_command: str | None = 'nix build "path:$PWD#default" --no-write-lock-file' + + # Frame names + world_frame: str = "map" + local_frame: str = "odom" + + # Keyframe detection + key_pose_delta_deg: float = 10.0 + key_pose_delta_trans: float = 0.5 + + # Loop closure + loop_search_radius: float = 1.0 + loop_time_thresh: float = 60.0 + loop_score_thresh: float = 0.15 + loop_submap_half_range: int = 5 + submap_resolution: float = 0.1 + min_loop_detect_duration: float = 5.0 + + # Input mode: transform world-frame scans to body-frame using odom + unregister_input: bool = True + + # Global map publishing + global_map_voxel_size: float = 0.1 + global_map_publish_rate: float = 1.0 + + debug: bool = False + + +@dataclass +class _RawKeyframe: + ts: float + translation: np.ndarray # (3,) + rotation: np.ndarray # 3x3 + + +class PGO(NativeModule, LoopClosure): + """Pose graph optimization with loop closure using GTSAM iSAM2 + PCL ICP.""" + + config: PGOConfig + + lidar: In[PointCloud2] + odometry: In[Odometry] + corrected_odometry: Out[Odometry] + pose_graph: Out[Graph3D] + loop_closure_event: Out[GraphDelta3D] + global_map: Out[PointCloud2] + pgo_tf: Out[Odometry] + + def __init__(self, **kwargs: object) -> None: + super().__init__(**kwargs) + self._keyframes: list[_RawKeyframe] = [] + self._offset_rotation = np.eye(3) + self._offset_translation = np.zeros(3) + self._graph_lock = threading.Lock() + + def _collect_topics(self) -> dict[str, str]: + topics = super()._collect_topics() + # The binary asks for --registered_scan; feed it the lidar topic. + if "lidar" in topics: + topics["registered_scan"] = topics["lidar"] + return topics + + @rpc + def start(self) -> None: + super().start() + self.register_disposable( + Disposable(self.pgo_tf.transport.subscribe(self._on_tf_correction, self.pgo_tf)) + ) + self.register_disposable( + Disposable(self.odometry.transport.subscribe(self._on_raw_odometry, self.odometry)) + ) + # Seed identity TF so consumers can query map->body immediately. + self._publish_tf( + translation=(0.0, 0.0, 0.0), + rotation=(0.0, 0.0, 0.0, 1.0), + ts=time.time(), + ) + if self.config.debug: + logger.info("unrefined PGO native module started (C++ iSAM2 + PCL ICP)") + + @rpc + def stop(self) -> None: + super().stop() + + # --- TF passthrough (same as the cmu_nav wrapper) ----------------------- + + def _publish_tf( + self, + translation: tuple[float, float, float], + rotation: tuple[float, float, float, float], + ts: float, + ) -> None: + self.tf.publish( + Transform( + frame_id=self.config.world_frame, + child_frame_id=self.config.local_frame, + translation=Vector3(*translation), + rotation=Quaternion(*rotation), + ts=ts, + ) + ) + + def _on_tf_correction(self, msg: Odometry) -> None: + self._publish_tf( + translation=(msg.pose.position.x, msg.pose.position.y, msg.pose.position.z), + rotation=( + msg.pose.orientation.x, + msg.pose.orientation.y, + msg.pose.orientation.z, + msg.pose.orientation.w, + ), + ts=msg.ts or time.time(), + ) + + new_rotation = Rotation.from_quat( + [ + msg.pose.orientation.x, + msg.pose.orientation.y, + msg.pose.orientation.z, + msg.pose.orientation.w, + ] + ).as_matrix() + new_translation = np.array([msg.pose.position.x, msg.pose.position.y, msg.pose.position.z]) + + with self._graph_lock: + delta_trans = float(np.linalg.norm(new_translation - self._offset_translation)) + cos_theta = float( + np.clip((np.trace(self._offset_rotation.T @ new_rotation) - 1.0) / 2.0, -1.0, 1.0) + ) + delta_deg = math.degrees(math.acos(cos_theta)) + self._offset_rotation = new_rotation + self._offset_translation = new_translation + is_loop = ( + delta_trans > _LOOP_EVENT_MIN_TRANS_M or delta_deg > _LOOP_EVENT_MIN_ROT_DEG + ) and bool(self._keyframes) + graph_msg = self._build_graph(msg.ts) if self._keyframes else None + event = self._build_loop_event(msg.ts) if is_loop else None + + if graph_msg is not None: + self.pose_graph.publish(graph_msg) + if event is not None: + self.loop_closure_event.publish(event) + + # --- synthesized pose graph (the binary doesn't expose its own) ---------- + + def _on_raw_odometry(self, msg: Odometry) -> None: + rotation = Rotation.from_quat( + [ + msg.pose.orientation.x, + msg.pose.orientation.y, + msg.pose.orientation.z, + msg.pose.orientation.w, + ] + ).as_matrix() + translation = np.array([msg.pose.position.x, msg.pose.position.y, msg.pose.position.z]) + + with self._graph_lock: + if not self._is_keyframe(rotation, translation): + return + self._keyframes.append( + _RawKeyframe(ts=msg.ts, translation=translation, rotation=rotation) + ) + graph_msg = self._build_graph(msg.ts) + self.pose_graph.publish(graph_msg) + + def _is_keyframe(self, rotation: np.ndarray, translation: np.ndarray) -> bool: + if not self._keyframes: + return True + last = self._keyframes[-1] + delta_trans = float(np.linalg.norm(translation - last.translation)) + cos_theta = float(np.clip((np.trace(last.rotation.T @ rotation) - 1.0) / 2.0, -1.0, 1.0)) + delta_deg = math.degrees(math.acos(cos_theta)) + return ( + delta_trans > self.config.key_pose_delta_trans + or delta_deg > self.config.key_pose_delta_deg + ) + + def _corrected_node(self, index: int, keyframe: _RawKeyframe) -> Graph3D.Node3D: + rotation = self._offset_rotation @ keyframe.rotation + translation = self._offset_rotation @ keyframe.translation + self._offset_translation + quaternion = Rotation.from_matrix(rotation).as_quat() + return Graph3D.Node3D( + pose=PoseStamped( + ts=keyframe.ts, + frame_id=self.config.world_frame, + position=[float(v) for v in translation], + orientation=[float(v) for v in quaternion], + ), + id=index, + ) + + def _build_graph(self, ts: float) -> Graph3D: + """Caller must hold ``_graph_lock``.""" + nodes = [ + self._corrected_node(index, keyframe) for index, keyframe in enumerate(self._keyframes) + ] + edges = [ + Graph3D.Edge(start_id=index - 1, end_id=index, timestamp=self._keyframes[index].ts) + for index in range(1, len(self._keyframes)) + ] + return Graph3D(ts=ts, nodes=nodes, edges=edges) + + def _build_loop_event(self, ts: float) -> GraphDelta3D: + """Caller must hold ``_graph_lock``.""" + latest_index = len(self._keyframes) - 1 + identity = GraphDelta3D.Transform( + translation=Vector3(0.0, 0.0, 0.0), rotation=Quaternion(0.0, 0.0, 0.0, 1.0) + ) + return GraphDelta3D( + ts=ts, + nodes=[self._corrected_node(latest_index, self._keyframes[latest_index])], + transforms=[identity], + ) From 13f97dae5c3e7e0bfd1a6e4a22980ef71734eb67 Mon Sep 17 00:00:00 2001 From: Jeff Hykin Date: Wed, 24 Jun 2026 14:52:43 +0800 Subject: [PATCH 08/21] add loop-closure eval/benchmark scripts and spec --- .../loop_closure/benchmark_table.py | 343 +++++++ .../jnav/components/loop_closure/eval.py | 924 ++++++++++++++++++ .../jnav/components/loop_closure/eval_all.py | 366 +++++++ .../loop_closure/eval_ground_truth_tag.py | 309 ++++++ .../components/loop_closure/eval_kitti.py | 192 ++++ .../components/loop_closure/eval_kitti_all.py | 122 +++ .../jnav/components/loop_closure/spec.py | 33 + 7 files changed, 2289 insertions(+) create mode 100644 dimos/navigation/jnav/components/loop_closure/benchmark_table.py create mode 100644 dimos/navigation/jnav/components/loop_closure/eval.py create mode 100644 dimos/navigation/jnav/components/loop_closure/eval_all.py create mode 100644 dimos/navigation/jnav/components/loop_closure/eval_ground_truth_tag.py create mode 100644 dimos/navigation/jnav/components/loop_closure/eval_kitti.py create mode 100644 dimos/navigation/jnav/components/loop_closure/eval_kitti_all.py create mode 100644 dimos/navigation/jnav/components/loop_closure/spec.py diff --git a/dimos/navigation/jnav/components/loop_closure/benchmark_table.py b/dimos/navigation/jnav/components/loop_closure/benchmark_table.py new file mode 100644 index 0000000000..102ebfdc25 --- /dev/null +++ b/dimos/navigation/jnav/components/loop_closure/benchmark_table.py @@ -0,0 +1,343 @@ +# Copyright 2026 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Resumable PGO benchmark over (environment x implementation/config). + +Fills one big table: every environment (go2 recordings, hk_village; kitti once +converted) scored against every PGO column (gsc_pgo in several configs, the +basic unrefined_pgo, ivan_pgo, ivan_pgo_transformer). Each cell is one eval.py +subprocess (sequential — they share the isolated LCM bus). + +CHECKPOINTED: a cell is skipped when its summary.json already exists and its +fingerprint still matches the db (size+mtime) and EVAL_VERSION. Kill it any +time and rerun — only missing/stale cells recompute. `--force` recomputes all. + +The universal score is **voxel agreement** (re-anchoring scans onto the +corrected trajectory should collapse double walls — ground-truth-free and +needs no camera), so tagless environments (kitti, hk_village) still get a +real number. April-tag agreement is reported additionally wherever a camera + +intrinsics sidecar exists. + +Usage: + uv run python dimos/navigation/jnav/components/loop_closure/benchmark_table.py + ... [--go2-root ~/datasets/go2_recordings] [--with-hk-village] [--force] + ... [--only-env NAME] [--only-col NAME] [--table-only] +""" + +from __future__ import annotations + +import argparse +from dataclasses import dataclass, field +import json +from pathlib import Path +import subprocess +import sys +import time +from typing import Any + +LOOP_CLOSURE_DIR = Path(__file__).resolve().parent +EVAL_PY = LOOP_CLOSURE_DIR / "eval.py" +RESULTS_DIR = LOOP_CLOSURE_DIR / "eval_results" +TABLE_PATH = RESULTS_DIR / "benchmark_table.md" + +DEFAULT_GO2_ROOT = Path("~/datasets/go2_recordings").expanduser() +# loop_closure/modules/jnav/navigation/dimos/ -> parents[4] is repo root. +LFS_DATA_DIR = LOOP_CLOSURE_DIR.parents[4] / "data" # repo_root/data (hk_village*.db) + +# Shared loop-closure thresholds for the gsc_pgo configs (mirrors recordings_eval). +_CMU_BASE: dict[str, Any] = { + "loop_search_radius": 3.0, + "loop_time_thresh": 5.0, + "min_loop_detect_duration": 2.0, + "key_pose_delta_trans": 0.5, +} + + +@dataclass(frozen=True) +class Column: + """One implementation+config = one table column.""" + + name: str # column label + results-suffix (disambiguates same-class modules) + module_dir: str # subdir under loop_closure/ holding module.py (class PGO) + overrides: dict[str, Any] = field(default_factory=dict) + + +COLUMNS: list[Column] = [ + Column("cmu_stock", "gsc_pgo", {}), + Column("cmu_scan_context", "gsc_pgo", {**_CMU_BASE, "use_scan_context": True}), + Column("cmu_radius", "gsc_pgo", {**_CMU_BASE, "use_scan_context": False}), + Column( + "cmu_scan_context_far", + "gsc_pgo", + { + **_CMU_BASE, + "use_scan_context": True, + "loop_candidate_max_distance_m": 0.0, + "loop_score_thresh": 10000.0, + }, + ), + Column("unrefined", "unrefined_pgo", {}), + Column("ivan", "ivan_pgo", {"publish_global_map": False}), + Column("ivan_transformer", "ivan_pgo_transformer", {}), +] + + +@dataclass(frozen=True) +class Environment: + """One dataset = one table row.""" + + name: str # results-dir recording key + db_path: Path + odom_stream: str + lidar_stream: str + camera_stream: str | None = None + intrinsics_json: Path | None = None + + +def discover_go2(root: Path) -> list[Environment]: + environments = [] + for db_path in sorted(root.glob("*/mem2.db")): + recording = db_path.parent + sidecar = recording / "camera_intrinsics.json" + environments.append( + Environment( + name=recording.name, + db_path=db_path, + odom_stream="fastlio_odometry", + lidar_stream="fastlio_lidar", + camera_stream="color_image", + intrinsics_json=sidecar if sidecar.exists() else None, + ) + ) + return environments + + +def discover_hk_village(data_dir: Path) -> list[Environment]: + # hk_village LFS dbs publish world-frame lidar + PoseStamped odom; no camera + # intrinsics sidecar, so they score on voxel agreement alone. + environments = [] + for db_path in sorted(data_dir.glob("hk_village*.db")): + environments.append( + Environment( + name=db_path.stem, + db_path=db_path, + odom_stream="odom", + lidar_stream="lidar", + camera_stream=None, + intrinsics_json=None, + ) + ) + return environments + + +def cell_dir(environment: Environment, column: Column) -> Path: + # Mirrors eval.py's out_dir formula: __.PGO[.]. + return RESULTS_DIR / f"{environment.name}__{column.module_dir}.PGO.{column.name}" + + +def cell_is_fresh(environment: Environment, column: Column) -> bool: + summary_path = cell_dir(environment, column) / "summary.json" + if not summary_path.exists(): + return False + try: + summary = json.loads(summary_path.read_text()) + except (json.JSONDecodeError, OSError): + return False + fingerprint = summary.get("fingerprint", {}) + stat = environment.db_path.stat() + return ( + fingerprint.get("db_bytes") == stat.st_size + and fingerprint.get("db_mtime") == int(stat.st_mtime) + and fingerprint.get("version") is not None + ) + + +def _kill_zombies() -> None: + """Clear leftover native processes / workers that can wedge the next cell.""" + subprocess.run( + "lsof -ti tcp:7766 2>/dev/null | xargs kill -9 2>/dev/null;" + ' pkill -9 -f "bin/pgo|scene_lidar" 2>/dev/null', + shell=True, + check=False, + ) + + +def run_cell(environment: Environment, column: Column) -> bool: + command = [ + sys.executable, + "-u", + str(EVAL_PY), + "--db-path", + str(environment.db_path), + "--odom-stream", + environment.odom_stream, + "--lidar-stream", + environment.lidar_stream, + "--module-path", + str(LOOP_CLOSURE_DIR / column.module_dir / "module.py"), + "--module-name", + "PGO", + "--recording-name", + environment.name, + "--results-suffix", + column.name, + "--with-rrd", + "false", + "--lockstep", + "true", + ] + if environment.camera_stream is not None: + command += ["--camera-stream", environment.camera_stream] + if environment.intrinsics_json is not None: + command += ["--camera-intrinsics-json-path", str(environment.intrinsics_json)] + if column.overrides: + command += ["--pgo-config-json", json.dumps(column.overrides)] + print(f"\n=== {environment.name} x {column.name} ===", flush=True) + result = subprocess.run(command, check=False) + print(f"=== {environment.name} x {column.name} exit: {result.returncode} ===", flush=True) + return result.returncode == 0 + + +def _fmt(value: float | None, places: int = 3, signed: bool = True) -> str: + if value is None: + return "—" + return f"{value:+.{places}f}" if signed else f"{value:.{places}f}" + + +def render_table(environments: list[Environment]) -> Path: + cells: dict[tuple[str, str], dict[str, Any]] = {} + for summary_path in RESULTS_DIR.glob("*/summary.json"): + recording, _, module_key = summary_path.parent.name.rpartition("__") + try: + summary = json.loads(summary_path.read_text()) + except (json.JSONDecodeError, OSError): + continue + cells[(recording, module_key)] = summary.get("scores", {}) + + column_keys = [f"{column.module_dir}.PGO.{column.name}" for column in COLUMNS] + header = "| environment | " + " | ".join(column.name for column in COLUMNS) + " |" + sep = "|" + "---|" * (len(COLUMNS) + 1) + lines = [ + "# PGO benchmark — environments x implementations", + "", + "Each cell: **voxel improvement** (fractional drop in occupied 0.2 m voxels " + "after re-anchoring scans onto the corrected trajectory; the universal, " + "ground-truth-free score) — then `tag:` where a camera " + "exists, and `cl`. Higher is better; `—` = not yet run / N/A.", + "", + header, + sep, + ] + for environment in environments: + row_cells = [] + for column_key in column_keys: + scores = cells.get((environment.name, column_key)) + if scores is None: + row_cells.append("—") + continue + voxel = _fmt(scores.get("voxel_improvement")) + tag = scores.get("tag_improvement") + closures = scores.get("closures") + text = voxel + if tag is not None: + text += f" tag:{tag:+.2f}" + if closures is not None: + text += f" cl{closures}" + row_cells.append(text) + lines.append(f"| {environment.name} | " + " | ".join(row_cells) + " |") + + # Per-column mean voxel improvement (over environments that have a number). + lines += ["", "## Mean voxel improvement per column", ""] + lines.append("| " + " | ".join(column.name for column in COLUMNS) + " |") + lines.append("|" + "---|" * len(COLUMNS)) + means = [] + for column_key in column_keys: + values = [ + cells[(environment.name, column_key)]["voxel_improvement"] + for environment in environments + if (environment.name, column_key) in cells + and cells[(environment.name, column_key)].get("voxel_improvement") is not None + ] + means.append(f"{sum(values) / len(values):+.3f}" if values else "—") + lines.append("| " + " | ".join(means) + " |") + lines.append("") + + RESULTS_DIR.mkdir(exist_ok=True) + TABLE_PATH.write_text("\n".join(lines) + "\n") + return TABLE_PATH + + +def main() -> None: + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument("--go2-root", type=Path, default=DEFAULT_GO2_ROOT) + parser.add_argument("--with-hk-village", action="store_true") + parser.add_argument("--data-dir", type=Path, default=LFS_DATA_DIR) + parser.add_argument("--only-env", help="comma-separated environment names") + parser.add_argument("--only-col", help="comma-separated column names") + parser.add_argument("--force", action="store_true", help="recompute fresh cells too") + parser.add_argument( + "--attempts", type=int, default=2, help="retries per cell on transient RPC timeouts" + ) + parser.add_argument("--table-only", action="store_true", help="render from cache, run nothing") + args = parser.parse_args() + + environments = discover_go2(args.go2_root.expanduser()) + if args.with_hk_village: + environments += discover_hk_village(args.data_dir.expanduser()) + if args.only_env: + wanted = {name.strip() for name in args.only_env.split(",")} + environments = [environment for environment in environments if environment.name in wanted] + + columns = COLUMNS + if args.only_col: + wanted = {name.strip() for name in args.only_col.split(",")} + columns = [column for column in COLUMNS if column.name in wanted] + + if args.table_only: + print(f"table -> {render_table(environments)}") + return + + total = len(environments) * len(columns) + print(f"benchmark: {len(environments)} environments x {len(columns)} columns = {total} cells") + done = skipped = failed = 0 + for environment in environments: + for column in columns: + if not args.force and cell_is_fresh(environment, column): + skipped += 1 + print(f"skip (fresh): {environment.name} x {column.name}", flush=True) + continue + # Retry transient macOS LCM startup-RPC timeouts; a fresh process + # almost always gets past them. Kill zombies between attempts. + ok = False + for attempt in range(1, args.attempts + 1): + ok = run_cell(environment, column) + if ok: + break + _kill_zombies() + if attempt < args.attempts: + print( + f"retry {attempt + 1}/{args.attempts}: {environment.name} x {column.name}" + ) + time.sleep(5) + done += 1 if ok else 0 + failed += 0 if ok else 1 + render_table(environments) # refresh after every cell — live + crash-safe + + table = render_table(environments) + print(f"\ncells: {done} ran, {skipped} cached, {failed} failed") + print(f"table -> {table}") + + +if __name__ == "__main__": + main() diff --git a/dimos/navigation/jnav/components/loop_closure/eval.py b/dimos/navigation/jnav/components/loop_closure/eval.py new file mode 100644 index 0000000000..02afb44749 --- /dev/null +++ b/dimos/navigation/jnav/components/loop_closure/eval.py @@ -0,0 +1,924 @@ +# Copyright 2026 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Evaluate a loop-closure module against a recording. + +Two ground-truth-free scores, before vs after correction: + * April-tag agreement — a fixed tag re-seen along the run should map to one + world position; the spread of its per-visit robot positions measures drift. + Tags are taken as relative to the chosen odom stream (sighting time -> + nearest odom pose), so no static transforms or stored db poses are needed. + * Lidar-voxel agreement — re-anchoring the registered scans onto the + corrected trajectory should collapse double walls, so the corrected map + should occupy FEWER voxels than the raw one. + +Pipeline: + 1. April tags: read the db's `april_tags` stream (ts + marker_id only), or + detect them with sane defaults (medoid, blur/reproj/size/distance gates). + 2. Raw agreement over the raw odometry. + 3. Replay lidar + odom through the module (loaded dynamically from + --module-path/--module-name), capture its optimized pose graph. + 4. Corrected agreement + voxel agreement, written to + eval_results/__/summary.json (and an eval.rrd with the + raw + corrected trajectories when --with-rrd true). + +Usage: + uv run python dimos/navigation/jnav/components/loop_closure/eval.py \\ + --db-path ~/datasets/go2_recordings/2026-06-04_12-56pm-PST/mem2.db \\ + --odom-stream fastlio_odometry \\ + --camera-stream color_image \\ + --camera-intrinsics-json-path \\ + ~/datasets/go2_recordings/2026-06-04_12-56pm-PST/camera_intrinsics.json \\ + --module-path dimos/navigation/jnav/components/loop_closure/gsc_pgo/module.py \\ + --module-name PGO \\ + --pgo-config-json '{"use_scan_context": true}' \\ + --with-rrd true +""" + +from __future__ import annotations + +import argparse +import asyncio +from collections.abc import AsyncGenerator, Iterable +import importlib +import json +from pathlib import Path +import tempfile +import time +from typing import Any + +import numpy as np + +from dimos.core.coordination.blueprints import autoconnect +from dimos.core.coordination.module_coordinator import ModuleCoordinator +from dimos.core.module import Module, ModuleConfig +from dimos.core.stream import In, Out +from dimos.msgs.geometry_msgs.Pose import Pose +from dimos.msgs.nav_msgs.Odometry import Odometry +from dimos.msgs.sensor_msgs.PointCloud2 import PointCloud2 +from dimos.navigation.jnav.msgs.Graph3D import Graph3D +from dimos.navigation.jnav.msgs.GraphDelta3D import GraphDelta3D +from dimos.navigation.jnav.utils.apriltag_agreement import ( + VISIT_GAP_S, + AgreementReport, + agreement_improvement, + agreement_report, + paired_tag_visit_positions, +) +from dimos.navigation.jnav.utils.apriltags import ( + detect_apriltags, + load_intrinsics_json, + load_or_detect_sightings, +) +from dimos.navigation.jnav.utils.module_loading import ( + filter_config_for_module, + load_module_class, +) +from dimos.navigation.jnav.utils.recording_db import ( + MAX_REPLAY_ODOM, + MAX_REPLAY_SCANS, + ODOM_MATCH_TOLERANCE_S, + REPLAY_DRAIN_MARGIN_S, + REPLAY_PUBLISH_HZ, + iterate_stream, + list_streams, + odometry_lookup, + store, + stream_count, +) +from dimos.navigation.jnav.utils.trajectory_metrics import ( + GraphPose, + PoseLookup7, + drifted_lookup, + graph_lookup, + has_drift, + lidar_voxel_agreement, + pose7_lookup, + trajectory_recovery_error, +) + +RESULTS_DIR = Path(__file__).resolve().parent / "eval_results" +APRIL_TAGS_STREAM = "april_tags" +_RRD_MAX_PATH_POINTS = 5000 + +# Cap replayed scans fed to voxel agreement so the map fits in memory. +VOXEL_MAX_SCANS = 300 + +# Bump to invalidate every cached cell (scoring/replay semantics changed). +EVAL_VERSION = 1 + + +def cell_fingerprint( + db_path: Path, + pgo_config: dict[str, Any], + lidar_stream: str, + odom_stream: str, + drift_per_sec: list[float] | None = None, +) -> dict[str, Any]: + """Identity of a completed cell — the driver re-runs only when this changes + (db edited, config changed, streams changed, drift changed, or version).""" + stat = db_path.stat() + return { + "db_bytes": stat.st_size, + "db_mtime": int(stat.st_mtime), + "pgo_config": pgo_config, + "lidar_stream": lidar_stream, + "odom_stream": odom_stream, + "drift_per_sec": list(drift_per_sec or [0.0, 0.0, 0.0]), + "version": EVAL_VERSION, + } + + +# A known-good PGO config for replayed recordings: revisit gates loose enough +# for walks where the same spot is re-seen tens of seconds later. These now +# match gsc_pgo's own defaults (so it's a no-op for gsc_pgo); kept here to apply +# the same gates to the other PGO modules, which have different defaults. +DEFAULT_PGO_CONFIG: dict[str, Any] = { + "loop_search_radius": 3.0, + "loop_time_thresh": 5.0, + "min_loop_detect_duration": 2.0, + "key_pose_delta_trans": 0.5, + "use_scan_context": True, +} + + +class GraphCaptureConfig(ModuleConfig): + output_path: str = "" + + +class GraphCapture(Module): + """Captures the module's optimized pose graph WITH orientations + closures. + + Results are handed back via a JSON file written on teardown (modules run in + separate worker processes).""" + + config: GraphCaptureConfig + + pose_graph: In[Graph3D] + loop_closure_event: In[GraphDelta3D] + + def __init__(self, **kwargs: Any) -> None: + super().__init__(**kwargs) + self._graph: list[GraphPose] = [] + self._closures = 0 + + async def handle_pose_graph(self, msg: Graph3D) -> None: + self._graph = [ + ( + node.pose.ts, + node.pose.position.x, + node.pose.position.y, + node.pose.position.z, + node.pose.orientation.x, + node.pose.orientation.y, + node.pose.orientation.z, + node.pose.orientation.w, + ) + for node in msg.nodes + ] + + async def handle_loop_closure_event(self, msg: GraphDelta3D) -> None: + self._closures += 1 + + async def main(self) -> AsyncGenerator[None, None]: + yield + Path(self.config.output_path).write_text( + json.dumps({"graph": self._graph, "closures": self._closures}) + ) + + +class LockstepReplayConfig(ModuleConfig): + db: str = "" + lidar_stream: str = "lidar" + odometry_stream: str = "odom" + lidar_stride: int = 1 + odometry_stride: int = 2 + odom_publish_hz: float = 500.0 + ack_timeout_s: float = 30.0 + done_path: str = "" + # Artificial odometry drift: a constant-velocity world offset added to both + # odom poses and lidar clouds at time t (offset = drift_per_sec * (t - t0)). + # Consistent per-instant, so the trajectory warps over time — exactly the + # accumulating error loop closure is supposed to fix. [0,0,0] = no drift. + drift_per_sec: list[float] = [0.0, 0.0, 0.0] + drift_t0: float = 0.0 + + +class LockstepReplay(Module): + """Closed-loop replay: after each scan, wait for the module's + corrected_odometry ack before sending the next. + + Every module under test sees 100% of the (strided) scans regardless of + machine speed — wall clock varies, the data the module processes doesn't. + Odometry messages are cheap latest-state updates and stay fire-and-forget + (lightly paced). Writes a done-marker JSON (ack timeout count) at the end + so the host knows when to tear down. + + odom and lidar are merged into one time-sorted stream, so playback runs in + bursts: all odoms whose timestamps fall before the next scan are emitted + fire-and-forget (paced by odom_publish_hz), then one scan is sent and the + loop blocks on its ack. The only guarantee is one ack-wait per scan; the + odom burst size per gap is data-dependent (~ odom_rate / lidar_rate).""" + + config: LockstepReplayConfig + + lidar: Out[PointCloud2] + odometry: Out[Odometry] + corrected_odometry: In[Odometry] + + def __init__(self, **kwargs: Any) -> None: + super().__init__(**kwargs) + self._ack_count = 0 + self._ack_event: asyncio.Event | None = None + + async def handle_corrected_odometry(self, msg: Odometry) -> None: + self._ack_count += 1 + if self._ack_event is not None: + self._ack_event.set() + + def _load(self) -> list[tuple[float, str, Any]]: + db_path = Path(self.config.db) + merged: list[tuple[float, str, Any]] = [] + for timestamp, pose in iterate_stream( + db_path, self.config.odometry_stream, stride=self.config.odometry_stride + ): + merged.append((timestamp, "odom", pose)) + for timestamp, cloud in iterate_stream( + db_path, self.config.lidar_stream, stride=self.config.lidar_stride + ): + merged.append((timestamp, "lidar", cloud)) + merged.sort(key=lambda item: item[0]) + return merged + + async def main(self) -> AsyncGenerator[None, None]: + messages = await asyncio.to_thread(self._load) + self._task = asyncio.create_task(self._replay(messages)) + yield + self._task.cancel() + + async def _replay(self, messages: list[tuple[float, str, Any]]) -> None: + odom_period = 1.0 / self.config.odom_publish_hz + timeouts = 0 + scans_sent = 0 + drift = np.asarray(self.config.drift_per_sec, dtype=np.float64) + t0 = self.config.drift_t0 + apply_drift = has_drift(drift) + # Timestamps of scans the module never acked — the frames it (likely) + # skipped. Recorded for reproducibility of partial runs. + skipped_scan_ts: list[float] = [] + for timestamp, kind, payload in messages: + if kind == "odom": + pose = RateReplay._payload_pose(payload) + if apply_drift: + offset = drift * (timestamp - t0) + pose = Pose( + position=[ + pose.position.x + offset[0], + pose.position.y + offset[1], + pose.position.z + offset[2], + ], + orientation=[ + pose.orientation.x, + pose.orientation.y, + pose.orientation.z, + pose.orientation.w, + ], + ) + self.odometry.publish( + Odometry( + ts=timestamp, + frame_id="map", + child_frame_id="base_link", + pose=pose, + ) + ) + await asyncio.sleep(odom_period) + continue + + acks_before = self._ack_count + self._ack_event = asyncio.Event() + points = payload.points_f32() + if apply_drift: + points = points + (drift * (timestamp - t0)).astype(np.float32) + self.lidar.publish(PointCloud2.from_numpy(points, frame_id="map", timestamp=timestamp)) + scans_sent += 1 + deadline = time.monotonic() + self.config.ack_timeout_s + while self._ack_count == acks_before: + remaining = deadline - time.monotonic() + if remaining <= 0: + timeouts += 1 + skipped_scan_ts.append(timestamp) + break + try: + await asyncio.wait_for(self._ack_event.wait(), timeout=remaining) + except TimeoutError: + continue + self._ack_event.clear() + if scans_sent % _PROGRESS_EVERY_N_SCANS == 0: + # Periodic progress so a capped run still reports coverage. + Path(self.config.done_path + ".progress").write_text( + json.dumps(self._stats(timeouts, scans_sent, skipped_scan_ts)) + ) + + Path(self.config.done_path).write_text( + json.dumps(self._stats(timeouts, scans_sent, skipped_scan_ts)) + ) + + @staticmethod + def _stats(timeouts: int, scans_sent: int, skipped_scan_ts: list[float]) -> dict[str, Any]: + return { + "timeouts": timeouts, + "scans_sent": scans_sent, + "skipped_scan_ts": skipped_scan_ts, + } + + +class RateReplayConfig(ModuleConfig): + db: str = "" + lidar_stream: str = "lidar" + odometry_stream: str = "odom" + lidar_stride: int = 1 + odometry_stride: int = 2 + publish_hz: float = 40.0 + + +class RateReplay(Module): + """Legacy fixed-rate replay: publishes world-frame lidar + odometry at a set + Hz with timestamps preserved (no ack pacing — wall-clock dependent). + + Works for both odometry payload shapes found in recordings: ``Odometry`` + (go2 ``fastlio_odometry``) and ``PoseStamped`` (hk_village ``odom``). + """ + + config: RateReplayConfig + + lidar: Out[PointCloud2] + odometry: Out[Odometry] + + def __init__(self, **kwargs: Any) -> None: + super().__init__(**kwargs) + self.done = False + + def _load(self) -> list[tuple[float, str, Any]]: + db_path = Path(self.config.db) + merged: list[tuple[float, str, Any]] = [] + for timestamp, pose in iterate_stream( + db_path, self.config.odometry_stream, stride=self.config.odometry_stride + ): + merged.append((timestamp, "odom", pose)) + for timestamp, cloud in iterate_stream( + db_path, self.config.lidar_stream, stride=self.config.lidar_stride + ): + merged.append((timestamp, "lidar", cloud)) + merged.sort(key=lambda item: item[0]) + return merged + + @staticmethod + def _payload_pose(payload: Any) -> Pose: + if hasattr(payload, "pose"): # Odometry + return payload.pose # type: ignore[no-any-return] + return Pose( # PoseStamped + payload.x, + payload.y, + payload.z, + payload.orientation.x, + payload.orientation.y, + payload.orientation.z, + payload.orientation.w, + ) + + async def main(self) -> AsyncGenerator[None, None]: + messages = await asyncio.to_thread(self._load) + self._task = asyncio.create_task(self._replay(messages)) + yield + self._task.cancel() + + async def _replay(self, messages: list[tuple[float, str, Any]]) -> None: + period = 1.0 / self.config.publish_hz + for timestamp, kind, payload in messages: + if kind == "odom": + self.odometry.publish( + Odometry( + ts=timestamp, + frame_id="map", + child_frame_id="base_link", + pose=self._payload_pose(payload), + ) + ) + else: + self.lidar.publish( + PointCloud2.from_numpy( + payload.points_f32(), frame_id="map", timestamp=timestamp + ) + ) + await asyncio.sleep(period) + self.done = True + + +# Run cap scales with the workload: a per-scan budget (well above any sane +# processing time, below the 30s ack timeout) plus fixed startup overhead. +LOCKSTEP_PER_SCAN_BUDGET_S = 2.0 +LOCKSTEP_BASE_OVERHEAD_S = 120.0 +LOCKSTEP_POLL_S = 5.0 +LOCKSTEP_DRAIN_S = 10.0 +_PROGRESS_EVERY_N_SCANS = 200 + + +def run_module_graph( + db_path: Path, + module_class: type, + config_overrides: dict[str, Any], + *, + lidar_stream: str, + odom_stream: str, + lockstep: bool = True, + drift_per_sec: list[float] | None = None, + drift_t0: float = 0.0, +) -> tuple[list[GraphPose], int, dict[str, Any]]: + """Replay the recording through the module; return its optimized pose graph + (with orientations), loop-closure count, and replay stats. + + lockstep=True (default) paces scans on the module's corrected_odometry + acks — machine-speed independent. lockstep=False is the legacy fixed-rate + wall-clock replay. drift_per_sec injects a constant-velocity world offset + into the replayed odom+lidar (see LockstepReplayConfig).""" + drift_per_sec = drift_per_sec or [0.0, 0.0, 0.0] + output_path = Path(tempfile.gettempdir()) / f"jnav_lc_eval_{db_path.parent.name}.json" + output_path.unlink(missing_ok=True) + done_path = Path(tempfile.gettempdir()) / f"jnav_lc_eval_done_{db_path.parent.name}.json" + done_path.unlink(missing_ok=True) + Path(str(done_path) + ".progress").unlink(missing_ok=True) + lidar_stride = max(1, -(-stream_count(db_path, lidar_stream) // MAX_REPLAY_SCANS)) + odometry_stride = max(1, -(-stream_count(db_path, odom_stream) // MAX_REPLAY_ODOM)) + n_messages = stream_count(db_path, odom_stream) // odometry_stride + n_messages += stream_count(db_path, lidar_stream) // lidar_stride + + if lockstep: + replay_blueprint = LockstepReplay.blueprint( + db=str(db_path), + lidar_stream=lidar_stream, + odometry_stream=odom_stream, + lidar_stride=lidar_stride, + odometry_stride=odometry_stride, + done_path=str(done_path), + drift_per_sec=drift_per_sec, + drift_t0=drift_t0, + ) + else: + replay_blueprint = RateReplay.blueprint( + db=str(db_path), + lidar_stream=lidar_stream, + odometry_stream=odom_stream, + lidar_stride=lidar_stride, + odometry_stride=odometry_stride, + publish_hz=REPLAY_PUBLISH_HZ, + ) + + blueprint = autoconnect( + replay_blueprint, + module_class.blueprint(**config_overrides), # type: ignore[attr-defined] + GraphCapture.blueprint(output_path=str(output_path)), + ) + coordinator = ModuleCoordinator.build(blueprint) + mode = "lockstep" if lockstep else f"fixed-rate {REPLAY_PUBLISH_HZ}Hz" + print( + f"replaying {n_messages} messages through {module_class.__name__}" + f" ({mode}, lidar stride {lidar_stride}, odom stride {odometry_stride})" + ) + replay_stats: dict[str, Any] = {"mode": mode} + try: + if lockstep: + # Per-frame budget: the cap scales with how many scans are fed. + n_scans = stream_count(db_path, lidar_stream) // lidar_stride + max_run_s = n_scans * LOCKSTEP_PER_SCAN_BUDGET_S + LOCKSTEP_BASE_OVERHEAD_S + started = time.monotonic() + while not done_path.exists(): + elapsed = time.monotonic() - started + if elapsed > max_run_s: + replay_stats["hit_max_run_s"] = max_run_s + print( + f"lockstep replay hit the per-frame cap" + f" ({n_scans} scans x {LOCKSTEP_PER_SCAN_BUDGET_S}s" + f" + {LOCKSTEP_BASE_OVERHEAD_S}s = {round(max_run_s)}s) — stopping early" + ) + break + if int(elapsed) % 60 < LOCKSTEP_POLL_S and elapsed > LOCKSTEP_POLL_S: + print(f" ... lockstep replay running ({round(elapsed)}s)") + time.sleep(LOCKSTEP_POLL_S) + progress_path = Path(str(done_path) + ".progress") + if done_path.exists(): + replay_stats.update(json.loads(done_path.read_text())) + elif progress_path.exists(): + # Capped run: last periodic progress still tells us coverage + # and which frames the module never acked. + replay_stats.update(json.loads(progress_path.read_text())) + replay_stats["partial"] = True + progress_path.unlink(missing_ok=True) + time.sleep(LOCKSTEP_DRAIN_S) + else: + time.sleep(n_messages / REPLAY_PUBLISH_HZ + REPLAY_DRAIN_MARGIN_S) + finally: + coordinator.stop() + + if not output_path.exists(): + raise SystemExit(f"{module_class.__name__} produced no pose graph output") + data = json.loads(output_path.read_text()) + graph = [tuple(row) for row in data["graph"]] + return graph, int(data["closures"]), replay_stats # type: ignore[return-value] + + +def odometry_pose7_lookup(db_path: Path, odom_stream: str) -> PoseLookup7: + times: list[float] = [] + poses: list[list[float]] = [] + for timestamp, payload in iterate_stream(db_path, odom_stream): + pose = RateReplay._payload_pose(payload) + times.append(timestamp) + poses.append( + [ + pose.position.x, + pose.position.y, + pose.position.z, + pose.orientation.x, + pose.orientation.y, + pose.orientation.z, + pose.orientation.w, + ] + ) + return pose7_lookup( + np.asarray(times, dtype=np.float64), + np.asarray(poses, dtype=np.float64), + ODOM_MATCH_TOLERANCE_S, + ) + + +def _subsampled_path(positions: np.ndarray) -> np.ndarray: + stride = max(1, len(positions) // _RRD_MAX_PATH_POINTS) + return positions[::stride] + + +def write_trajectory_rrd(rrd_path: Path, raw_positions: np.ndarray, graph: list[GraphPose]) -> None: + """Raw + corrected trajectories (no tag rendering — tags are scored as + odom-relative, not placed in 3D).""" + import rerun as rr + + rr.init("jnav_loop_closure_eval", spawn=False) + rr.save(str(rrd_path)) + rr.log( + "trajectory/raw_odom", + rr.LineStrips3D([_subsampled_path(raw_positions)], colors=[[200, 90, 90]]), + static=True, + ) + corrected = np.asarray([[node[1], node[2], node[3]] for node in graph], dtype=np.float64) + rr.log( + "trajectory/corrected", + rr.LineStrips3D([_subsampled_path(corrected)], colors=[[90, 200, 120]]), + static=True, + ) + + +def _report_dict(report: AgreementReport) -> dict[str, Any]: + return { + "mean_spread_m": report.mean_spread, + "total_observations": report.total_observations, + "per_tag": [ + {"tag_id": tag.tag_id, "observations": tag.observations, "spread_m": tag.spread} + for tag in report.per_tag + ], + } + + +def evaluate( + db_path: Path, + *, + odom_stream: str, + camera_stream: str | None, + intrinsics_json: Path | None, + module_path: Path, + module_name: str, + pgo_config: dict[str, Any], + with_rrd: bool, + lidar_stream: str, + lockstep: bool = True, + results_suffix: str = "", + recording_name: str | None = None, + drift_per_sec: list[float] | None = None, + ignore_tags: set[int] | None = None, +) -> dict[str, Any]: + streams = list_streams(db_path) + for required in (odom_stream, lidar_stream): + if required not in streams: + raise SystemExit(f"no stream {required!r} in {db_path} (have: {streams})") + + module_class = load_module_class(module_path, module_name) + pgo_config = filter_config_for_module(module_class, pgo_config) + + # Artificial drift: the module is fed odom+lidar with a constant-velocity + # world offset added at each time; the raw-baseline scoring must apply the + # SAME offset so it compares against what the module actually saw. + drift_per_sec = drift_per_sec or [0.0, 0.0, 0.0] + drift_t0 = next(iterate_stream(db_path, odom_stream))[0] if has_drift(drift_per_sec) else 0.0 + + # April-tag agreement needs a camera + intrinsics; voxel agreement does not. + # Datasets without either (kitti-360, bare lidar recordings) still score on + # voxel agreement alone, so the same harness fills every table cell. + sightings: dict[int, list[float]] = {} + tag_source = "none" + have_camera = camera_stream is not None and camera_stream in streams + if have_camera and intrinsics_json is not None and intrinsics_json.exists(): + assert camera_stream is not None # narrowed by have_camera + camera = camera_stream + intrinsics_config = load_intrinsics_json(intrinsics_json) + db_store = store(db_path) + stored_stream = ( + db_store.stream(APRIL_TAGS_STREAM) + if APRIL_TAGS_STREAM in db_store.list_streams() + else [] + ) + stored = ((int(obs.tags["marker_id"]), float(obs.ts)) for obs in stored_stream) + + def detect() -> Iterable[tuple[int, float]]: + detections = detect_apriltags( + db_store, + intrinsics_config["intrinsics"], + intrinsics_config["distortion"], + image_stream=camera, + stream_name=APRIL_TAGS_STREAM, + marker_length=intrinsics_config.get("marker_length", 0.10), + dictionary=intrinsics_config.get("dictionary", "DICT_APRILTAG_36h11"), + ) + return ((int(d["marker_id"]), float(d["ts"])) for d in detections) + + sightings, tag_source = load_or_detect_sightings(stored, detect) + # Drop dynamic/unreliable tags (e.g. a tag on a moving object) so their + # motion isn't mistaken for trajectory drift. huge_loop_realsense tag #17 is + # dynamic; all others are static. + if ignore_tags: + dropped = sorted(tag_id for tag_id in sightings if tag_id in ignore_tags) + for tag_id in dropped: + del sightings[tag_id] + if dropped: + print(f"ignoring tags {dropped} (declared dynamic/unreliable)") + n_sightings = sum(len(times) for times in sightings.values()) + if sightings: + print(f"april tags ({tag_source}): {n_sightings} sightings across ids {sorted(sightings)}") + else: + print("no April tags (camera/intrinsics absent or none detected) — voxel agreement only") + + started = time.monotonic() + graph, closures, replay_stats = run_module_graph( + db_path, + module_class, + pgo_config, + lidar_stream=lidar_stream, + odom_stream=odom_stream, + lockstep=lockstep, + drift_per_sec=drift_per_sec, + drift_t0=drift_t0, + ) + runtime_s = time.monotonic() - started + if not graph: + raise SystemExit(f"{module_name} produced an empty pose graph") + + # The module solved on drifted input, so its graph lives in the drifted + # world; the raw baselines must be drifted to match (see drift_per_sec). + raw_xyz_lookup = drifted_lookup(odometry_lookup(db_path, odom_stream), drift_per_sec, drift_t0) + raw_pose7_lookup = drifted_lookup( + odometry_pose7_lookup(db_path, odom_stream), drift_per_sec, drift_t0 + ) + + xyz_graph = [(node[0], node[1], node[2], node[3]) for node in graph] + if sightings: + raw_tag_positions, corrected_tag_positions = paired_tag_visit_positions( + sightings, + raw_xyz_lookup, + graph_lookup(xyz_graph), + gap_s=VISIT_GAP_S, + ) + raw_report = agreement_report(raw_tag_positions) + corrected_report = agreement_report(corrected_tag_positions) + improvement: float | None = agreement_improvement(raw_report, corrected_report) + else: + raw_report = agreement_report({}) + corrected_report = agreement_report({}) + improvement = None # no tags — tag agreement is N/A for this cell + + voxel_stride = max(1, -(-stream_count(db_path, lidar_stream) // VOXEL_MAX_SCANS)) + voxel = lidar_voxel_agreement( + ( + (timestamp, cloud.points_f32()) + for timestamp, cloud in iterate_stream(db_path, lidar_stream, stride=voxel_stride) + ), + raw_pose7_lookup, + graph, + drift_per_sec=drift_per_sec, + drift_t0=drift_t0, + ) + + # Drift-recovery ATE: corrected trajectory vs the UN-drifted ground truth + # (the odom before drift was injected). Only meaningful with --drift-per-sec; + # the right metric where tag/voxel agreement is weak (e.g. KITTI's long loop). + trajectory = trajectory_recovery_error( + graph, odometry_lookup(db_path, odom_stream), drift_per_sec, drift_t0 + ) + if trajectory is not None: + print( + f" drift recovery: {trajectory['drifted_ate_m']:.2f}" + f" -> {trajectory['corrected_ate_m']:.2f} m ATE" + f" ({trajectory['trajectory_improvement']:+.3f})" + ) + + # Key by package + class — several loop-closure modules are all named PGO. + # results_suffix (dot-joined, NOT "__" which delimits the recording name) + # separates runs that differ in inputs, e.g. fastlio vs pointlio odometry. + module_package = module_class.__module__.rsplit(".", 2)[-2] + module_key = f"{module_package}.{module_name}" + ( + f".{results_suffix}" if results_suffix else "" + ) + # db.parent.name is the recording dir for go2; LFS dbs (hk_village) sit + # directly in data/, so an explicit recording_name avoids cell collisions. + out_dir = RESULTS_DIR / f"{recording_name or db_path.parent.name}__{module_key}" + out_dir.mkdir(parents=True, exist_ok=True) + rrd_path = out_dir / "eval.rrd" + if with_rrd: + raw_positions = np.asarray( + [ + [pose.position.x, pose.position.y, pose.position.z] + for _, payload in iterate_stream(db_path, odom_stream) + for pose in [RateReplay._payload_pose(payload)] + ], + dtype=np.float64, + ) + write_trajectory_rrd(rrd_path, raw_positions, graph) + + summary = { + "db": str(db_path), + "odom_stream": odom_stream, + "camera_stream": camera_stream, + "lidar_stream": lidar_stream, + "module": {"path": str(module_path), "name": module_name}, + "pgo_config": pgo_config, + "drift_per_sec": list(drift_per_sec), + "fingerprint": cell_fingerprint( + db_path, pgo_config, lidar_stream, odom_stream, drift_per_sec + ), + "replay": replay_stats, + "april_tags": { + "source": tag_source, + "sightings": n_sightings, + "ids": sorted(sightings), + }, + "scores": { + "raw_spread_m": raw_report.mean_spread if sightings else None, + "corrected_spread_m": corrected_report.mean_spread if sightings else None, + "tag_improvement": improvement, + "voxel_improvement": voxel.get("improvement"), + "trajectory_improvement": trajectory["trajectory_improvement"] if trajectory else None, + "drifted_ate_m": trajectory["drifted_ate_m"] if trajectory else None, + "corrected_ate_m": trajectory["corrected_ate_m"] if trajectory else None, + "closures": closures, + "keyframes": len(graph), + "runtime_s": round(runtime_s, 1), + }, + "raw_agreement": _report_dict(raw_report), + "corrected_agreement": _report_dict(corrected_report), + "voxel_agreement": voxel, + "rrd": str(rrd_path) if with_rrd else None, + "evaluated_at": time.strftime("%Y-%m-%d %H:%M:%S"), + } + (out_dir / "summary.json").write_text(json.dumps(summary, indent=2) + "\n") + + print(f"\nresults -> {out_dir / 'summary.json'}") + if sightings: + print( + f" tag spread: {raw_report.mean_spread:.3f}" + f" -> {corrected_report.mean_spread:.3f} m" + ) + print(f" tag improvement: {improvement:+.3f} (1.0 = perfect)") + else: + print(" tag improvement: n/a (no tags)") + if voxel.get("status") == "ok": + print( + f" voxel agreement: {voxel['raw_voxels']} -> {voxel['corrected_voxels']} voxels" + f" ({voxel['improvement']:+.3f}, {voxel['scans_used']} scans @ {voxel['voxel_size_m']}m)" + ) + else: + print(f" voxel agreement: {voxel.get('status')}") + print(f" closures: {closures}, keyframes: {len(graph)}") + if with_rrd: + print(f" rrd: {rrd_path}") + return summary + + +def main() -> None: + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument("--db-path", type=Path, required=True) + parser.add_argument("--odom-stream", required=True) + parser.add_argument( + "--camera-stream", default=None, help="omit for tagless datasets (voxel agreement only)" + ) + parser.add_argument( + "--camera-intrinsics-json-path", + type=Path, + default=None, + help="omit for tagless datasets (voxel agreement only)", + ) + parser.add_argument("--module-path", type=Path, required=True) + parser.add_argument("--module-name", required=True) + parser.add_argument( + "--pgo-config-json", + help="inline JSON of module config overrides (default: scan_context variant)", + ) + parser.add_argument("--with-rrd", default="false", choices=["true", "false"]) + parser.add_argument( + "--lidar-stream", + default="fastlio_lidar", + help="lidar stream replayed into the module alongside the odometry", + ) + parser.add_argument( + "--lockstep", + default="true", + choices=["true", "false"], + help="pace scans on corrected_odometry acks (machine-independent); false = fixed-rate", + ) + parser.add_argument( + "--results-suffix", + default="", + help="extra results-dir key for runs with different inputs (e.g. pointlio)", + ) + parser.add_argument( + "--recording-name", + default=None, + help="results-dir recording key (default: db parent dir name)", + ) + parser.add_argument( + "--drift-per-sec", + default=None, + help="inject odom drift as a constant world velocity 'x,y,z' in m/s " + "(offset = this * (t - t0), added to odom+lidar). e.g. '0.01,0,0'", + ) + parser.add_argument( + "--ignore-tags", + default=None, + help="comma-separated April-tag ids to drop from scoring (dynamic/unreliable " + "tags whose motion would look like drift). e.g. '17'", + ) + args = parser.parse_args() + + drift_per_sec = ( + [float(v) for v in args.drift_per_sec.split(",")] if args.drift_per_sec else None + ) + if drift_per_sec is not None and len(drift_per_sec) != 3: + raise SystemExit(f"--drift-per-sec must be 'x,y,z', got {args.drift_per_sec!r}") + + ignore_tags = ( + {int(tag_id) for tag_id in args.ignore_tags.split(",")} if args.ignore_tags else None + ) + + db_path = args.db_path.expanduser() + if not db_path.exists(): + raise SystemExit(f"no such db: {db_path}") + intrinsics_json = ( + args.camera_intrinsics_json_path.expanduser() + if args.camera_intrinsics_json_path is not None + else None + ) + if intrinsics_json is not None and not intrinsics_json.exists(): + raise SystemExit(f"no such intrinsics json: {intrinsics_json}") + + pgo_config = dict(DEFAULT_PGO_CONFIG) + if args.pgo_config_json: + pgo_config.update(json.loads(args.pgo_config_json)) + + evaluate( + db_path, + odom_stream=args.odom_stream, + camera_stream=args.camera_stream, + intrinsics_json=intrinsics_json, + module_path=args.module_path, + module_name=args.module_name, + pgo_config=pgo_config, + with_rrd=args.with_rrd == "true", + lidar_stream=args.lidar_stream, + lockstep=args.lockstep == "true", + results_suffix=args.results_suffix, + recording_name=args.recording_name, + drift_per_sec=drift_per_sec, + ignore_tags=ignore_tags, + ) + + +if __name__ == "__main__": + # Re-import under the canonical dotted name so module classes defined here + # (GraphCapture) deploy into workers with a picklable __module__. + importlib.import_module("dimos.navigation.jnav.components.loop_closure.eval").main() diff --git a/dimos/navigation/jnav/components/loop_closure/eval_all.py b/dimos/navigation/jnav/components/loop_closure/eval_all.py new file mode 100644 index 0000000000..47df33d081 --- /dev/null +++ b/dimos/navigation/jnav/components/loop_closure/eval_all.py @@ -0,0 +1,366 @@ +# Copyright 2026 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Run the loop-closure eval over every PGO module and render a comparison table. + +Each module is evaluated in its own subprocess (sequentially — they share LCM) +via eval.py with lockstep replay, then all eval_results summaries are rendered +to eval_results/comparison.md. A failed module doesn't stop the rest. + +Usage: + uv run python dimos/navigation/jnav/components/loop_closure/eval_all.py \\ + --db-path ~/datasets/go2_recordings//mem2.db \\ + [--camera-intrinsics-json-path ] # default: sidecar next to db + [--only gsc_pgo,ivan_pgo] # subset by directory name + [--with-rrd true] [--lockstep true] +""" + +from __future__ import annotations + +import argparse +from collections import defaultdict +import json +from pathlib import Path +import subprocess +import sys +from typing import Any + +LOOP_CLOSURE_DIR = Path(__file__).resolve().parent +RESULTS_DIR = LOOP_CLOSURE_DIR / "eval_results" +TABLE_PATH = RESULTS_DIR / "comparison.md" +SIDECAR_NAME = "camera_intrinsics.json" + +# (directory, per-module config overrides). All classes are named PGO. +# global_map is disabled everywhere — nothing in the eval consumes it, and the +# native modules' big global_map clouds overflow LCM and congest the +# corrected_odometry ack channel the lockstep replay waits on (rate 0 = off +# for the native binaries; publish_global_map=False for the python ivan). +MODULES: list[tuple[str, dict[str, Any]]] = [ + ("gsc_pgo", {"use_scan_context": True, "global_map_publish_rate": 0.0}), + ("ivan_pgo", {"publish_global_map": False}), + ("ivan_pgo_transformer", {}), + ("unrefined_pgo", {"global_map_publish_rate": 0.0}), +] + + +def _fmt_spread(s: dict[str, Any]) -> str: + raw, corrected = s.get("raw_spread_m"), s.get("corrected_spread_m") + if raw is None or corrected is None: # tagless dataset (KITTI, bare lidar) + return "—" + return f"{raw:.2f} -> {corrected:.2f}" + + +TABLE_COLUMNS = [ + ("tag spread (m)", _fmt_spread), + ( + "tag improvement", + lambda s: f"{s['tag_improvement']:+.3f}" if s.get("tag_improvement") is not None else "—", + ), + ( + "voxel improvement", + lambda s: f"{s['voxel_improvement']:+.3f}" + if s.get("voxel_improvement") is not None + else "—", + ), + ( + "drift recovery", # ATE improvement vs un-drifted GT (only with --drift-per-sec) + lambda s: f"{s['trajectory_improvement']:+.3f}" + if s.get("trajectory_improvement") is not None + else "—", + ), + ("closures", lambda s: str(s["closures"])), + ("keyframes", lambda s: str(s["keyframes"])), + ("runtime (s)", lambda s: str(s["runtime_s"])), +] + + +def run_one( + module_dir: str, + overrides: dict[str, Any], + *, + db_path: Path, + odom_stream: str, + camera_stream: str, + lidar_stream: str, + intrinsics_json: Path | None, + with_rrd: str, + lockstep: str, + results_suffix: str = "", + drift_per_sec: str = "", + ignore_tags: str = "", +) -> bool: + command = [ + sys.executable, + "-u", + str(LOOP_CLOSURE_DIR / "eval.py"), + "--db-path", + str(db_path), + "--odom-stream", + odom_stream, + "--camera-stream", + camera_stream, + "--lidar-stream", + lidar_stream, + "--module-path", + str(LOOP_CLOSURE_DIR / module_dir / "module.py"), + "--module-name", + "PGO", + "--with-rrd", + with_rrd, + "--lockstep", + lockstep, + ] + if intrinsics_json is not None: + command += ["--camera-intrinsics-json-path", str(intrinsics_json)] + if drift_per_sec: + command += ["--drift-per-sec", drift_per_sec] + if ignore_tags: + command += ["--ignore-tags", ignore_tags] + if results_suffix: + command += ["--results-suffix", results_suffix] + if overrides: + command += ["--pgo-config-json", json.dumps(overrides)] + print(f"\n=== {module_dir} ===", flush=True) + result = subprocess.run(command, check=False) + print(f"=== {module_dir} exit: {result.returncode} ===", flush=True) + return result.returncode == 0 + + +def _row_label(module_key: str, replay: dict[str, Any]) -> str: + label = module_key + scans = replay.get("scans_sent") + timeouts = replay.get("timeouts") + if scans is not None: + label += f" ({scans} scans, {timeouts} ack-timeouts)" if timeouts else f" ({scans} scans)" + if replay.get("hit_max_run_s"): + label += " [hit run cap]" + return label + + +def _section_for(recording: str, module_key: str) -> str: + """Which breakdown section a result belongs to.""" + if recording.startswith("hk_village"): + return "hk_village" + if "drift" in module_key: + return "drift" + return "real" + + +def _tag_improvement_key(labelled_row: tuple[str, dict[str, Any]]) -> float: + # Best first; fall back to drift-recovery for tagless rows, then sink to the + # bottom if neither is present. + scores = labelled_row[1] + value = scores.get("tag_improvement") + if value is None: + value = scores.get("trajectory_improvement") + return float(value) if value is not None else float("-inf") + + +def _render_section( + by_recording: dict[str, list[tuple[str, dict[str, Any]]]], sort_reverse: bool +) -> list[str]: + lines: list[str] = [] + for recording in sorted(by_recording): + rows = sorted(by_recording[recording], key=_tag_improvement_key, reverse=sort_reverse) + lines += [f"### {recording}", ""] + lines.append("| module | " + " | ".join(name for name, _ in TABLE_COLUMNS) + " | replay |") + lines.append("|" + "---|" * (len(TABLE_COLUMNS) + 2)) + for module_label, scores in rows: + cells = [render(scores) for _, render in TABLE_COLUMNS] + lines.append(f"| {module_label} | " + " | ".join(cells) + f" | {scores['_mode']} |") + lines.append("") + return lines + + +def _base_module(module_key: str) -> str: + """`gsc_pgo.PGO.drift0p05_0_0` -> `gsc_pgo`.""" + return module_key.split(".")[0] + + +def _conclusion(real: dict[str, list[tuple[str, dict[str, Any]]]]) -> list[str]: + """Data-driven summary: count per-recording tag-improvement wins per module.""" + wins: dict[str, int] = defaultdict(int) + tagged_recordings = 0 + for rows in real.values(): + tagged = [(label, s) for label, s in rows if s.get("tag_improvement") is not None] + if not tagged: + continue + tagged_recordings += 1 + best_label = max(tagged, key=lambda item: item[1]["tag_improvement"])[0] + wins[_base_module(best_label)] += 1 + if not tagged_recordings: + return [] + ranking = sorted(wins.items(), key=lambda item: item[1], reverse=True) + leader = ranking[0][0] if ranking else "n/a" + lines = ["## Conclusion", ""] + lines.append( + f"Across {tagged_recordings} tagged real recordings, the best per-recording " + f"April-tag improvement was won by:" + ) + lines.append("") + for module, count in ranking: + lines.append(f"- **{module}** — best on {count}/{tagged_recordings}") + lines += [ + "", + f"`{leader}` is the strongest loop-closure PGO overall on real recordings; " + "`unrefined_pgo` (the pass-through baseline) is the floor. On tagless data " + "(hk_village) modules are ranked by voxel improvement, and under artificial " + "drift by drift-recovery (fraction of injected ATE removed).", + "", + ] + return lines + + +def render_table() -> Path: + buckets: dict[str, dict[str, list[tuple[str, dict[str, Any]]]]] = { + "real": defaultdict(list), + "hk_village": defaultdict(list), + "drift": defaultdict(list), + } + for summary_path in sorted(RESULTS_DIR.glob("*/summary.json")): + summary = json.loads(summary_path.read_text()) + # Skip non-PGO summaries (e.g. the cross-recording ground-truth tag eval, + # which has a different shape and its own write-up). + if "scores" not in summary: + continue + recording, _, module_key = summary_path.parent.name.rpartition("__") + replay = summary.get("replay", {}) + row = {**summary["scores"], "_mode": replay.get("mode", "fixed-rate (pre-lockstep)")} + section = _section_for(recording, module_key) + buckets[section][recording].append((_row_label(module_key, replay), row)) + + lines = [ + "# Loop-closure PGO comparison", + "", + "Higher improvement = better. **Tag improvement**: fractional drop in per-visit", + "April-tag position spread (1.0 = perfect). **Voxel improvement**: fractional drop", + "in occupied 0.2 m voxels after re-anchoring scans onto the corrected trajectory.", + "**Drift recovery**: fraction of injected ATE removed vs the un-drifted ground truth.", + "", + ] + if buckets["real"]: + lines += ["## Real recordings (clean input)", ""] + lines += _render_section(buckets["real"], sort_reverse=True) + if buckets["hk_village"]: + lines += ["## hk_village (tagless — voxel agreement only)", ""] + lines += _render_section(buckets["hk_village"], sort_reverse=True) + if buckets["drift"]: + lines += ["## Artificial drift robustness", ""] + lines += _render_section(buckets["drift"], sort_reverse=True) + + kitti_table = RESULTS_DIR / "kitti_comparison.md" + if kitti_table.exists(): + lines += [ + "## KITTI (official odometry error)", + "", + "Scored with the official KITTI translational (%) / rotational (deg/m) error " + "(lower = better); see [kitti_comparison.md](kitti_comparison.md).", + "", + ] + + lines += _conclusion(buckets["real"]) + RESULTS_DIR.mkdir(exist_ok=True) + TABLE_PATH.write_text("\n".join(lines)) + return TABLE_PATH + + +def main() -> None: + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument("--db-path", type=Path, required=True) + parser.add_argument("--odom-stream", default="fastlio_odometry") + parser.add_argument("--camera-stream", default="color_image") + parser.add_argument("--lidar-stream", default="fastlio_lidar") + parser.add_argument( + "--camera-intrinsics-json-path", + type=Path, + help=f"default: {SIDECAR_NAME} next to the db", + ) + parser.add_argument("--only", help="comma-separated module directory names") + parser.add_argument("--with-rrd", default="true", choices=["true", "false"]) + parser.add_argument("--lockstep", default="true", choices=["true", "false"]) + parser.add_argument( + "--results-suffix", + default="", + help="extra results-dir key for runs with different inputs (e.g. pointlio)", + ) + parser.add_argument( + "--drift-per-sec", + default="", + help="inject constant-velocity world drift 'x,y,z' (m/s) into odom+lidar; " + "stress-tests how each PGO corrects known drift. Auto-tags the results dir.", + ) + parser.add_argument( + "--ignore-tags", + default="", + help="comma-separated April-tag ids to drop from scoring (e.g. '17' for the " + "dynamic tag in huge_loop_realsense)", + ) + args = parser.parse_args() + + # Keep drifted runs in their own results dirs so they don't clobber the + # clean-input comparison (e.g. drift 0.1,0,0 -> suffix '...drift0p1x'). + results_suffix = args.results_suffix + if args.drift_per_sec: + drift_tag = "drift" + args.drift_per_sec.replace(".", "p").replace(",", "_") + results_suffix = f"{results_suffix}.{drift_tag}" if results_suffix else drift_tag + + db_path = args.db_path.expanduser() + if not db_path.exists(): + raise SystemExit(f"no such db: {db_path}") + # Tagless datasets (KITTI, bare lidar): empty --camera-stream -> no intrinsics + # needed, voxel-agreement scoring only. + tagless = not args.camera_stream + intrinsics_json: Path | None = None + if not tagless: + sidecar: Path = args.camera_intrinsics_json_path or db_path.parent / SIDECAR_NAME + intrinsics_json = sidecar.expanduser() + if not intrinsics_json.exists(): + raise SystemExit(f"no intrinsics json: {intrinsics_json}") + + selected = MODULES + if args.only: + wanted = {name.strip() for name in args.only.split(",")} + selected = [(name, overrides) for name, overrides in MODULES if name in wanted] + missing = wanted - {name for name, _ in selected} + if missing: + raise SystemExit( + f"unknown modules: {sorted(missing)} (have: {[m for m, _ in MODULES]})" + ) + + outcomes = {} + for module_dir, overrides in selected: + outcomes[module_dir] = run_one( + module_dir, + overrides, + db_path=db_path, + odom_stream=args.odom_stream, + camera_stream=args.camera_stream, + lidar_stream=args.lidar_stream, + intrinsics_json=intrinsics_json, + with_rrd=args.with_rrd, + lockstep=args.lockstep, + results_suffix=results_suffix, + drift_per_sec=args.drift_per_sec, + ignore_tags=args.ignore_tags, + ) + + table = render_table() + print(f"\ntable -> {table}") + failed = [name for name, ok in outcomes.items() if not ok] + if failed: + raise SystemExit(f"failed modules: {failed}") + + +if __name__ == "__main__": + main() diff --git a/dimos/navigation/jnav/components/loop_closure/eval_ground_truth_tag.py b/dimos/navigation/jnav/components/loop_closure/eval_ground_truth_tag.py new file mode 100644 index 0000000000..5644fb8add --- /dev/null +++ b/dimos/navigation/jnav/components/loop_closure/eval_ground_truth_tag.py @@ -0,0 +1,309 @@ +# Copyright 2026 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Cross-recording ground-truth tag eval (todo item 4). + +Run the best PGO on huge_loop_realsense to get corrected April-tag world +positions (treated as ground truth, since cmu reaches ~0.14 m tag spread there), +then measure how close each PGO moves huge_loop_go2's tags toward those GT +locations — a CROSS-recording metric, unlike the within-recording agreement. + +Both recordings see the same physical tags. The pipeline per recording: + 1. run a PGO -> corrected keyframe trajectory (graph). + 2. read each tag sighting's pose-in-optical from the recorded april_tags + stream (PoseStamped: t_optical_tag). + 3. place each sighting in the world: + tag_world = T_world_base(corrected, t) · T_base_optical · t_optical_tag + where T_base_optical = the recording's `optical_in_base` extrinsic. + 4. average each tag's world positions -> one centroid per tag (the estimate). +Then Umeyama-align go2's tag constellation onto the realsense GT constellation +over shared tag ids; residual RMSE = how well the PGO recovered the true tag +geometry (lower = better; an uncorrected / wrong PGO leaves higher residual). + +Tag 17 is dynamic on huge_loop_realsense; it is dropped via --ignore-tags. + +Results are written to eval_results/__ground_truth_tag/summary.json +(per-module residual + closures, plus the GT tag ids), and the frame-composition +core is covered by --self-test. +""" + +from __future__ import annotations + +import argparse +from collections import defaultdict +from collections.abc import Callable +import json +from pathlib import Path +from typing import Any + +import numpy as np +from scipy.spatial.transform import Rotation + +from dimos.navigation.jnav.components.loop_closure.eval import ( + odometry_pose7_lookup, + run_module_graph, +) +from dimos.navigation.jnav.utils.module_loading import ( + filter_config_for_module, + load_module_class, +) +from dimos.navigation.jnav.utils.recording_db import store +from dimos.navigation.jnav.utils.trajectory_metrics import ( + drift_delta_lookup, + rigid_align_rmse, +) + + +def pose7_to_matrix(pose7: np.ndarray | list[float]) -> np.ndarray: + """[x,y,z, qx,qy,qz,qw] -> 4x4 homogeneous transform.""" + pose = np.asarray(pose7, dtype=np.float64) + transform = np.eye(4) + transform[:3, :3] = Rotation.from_quat(pose[3:7]).as_matrix() + transform[:3, 3] = pose[:3] + return transform + + +def tag_world_position( + robot_pose7: np.ndarray | list[float], + optical_in_base7: np.ndarray | list[float], + tag_in_optical7: np.ndarray | list[float], +) -> np.ndarray: + """World xyz of a tag from the corrected robot pose, the camera->base + extrinsic, and the tag-in-optical detection. Composes + T_world_base · T_base_optical · T_optical_tag and returns the translation.""" + transform = ( + pose7_to_matrix(robot_pose7) + @ pose7_to_matrix(optical_in_base7) + @ pose7_to_matrix(tag_in_optical7) + ) + return transform[:3, 3] + + +def read_optical_in_base(intrinsics_json: Path) -> list[float]: + raw = json.loads(Path(intrinsics_json).read_text()) + extrinsic = raw.get("optical_in_base") + if extrinsic is None: + raise SystemExit(f"no optical_in_base (camera->base extrinsic) in {intrinsics_json}") + return list(extrinsic) + + +def tag_in_camera_sightings(db_path: Path) -> list[tuple[float, int, list[float]]]: + """(ts, marker_id, tag-in-optical pose7) per April-tag observation, read + straight from the recorded april_tags PoseStamped stream (no re-detection).""" + sightings: list[tuple[float, int, list[float]]] = [] + for observation in store(db_path).stream("april_tags"): + pose_tuple = observation.pose_tuple + if pose_tuple is None: + continue + sightings.append( + (float(observation.ts), int(observation.tags["marker_id"]), list(pose_tuple)) + ) + return sightings + + +def corrected_pose7_at( + timestamp: float, + raw_pose7_lookup: Callable[[float], np.ndarray | None], + delta_lookup: Callable[[float], tuple[np.ndarray, np.ndarray] | None], +) -> np.ndarray | None: + """Corrected robot pose7 at a time: apply the nearest keyframe's drift + correction (R_delta, t_delta) to the raw odom pose.""" + raw = raw_pose7_lookup(timestamp) + delta = delta_lookup(timestamp) + if raw is None or delta is None: + return None + rotation_delta, translation_delta = delta + raw = np.asarray(raw, dtype=np.float64) + rotation_corrected = rotation_delta @ Rotation.from_quat(raw[3:7]).as_matrix() + translation_corrected = rotation_delta @ raw[:3] + translation_delta + return np.array([*translation_corrected, *Rotation.from_matrix(rotation_corrected).as_quat()]) + + +def tag_constellation( + db_path: Path, + module_path: Path, + module_name: str, + config: dict[str, Any], + *, + lidar_stream: str, + odom_stream: str, + optical_in_base: list[float], + ignore_tags: set[int], +) -> tuple[dict[int, np.ndarray], int]: + """Run the PGO, then place each tag sighting in the world via the corrected + trajectory + extrinsic, averaging per tag. Returns {marker_id: world centroid} + and the closure count.""" + module_class = load_module_class(module_path, module_name) + config = filter_config_for_module(module_class, config) + graph, closures, _ = run_module_graph( + db_path, + module_class, + config, + lidar_stream=lidar_stream, + odom_stream=odom_stream, + lockstep=True, + ) + raw_pose7_lookup = odometry_pose7_lookup(db_path, odom_stream) + delta_lookup = drift_delta_lookup(graph, raw_pose7_lookup) + by_tag: dict[int, list[np.ndarray]] = defaultdict(list) + for timestamp, marker_id, tag_pose7 in tag_in_camera_sightings(db_path): + if marker_id in ignore_tags: + continue + robot_pose7 = corrected_pose7_at(timestamp, raw_pose7_lookup, delta_lookup) + if robot_pose7 is None: + continue + by_tag[marker_id].append(tag_world_position(robot_pose7, optical_in_base, tag_pose7)) + constellation = {tag: np.mean(positions, axis=0) for tag, positions in by_tag.items()} + return constellation, closures + + +def constellation_residual( + gt: dict[int, np.ndarray], test: dict[int, np.ndarray] +) -> tuple[float, list[int]]: + """Umeyama-align the test tag constellation onto the GT; residual RMSE over + shared tags (lower = the PGO recovered the true tag geometry better).""" + shared = sorted(set(gt) & set(test)) + if len(shared) < 3: + return float("nan"), shared + gt_points = np.asarray([gt[tag] for tag in shared]) + test_points = np.asarray([test[tag] for tag in shared]) + return rigid_align_rmse(test_points, gt_points), shared + + +def _self_test() -> None: + identity = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0] + # 1) everything identity, tag 2m ahead in optical -> world = 2m ahead. + world = tag_world_position(identity, identity, [0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0]) + assert np.allclose(world, [0.0, 0.0, 2.0]), world + + # 2) robot translated by (10,5,0), no rotation: tag world shifts by that. + world = tag_world_position( + [10.0, 5.0, 0.0, 0.0, 0.0, 0.0, 1.0], identity, [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0] + ) + assert np.allclose(world, [11.0, 5.0, 0.0]), world + + # 3) robot yawed 90deg (+z): a tag 1m ahead in base (x) lands 1m to +y of robot. + yaw90 = [0.0, 0.0, 0.0, *Rotation.from_euler("z", 90, degrees=True).as_quat()] + world = tag_world_position(yaw90, identity, [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0]) + assert np.allclose(world, [0.0, 1.0, 0.0], atol=1e-9), world + + # 4) optical->base rotation (-0.5,0.5,-0.5,0.5) maps optical +z (forward) to + # base +x (forward). Tag 3m ahead in optical -> 3m ahead in base/world. + optical_in_base = [0.0, 0.0, 0.0, -0.5, 0.5, -0.5, 0.5] + world = tag_world_position(identity, optical_in_base, [0.0, 0.0, 3.0, 0.0, 0.0, 0.0, 1.0]) + assert np.allclose(world, [3.0, 0.0, 0.0], atol=1e-9), world + + print("frame-composition self-test PASSED (4 cases)") + + +LOOP_CLOSURE_DIR = Path(__file__).resolve().parent +DEFAULT_GT_CONFIG = {"use_scan_context": True, "global_map_publish_rate": 0} + + +def main() -> None: + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument( + "--gt-db", type=Path, required=True, help="recording for ground truth (realsense)" + ) + parser.add_argument("--gt-intrinsics", type=Path, required=True) + parser.add_argument("--gt-odom-stream", default="fastlio_odometry") + parser.add_argument("--gt-lidar-stream", default="fastlio_lidar") + parser.add_argument("--test-db", type=Path, required=True, help="recording to score (go2)") + parser.add_argument("--test-intrinsics", type=Path, required=True) + parser.add_argument("--test-odom-stream", default="fastlio_odometry") + parser.add_argument("--test-lidar-stream", default="fastlio_lidar") + parser.add_argument("--gt-module", default="gsc_pgo", help="PGO dir used to build GT") + parser.add_argument("--modules", default="gsc_pgo,ivan_pgo,ivan_pgo_transformer,unrefined_pgo") + parser.add_argument("--ignore-tags", default="17", help="dynamic tags to drop") + args = parser.parse_args() + + ignore_tags = {int(t) for t in args.ignore_tags.split(",")} if args.ignore_tags else set() + gt_extrinsic = read_optical_in_base(args.gt_intrinsics) + test_extrinsic = read_optical_in_base(args.test_intrinsics) + + def module_py(module_dir: str) -> Path: + return LOOP_CLOSURE_DIR / module_dir / "module.py" + + print(f"building GT constellation: {args.gt_module} on {args.gt_db.parent.name}") + gt_constellation, _ = tag_constellation( + args.gt_db, + module_py(args.gt_module), + "PGO", + dict(DEFAULT_GT_CONFIG), + lidar_stream=args.gt_lidar_stream, + odom_stream=args.gt_odom_stream, + optical_in_base=gt_extrinsic, + ignore_tags=ignore_tags, + ) + print(f" GT tags: {sorted(gt_constellation)}") + + print(f"\n{'module':24s} {'tag-to-GT residual (m)':>22s} {'shared tags':>12s} {'closures':>9s}") + print("-" * 70) + rows: list[dict[str, Any]] = [] + for module_dir in [m.strip() for m in args.modules.split(",")]: + try: + test_constellation, closures = tag_constellation( + args.test_db, + module_py(module_dir), + "PGO", + dict(DEFAULT_GT_CONFIG), + lidar_stream=args.test_lidar_stream, + odom_stream=args.test_odom_stream, + optical_in_base=test_extrinsic, + ignore_tags=ignore_tags, + ) + residual, shared = constellation_residual(gt_constellation, test_constellation) + rows.append( + { + "module": module_dir, + "residual_m": residual, + "shared_tags": len(shared), + "closures": closures, + } + ) + print(f"{module_dir:24s} {residual:>22.3f} {len(shared):>12d} {closures:>9d}") + except Exception as error: + print(f"{module_dir:24s} FAILED: {type(error).__name__}: {error}") + rows.append({"module": module_dir, "residual_m": None, "error": str(error)}) + + scored = [row for row in rows if row.get("residual_m") is not None] + best_module = min(scored, key=lambda row: row["residual_m"])["module"] if scored else None + if best_module is not None: + best = next(row for row in scored if row["module"] == best_module) + print( + f"\nbest (lowest residual to GT tag geometry): {best_module} ({best['residual_m']:.3f} m)" + ) + + out_dir = LOOP_CLOSURE_DIR / "eval_results" / f"{args.test_db.parent.name}__ground_truth_tag" + out_dir.mkdir(parents=True, exist_ok=True) + summary = { + "gt_db": str(args.gt_db), + "gt_module": args.gt_module, + "test_db": str(args.test_db), + "ignore_tags": sorted(ignore_tags), + "gt_tags": sorted(int(tag) for tag in gt_constellation), + "best_module": best_module, + "rows": rows, + } + (out_dir / "summary.json").write_text(json.dumps(summary, indent=2) + "\n") + print(f"\nresults -> {out_dir / 'summary.json'}") + + +if __name__ == "__main__": + import sys + + if "--self-test" in sys.argv: + _self_test() + else: + main() diff --git a/dimos/navigation/jnav/components/loop_closure/eval_kitti.py b/dimos/navigation/jnav/components/loop_closure/eval_kitti.py new file mode 100644 index 0000000000..cf7e53fe32 --- /dev/null +++ b/dimos/navigation/jnav/components/loop_closure/eval_kitti.py @@ -0,0 +1,192 @@ +# Copyright 2026 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Evaluate one PGO on a KITTI db with the official KITTI odometry error. + +Parallels eval.py, but for the kitti_to_db.py recordings. Runs the module over +the db's drifty ICP odometry (fastlio_odometry) + registered scans (fastlio_lidar), +reads the ground truth from the db's gt_odometry stream, and reports translational +(%) and rotational (deg/m) error — the leaderboard metric — for the corrected +trajectory and the raw-odometry baseline. + +Usage: + uv run python dimos/navigation/jnav/components/loop_closure/eval_kitti.py \ + --db-path ~/datasets/kitti/kitti_seq07/mem2.db \ + --module-path dimos/navigation/jnav/components/loop_closure/gsc_pgo/module.py \ + [--module-name PGO] [--pgo-config-json '{...}'] +""" + +from __future__ import annotations + +import argparse +import json +from pathlib import Path +from typing import Any + +import numpy as np +from scipy.spatial.transform import Rotation + +from dimos.navigation.jnav.components.loop_closure.eval import run_module_graph +from dimos.navigation.jnav.utils.kitti import kitti_odometry_error +from dimos.navigation.jnav.utils.module_loading import ( + filter_config_for_module, + load_module_class, +) +from dimos.navigation.jnav.utils.recording_db import iterate_stream + +DEFAULT_CONFIG = {"use_scan_context": True, "global_map_publish_rate": 0} + +# Stream names produced by kitti_to_db (overridable on the CLI). +DEFAULT_LIDAR_STREAM = "fastlio_lidar" +DEFAULT_ODOM_STREAM = "fastlio_odometry" +DEFAULT_GT_STREAM = "gt_odometry" + + +def poses_from_stream(db_path: Path, stream: str) -> tuple[list[np.ndarray], list[float]]: + """Read an Odometry stream as (4x4 poses, timestamps).""" + poses: list[np.ndarray] = [] + times: list[float] = [] + for timestamp, message in iterate_stream(db_path, stream): + orientation = message.pose.orientation + position = message.pose.position + transform = np.eye(4) + transform[:3, :3] = Rotation.from_quat( + [orientation.x, orientation.y, orientation.z, orientation.w] + ).as_matrix() + transform[:3, 3] = [position.x, position.y, position.z] + poses.append(transform) + times.append(timestamp) + return poses, times + + +def _gt_at( + gt_poses: list[np.ndarray], gt_times: list[float], query: list[float] +) -> list[np.ndarray]: + times = np.asarray(gt_times) + return [gt_poses[int(np.argmin(np.abs(times - t)))] for t in query] + + +def corrected_trajectory( + db_path: Path, + module_path: Path, + module_name: str, + config: dict[str, Any], + *, + lidar_stream: str, + odom_stream: str, +): + module_class = load_module_class(module_path, module_name) + config = filter_config_for_module(module_class, config) + graph, closures, _ = run_module_graph( + db_path, + module_class, + config, + lidar_stream=lidar_stream, + odom_stream=odom_stream, + lockstep=True, + ) + poses, times = [], [] + for node in graph: + transform = np.eye(4) + transform[:3, :3] = Rotation.from_quat(node[4:8]).as_matrix() + transform[:3, 3] = node[1:4] + poses.append(transform) + times.append(node[0]) + return poses, times, closures + + +def evaluate( + db_path: Path, + module_path: Path, + module_name: str, + config: dict[str, Any], + results_suffix: str = "", + *, + lidar_stream: str = DEFAULT_LIDAR_STREAM, + odom_stream: str = DEFAULT_ODOM_STREAM, + gt_stream: str = DEFAULT_GT_STREAM, +) -> dict[str, Any]: + gt_poses, gt_times = poses_from_stream(db_path, gt_stream) + odom_poses, _ = poses_from_stream(db_path, odom_stream) + if not gt_poses: + raise SystemExit(f"no {gt_stream!r} stream in {db_path}") + baseline = kitti_odometry_error(odom_poses, gt_poses) + + corrected, corrected_times, closures = corrected_trajectory( + db_path, + module_path, + module_name, + config, + lidar_stream=lidar_stream, + odom_stream=odom_stream, + ) + error = kitti_odometry_error(corrected, _gt_at(gt_poses, gt_times, corrected_times)) + + print( + f"raw odometry: {baseline['translational_percent']:.2f}% transl / " + f"{baseline['rotational_deg_per_m']:.4f} deg/m" + ) + print( + f"after {module_path.parent.name}: {error['translational_percent']:.2f}% transl / " + f"{error['rotational_deg_per_m']:.4f} deg/m ({closures} closures)" + ) + + module_key = module_path.parent.name + (f".{results_suffix}" if results_suffix else "") + summary = { + "db": str(db_path), + "module": module_key, + "scores": { + "translational_percent": error["translational_percent"], + "rotational_deg_per_m": error["rotational_deg_per_m"], + "baseline_translational_percent": baseline["translational_percent"], + "baseline_rotational_deg_per_m": baseline["rotational_deg_per_m"], + "closures": closures, + "keyframes": len(corrected), + }, + } + recording = db_path.parent.name + out_dir = Path(__file__).resolve().parent / "eval_results" / f"{recording}__{module_key}" + out_dir.mkdir(parents=True, exist_ok=True) + (out_dir / "kitti_summary.json").write_text(json.dumps(summary, indent=2)) + return summary + + +def main() -> None: + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument("--db-path", type=Path, required=True) + parser.add_argument("--module-path", type=Path, required=True) + parser.add_argument("--module-name", default="PGO") + parser.add_argument("--pgo-config-json", default="") + parser.add_argument("--results-suffix", default="") + parser.add_argument("--lidar-stream", default=DEFAULT_LIDAR_STREAM) + parser.add_argument("--odom-stream", default=DEFAULT_ODOM_STREAM) + parser.add_argument("--gt-stream", default=DEFAULT_GT_STREAM) + args = parser.parse_args() + config = dict(DEFAULT_CONFIG) + if args.pgo_config_json: + config.update(json.loads(args.pgo_config_json)) + evaluate( + args.db_path.expanduser(), + args.module_path, + args.module_name, + config, + args.results_suffix, + lidar_stream=args.lidar_stream, + odom_stream=args.odom_stream, + gt_stream=args.gt_stream, + ) + + +if __name__ == "__main__": + main() diff --git a/dimos/navigation/jnav/components/loop_closure/eval_kitti_all.py b/dimos/navigation/jnav/components/loop_closure/eval_kitti_all.py new file mode 100644 index 0000000000..3a16b799d1 --- /dev/null +++ b/dimos/navigation/jnav/components/loop_closure/eval_kitti_all.py @@ -0,0 +1,122 @@ +# Copyright 2026 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Run every PGO on KITTI db(s) and render an official-error comparison table. + +Parallels eval_all.py. For each KITTI db (one sequence) it evaluates every PGO +module in its own subprocess (they share LCM) via eval_kitti.py, then renders +translational (%) / rotational (deg/m) error per module to kitti_comparison.md, +sorted best-first (lower error = better), alongside the raw-odometry baseline. + +Usage: + uv run python dimos/navigation/jnav/components/loop_closure/eval_kitti_all.py \ + --recordings-dir ~/datasets/kitti/sequences # all kitti_seq*/mem2.db + [--db-path ~/datasets/kitti/sequences/kitti_seq07/mem2.db] # or one + [--only gsc_pgo,ivan_pgo] +""" + +from __future__ import annotations + +import argparse +import json +from pathlib import Path +import subprocess +import sys +from typing import Any + +LOOP_CLOSURE_DIR = Path(__file__).resolve().parent +RESULTS_DIR = LOOP_CLOSURE_DIR / "eval_results" +TABLE_PATH = RESULTS_DIR / "kitti_comparison.md" +MODULES = ("gsc_pgo", "ivan_pgo", "ivan_pgo_transformer", "unrefined_pgo") + + +def run_one(db_path: Path, module_dir: str) -> bool: + command = [ + sys.executable, + "-u", + str(LOOP_CLOSURE_DIR / "eval_kitti.py"), + "--db-path", + str(db_path), + "--module-path", + str(LOOP_CLOSURE_DIR / module_dir / "module.py"), + "--module-name", + "PGO", + ] + print(f"\n=== {db_path.parent.name} / {module_dir} ===", flush=True) + return subprocess.run(command, check=False).returncode == 0 + + +def render_table() -> Path: + by_recording: dict[str, list[dict[str, Any]]] = {} + for summary_path in sorted(RESULTS_DIR.glob("*/kitti_summary.json")): + summary = json.loads(summary_path.read_text()) + recording = Path(summary["db"]).parent.name + by_recording.setdefault(recording, []).append( + {"module": summary["module"], **summary["scores"]} + ) + + lines = ["# KITTI official-error PGO comparison", ""] + lines += [ + "Translational error (%) and rotational error (deg/m), the KITTI", + "leaderboard metric (avg relative pose error over 100..800m sub-sequences).", + "**Lower is better.** raw odometry = the scan-to-scan ICP input the PGO refines.", + "", + ] + for recording in sorted(by_recording): + rows = sorted(by_recording[recording], key=lambda r: r["translational_percent"]) + lines += [f"## {recording}", ""] + lines.append("| module | transl % | rot deg/m | closures | keyframes |") + lines.append("|---|---|---|---|---|") + baseline = rows[0] + lines.append( + f"| _raw odometry_ | {baseline['baseline_translational_percent']:.2f} | " + f"{baseline['baseline_rotational_deg_per_m']:.4f} | — | — |" + ) + for row in rows: + lines.append( + f"| {row['module']} | {row['translational_percent']:.2f} | " + f"{row['rotational_deg_per_m']:.4f} | {row['closures']} | {row['keyframes']} |" + ) + lines.append("") + RESULTS_DIR.mkdir(exist_ok=True) + TABLE_PATH.write_text("\n".join(lines)) + return TABLE_PATH + + +def main() -> None: + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument("--recordings-dir", type=Path, default=Path("~/datasets/kitti/sequences")) + parser.add_argument( + "--db-path", type=Path, default=None, help="a single kitti db (overrides dir)" + ) + parser.add_argument("--only", help="comma-separated module dirs") + args = parser.parse_args() + + if args.db_path: + dbs = [args.db_path.expanduser()] + else: + dbs = sorted(args.recordings_dir.expanduser().glob("kitti_seq*/mem2.db")) + if not dbs: + raise SystemExit("no KITTI dbs found (run kitti_to_db.py first)") + modules = [m.strip() for m in args.only.split(",")] if args.only else list(MODULES) + + for db_path in dbs: + for module_dir in modules: + run_one(db_path, module_dir) + table = render_table() + print(f"\ntable -> {table}") + + +if __name__ == "__main__": + main() diff --git a/dimos/navigation/jnav/components/loop_closure/spec.py b/dimos/navigation/jnav/components/loop_closure/spec.py new file mode 100644 index 0000000000..d571192266 --- /dev/null +++ b/dimos/navigation/jnav/components/loop_closure/spec.py @@ -0,0 +1,33 @@ +# Copyright 2026 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import Protocol + +from dimos.core.stream import In, Out +from dimos.msgs.nav_msgs.Odometry import Odometry +from dimos.msgs.sensor_msgs.PointCloud2 import PointCloud2 +from dimos.navigation.jnav.msgs.Graph3D import Graph3D +from dimos.navigation.jnav.msgs.GraphDelta3D import GraphDelta3D + + +class LoopClosure(Protocol): + # frame:sensor_link + lidar: In[PointCloud2] + odometry: In[Odometry] + + corrected_odometry: Out[Odometry] + # frame:map + pose_graph: Out[Graph3D] + # frame:map + loop_closure_event: Out[GraphDelta3D] From f17a86957e31e741d08e9159b584e1978373351d Mon Sep 17 00:00:00 2001 From: Jeff Hykin Date: Wed, 24 Jun 2026 14:52:51 +0800 Subject: [PATCH 09/21] add tf tree support to memory2 store (DbTf, write_tf_tree, lazy store.tf) --- dimos/memory2/db_tf.py | 180 ++++++++++++++++++++++++++++++++++++ dimos/memory2/store/base.py | 15 +++ 2 files changed, 195 insertions(+) create mode 100644 dimos/memory2/db_tf.py diff --git a/dimos/memory2/db_tf.py b/dimos/memory2/db_tf.py new file mode 100644 index 0000000000..60b40242d4 --- /dev/null +++ b/dimos/memory2/db_tf.py @@ -0,0 +1,180 @@ +# Copyright 2026 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Transform lookups over the transforms recorded in a store. + +A store's ``tf`` member lazily reads every transform recorded under the ``tf`` +(and ``tf_static``) streams into a :class:`MultiTBuffer`, then answers +``store.tf.get(target_frame, source_frame, time)`` — composing multi-hop chains +(e.g. ``world -> map -> odom -> base_link -> mid360_link``) and interpolating to +the nearest recorded sample. This makes world-registration a real transform +lookup instead of assuming a single baked-in pose. + +``write_tf_tree`` populates those streams for a recording that lacks them. +""" + +from __future__ import annotations + +from typing import TYPE_CHECKING + +import numpy as np + +from dimos.msgs.geometry_msgs.Quaternion import Quaternion +from dimos.msgs.geometry_msgs.Transform import Transform +from dimos.msgs.geometry_msgs.Vector3 import Vector3 +from dimos.msgs.tf2_msgs.TFMessage import TFMessage +from dimos.protocol.tf.tf import MultiTBuffer + +if TYPE_CHECKING: + from dimos.memory2.store.base import Store + +TF_STREAMS = ("tf", "tf_static") +# Larger than any single recording's span so the buffer never prunes loaded +# transforms (MultiTBuffer drops samples older than ts - buffer_size). +_NO_PRUNE = 1.0e15 + + +class DbTf: + """Transform lookups backed by the ``tf``/``tf_static`` streams of a store.""" + + def __init__(self, store: Store, stream_names: tuple[str, ...] = TF_STREAMS) -> None: + self._store = store + self._stream_names = stream_names + self._buffer: MultiTBuffer | None = None + + def _ensure_loaded(self) -> MultiTBuffer: + if self._buffer is not None: + return self._buffer + buffer = MultiTBuffer(buffer_size=_NO_PRUNE) + available = set(self._store.list_streams()) + for name in self._stream_names: + if name not in available: + continue + for observation in self._store.stream(name, TFMessage): + message = observation.data + transforms = getattr(message, "transforms", None) + if transforms is None: + transforms = [message] + buffer.receive_transform(*transforms) + self._buffer = buffer + return buffer + + def has_transforms(self) -> bool: + return bool(self._ensure_loaded().buffers) + + def get( + self, + target_frame: str, + source_frame: str, + time_point: float | None = None, + time_tolerance: float | None = None, + ) -> Transform | None: + """Transform that maps a point in ``source_frame`` into ``target_frame``. + + Returns ``None`` if no chain connects the two frames. Uses the buffer's + non-warning lookup so per-scan misses don't spam the log. + """ + buffer = self._ensure_loaded() + return buffer._get(target_frame, source_frame, time_point, time_tolerance) + + +def transform_matrix(transform: Transform) -> tuple[np.ndarray, np.ndarray]: + """Return ``(R, t)`` (3x3, 3) for ``transform`` so ``p_target = p_source @ R.T + t``.""" + rotation = transform.rotation + rotation_matrix = np.asarray(rotation.to_rotation_matrix(), float).reshape(3, 3) + translation = np.array( + [transform.translation.x, transform.translation.y, transform.translation.z], float + ) + return rotation_matrix, translation + + +def write_tf_tree( + store: Store, + *, + odom_stream: str, + odom_parent: str = "odom", + odom_child: str = "base_link", + root_links: tuple[tuple[str, str], ...] = (("world", "map"), ("map", "odom")), + sensor_child: str = "mid360_link", + sensor_translation: tuple[float, float, float] = (0.0, 0.0, 0.0), + sensor_rotation: tuple[float, float, float, float] = (0.0, 0.0, 0.0, 1.0), + static_period: float = 0.45, + stream_name: str = "tf", +) -> int: + """Populate ``store``'s tf stream from an odometry stream. + + - ``root_links`` and ``odom_child -> sensor_child`` are emitted as identity / + fixed transforms every ``static_period`` seconds across the recording span. + - ``odom_parent -> odom_child`` is emitted once per odometry sample, taken + from each observation's pose. + + Returns the number of tf observations written. + """ + import sqlite3 + + db_path = store.config.path + connection = sqlite3.connect(f"file:{db_path}?mode=ro&immutable=1", uri=True) + odom = np.array( + list( + connection.execute( + "select ts,pose_x,pose_y,pose_z,pose_qx,pose_qy,pose_qz,pose_qw " + f"from {odom_stream} order by ts" + ) + ), + float, + ) + connection.close() + if not len(odom): + raise ValueError(f"odom stream {odom_stream!r} is empty; cannot build tf tree") + + tf_stream = store.stream(stream_name, TFMessage) + written = 0 + + # dynamic: odom_parent -> odom_child, one per odometry sample + for row in odom: + ts = float(row[0]) + transform = Transform( + translation=Vector3(row[1], row[2], row[3]), + rotation=Quaternion(row[4], row[5], row[6], row[7]), + frame_id=odom_parent, + child_frame_id=odom_child, + ts=ts, + ) + tf_stream.append(TFMessage(transform), ts=ts) + written += 1 + + # static: root links + sensor mount, resampled every static_period + t0 = float(odom[0, 0]) + t1 = float(odom[-1, 0]) + + def statics_at(ts: float) -> list[Transform]: + links = [ + Transform(frame_id=parent, child_frame_id=child, ts=ts) for parent, child in root_links + ] + links.append( + Transform( + translation=Vector3(*sensor_translation), + rotation=Quaternion(*sensor_rotation), + frame_id=odom_child, + child_frame_id=sensor_child, + ts=ts, + ) + ) + return links + + for ts in np.arange(t0, t1 + static_period, static_period): + tf_stream.append(TFMessage(*statics_at(float(ts))), ts=float(ts)) + written += 1 + + return written diff --git a/dimos/memory2/store/base.py b/dimos/memory2/store/base.py index 7a7162a6d1..46be8b8cb5 100644 --- a/dimos/memory2/store/base.py +++ b/dimos/memory2/store/base.py @@ -29,6 +29,7 @@ from dimos.protocol.service.spec import BaseConfig, Configurable if TYPE_CHECKING: + from dimos.memory2.db_tf import DbTf from dimos.memory2.replay import Replay T = TypeVar("T") @@ -105,12 +106,26 @@ def __init__(self, **kwargs: Any) -> None: Configurable.__init__(self, **kwargs) CompositeResource.__init__(self) self._streams: dict[str, Stream[Any]] = {} + self._tf: DbTf | None = None @property def streams(self) -> StreamAccessor[Stream[Any]]: """Attribute-style access to streams: ``store.streams.name``.""" return StreamAccessor(self) + @property + def tf(self) -> DbTf: + """Transform lookups over the recording's ``tf``/``tf_static`` streams. + + ``store.tf.get(target_frame, source_frame, ts)`` composes multi-hop chains + (e.g. ``world -> ... -> mid360_link``) from the recorded transforms. + """ + if self._tf is None: + from dimos.memory2.db_tf import DbTf + + self._tf = DbTf(self) + return self._tf + def replay( self, *, From 1fb8536a97564cb3581aabf749ecd31b2d4519b2 Mon Sep 17 00:00:00 2001 From: Jeff Hykin Date: Wed, 24 Jun 2026 14:52:58 +0800 Subject: [PATCH 10/21] add map post-processing pipeline (post_process, add_april, detect_tags, make_rrd) --- .../loop_closure/gsc_pgo/add_april.py | 216 +++++++ .../loop_closure/gsc_pgo/detect_tags.py | 175 +++++ .../loop_closure/gsc_pgo/make_rrd.py | 184 ++++++ .../loop_closure/gsc_pgo/post_process.py | 604 ++++++++++++++++++ 4 files changed, 1179 insertions(+) create mode 100644 dimos/navigation/jnav/components/loop_closure/gsc_pgo/add_april.py create mode 100644 dimos/navigation/jnav/components/loop_closure/gsc_pgo/detect_tags.py create mode 100644 dimos/navigation/jnav/components/loop_closure/gsc_pgo/make_rrd.py create mode 100644 dimos/navigation/jnav/components/loop_closure/gsc_pgo/post_process.py diff --git a/dimos/navigation/jnav/components/loop_closure/gsc_pgo/add_april.py b/dimos/navigation/jnav/components/loop_closure/gsc_pgo/add_april.py new file mode 100644 index 0000000000..010a02fc08 --- /dev/null +++ b/dimos/navigation/jnav/components/loop_closure/gsc_pgo/add_april.py @@ -0,0 +1,216 @@ +# Copyright 2026 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Build the raw_april_tags + april_tags streams into a recording's mem2.db and +record the outcome in summary.json. --summary recomputes only the result section. + +Usage: + python dimos/navigation/jnav/components/loop_closure/gsc_pgo/add_april.py --rec PATH + [--summary] [--output PATH] [--camera color_image] [--intrinsics PATH] + [--tag-size 0.10] [--dict DICT_APRILTAG_36h11] [--dynamic 17] +""" + +import argparse +import json +from pathlib import Path +from typing import Any + +from dimos.navigation.jnav.utils import recording_db as rdb +from dimos.navigation.jnav.utils.apriltag_agreement import ( + VISIT_GAP_S, + split_visits, +) +from dimos.navigation.jnav.utils.apriltags import ( + ensure_april_streams, + gate_params, + load_intrinsics_json, +) + +RAW_STREAM = "raw_april_tags" +FILTERED_STREAM = "april_tags" +SUMMARY_NAME = "summary.json" +MIN_REVISITS = 2 # a tag seen on fewer visits than this carries no agreement signal +DEFAULT_MARKER_LENGTH_M = 0.10 +DEFAULT_DICTIONARY = "DICT_APRILTAG_36h11" + + +def _parse_dynamic(raw: str | None) -> list[int]: + if not raw: + return [] + return sorted({int(token) for token in raw.replace(",", " ").split()}) + + +def _times_by_tag(store: Any, stream_name: str) -> dict[int, list[float]]: + by_tag: dict[int, list[float]] = {} + for observation in store.stream(stream_name): + by_tag.setdefault(int(observation.tags["marker_id"]), []).append(float(observation.ts)) + return by_tag + + +def summarize(store: Any) -> dict[str, Any]: + """Per-tag raw detections + filtered visits from the existing streams, flagging + never-revisited tags. Prints the table and returns the result for summary.json.""" + streams = set(store.list_streams()) + raw_available = RAW_STREAM in streams + raw = _times_by_tag(store, RAW_STREAM) if raw_available else {} + filtered = _times_by_tag(store, FILTERED_STREAM) if FILTERED_STREAM in streams else {} + tag_ids = sorted(set(raw) | set(filtered)) + + print( + f" {'tag':>5} {'raw':>6} {'filtered':>9} {'revisits':>9} (visit gap {VISIT_GAP_S:.0f}s)" + ) + tags: list[dict[str, Any]] = [] + not_revisited: list[int] = [] + for tag_id in tag_ids: + raw_count = len(raw.get(tag_id, [])) + filtered_times = sorted(filtered.get(tag_id, [])) + visits = len(split_visits(filtered_times, gap_s=VISIT_GAP_S)) if filtered_times else 0 + revisited = visits >= MIN_REVISITS + if not revisited: + not_revisited.append(tag_id) + tags.append( + { + "tag_id": tag_id, + "raw": raw_count if raw_available else None, + "filtered": len(filtered_times), + "revisits": visits, + "revisited": revisited, + } + ) + raw_display = raw_count if raw_available else "-" + flag = " <-- NOT revisited" if not revisited else "" + print(f" {tag_id:>5} {raw_display!s:>6} {len(filtered_times):>9} {visits:>9}{flag}") + + print( + f" totals: {len(tag_ids)} tags | {len(tag_ids) - len(not_revisited)} revisited" + f" (>={MIN_REVISITS} visits) | {len(not_revisited)} not revisited" + ) + if not_revisited: + print(f" NOT revisited: {not_revisited}") + if not raw_available: + print(f" (no '{RAW_STREAM}' yet — raw counts N/A; run without --summary to build it)") + + return { + "visit_gap_s": VISIT_GAP_S, + "min_revisits": MIN_REVISITS, + "all_unfiltered_tag_ids": sorted(raw) if raw_available else sorted(filtered), + "total_tags": len(tag_ids), + "revisited": len(tag_ids) - len(not_revisited), + "not_revisited": not_revisited, + "raw_available": raw_available, + "tags": tags, + } + + +def _update_summary_json( + path: Path, + *, + filter_parameters: dict[str, Any] | None = None, + result: dict[str, Any] | None = None, +) -> Path: + """Merge the april_tags section into summary.json, preserving every other key.""" + data: dict[str, Any] = json.loads(path.read_text()) if path.exists() else {} + section: dict[str, Any] = data.get("april_tags", {}) + if filter_parameters is not None: + section["filter_parameters"] = filter_parameters + if result is not None: + section["result"] = result + data["april_tags"] = section + path.write_text(json.dumps(data, indent=2)) + return path + + +def main() -> None: + parser = argparse.ArgumentParser( + description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter + ) + parser.add_argument("--rec", type=Path, required=True, help="recording dir or mem2.db path") + parser.add_argument("--camera", default="color_image") + parser.add_argument("--intrinsics", type=Path, default=None) + parser.add_argument("--tag-size", type=float, default=None, help="marker length (m)") + parser.add_argument("--dict", dest="dictionary", default=None) + parser.add_argument( + "--summary", + action="store_true", + help="read-only on streams: recompute and write ONLY april_tags.result (a subset of" + " the full run, which also rebuilds streams + filter_parameters)", + ) + parser.add_argument( + "--output", + type=Path, + default=None, + help="summary.json path to write (default: /summary.json) — pick another" + " location to avoid overwriting an existing one", + ) + parser.add_argument( + "--dynamic", + default=None, + help="comma/space-separated tag ids on moving objects — kept in raw, dropped from filtered", + ) + args = parser.parse_args() + + recording = args.rec.expanduser() + db_path = recording if recording.name == "mem2.db" else recording / "mem2.db" + if not db_path.exists(): + parser.error(f"no mem2.db at {db_path}") + store = rdb.store(db_path) + summary_path = args.output.expanduser() if args.output else (db_path.parent / SUMMARY_NAME) + print(f"=== {db_path.parent.name} ===") + + if args.summary: + result = summarize(store) + _update_summary_json(summary_path, result=result) + print(f" updated {summary_path} april_tags.result") + return + + # Rebuild both streams and overwrite filter_parameters + result. + dynamic_tags = _parse_dynamic(args.dynamic) + intrinsics_path = (args.intrinsics or (db_path.parent / "camera_intrinsics.json")).expanduser() + config = load_intrinsics_json(intrinsics_path) + marker_length = ( + args.tag_size + if args.tag_size is not None + else config.get("marker_length", DEFAULT_MARKER_LENGTH_M) + ) + dictionary = args.dictionary or config.get("dictionary", DEFAULT_DICTIONARY) + ensure_april_streams( + store, + config["intrinsics"], + config["distortion"], + image_stream=args.camera, + marker_length=marker_length, + dictionary=dictionary, + raw_stream=RAW_STREAM, + filtered_stream=FILTERED_STREAM, + exclude_tags=dynamic_tags, + force=True, + ) + filter_parameters = { + "gates": gate_params(), + "marker_length_m": marker_length, + "dictionary": dictionary, + "camera_stream": args.camera, + "raw_stream": RAW_STREAM, + "filtered_stream": FILTERED_STREAM, + "dynamic_tags_excluded": dynamic_tags, + } + result = summarize(store) + _update_summary_json(summary_path, filter_parameters=filter_parameters, result=result) + print( + f" updated {summary_path} april_tags (filter_parameters + result); dynamic={dynamic_tags}" + ) + + +if __name__ == "__main__": + main() diff --git a/dimos/navigation/jnav/components/loop_closure/gsc_pgo/detect_tags.py b/dimos/navigation/jnav/components/loop_closure/gsc_pgo/detect_tags.py new file mode 100644 index 0000000000..71b11201cc --- /dev/null +++ b/dimos/navigation/jnav/components/loop_closure/gsc_pgo/detect_tags.py @@ -0,0 +1,175 @@ +# Copyright 2026 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Untyped analysis script: gtsam/open3d/cv2 lack type stubs. +# mypy: ignore-errors +"""Build the raw AprilTag stream: EVERY detection over the camera image, NO filtering whatsoever +(no blur/reproj/distance/angle/motion/size gate, no time-clustering). One row per per-frame +detection that yields a valid PnP pose. Each row carries its gate diagnostics in tags +(sharpness, reproj_px, tag_px, distance_m, view_angle_deg, lin_speed, ang_speed) so downstream +gate tuning in post_process.py needs no re-detection. + +Prints the raw per-marker histogram + visit structure (visit = sightings >30s apart). + +Usage: python dimos/navigation/jnav/components/loop_closure/gsc_pgo/detect_tags.py --rec=PATH + [--camera=color_image] [--tag-size=0.10] + [--dict=DICT_APRILTAG_36h11] [--intrinsics=PATH] [--out=raw_april_tags] +""" + +import json +from pathlib import Path +import sys + +import cv2 +import numpy as np +from scipy.spatial.transform import Rotation + +from dimos.msgs.geometry_msgs.PoseStamped import PoseStamped +from dimos.msgs.sensor_msgs.Image import Image +from dimos.navigation.jnav.utils import recording_db as rdb +from dimos.navigation.jnav.utils.apriltags import ( + _camera_speeds, + estimate_marker_pose, + make_detector, + reprojection_error_px, + tag_pixel_size, + tag_sharpness, + view_quality, +) + + +def arg(flag, default=None): + return next((a.split("=", 1)[1] for a in sys.argv if a.startswith(flag + "=")), default) + + +REC_ARG = arg("--rec") +if not REC_ARG: + sys.exit( + "usage: python dimos/navigation/jnav/components/loop_closure/gsc_pgo/detect_tags.py --rec=PATH [--camera=...] [--tag-size=...] " + "[--dict=...] [--intrinsics=PATH] [--out=...] (--rec is required)" + ) +REC = Path(REC_ARG).expanduser() +CAMERA = arg("--camera", "color_image") # camera image stream the tags are detected on +MARKER_LENGTH_M = float(arg("--tag-size", "0.10")) +DICTIONARY = arg("--dict", "DICT_APRILTAG_36h11") +STREAM = arg("--out", "raw_april_tags") +VISIT_GAP_S = 30.0 + +DB = REC / "mem2.db" +intr_path = Path(arg("--intrinsics", str(REC / "camera_intrinsics.json"))).expanduser() +intr = json.loads(intr_path.read_text()) +K = np.array(intr["intrinsics"], float).reshape(3, 3) +dist = np.array(intr.get("distortion", []), float) +st = rdb.store(DB) +if CAMERA not in st.list_streams(): + sys.exit(f"!! camera stream '{CAMERA}' not in db — available: {st.list_streams()}") + +detector = make_detector(DICTIONARY) +print(f"loading '{CAMERA}' frames...") +images = st.stream(CAMERA, Image).to_list() +speed_by_ts, speed_available = _camera_speeds(images) +print( + f"detecting over {len(images)} frames (unfiltered), tag_size={MARKER_LENGTH_M} m, dict={DICTIONARY}..." +) + +rows = [] +for image_obs in images: + image = image_obs.data + bgr = image.numpy() if hasattr(image, "numpy") else np.asarray(image.data) + gray = cv2.cvtColor(bgr, cv2.COLOR_BGR2GRAY) if bgr.ndim == 3 else bgr + all_corners, marker_ids, _ = detector.detectMarkers(bgr) + if marker_ids is None: + continue + for corners, marker_id in zip(all_corners, marker_ids.flatten(), strict=False): + pose = estimate_marker_pose(corners, MARKER_LENGTH_M, K, dist) + if pose is None: + continue + rvec, tvec = pose + quat = Rotation.from_rotvec(rvec.reshape(3)).as_quat() # x,y,z,w + t = tvec.reshape(3) + tcm = [ + float(t[0]), + float(t[1]), + float(t[2]), + float(quat[0]), + float(quat[1]), + float(quat[2]), + float(quat[3]), + ] + distance, view_angle = view_quality(tcm) + speed = speed_by_ts.get(float(image_obs.ts)) + rows.append( + { + "ts": float(image_obs.ts), + "marker_id": int(marker_id), + "tcm": tcm, + "sharpness": float(tag_sharpness(gray, corners)), + "reproj_px": float( + reprojection_error_px(corners, rvec, tvec, MARKER_LENGTH_M, K, dist) + ), + "tag_px": float(tag_pixel_size(corners)), + "distance_m": float(distance), + "view_angle_deg": float(view_angle), + "lin_speed": float(speed[0]) if speed else -1.0, + "ang_speed": float(speed[1]) if speed else -1.0, + } + ) +rows.sort(key=lambda r: r["ts"]) + +if STREAM in st.list_streams(): + st.delete_stream(STREAM) +out = st.stream(STREAM, PoseStamped) +for r in rows: + tcm = r["tcm"] + out.append( + PoseStamped(ts=r["ts"], position=tcm[:3], orientation=tcm[3:]), + ts=r["ts"], + pose=tuple(tcm), + tags={ + k: r[k] + for k in ( + "marker_id", + "sharpness", + "reproj_px", + "tag_px", + "distance_m", + "view_angle_deg", + "lin_speed", + "ang_speed", + ) + }, + ) +print(f"\nwrote {STREAM}: {len(rows)} unfiltered detections") + +by = {} +for r in rows: + by.setdefault(r["marker_id"], []).append(r) +print(f"\n=== RAW per-marker (visit = >{VISIT_GAP_S:.0f}s apart) ===") +print( + f"{'tag':>4} {'det':>4} {'visits':>6} {'dist_m':>12} {'sharp>=60%':>10} {'reproj<=2%':>10} {'span_s':>7}" +) +for mid in sorted(by): + rs = sorted(by[mid], key=lambda r: r["ts"]) + times = [r["ts"] for r in rs] + visits = [[times[0]]] + for tt in times[1:]: + (visits[-1].append(tt) if tt - visits[-1][-1] <= VISIT_GAP_S else visits.append([tt])) + d = [r["distance_m"] for r in rs] + sharp_ok = 100 * np.mean([r["sharpness"] >= 60 for r in rs]) + reproj_ok = 100 * np.mean([r["reproj_px"] <= 2 for r in rs]) + print( + f"{mid:>4} {len(rs):>4} {len(visits):>6} {min(d):5.2f}-{max(d):5.2f} " + f"{sharp_ok:9.0f}% {reproj_ok:9.0f}% {times[-1] - times[0]:7.0f}" + ) +print("\nmarkers present:", sorted(by)) diff --git a/dimos/navigation/jnav/components/loop_closure/gsc_pgo/make_rrd.py b/dimos/navigation/jnav/components/loop_closure/gsc_pgo/make_rrd.py new file mode 100644 index 0000000000..bfd2db540a --- /dev/null +++ b/dimos/navigation/jnav/components/loop_closure/gsc_pgo/make_rrd.py @@ -0,0 +1,184 @@ +# Copyright 2026 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Untyped analysis script: gtsam/open3d/cv2 lack type stubs. +# mypy: ignore-errors +"""Combined comparison rrd: raw lidar cloud + EVERY gt_*_lidar version present in the db, each as +its own colored entity, plus AprilTag landmarks + trajectories. Re-run after adding a new GT method +and it picks the new stream up automatically. + +Importable: `build(...)` writes the rrd and returns its path (used by post_process.py). +Standalone: python dimos/navigation/jnav/components/loop_closure/gsc_pgo/make_rrd.py --rec=PATH [--lidar=...] [--odom=...] [--tags=...] [--out=...] +""" + +import json +from pathlib import Path +import sys + +from gtsam import Point3, Pose3, Rot3 +import numpy as np +import rerun as rr + +from dimos.navigation.jnav.utils import recording_db as rdb + +SCAN_STRIDE, VOXEL = 8, 0.10 +COLORS = {"raw": [220, 60, 60]} +PALETTE = [ + [60, 120, 230], + [60, 210, 90], + [230, 180, 50], + [200, 80, 220], + [80, 220, 220], + [240, 130, 60], +] +# same relaxed gates post_process uses, for placing landmark markers +GATE = dict(s=25.0, r=3.5, px=12.0, d=1.5, a=65.0, lv=1.5, av=150.0) + + +def build( + rec, + lidar_stream="pointlio_lidar", + odom_stream="pointlio_odometry", + tag_stream="raw_april_tags", + out_name="gt_compare.rrd", +): + rec = Path(rec).expanduser() + db = rec / "mem2.db" + out = rec / out_name + st = rdb.store(db) + intr = json.loads((rec / "camera_intrinsics.json").read_text()) + ext = np.array(intr["optical_in_base"], float) + Tbo = Pose3(Rot3.Quaternion(ext[6], ext[3], ext[4], ext[5]), Point3(ext[0], ext[1], ext[2])) + + def accumulate(name): + pts = [] + for i, o in enumerate(st.stream(name)): + if i % SCAN_STRIDE: + continue + xyz = np.asarray(o.data.points_f32()) + if len(xyz): + pts.append(xyz[::3]) + a = np.concatenate(pts, 0) + _, idx = np.unique(np.floor(a / VOXEL).astype(np.int64), axis=0, return_index=True) + return a[idx] + + def traj(name): + return np.array( + [ + [o.data.pose.position.x, o.data.pose.position.y, o.data.pose.position.z] + for o in st.stream(name) + ], + np.float32, + ) + + def landmarks(gt_odom): + gt = [ + ( + o.ts, + Pose3( + Rot3.Quaternion( + o.data.pose.orientation.w, + o.data.pose.orientation.x, + o.data.pose.orientation.y, + o.data.pose.orientation.z, + ), + Point3(o.data.pose.position.x, o.data.pose.position.y, o.data.pose.position.z), + ), + ) + for o in st.stream(gt_odom) + ] + gts = np.array([t for t, _ in gt]) + pos = {} + for obs in st.stream(tag_stream): + t = obs.tags + if not ( + float(t["sharpness"]) >= GATE["s"] + and float(t["reproj_px"]) <= GATE["r"] + and float(t["tag_px"]) >= GATE["px"] + and float(t["distance_m"]) <= GATE["d"] + and float(t["view_angle_deg"]) <= GATE["a"] + and (float(t["lin_speed"]) < 0 or float(t["lin_speed"]) <= GATE["lv"]) + and (float(t["ang_speed"]) < 0 or float(t["ang_speed"]) <= GATE["av"]) + ): + continue + ps = obs.data + cb = gt[int(np.argmin(np.abs(gts - float(obs.ts))))][1] + Tw = cb.compose(Tbo).compose( + Pose3( + Rot3.Quaternion( + ps.orientation.w, ps.orientation.x, ps.orientation.y, ps.orientation.z + ), + Point3(ps.x, ps.y, ps.z), + ) + ) + pos.setdefault(int(t["marker_id"]), []).append(np.asarray(Tw.translation())) + means = [np.mean(value, 0) for key, value in sorted(pos.items())] + lbl = [f"tag{key}" for key in sorted(pos)] + return np.array(means), lbl + + streams = st.list_streams() + gt_lidars = sorted(s for s in streams if s.startswith("gt_") and "_lidar" in s) + print("raw + GT lidar streams:", gt_lidars) + + rr.init("gt_compare") + rr.save(str(out)) + rr.log( + "raw/cloud", + rr.Points3D(accumulate(lidar_stream), colors=COLORS["raw"], radii=0.02), + static=True, + ) + rr.log( + "raw/trajectory", rr.LineStrips3D([traj(odom_stream)], colors=[255, 120, 120]), static=True + ) + for k, name in enumerate(gt_lidars): + color = PALETTE[k % len(PALETTE)] + cloud = accumulate(name) + rr.log(f"{name}/cloud", rr.Points3D(cloud, colors=color, radii=0.02), static=True) + print(f" logged {name}: {len(cloud):,} pts") + odom = name.replace("_lidar", "_odometry") + if odom in streams: + rr.log(f"{name}/trajectory", rr.LineStrips3D([traj(odom)], colors=color), static=True) + # landmarks placed against the first available gt odometry + gt_odoms = sorted(s for s in streams if s.startswith("gt_") and "_odometry" in s) + if gt_odoms: + lm, lbl = landmarks(gt_odoms[0]) + if len(lm): + rr.log( + "landmarks", + rr.Points3D(lm, colors=[255, 230, 0], radii=0.25, labels=lbl), + static=True, + ) + print(f" logged {len(lbl)} landmarks") + print("wrote", out) + return out + + +def _arg(flag, default=None): + return next((a.split("=", 1)[1] for a in sys.argv if a.startswith(flag + "=")), default) + + +if __name__ == "__main__": + rec_arg = _arg("--rec") + if not rec_arg: + sys.exit( + "usage: python dimos/navigation/jnav/components/loop_closure/gsc_pgo/make_rrd.py --rec=PATH [--lidar=...] [--odom=...] " + "[--tags=...] [--out=...] (--rec is required)" + ) + build( + rec_arg, + lidar_stream=_arg("--lidar", "pointlio_lidar"), + odom_stream=_arg("--odom", "pointlio_odometry"), + tag_stream=_arg("--tags", "raw_april_tags"), + out_name=_arg("--out", "gt_compare.rrd"), + ) diff --git a/dimos/navigation/jnav/components/loop_closure/gsc_pgo/post_process.py b/dimos/navigation/jnav/components/loop_closure/gsc_pgo/post_process.py new file mode 100644 index 0000000000..609b80f93b --- /dev/null +++ b/dimos/navigation/jnav/components/loop_closure/gsc_pgo/post_process.py @@ -0,0 +1,604 @@ +# Copyright 2026 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Untyped analysis script: gtsam/open3d/cv2 lack type stubs. +# mypy: ignore-errors +"""AprilTag-loop-closed + ICP-refined ground-truth post-processing for a go2 recording. + +Source of tags = the UNFILTERED tag stream (build it first with detect_tags.py). Each raw +detection carries its gate diagnostics, so gates are applied here post-hoc (no re-detection) +and are easy to relax. Factors: one robust (best-reproj) observation per keyframe x marker -> +denser, balanced loop closure than one-medoid-per-visit. + +Two-stage solve: (1) GTSAM tag PGO -- anisotropic odometry between-factors (stiff roll/pitch + z +anchor gravity, loose yaw) + quality-weighted AprilTag landmark factors fix macro drift; +(2) ICP loop closures between spatially-close / temporally-distant lidar submaps anchor local +geometry. Writes _odometry / _lidar back into the recording db, optionally a .pc2.lcm +log of the corrected cloud, and opens a comparison rrd. + +Usage: python dimos/navigation/jnav/components/loop_closure/gsc_pgo/post_process.py [odom|lidar|both] --rec=PATH + [--lidar=pointlio_lidar] [--odom=pointlio_odometry] [--tags=raw_april_tags] + [--out=gt_pointlio] [--suffix=...] [--ignore-tags=17] [--no-icp] [--no-lcm] [--no-rrd] +""" + +import json +from pathlib import Path +import sqlite3 +import sys +import time + +from gtsam import ( + BetweenFactorPose3, + LevenbergMarquardtOptimizer, + LevenbergMarquardtParams, + NonlinearFactorGraph, + Point3, + Pose3, + PriorFactorPose3, + Rot3, + Symbol, + Values, + noiseModel, +) +import numpy as np + +from dimos.msgs.geometry_msgs.Pose import Pose +from dimos.msgs.nav_msgs.Odometry import Odometry +from dimos.msgs.sensor_msgs.PointCloud2 import PointCloud2 +from dimos.navigation.jnav.utils import recording_db as rdb +from dimos.navigation.jnav.utils.apriltags import ( + DEFAULT_MAX_ANGULAR_SPEED_DPS, + DEFAULT_MAX_DISTANCE_M, + DEFAULT_MAX_LINEAR_SPEED_MPS, + DEFAULT_MAX_REPROJ_PX, + DEFAULT_MAX_VIEW_ANGLE_DEG, + DEFAULT_MIN_SHARPNESS, + DEFAULT_MIN_TAG_PX, +) + +VISIT_GAP_S = 30.0 +WHAT = sys.argv[1] if len(sys.argv) > 1 and not sys.argv[1].startswith("-") else "both" + + +def arg(flag, default=""): + return next((a.split("=", 1)[1] for a in sys.argv if a.startswith(flag + "=")), default) + + +REC_ARG = arg("--rec") +SUFFIX = arg("--suffix") +LIDAR_STREAM = arg("--lidar", "pointlio_lidar") # input lidar stream (world-registered scans) +ODOM_STREAM = arg("--odom", "pointlio_odometry") # input odometry stream (keyframe source) +RAW_STREAM = arg("--tags", "raw_april_tags") # input unfiltered AprilTag stream +IGNORE_TAGS = { + int(x) for x in arg("--ignore-tags").replace(",", " ").split() +} # dynamic/moving tags +OUT_PREFIX = arg("--out", "gt_pointlio") # output prefix -> _odometry / _lidar +WRITE_LCM = "--no-lcm" not in sys.argv # also emit _lidar.pc2.lcm of the corrected cloud +OPEN_RRD = "--no-rrd" not in sys.argv # build + open a comparison rrd at the end +LCM_VOXEL = float(arg("--lcm-voxel", "0.05")) # voxel size for the aggregated .pc2.lcm cloud +LCM_OUTLIER_NN = 20 # statistical outlier removal: neighbor count +LCM_OUTLIER_STD = 2.0 # ...and std-ratio threshold (lower = more aggressive) +LIDAR_FRAME = arg("--lidar-frame", "mid360_link") # frame the raw lidar scans live in +WORLD_FRAME = arg("--world-frame", "world") # frame to register scans into +USE_TF = "--no-tf" not in sys.argv # world-register via recording tf (fallback: obs.pose) +TF_TOL = float(arg("--tf-tol", "0.5")) # max seconds to the nearest recorded transform + +# Per-glimpse gates (speed == -1 means "unknown" and always passes). +GATE = dict( + min_sharpness=DEFAULT_MIN_SHARPNESS, + max_reproj_px=DEFAULT_MAX_REPROJ_PX, + min_tag_px=DEFAULT_MIN_TAG_PX, + max_distance_m=DEFAULT_MAX_DISTANCE_M, + max_view_angle_deg=DEFAULT_MAX_VIEW_ANGLE_DEG, + max_lin_speed=DEFAULT_MAX_LINEAR_SPEED_MPS, + max_ang_speed=DEFAULT_MAX_ANGULAR_SPEED_DPS, +) + +if not REC_ARG: + sys.exit( + "usage: python dimos/navigation/jnav/components/loop_closure/gsc_pgo/post_process.py [odom|lidar|both] --rec=PATH " + "[--lidar=...] [--odom=...] [--tags=...] [--out=...] [--suffix=...] " + "[--no-icp] [--no-lcm] [--no-rrd] (--rec is required: path to the recording dir)" + ) +REC = Path(REC_ARG).expanduser() +DB = REC / "mem2.db" +intr = json.loads((REC / "camera_intrinsics.json").read_text()) +ext = np.array(intr["optical_in_base"], float) +Tbo = Pose3(Rot3.Quaternion(ext[6], ext[3], ext[4], ext[5]), Point3(ext[0], ext[1], ext[2])) +st = rdb.store(DB) +if RAW_STREAM not in st.list_streams(): + sys.exit( + f"!! {RAW_STREAM} missing -- run detect_tags.py first to build the unfiltered tag stream." + ) +from dimos.memory2.db_tf import transform_matrix + +_TF_AVAILABLE = USE_TF and st.tf.has_transforms() + + +def world_points(obs): + """Nx3 world-registered points for a lidar observation. + + Primary: the recording's tf (``world <- LIDAR_FRAME``) at the scan time. + Fallback: the observation's stored pose as the transform. Otherwise the + scan is assumed already world-registered (legacy recordings). + """ + pts = np.asarray(obs.data.points_f32()) + if not len(pts): + return pts + if _TF_AVAILABLE: + tf = st.tf.get(WORLD_FRAME, LIDAR_FRAME, float(obs.ts), TF_TOL) + if tf is not None: + rot, trans = transform_matrix(tf) + return pts @ rot.T + trans + pose = getattr(obs, "pose", None) + if isinstance(pose, (tuple, list)) and len(pose) >= 7: + rot = Rot3.Quaternion(pose[6], pose[3], pose[4], pose[5]).matrix() + return pts @ rot.T + np.array(pose[:3], float) + return pts # already world-registered + + +print(f"recording: {REC}", flush=True) +if _TF_AVAILABLE: + print(f"world-registering {LIDAR_STREAM} via tf ({WORLD_FRAME} <- {LIDAR_FRAME})", flush=True) +print( + f"streams: tags={RAW_STREAM} odom={ODOM_STREAM} lidar={LIDAR_STREAM} -> out={OUT_PREFIX}{SUFFIX}", + flush=True, +) + + +def passes(t): + return ( + t["sharpness"] >= GATE["min_sharpness"] + and t["reproj_px"] <= GATE["max_reproj_px"] + and t["tag_px"] >= GATE["min_tag_px"] + and t["distance_m"] <= GATE["max_distance_m"] + and t["view_angle_deg"] <= GATE["max_view_angle_deg"] + and (t["lin_speed"] < 0 or t["lin_speed"] <= GATE["max_lin_speed"]) + and (t["ang_speed"] < 0 or t["ang_speed"] <= GATE["max_ang_speed"]) + ) + + +# read raw detections (pose + diagnostics) +print("reading tag detections...", flush=True) +raw = [] +for obs in st.stream(RAW_STREAM): + ps = obs.data + tg = obs.tags + raw.append( + dict( + ts=float(obs.ts), + marker_id=int(tg["marker_id"]), + T_cam_tag=Pose3( + Rot3.Quaternion( + ps.orientation.w, ps.orientation.x, ps.orientation.y, ps.orientation.z + ), + Point3(ps.x, ps.y, ps.z), + ), + reproj_px=float(tg["reproj_px"]), + **{ + k: float(tg[k]) + for k in ( + "sharpness", + "tag_px", + "distance_m", + "view_angle_deg", + "lin_speed", + "ang_speed", + ) + }, + ) + ) +gated = [t for t in raw if passes(t) and t["marker_id"] not in IGNORE_TAGS] + +# keyframes from raw odometry +con = sqlite3.connect(f"file:{DB}?mode=ro", uri=True) +fo = np.array( + list( + con.execute( + "select ts,pose_x,pose_y,pose_z,pose_qx,pose_qy,pose_qz,pose_qw " + f"from {ODOM_STREAM} order by ts" + ) + ), + float, +) +con.close() + + +def pr(r): + return Rot3.Quaternion(r[7], r[4], r[5], r[6]), np.array(r[1:4]) + + +ki = [0] +prev_rot, prev_pos = pr(fo[0]) +for i in range(1, len(fo)): + rot, pos = pr(fo[i]) + if ( + np.linalg.norm(pos - prev_pos) > 0.5 + or np.degrees(np.linalg.norm(Rot3.Logmap(prev_rot.inverse() * rot))) > 10 + ): + ki.append(i) + prev_rot, prev_pos = rot, pos +kfs = [pr(fo[i]) for i in ki] +kts = fo[ki, 0] +N = len(kfs) + +# one factor per keyframe x marker: keep the best-reproj detection in each bucket +bucket = {} +for t in gated: + kf = int(np.argmin(np.abs(kts - t["ts"]))) + key = (kf, t["marker_id"]) + if key not in bucket or t["reproj_px"] < bucket[key]["reproj_px"]: + bucket[key] = t + +# revisit report +raw_by, vis_by = {}, {} +for t in raw: + raw_by.setdefault(t["marker_id"], 0) + raw_by[t["marker_id"]] += 1 +for (_kf, mid), t in bucket.items(): + vis_by.setdefault(mid, []).append(t["ts"]) + + +def n_visits(times): + times = sorted(times) + visits = [[times[0]]] + for tt in times[1:]: + (visits[-1].append(tt) if tt - visits[-1][-1] <= VISIT_GAP_S else visits.append([tt])) + return len(visits) + + +print(f"gates: {GATE}") +print( + f"raw detections {len(raw)} -> {len(gated)} pass gates -> {len(bucket)} keyframe-tag factors\n" +) +print(f"{'tag':>4} | {'raw viewings':>12} | {'filtered revisits':>17}") +not_revisited = [] +for mid in sorted(raw_by): + nv = n_visits(vis_by[mid]) if mid in vis_by else 0 + flag = "" if nv >= 2 else " <-- NOT REVISITED" + print(f"{mid:>4} | {raw_by[mid]:>12} | {nv:>10} visit(s){flag}") + if nv < 2: + not_revisited.append(mid) +print( + f"\ntags NOT revisited (no loop-closure constraint): {not_revisited if not_revisited else 'none'}\n" +) + +# factor graph + solve +odom = noiseModel.Diagonal.Variances(np.array([1e-8, 1e-8, 1e-5, 1e-4, 1e-4, 1e-6])) +grav0 = noiseModel.Diagonal.Variances(np.array([1e-8, 1e-8, 1e-6, 1e-8, 1e-8, 1e-8])) + + +# quality weighting: planar-PnP pose error grows ~quadratically with range, and reproj_px is a +# direct misfit proxy. Inflate a glimpse's covariance by (dist/REF_D)^2 * (reproj/REF_R)^2 so a +# far/oblique/blurry tag pose contributes almost nothing while close, sharp ones dominate. +REF_D, REF_R = 0.4, 1.0 + + +def tn(Rbt, distance_m=REF_D, reproj_px=REF_R): + s = max((max(distance_m, 0.2) / REF_D) ** 2 * (max(reproj_px, 0.5) / REF_R) ** 2, 0.25) + Rm = Rbt.matrix() + c = np.zeros((6, 6)) + c[:3, :3] = Rm @ np.diag([0.04, 0.04, 0.0025]) @ Rm.T + c[3:, 3:] = Rm @ np.diag([0.0025, 0.0025, 0.25]) @ Rm.T + return noiseModel.Gaussian.Covariance(c * s) + + +print(f"building factor graph over {N} keyframes...", flush=True) +g = NonlinearFactorGraph() +v = Values() +for i in range(N): + rot, pos = kfs[i] + v.insert(i, Pose3(rot, Point3(pos))) + if i == 0: + g.add(PriorFactorPose3(0, Pose3(rot, Point3(pos)), grav0)) + else: + rot_prev, pos_prev = kfs[i - 1] + g.add( + BetweenFactorPose3( + i - 1, + i, + Pose3( + rot_prev.inverse() * rot, + Point3(rot_prev.inverse().rotate(Point3(pos - pos_prev))), + ), + odom, + ) + ) +seen = set() +for (kf, mid), t in sorted(bucket.items()): + rot, pos = kfs[kf] + kp = Pose3(rot, Point3(pos)) + T = Tbo.compose(t["T_cam_tag"]) + if mid not in seen: + seen.add(mid) + v.insert(Symbol("l", mid).key(), kp.compose(T)) + g.add( + BetweenFactorPose3( + kf, Symbol("l", mid).key(), T, tn(T.rotation(), t["distance_m"], t["reproj_px"]) + ) + ) + +print("solving stage 1 (tag PGO)...", flush=True) +lm_params = LevenbergMarquardtParams() +lm_params.setMaxIterations(200) +est = LevenbergMarquardtOptimizer(g, v, lm_params).optimize() +raw_kf = [Pose3(kfs[i][0], Point3(kfs[i][1])) for i in range(N)] + +# STAGE 2: ICP loop-closure refinement (tags=macro, lidar ICP=local anchor) +# Tag PGO has pulled revisits roughly together; now ICP the lidar submaps of spatially-close, +# temporally-distant keyframe pairs to add precise 6-DOF relative constraints, then re-solve. +ICP = "--no-icp" not in sys.argv +if ICP: + import open3d as o3d + from scipy.spatial import cKDTree + + ICP_RADIUS_M = 4.0 # tag-corrected positions must be within this to be a revisit candidate + ICP_MIN_DT_S = 25.0 # ...and at least this far apart in time (a real revisit, not adjacency) + ICP_MAX_CORR_M = 0.6 # ICP correspondence distance + ICP_VOXEL = 0.15 + ICP_FIT_MIN, ICP_RMSE_MAX = 0.45, 0.25 + SUBMAP_HALF_S = 1.0 # accumulate scans within +/- this of a keyframe time into its submap + + corr = [est.atPose3(i) for i in range(N)] + cpos = np.array([np.asarray(p.translation()) for p in corr]) + + # revisit candidate pairs + tree = cKDTree(cpos) + pairs = set() + for i, j in tree.query_pairs(ICP_RADIUS_M): + if abs(kts[i] - kts[j]) >= ICP_MIN_DT_S: + pairs.add((min(i, j), max(i, j))) + pairs = sorted(pairs, key=lambda p: np.linalg.norm(cpos[p[0]] - cpos[p[1]])) + involved = {k for p in pairs for k in p} + print( + f"ICP stage: {len(pairs)} revisit candidate pairs over {len(involved)} keyframes", + flush=True, + ) + + # build per-(involved)-keyframe body-frame submaps from the input lidar (odom-registered) + print("ICP stage: reading lidar submaps...", flush=True) + submap = {k: [] for k in involved} + scan_n = 0 + t_sub = time.time() + for obs in st.stream(LIDAR_STREAM): + scan_n += 1 + if scan_n % 20000 == 0: + print(f" read {scan_n} scans, {time.time() - t_sub:.0f}s", flush=True) + ts = float(obs.ts) + k = int(np.argmin(np.abs(kts - ts))) + if k not in submap or abs(kts[k] - ts) > SUBMAP_HALF_S: + continue + rot_k, pos_k = kfs[k] + world = world_points(obs) + submap[k].append((world - pos_k) @ rot_k.matrix()) # world -> kf-k body frame + pcd = {} + for k, chunks in submap.items(): + if not chunks: + continue + p = o3d.geometry.PointCloud() + p.points = o3d.utility.Vector3dVector(np.concatenate(chunks, 0).astype(np.float64)) + p = p.voxel_down_sample(ICP_VOXEL) + p.estimate_normals(o3d.geometry.KDTreeSearchParamHybrid(radius=0.5, max_nn=30)) + pcd[k] = p + print(f"ICP stage: built {len(pcd)} submaps, registering {len(pairs)} pairs...", flush=True) + + icp_noise = noiseModel.Robust.Create( + noiseModel.mEstimator.Huber.Create(1.345), + noiseModel.Diagonal.Variances(np.array([4e-4, 4e-4, 4e-4, 2.5e-3, 2.5e-3, 2.5e-3])), + ) + added = 0 + t_icp = time.time() + for pair_n, (i, j) in enumerate(pairs): + if pair_n and pair_n % 5000 == 0: + print( + f" registered {pair_n}/{len(pairs)} pairs, {added} accepted, {time.time() - t_icp:.0f}s", + flush=True, + ) + if i not in pcd or j not in pcd: + continue + init = (corr[i].inverse() * corr[j]).matrix() # i<-j initial guess from tag correction + res = o3d.pipelines.registration.registration_icp( + pcd[j], + pcd[i], + ICP_MAX_CORR_M, + init, + o3d.pipelines.registration.TransformationEstimationPointToPlane(), + ) + if res.fitness >= ICP_FIT_MIN and res.inlier_rmse <= ICP_RMSE_MAX: + T = res.transformation + g.add(BetweenFactorPose3(i, j, Pose3(Rot3(T[:3, :3]), Point3(T[:3, 3])), icp_noise)) + added += 1 + print( + f"ICP stage: accepted {added}/{len(pairs)} loop closures " + f"(fit>={ICP_FIT_MIN}, rmse<={ICP_RMSE_MAX}m)", + flush=True, + ) + if added: + # est already holds every key (keyframe + landmark poses); warm-start from it + print("solving stage 2 (tag PGO + ICP closures)...", flush=True) + est = LevenbergMarquardtOptimizer(g, est, lm_params).optimize() + +C = [est.atPose3(i).compose(raw_kf[i].inverse()) for i in range(N)] +shift = max(float(np.linalg.norm(np.asarray(C[i].translation()))) for i in range(N)) +print( + f"PGO: N={N} keyframes, {len(bucket)} tag factors over {len(seen)} markers, " + f"max correction shift {shift:.1f} m", + flush=True, +) + + +def C_at(ts): + if ts <= kts[0]: + return C[0] + if ts >= kts[-1]: + return C[-1] + j = int(np.searchsorted(kts, ts)) + a, b = j - 1, j + al = (ts - kts[a]) / (kts[b] - kts[a]) + return C[a].compose(Pose3.Expmap(al * Pose3.Logmap(C[a].between(C[b])))) + + +def pose_tuple(P): + t = P.translation() + q = P.rotation().toQuaternion() + return (t[0], t[1], t[2], q.x(), q.y(), q.z(), q.w()) + + +if WHAT in ("odom", "both"): + name = f"{OUT_PREFIX}_odometry{SUFFIX}" + if name in st.list_streams(): + st.delete_stream(name) + out = st.stream(name, Odometry) + print(f"writing {name} ({len(fo)} poses)...", flush=True) + n = 0 + t0 = time.time() + for r in fo: + ts = float(r[0]) + P = C_at(ts).compose( + Pose3(Rot3.Quaternion(r[7], r[4], r[5], r[6]), Point3(r[1], r[2], r[3])) + ) + x, y, zz, qx, qy, qz, qw = pose_tuple(P) + out.append( + Odometry( + ts=ts, + frame_id="odom", + child_frame_id="base_link", + pose=Pose(x, y, zz, qx, qy, qz, qw), + ), + ts=ts, + pose=(x, y, zz, qx, qy, qz, qw), + ) + n += 1 + if n % 20000 == 0: + print(f" {n}/{len(fo)} poses, {time.time() - t0:.0f}s", flush=True) + print(f"wrote {name}: {len(fo)} poses in {time.time() - t0:.0f}s", flush=True) + +if WHAT in ("lidar", "both"): + name = f"{OUT_PREFIX}_lidar{SUFFIX}" + fo_ts = fo[:, 0] + + def base_pose(ts): + j = int(np.searchsorted(fo_ts, ts)) + j = min(max(j, 0), len(fo) - 1) + if j > 0 and abs(fo_ts[j - 1] - ts) < abs(fo_ts[j] - ts): + j -= 1 + r = fo[j] + return Pose3(Rot3.Quaternion(r[7], r[4], r[5], r[6]), Point3(r[1], r[2], r[3])) + + if name in st.list_streams(): + st.delete_stream(name) + out = st.stream(name, PointCloud2) + + # The db stream stays per-scan, but the .pc2.lcm is ONE aggregated cloud (voxel-downsampled + + # statistical-outlier-removed), not 5184 per-scan events. Intensity rides through open3d's + # voxel averaging via the color channel. Chunks are collapsed every CHUNK scans to bound memory. + if WRITE_LCM: + import open3d as o3d + + CHUNK = 1000 + agg_xyz, agg_i = [], [] # incrementally voxel-downsampled chunks + buf_xyz, buf_i = [], [] + have_inten = False + + def collapse(xyz_list, i_list, voxel): + pc = o3d.geometry.PointCloud() + pc.points = o3d.utility.Vector3dVector(np.concatenate(xyz_list).astype(np.float64)) + carry = bool(i_list) + if carry: + inten_col = np.concatenate(i_list).astype(np.float64)[:, None] + pc.colors = o3d.utility.Vector3dVector(np.repeat(inten_col, 3, axis=1)) + pc = pc.voxel_down_sample(voxel) + dx = np.asarray(pc.points, np.float32) + di = np.asarray(pc.colors, np.float32)[:, 0] if carry else None + return dx, di + + print(f"writing {name} (corrected lidar)...", flush=True) + n = 0 + t0 = time.time() + for obs in st.stream(LIDAR_STREAM): + ts = float(obs.ts) + Cts = C_at(ts) + Rm = Cts.rotation().matrix() + tv = np.asarray(Cts.translation()) + xyz = world_points(obs) + inten = obs.data.intensities_f32() + xyz2 = (xyz @ Rm.T + tv).astype(np.float32) + m = PointCloud2.from_numpy( + xyz2, frame_id="odom", intensities=(np.asarray(inten) if inten is not None else None) + ) + m.ts = ts # stamp the cloud (lcm_encode needs a non-None ts) + out.append(m, ts=ts, pose=pose_tuple(Cts.compose(base_pose(ts)))) + if WRITE_LCM: + buf_xyz.append(xyz2) + if inten is not None: + have_inten = True + buf_i.append(np.asarray(inten, np.float32)) + if len(buf_xyz) >= CHUNK: + dx, di = collapse(buf_xyz, buf_i if have_inten else [], LCM_VOXEL) + agg_xyz.append(dx) + if di is not None: + agg_i.append(di) + buf_xyz, buf_i = [], [] + n += 1 + if n % 2000 == 0: + print(f" {n} scans, {time.time() - t0:.0f}s", flush=True) + print(f"wrote {name}: {n} scans in {time.time() - t0:.0f}s", flush=True) + + if WRITE_LCM: + if buf_xyz: # flush remainder + dx, di = collapse(buf_xyz, buf_i if have_inten else [], LCM_VOXEL) + agg_xyz.append(dx) + if di is not None: + agg_i.append(di) + # final unified voxel pass over the per-chunk results, then statistical outlier removal + merged_i = agg_i if have_inten else [] + dx, di = collapse(agg_xyz, merged_i, LCM_VOXEL) + print( + f"aggregating .pc2.lcm: {len(dx):,} pts after voxel, removing outliers...", flush=True + ) + pc = o3d.geometry.PointCloud() + pc.points = o3d.utility.Vector3dVector(dx.astype(np.float64)) + if di is not None: + pc.colors = o3d.utility.Vector3dVector( + np.repeat(di.astype(np.float64)[:, None], 3, axis=1) + ) + pc, _keep = pc.remove_statistical_outlier(LCM_OUTLIER_NN, LCM_OUTLIER_STD) + merged_xyz = np.asarray(pc.points, np.float32) + merged_inten = np.asarray(pc.colors, np.float32)[:, 0] if di is not None else None + merged = PointCloud2.from_numpy(merged_xyz, frame_id="odom", intensities=merged_inten) + merged.ts = float(fo_ts[0]) + lcm_path = REC / f"{name}.pc2.lcm" + lcm_path.write_bytes(merged.lcm_encode()) + print( + f"wrote {lcm_path}: 1 aggregated cloud, {len(merged_xyz):,} pts " + f"(voxel {LCM_VOXEL} m, outlier nn={LCM_OUTLIER_NN}/std={LCM_OUTLIER_STD})", + flush=True, + ) + +# build + open the comparison rrd +if OPEN_RRD and WHAT in ("lidar", "both"): + import subprocess + + from dimos.navigation.jnav.components.loop_closure.gsc_pgo import make_rrd + + print("building comparison rrd...", flush=True) + rrd_path = make_rrd.build( + REC, lidar_stream=LIDAR_STREAM, odom_stream=ODOM_STREAM, tag_stream=RAW_STREAM + ) + rerun_bin = Path(sys.executable).parent / "rerun" + if rerun_bin.exists(): + subprocess.Popen([str(rerun_bin), str(rrd_path)]) + print(f"opened {rrd_path}", flush=True) + else: + print(f"rerun binary not found at {rerun_bin}; open manually: rerun {rrd_path}", flush=True) From 898fdef86d13b3a361cd65db4573e7386e426367 Mon Sep 17 00:00:00 2001 From: Jeff Hykin Date: Wed, 24 Jun 2026 14:53:05 +0800 Subject: [PATCH 11/21] add jnav map post-processing and PGO migration docs --- experimental/docs/jnav/map_postprocessing.md | 75 ++++++++++ experimental/docs/jnav/pgo_migration_plan.md | 144 +++++++++++++++++++ 2 files changed, 219 insertions(+) create mode 100644 experimental/docs/jnav/map_postprocessing.md create mode 100644 experimental/docs/jnav/pgo_migration_plan.md diff --git a/experimental/docs/jnav/map_postprocessing.md b/experimental/docs/jnav/map_postprocessing.md new file mode 100644 index 0000000000..9311066e6c --- /dev/null +++ b/experimental/docs/jnav/map_postprocessing.md @@ -0,0 +1,75 @@ +# Map Postprocessing + +You recorded a run. The lidar map drifted. You want a clean one to compare against — ground truth, basically, without a motion-capture rig. + +This is the offline fix. Point it at a recording, it bends the trajectory back into shape using AprilTags it saw along the way, then snaps the local geometry together with ICP. Out comes a corrected map written back into the same recording. + +It runs on a `.db` after the fact. It is not part of the live nav stack and never touches the robot. + +## What you need in the recording + +A recording dir with `mem2.db` plus `camera_intrinsics.json`, and these streams inside the db: + +- a camera stream (`color_image`) +- odometry (`pointlio_odometry`) +- world-registered lidar scans (`pointlio_lidar`) +- AprilTags physically in the scene, sized and known + +The tags are the whole trick. Drift accumulates, but a tag you saw at minute 1 and again at minute 9 is the *same tag* — so the two sightings have to land in the same spot. That constraint is what pulls the map straight. + +## The three steps + +The scripts live in `dimos/navigation/jnav/components/loop_closure/gsc_pgo/`. + +**1. Detect the tags.** Run the camera frames through detection and write both the raw and the gated tag streams in one step: + +``` +python dimos/navigation/jnav/components/loop_closure/gsc_pgo/add_april.py --rec=PATH +``` + +`add_april.py` writes `raw_april_tags` (every detection, unfiltered, with its quality numbers attached — this is what postprocessing reads), the gated/clustered `april_tags` stream, and an `april_tags` section in the recording's `summary.json` (see below). Leaving `raw_april_tags` unfiltered matters: you tune the quality gates later without re-running detection, which is the slow part. (`detect_tags.py` writes just the raw stream if that's all you want; `--dynamic 17` keeps a moving tag in raw but drops it from the gated stream.) + +Inspect what was found without rebuilding anything — per tag, raw count and revisit count, flagging any tag never revisited (your fast check for whether a recording even has loop constraints): + +``` +python dimos/navigation/jnav/components/loop_closure/gsc_pgo/add_april.py --rec=PATH --summary +``` + +**2. Solve.** Two stages, one command: + +``` +python dimos/navigation/jnav/components/loop_closure/gsc_pgo/post_process.py both --rec=PATH +``` + +- **Tag PGO (GTSAM).** Odometry between-poses are stiff on roll/pitch and z (gravity isn't drifting) and loose on yaw, where the real error lives. The tag sightings are landmark factors, weighted by how good each detection was. This fixes the big-picture drift. +- **ICP refinement.** Lidar submaps that are close in space but far apart in time get aligned to each other. This cleans up the local geometry the tags don't directly constrain. + +It writes `gt_pointlio_odometry` and `gt_pointlio_lidar` back into the db, optionally a `.pc2.lcm` of the corrected cloud, and opens a comparison view. Run `odom`, `lidar`, or `both`. + +**3. Look at it.** `post_process.py` opens the rrd for you, but you can rebuild it anytime: + +``` +python dimos/navigation/jnav/components/loop_closure/gsc_pgo/make_rrd.py --rec=PATH +``` + +Raw cloud in red, every `gt_*` version in its own color, tag landmarks marked. Add another correction method and it shows up automatically — good for comparing approaches side by side. + +## What `add_april.py` records in `summary.json` + +It merges an `april_tags` section into the recording's `summary.json` (other keys preserved; it never touches `camera_intrinsics.json`): + +- `filter_parameters` — the exact gate thresholds used, marker size, dictionary, and any dynamic tags excluded. +- `result` — `all_unfiltered_tag_ids`, and per tag the raw detection count, filtered visits, and revisit count, plus the list of tags never revisited. + +So the gates and the outcome are auditable after the fact. `--summary` recomputes just the `result` from the existing streams (read-only). + +## Knobs worth knowing + +- `--no-icp` — tag PGO only, skip the ICP stage. +- `--no-lcm` / `--no-rrd` — skip the cloud export / the viewer. +- `--out=NAME` — output prefix, if you want to keep several corrections in one db. +- Tag quality gates (sharpness, reprojection error, distance, view angle, motion blur) are single-sourced in `dimos/navigation/jnav/utils/apriltags.py` (the `DEFAULT_*` constants); `post_process.py` and the eval both import them. They're relaxed by default to keep more sightings. Tighten them there if a bad tag pose is yanking the map around. + +## When it won't help + +No tags in the scene, or tags seen only once, means no loop constraints — you get ICP cleanup and not much else. Same story if the camera never got a clean look at a tag. Garbage detections in, garbage map out; that's what the gates are for. diff --git a/experimental/docs/jnav/pgo_migration_plan.md b/experimental/docs/jnav/pgo_migration_plan.md new file mode 100644 index 0000000000..a35efdc31f --- /dev/null +++ b/experimental/docs/jnav/pgo_migration_plan.md @@ -0,0 +1,144 @@ +# PGO → jnav Loop-Closure Migration Plan + +Goal: land the pose-graph-optimization (PGO) / loop-closure work on +`jeff/feat/jnav_pgo` in the new `jnav` layout, extract the C++ + nix flake into a +standalone `github:jeff-hykin/gsc_pgo` repo, and merge in the offline AprilTag +map-postprocessing tooling. + +## Source material + +Two branches hold the pieces; neither alone is the target. + +| Source | Has | Layout | +|---|---|---| +| `jeff/feat/jnav` | `jnav/{msgs,utils}/`, `loop_closure/{gsc_pgo,ivan_pgo,ivan_pgo_transformer,unrefined_pgo}`, `eval.py`, `eval_all.py`, Scan-Context + Landmark C++ | `dimos/navigation/jnav/modules/...` (inline C++) | +| `jeff/feat/better_pgo` | postprocessing scripts (`add_april.py`, `detect_tags.py`, `post_process.py`, `make_rrd.py`), `map_postprocessing.md` doc | `dimos/navigation/nav_stack/modules/pgo/scripts/...` | +| `jeff/feat/jnav_pgo` (current) | nothing yet — only `nav_stack→cmu_nav` rename on top of `main` | — | + +`jeff/feat/jnav` is the more evolved navigation reorg; `better_pgo` is the source +for the AprilTag postprocessing scripts/doc. The migration is the union of both. + +## Target layout (on `jeff/feat/jnav_pgo`) + +``` +dimos/navigation/jnav/ + msgs/ # Graph3D/GraphDelta3D/Landmark/Marker (.py + .hpp + tests) + utils/ # ALL pgo utils (apriltags, trajectory_metrics, recording_db, ...) + components/ # renamed from jnav's "modules/" + loop_closure/ + eval.py + eval_all.py + spec.py # LoopClosure Protocol + gsc_pgo/ + module.py # NativeModule -> builds external gsc_pgo flake + post_process.py # ported from better_pgo scripts/post_process.py + (add_april.py, detect_tags.py, make_rrd.py) # the other postprocess scripts + ivan_pgo/ ivan_pgo_transformer/ unrefined_pgo/ # comparison baselines (eval_all needs them) +experimental/docs/jnav/ + map_postprocessing.md # adapted from docs/capabilities/navigation/map_postprocessing.md +``` + +External repo: +``` +github:jeff-hykin/gsc_pgo # all C++ + flake.nix/flake.lock + flake.nix, flake.lock, CMakeLists.txt + main.cpp, simple_pgo.{cpp,h}, scan_context.{cpp,h}, + commons.{cpp,h}, point_cloud_utils.hpp, dimos_native_module.hpp, + pgo_landmark_test.cpp + msgs/ (Graph3D.hpp, GraphDelta3D.hpp, Landmark.hpp) # C++ wire helpers +``` + +## Decisions (confirmed by Jeff, 2026-06-22) + +1. **Base branch.** ✅ Base `jnav_pgo` on `jeff/feat/jnav`. BUT `post_process.py` + must come from `better_pgo` (not jnav, which lacks it). +2. **`components/` scope.** ⏳ pending (whole `jnav/modules/`→`components/` vs only + `loop_closure`). +3. **`gsc_pgo` repo.** ✅ **public**, created at + `github.com/jeff-hykin/gsc_pgo`, initial rev `494e7a1d657c3702ec805c9e3d251a2fe8bc9529`. + Flake input will pin to that rev: `github:jeff-hykin/gsc_pgo/494e7a1...#default`. + +## Work breakdown + +### Phase 0 — branch setup +- Confirm `jnav_pgo` base (decision 1). If basing on jnav: merge/cherry-pick the + `dimos/navigation/jnav/` tree onto current `jnav_pgo` (which already has the + `cmu_nav` rename). Resolve any overlap with cmu_nav's own `pgo` module. + +### Phase 1 — external `gsc_pgo` repo +- `gh repo create jeff-hykin/gsc_pgo` (visibility per decision 3). *(outward-facing — confirm first)* +- Move `gsc_pgo/cpp/*` (C++, CMakeLists, flake.nix, flake.lock, `msgs/*.hpp`, + `pgo_landmark_test.cpp`) into the new repo. Keep the flake's + `lcm-extended` / `dimos-lcm` / `gtsam-extended` inputs as-is. +- `flake.nix`: `src = ./.;` already self-contained — switching to a tracked git + repo *fixes* the untracked-files gotcha that forced `path:$PWD#default` + (see memory: nix untracked-files gotcha). Verify `nix build .#default` from a + clean checkout. +- Push, tag/record the rev for the flake pin. + +### Phase 2 — `gsc_pgo` module.py +- Update `PGOConfig`: + - `build_command = 'nix build "github:jeff-hykin/gsc_pgo/#default" --no-write-lock-file'` + - drop the `path:$PWD` workaround comment; `cwd` no longer needs a local `cpp/`. + - `executable` resolves from the nix `result/bin/pgo` (confirm NativeModule + out-of-tree build path handling). +- Keep `In/Out` stream wiring and all loop-closure hyperparameters unchanged. +- Confirm C++ `msgs/*.hpp` wire format still matches `jnav/msgs/*.py` decoders + (they're hand-synced; the `.hpp` headers travel with the C++ repo). + +### Phase 3 — msgs/ + utils/ +- `jnav/msgs/`: already present on jnav (Graph3D, GraphDelta3D, Landmark, Marker + + `.hpp` + tests). Bring over as-is. The `.hpp` files are duplicated into the + external repo (C++ side); decide whether the canonical `.hpp` lives in + `gsc_pgo` and `jnav/msgs/*.hpp` is a mirror, or vice-versa. Recommend: canonical + in `gsc_pgo`, keep a note in `jnav/msgs` pointing at it. +- `jnav/utils/`: bring all utils. `better_pgo`'s `eval_utils/` (apriltags, + apriltag_agreement, trajectory_metrics, recording_db, voxel_map, module_loading) + are already absorbed into jnav `utils/` — verify no newer logic in + `better_pgo` was lost (diff the 6 overlapping files). + +### Phase 4 — eval.py / eval_all.py +- Copy from jnav `loop_closure/`. Fix imports to `dimos.navigation.jnav.utils.*` + and `...jnav.msgs.*` (already correct on jnav). +- `eval_all.py` enumerates the comparison modules (`gsc_pgo`, `ivan_pgo`, + `ivan_pgo_transformer`, `unrefined_pgo`) → port all four component dirs so eval + doesn't break. (If baselines are unwanted, prune eval_all's list instead.) + +### Phase 5 — postprocessing scripts (from better_pgo) +- Port `scripts/post_process.py` → `components/loop_closure/gsc_pgo/post_process.py`. +- Port `add_april.py`, `detect_tags.py`, `make_rrd.py` alongside it (the doc's + 3-step flow references all of them). +- Rewrite their internal imports: `eval_utils.apriltags` → `dimos.navigation.jnav.utils.apriltags`, etc. +- Tag quality gates are single-sourced in `utils/apriltags.py` (`DEFAULT_*`); keep + post_process importing from there (don't duplicate constants). + +### Phase 6 — docs +- Adapt `docs/capabilities/navigation/map_postprocessing.md` → + `experimental/docs/jnav/map_postprocessing.md`. +- Rewrite every script path in the doc: + `dimos/navigation/nav_stack/modules/pgo/scripts/X.py` + → `dimos/navigation/jnav/components/loop_closure/gsc_pgo/X.py`. +- Update the `eval_utils/apriltags.py` reference → `jnav/utils/apriltags.py`. +- Add a back-link from the navigation readme if appropriate. + +### Phase 7 — wiring + tests +- `spec.py` `LoopClosure` Protocol: bring over, update any module paths. +- Blueprints: if any blueprint references the old `nav_stack`/`pgo` module path, + repoint to `jnav.components.loop_closure.gsc_pgo`. Regenerate + `all_blueprints.py` via `pytest dimos/robot/test_all_blueprints_generation.py`. +- Port tests (`test_pgo_synthetic_drift.py`, msgs tests, utils tests). Run + `uv run pytest` for the loop_closure + msgs + utils dirs. +- `ruff check --fix && ruff format`. + +## Risks / cut-corners to flag +- **C++ wire-format drift**: `.hpp` (C++) and `.py` (decode) are hand-synced. Once + the `.hpp` lives in an external repo, a change there can silently desync the + Python decoder. Mitigate: keep a `test_Graph3D` round-trip test in `jnav/msgs` + that builds against the pinned flake rev, or at least a schema-comment check. +- **Private flake in CI**: if `gsc_pgo` is private, dimos CI nix builds will fail + without a token — do not add a token to CI without asking (secrets rule). +- **eval_results/**: jnav carries committed `eval_results/*/summary.json` snapshots. + Decide whether to bring those (history/benchmarks) or regenerate. +- **Baseline PGO impls**: `ivan_pgo*` / `unrefined_pgo` carry their own inline C++ + (`unrefined_pgo/cpp`). If only `gsc_pgo` goes external, the layout is asymmetric + — acceptable (they're experimental baselines) but worth noting. From ab31c4b72a3005082f122bd5b0f5939418165556 Mon Sep 17 00:00:00 2001 From: Jeff Hykin Date: Wed, 24 Jun 2026 00:27:07 -0700 Subject: [PATCH 12/21] recorder: count + warn on frames dropped by LATEST coalescing process_observable gains an optional on_drop callback fired once per message dropped by the dispatcher's single-slot LATEST mailbox. The Recorder uses it to count dropped frames per stream and log a throttled warning, so a slow sink no longer loses data silently. --- dimos/core/module.py | 18 ++++++++++++++---- dimos/memory2/module.py | 22 +++++++++++++++++++++- 2 files changed, 35 insertions(+), 5 deletions(-) diff --git a/dimos/core/module.py b/dimos/core/module.py index 26a2b6f893..fdf6d23061 100644 --- a/dimos/core/module.py +++ b/dimos/core/module.py @@ -477,14 +477,16 @@ def process_observable( self, observable: "Observable[Any]", async_cb: Callable[[Any], Any], + on_drop: Callable[[], None] | None = None, ) -> "DisposableBase": """Subscribe `async_cb` (an async function) to `observable`, dispatching each emitted value onto self._loop. Invocations are serialized through a - per-subscription dispatcher task with LATEST coalescing. The subscription + per-subscription dispatcher task with LATEST coalescing. `on_drop`, if + given, fires once per message dropped by that coalescing. The subscription is registered for cleanup on stop().""" if not inspect.iscoroutinefunction(async_cb): raise TypeError("process_observable requires an `async def` callback") - on_msg, dispatcher_disp = self._make_async_dispatch(async_cb) + on_msg, dispatcher_disp = self._make_async_dispatch(async_cb, on_drop) sub = observable.subscribe(on_msg) return self.register_disposable(CompositeDisposable(sub, dispatcher_disp)) @@ -635,7 +637,9 @@ def _auto_bind_handlers(self) -> None: self.process_observable(in_stream.pure_observable(), handler) def _make_async_dispatch( - self, async_handler: Callable[[Any], Any] + self, + async_handler: Callable[[Any], Any], + on_drop: Callable[[], None] | None = None, ) -> tuple[Callable[[Any], None], "DisposableBase"]: """Build a sync callback that delivers `msg` into a single-slot LATEST mailbox drained by a dedicated dispatcher task on `self._loop`. @@ -645,7 +649,9 @@ def _make_async_dispatch( awaits). - If messages arrive faster than the handler can process them, intermediate messages are dropped and only the most recent unprocessed - message is kept (LATEST policy). + message is kept (LATEST policy). `on_drop`, if given, is called once + per dropped message (on the loop thread) so callers that need every + message can surface the loss. - The returned Disposable cancels the dispatcher task. """ loop = self._loop @@ -685,6 +691,10 @@ def on_msg(msg: Any) -> None: return def _set() -> None: + # A slot that still holds an unconsumed value is about to be + # overwritten — that queued message is being dropped (LATEST). + if slot["has_value"] and on_drop is not None: + on_drop() slot["value"] = msg slot["has_value"] = True event.set() diff --git a/dimos/memory2/module.py b/dimos/memory2/module.py index e16c1527e7..cc3aebc67d 100644 --- a/dimos/memory2/module.py +++ b/dimos/memory2/module.py @@ -318,6 +318,9 @@ async def _lidar_pose(self, msg): config: RecorderConfig _pose_setters: dict[str, Any] = {} + # Per-stream count of frames lost to the dispatcher's LATEST coalescing + # (sink slower than input). Populated lazily as drops happen. + _dropped_frames: dict[str, int] = {} @rpc def start(self) -> None: @@ -330,6 +333,7 @@ def start(self) -> None: return self._pose_setters = self._collect_pose_setters() + self._dropped_frames = {} # TODO: store reset API/logic is not implemented yet. This module # shouldn't need to know about files (SqliteStore specific), and @@ -391,7 +395,23 @@ async def on_msg(msg: Any) -> None: ) stream.append(msg, ts=ts, pose=pose) - self.process_observable(input_topic.pure_observable(), on_msg) + self.process_observable( + input_topic.pure_observable(), on_msg, on_drop=lambda: self._on_frame_dropped(name) + ) + + def _on_frame_dropped(self, name: str) -> None: + """A frame for *name* was dropped because the sink couldn't keep up with + the input rate (dispatcher LATEST coalescing). Count it and warn — once, + then on each power-of-ten — so silent data loss is visible without + flooding the log.""" + count = self._dropped_frames.get(name, 0) + 1 + self._dropped_frames[name] = count + if count == 1 or count % 1000 == 0: + logger.warning( + "[%s] Recorder dropped %d frame(s) — sink slower than input; recording is lossy", + name, + count, + ) def _prepare_streams(self) -> None: """On APPEND, drop the streams this recorder is about to (re)write — the From d6c0c664bc8a77def35a5b1d02c8d06be476eff7 Mon Sep 17 00:00:00 2001 From: Jeff Hykin Date: Wed, 24 Jun 2026 15:32:38 +0800 Subject: [PATCH 13/21] remove comparision modules --- .../loop_closure/ivan_pgo/module.py | 637 ------------------ .../ivan_pgo_transformer/module.py | 263 -------- .../unrefined_pgo/cpp/CMakeLists.txt | 49 -- .../unrefined_pgo/cpp/commons.cpp | 8 - .../loop_closure/unrefined_pgo/cpp/commons.h | 29 - .../unrefined_pgo/cpp/dimos_native_module.hpp | 97 --- .../loop_closure/unrefined_pgo/cpp/flake.lock | 144 ---- .../loop_closure/unrefined_pgo/cpp/flake.nix | 70 -- .../loop_closure/unrefined_pgo/cpp/main.cpp | 299 -------- .../unrefined_pgo/cpp/point_cloud_utils.hpp | 170 ----- .../unrefined_pgo/cpp/simple_pgo.cpp | 211 ------ .../unrefined_pgo/cpp/simple_pgo.h | 78 --- .../loop_closure/unrefined_pgo/module.py | 286 -------- 13 files changed, 2341 deletions(-) delete mode 100644 dimos/navigation/jnav/components/loop_closure/ivan_pgo/module.py delete mode 100644 dimos/navigation/jnav/components/loop_closure/ivan_pgo_transformer/module.py delete mode 100644 dimos/navigation/jnav/components/loop_closure/unrefined_pgo/cpp/CMakeLists.txt delete mode 100644 dimos/navigation/jnav/components/loop_closure/unrefined_pgo/cpp/commons.cpp delete mode 100644 dimos/navigation/jnav/components/loop_closure/unrefined_pgo/cpp/commons.h delete mode 100644 dimos/navigation/jnav/components/loop_closure/unrefined_pgo/cpp/dimos_native_module.hpp delete mode 100644 dimos/navigation/jnav/components/loop_closure/unrefined_pgo/cpp/flake.lock delete mode 100644 dimos/navigation/jnav/components/loop_closure/unrefined_pgo/cpp/flake.nix delete mode 100644 dimos/navigation/jnav/components/loop_closure/unrefined_pgo/cpp/main.cpp delete mode 100644 dimos/navigation/jnav/components/loop_closure/unrefined_pgo/cpp/point_cloud_utils.hpp delete mode 100644 dimos/navigation/jnav/components/loop_closure/unrefined_pgo/cpp/simple_pgo.cpp delete mode 100644 dimos/navigation/jnav/components/loop_closure/unrefined_pgo/cpp/simple_pgo.h delete mode 100644 dimos/navigation/jnav/components/loop_closure/unrefined_pgo/module.py diff --git a/dimos/navigation/jnav/components/loop_closure/ivan_pgo/module.py b/dimos/navigation/jnav/components/loop_closure/ivan_pgo/module.py deleted file mode 100644 index 7c86f6dfb5..0000000000 --- a/dimos/navigation/jnav/components/loop_closure/ivan_pgo/module.py +++ /dev/null @@ -1,637 +0,0 @@ -# Copyright 2026 Dimensional Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Pure-Python PGO ported from Ivan's `ivan/feat/go2loopclosure` branch -(`dimos/mapping/pgo.py`), adapted to the jnav LoopClosure spec. - -GTSAM iSAM2 pose graph + Open3D point-to-plane ICP loop verification + -KD-tree loop candidate search. Differences from the original: - * input renamed `registered_scan` -> `lidar` (LoopClosure spec) - * publishes `pose_graph: Out[Graph3D]` (optimized keyframes, odometry + - loop edges) and `loop_closure_event: Out[GraphDelta3D]` so the eval - harness can capture the corrected trajectory and count closures - * offline experiment helpers (transformers, two-pass voxel pipelines) - were left behind — this is just the runtime module -""" - -from __future__ import annotations - -from dataclasses import dataclass -import threading -import time -from typing import Any - -import gtsam # type: ignore[import-untyped] -import numpy as np -import open3d as o3d # type: ignore[import-untyped] -import open3d.core as o3c # type: ignore[import-untyped] -from reactivex.disposable import Disposable -from scipy.spatial import KDTree -from scipy.spatial.transform import Rotation - -from dimos.constants import DEFAULT_THREAD_JOIN_TIMEOUT -from dimos.core.core import rpc -from dimos.core.module import Module, ModuleConfig -from dimos.core.stream import In, Out -from dimos.msgs.geometry_msgs.Pose import Pose -from dimos.msgs.geometry_msgs.PoseStamped import PoseStamped -from dimos.msgs.geometry_msgs.Quaternion import Quaternion -from dimos.msgs.geometry_msgs.Transform import Transform -from dimos.msgs.geometry_msgs.Vector3 import Vector3 -from dimos.msgs.nav_msgs.Odometry import Odometry -from dimos.msgs.sensor_msgs.PointCloud2 import PointCloud2 -from dimos.navigation.jnav.components.loop_closure.spec import LoopClosure -from dimos.navigation.jnav.msgs.Graph3D import Graph3D -from dimos.navigation.jnav.msgs.GraphDelta3D import GraphDelta3D -from dimos.utils.logging_config import setup_logger - -FRAME_MAP = "map" -FRAME_ODOM = "odom" -FRAME_BODY = "base_link" - -logger = setup_logger() - - -class PGOConfig(ModuleConfig): - world_frame: str = FRAME_MAP - - # Keyframe detection - key_pose_delta_trans: float = 0.5 - key_pose_delta_deg: float = 10.0 - - # Loop closure - loop_search_radius: float = 2.0 - loop_time_thresh: float = 20.0 - loop_score_thresh: float = 0.3 - loop_submap_half_range: int = 10 - min_icp_inliers: int = 10 - min_keyframes_for_loop_search: int = 10 - loop_closure_extra_iterations: int = 4 - submap_resolution: float = 0.2 - min_loop_detect_duration: float = 5.0 - - # Input mode - unregister_input: bool = True # Transform world-frame scans to body-frame using odom - - # Global map - publish_global_map: bool = True - global_map_publish_rate: float = 0.5 - global_map_voxel_size: float = 0.15 - - # ICP - max_icp_iterations: int = 50 - max_icp_correspondence_dist: float = 1.0 - - -@dataclass -class _KeyPose: - r_local: np.ndarray # 3x3 rotation in local/odom frame - t_local: np.ndarray # 3-vec translation in local/odom frame - r_global: np.ndarray # 3x3 corrected rotation - t_global: np.ndarray # 3-vec corrected translation - timestamp: float - body_cloud: np.ndarray # Nx3 points in body frame - - -def _icp( - source: np.ndarray, - target: np.ndarray, - max_iter: int = 50, - max_dist: float = 1.0, - tol: float = 1e-6, - min_inliers: int = 10, - init: np.ndarray | None = None, -) -> tuple[np.ndarray, float]: - """Point-to-plane ICP using Open3D's tensor pipeline. - - Returns ``(T, fitness)`` where ``fitness`` is mean squared inlier - distance (m²).""" - if len(source) < min_inliers or len(target) < min_inliers: - return np.eye(4), float("inf") - - cpu = o3c.Device("CPU:0") - src_pcd = o3d.t.geometry.PointCloud(o3c.Tensor(source.astype(np.float32), device=cpu)) - tgt_pcd = o3d.t.geometry.PointCloud(o3c.Tensor(target.astype(np.float32), device=cpu)) - - # Normals on the target enable point-to-plane ICP, which converges - # tighter than point-to-point on indoor scenes (walls give unambiguous - # normals that resolve the slide-along-wall ambiguity). - tgt_pcd.estimate_normals(max_nn=30, radius=0.3) - - init_T = ( - o3c.Tensor(init.astype(np.float64), dtype=o3c.float64, device=cpu) - if init is not None - else o3c.Tensor.eye(4, dtype=o3c.float64, device=cpu) - ) - - # Silence Open3D's "0 correspondence" warning — we deliberately use a - # tight max_correspondence_distance and reject loops with poor fitness. - with o3d.utility.VerbosityContextManager(o3d.utility.VerbosityLevel.Error): - result = o3d.t.pipelines.registration.icp( - source=src_pcd, - target=tgt_pcd, - max_correspondence_distance=max_dist, - init_source_to_target=init_T, - estimation_method=o3d.t.pipelines.registration.TransformationEstimationPointToPlane(), - criteria=o3d.t.pipelines.registration.ICPConvergenceCriteria( - relative_fitness=tol, - relative_rmse=tol, - max_iteration=max_iter, - ), - ) - - fitness_inlier_frac = float(result.fitness) - if fitness_inlier_frac == 0.0: - return np.eye(4), float("inf") - - rmse = float(result.inlier_rmse) - T = result.transformation.numpy() - return T, rmse * rmse - - -def _voxel_downsample(pts: np.ndarray, voxel_size: float) -> np.ndarray: - if len(pts) == 0 or voxel_size <= 0: - return pts - keys = np.floor(pts / voxel_size).astype(np.int32) - _, idx = np.unique(keys, axis=0, return_index=True) - return pts[idx] - - -class _SimplePGO: - def __init__(self, config: PGOConfig) -> None: - self._cfg = config - self._key_poses: list[_KeyPose] = [] - self._history_pairs: list[tuple[int, int]] = [] - self._cache_pairs: list[dict[str, Any]] = [] - self._r_offset = np.eye(3) - self._t_offset = np.zeros(3) - - params = gtsam.ISAM2Params() - params.setRelinearizeThreshold(0.01) - params.relinearizeSkip = 1 - self._isam2 = gtsam.ISAM2(params) - self._graph = gtsam.NonlinearFactorGraph() - self._values = gtsam.Values() - - def is_key_pose(self, r: np.ndarray, t: np.ndarray) -> bool: - if not self._key_poses: - return True - last = self._key_poses[-1] - delta_trans = np.linalg.norm(t - last.t_local) - # Angular distance via quaternion dot product - q_cur = Rotation.from_matrix(r).as_quat() # [x,y,z,w] - q_last = Rotation.from_matrix(last.r_local).as_quat() - dot = abs(np.dot(q_cur, q_last)) - delta_deg = np.degrees(2.0 * np.arccos(min(dot, 1.0))) - return bool( - delta_trans > self._cfg.key_pose_delta_trans or delta_deg > self._cfg.key_pose_delta_deg - ) - - def add_key_pose( - self, r_local: np.ndarray, t_local: np.ndarray, timestamp: float, body_cloud: np.ndarray - ) -> bool: - if not self.is_key_pose(r_local, t_local): - return False - - idx = len(self._key_poses) - init_r = self._r_offset @ r_local - init_t = self._r_offset @ t_local + self._t_offset - - pose = gtsam.Pose3(gtsam.Rot3(init_r), gtsam.Point3(init_t)) - self._values.insert(idx, pose) - - if idx == 0: - noise = gtsam.noiseModel.Diagonal.Variances(np.full(6, 1e-12)) - self._graph.add(gtsam.PriorFactorPose3(idx, pose, noise)) - else: - last = self._key_poses[-1] - r_between = last.r_local.T @ r_local - t_between = last.r_local.T @ (t_local - last.t_local) - noise = gtsam.noiseModel.Diagonal.Variances( - np.array([1e-6, 1e-6, 1e-6, 1e-4, 1e-4, 1e-6]) - ) - self._graph.add( - gtsam.BetweenFactorPose3( - idx - 1, idx, gtsam.Pose3(gtsam.Rot3(r_between), gtsam.Point3(t_between)), noise - ) - ) - - kp = _KeyPose( - r_local=r_local.copy(), - t_local=t_local.copy(), - r_global=init_r.copy(), - t_global=init_t.copy(), - timestamp=timestamp, - body_cloud=_voxel_downsample(body_cloud, self._cfg.submap_resolution), - ) - self._key_poses.append(kp) - return True - - def _get_submap(self, idx: int, half_range: int) -> np.ndarray: - lo = max(0, idx - half_range) - hi = min(len(self._key_poses) - 1, idx + half_range) - parts = [] - for i in range(lo, hi + 1): - kp = self._key_poses[i] - world = (kp.r_global @ kp.body_cloud.T).T + kp.t_global - parts.append(world) - if not parts: - return np.empty((0, 3)) - cloud = np.vstack(parts) - return _voxel_downsample(cloud, self._cfg.submap_resolution) - - def search_for_loops(self) -> None: - if len(self._key_poses) < self._cfg.min_keyframes_for_loop_search: - return - - # Rate limit - if self._history_pairs: - cur_time = self._key_poses[-1].timestamp - last_time = self._key_poses[self._history_pairs[-1][1]].timestamp - if cur_time - last_time < self._cfg.min_loop_detect_duration: - return - - cur_idx = len(self._key_poses) - 1 - cur_kp = self._key_poses[-1] - - # Build KD-tree of previous keyframe positions - positions = np.array([kp.t_global for kp in self._key_poses[:-1]]) - tree = KDTree(positions) - - idxs = tree.query_ball_point(cur_kp.t_global, self._cfg.loop_search_radius) - if not idxs: - return - - # Pick the spatially closest keyframe that's also old enough in time. - # query_ball_point doesn't sort, so we sort by distance ourselves. - candidates = [ - (float(np.linalg.norm(self._key_poses[i].t_global - cur_kp.t_global)), i) - for i in idxs - if abs(cur_kp.timestamp - self._key_poses[i].timestamp) > self._cfg.loop_time_thresh - ] - if not candidates: - return - candidates.sort() - loop_idx = candidates[0][1] - - # ICP verification - target = self._get_submap(loop_idx, self._cfg.loop_submap_half_range) - source = self._get_submap(cur_idx, 0) - - transform, fitness = _icp( - source, - target, - max_iter=self._cfg.max_icp_iterations, - max_dist=self._cfg.max_icp_correspondence_dist, - min_inliers=self._cfg.min_icp_inliers, - ) - if fitness > self._cfg.loop_score_thresh: - return - - # Compute relative pose - R_icp = transform[:3, :3] - t_icp = transform[:3, 3] - r_refined = R_icp @ cur_kp.r_global - t_refined = R_icp @ cur_kp.t_global + t_icp - r_offset = self._key_poses[loop_idx].r_global.T @ r_refined - t_offset = self._key_poses[loop_idx].r_global.T @ ( - t_refined - self._key_poses[loop_idx].t_global - ) - - self._cache_pairs.append( - { - "source": cur_idx, - "target": loop_idx, - "r_offset": r_offset, - "t_offset": t_offset, - "score": fitness, - } - ) - self._history_pairs.append((loop_idx, cur_idx)) - logger.info( - "Loop closure detected", - source=cur_idx, - target=loop_idx, - score=round(fitness, 4), - ) - - def smooth_and_update(self) -> None: - has_loop = bool(self._cache_pairs) - - for pair in self._cache_pairs: - # Pose3 noise model is [rx, ry, rz, x, y, z]. Use ICP fitness as - # the translation variance and a generous fixed rotation variance — - # loops shouldn't be trusted to fix rotation tightly. - trans_var = max(0.01, float(pair["score"])) # >= sigma_trans = 10 cm - rot_var = 0.05 # sigma_rot ~= 13 deg - noise = gtsam.noiseModel.Diagonal.Variances( - np.array([rot_var, rot_var, rot_var, trans_var, trans_var, trans_var]) - ) - self._graph.add( - gtsam.BetweenFactorPose3( - pair["target"], - pair["source"], - gtsam.Pose3(gtsam.Rot3(pair["r_offset"]), gtsam.Point3(pair["t_offset"])), - noise, - ) - ) - self._cache_pairs.clear() - - self._isam2.update(self._graph, self._values) - self._isam2.update() - if has_loop: - for _ in range(self._cfg.loop_closure_extra_iterations): - self._isam2.update() - self._graph = gtsam.NonlinearFactorGraph() - self._values = gtsam.Values() - - estimates = self._isam2.calculateBestEstimate() - for i in range(len(self._key_poses)): - pose = estimates.atPose3(i) - self._key_poses[i].r_global = pose.rotation().matrix() - self._key_poses[i].t_global = pose.translation() - - last = self._key_poses[-1] - self._r_offset = last.r_global @ last.r_local.T - self._t_offset = last.t_global - self._r_offset @ last.t_local - - def get_corrected_pose( - self, r_local: np.ndarray, t_local: np.ndarray - ) -> tuple[np.ndarray, np.ndarray]: - return self._r_offset @ r_local, self._r_offset @ t_local + self._t_offset - - def build_global_map(self, voxel_size: float) -> np.ndarray: - if not self._key_poses: - return np.empty((0, 3), dtype=np.float32) - parts = [] - for kp in self._key_poses: - world = (kp.r_global @ kp.body_cloud.T).T + kp.t_global - parts.append(world) - cloud = np.vstack(parts).astype(np.float32) - return _voxel_downsample(cloud, voxel_size) - - @property - def num_key_poses(self) -> int: - return len(self._key_poses) - - -def process_scan( - pgo: _SimplePGO, - cloud: PointCloud2, - r_local: np.ndarray, - t_local: np.ndarray, - ts: float, - unregister_input: bool, -) -> tuple[Odometry, Transform, bool] | None: - """Add a keyframe, run loop closure, return (corrected odom, map->odom tf, - keyframe_added) — or None on empty cloud. - - Caller must hold ``pgo``'s lock during this call.""" - points, _ = cloud.as_numpy() - if len(points) == 0: - return None - - if unregister_input: - # registered_scan is world-frame; transform back to body-frame. - body_pts = (r_local.T @ (points[:, :3].T - t_local[:, None])).T - else: - body_pts = points[:, :3] - - added = pgo.add_key_pose(r_local, t_local, ts, body_pts) - if added: - pgo.search_for_loops() - pgo.smooth_and_update() - - r_corr, t_corr = pgo.get_corrected_pose(r_local, t_local) - return ( - build_corrected_odometry(r_corr, t_corr, ts), - build_map_odom_tf(pgo._r_offset.copy(), pgo._t_offset.copy(), ts), - added, - ) - - -def build_corrected_odometry( - r: np.ndarray, - t: np.ndarray, - ts: float, - world_frame: str = FRAME_MAP, -) -> Odometry: - q = Rotation.from_matrix(r).as_quat() # [x,y,z,w] - return Odometry( - ts=ts, - frame_id=world_frame, - child_frame_id=FRAME_BODY, - pose=Pose( - position=[float(t[0]), float(t[1]), float(t[2])], - orientation=[float(q[0]), float(q[1]), float(q[2]), float(q[3])], - ), - ) - - -def build_map_odom_tf( - r_offset: np.ndarray, - t_offset: np.ndarray, - ts: float, - world_frame: str = FRAME_MAP, - odom_frame: str = FRAME_ODOM, -) -> Transform: - q = Rotation.from_matrix(r_offset).as_quat() # [x,y,z,w] - return Transform( - frame_id=world_frame, - child_frame_id=odom_frame, - translation=Vector3(float(t_offset[0]), float(t_offset[1]), float(t_offset[2])), - rotation=Quaternion(float(q[0]), float(q[1]), float(q[2]), float(q[3])), - ts=ts, - ) - - -def _keyframe_node(index: int, key_pose: _KeyPose, world_frame: str) -> Graph3D.Node3D: - q = Rotation.from_matrix(key_pose.r_global).as_quat() # [x,y,z,w] - return Graph3D.Node3D( - pose=PoseStamped( - ts=key_pose.timestamp, - frame_id=world_frame, - position=[float(v) for v in key_pose.t_global], - orientation=[float(q[0]), float(q[1]), float(q[2]), float(q[3])], - ), - id=index, - ) - - -class PGO(Module, LoopClosure): - """Pose graph optimization with loop closure (pure Python). - - Detects keyframes, performs loop closure via ICP + KD-tree search, and - optimizes the pose graph with GTSAM iSAM2. Publishes corrected odometry, - the optimized pose graph, loop-closure events, and an accumulated - global map.""" - - config: PGOConfig - - lidar: In[PointCloud2] - odometry: In[Odometry] - corrected_odometry: Out[Odometry] - pose_graph: Out[Graph3D] - loop_closure_event: Out[GraphDelta3D] - global_map: Out[PointCloud2] - - def __init__(self, **kwargs: Any) -> None: - super().__init__(**kwargs) - self._running = False - self._thread: threading.Thread | None = None - self._pgo: _SimplePGO | None = None - self._latest_r = np.eye(3) - self._latest_t = np.zeros(3) - self._latest_time = 0.0 - self._has_odom = False - self._last_global_map_time = 0.0 - self._published_loops = 0 - self._lock = threading.Lock() - # Protects _pgo mutations (add_key_pose, search_for_loops, - # smooth_and_update, build_global_map) against concurrent access - # from _on_scan and _publish_loop threads. - self._pgo_lock = threading.Lock() - - @rpc - def start(self) -> None: - super().start() - self._pgo = _SimplePGO(self.config) - # Identity map -> odom so consumers querying map -> body get a result - # before any loop-closure correction exists. - self.tf.publish(build_map_odom_tf(np.eye(3), np.zeros(3), time.time())) - self.register_disposable(Disposable(self.odometry.subscribe(self._on_odom))) - self.register_disposable(Disposable(self.lidar.subscribe(self._on_scan))) - self._running = True - if self.config.publish_global_map: - self._thread = threading.Thread(target=self._publish_global_map_loop, daemon=True) - self._thread.start() - logger.info( - "PGO module started (gtsam iSAM2, pure python)", - publish_global_map=self.config.publish_global_map, - ) - - @rpc - def stop(self) -> None: - self._running = False - if self._thread: - self._thread.join(timeout=DEFAULT_THREAD_JOIN_TIMEOUT) - super().stop() - - def _on_odom(self, msg: Odometry) -> None: - q = [ - msg.pose.orientation.x, - msg.pose.orientation.y, - msg.pose.orientation.z, - msg.pose.orientation.w, - ] - r = Rotation.from_quat(q).as_matrix() - t = np.array([msg.pose.position.x, msg.pose.position.y, msg.pose.position.z]) - with self._lock: - self._latest_r = r - self._latest_t = t - self._latest_time = msg.ts if msg.ts else time.time() - self._has_odom = True - - def _on_scan(self, cloud: PointCloud2) -> None: - with self._lock: - if not self._has_odom: - return - r_local = self._latest_r.copy() - t_local = self._latest_t.copy() - ts = self._latest_time - - pgo = self._pgo - assert pgo is not None - - with self._pgo_lock: - result = process_scan(pgo, cloud, r_local, t_local, ts, self.config.unregister_input) - if result is None: - return - corrected_odom, tf_msg, keyframe_added = result - if keyframe_added: - graph_msg, loop_events = self._snapshot_graph(pgo, ts) - else: - graph_msg, loop_events = None, [] - - self.corrected_odometry.publish(corrected_odom) - self.tf.publish(tf_msg) - if graph_msg is not None: - self.pose_graph.publish(graph_msg) - for event in loop_events: - self.loop_closure_event.publish(event) - - def _snapshot_graph(self, pgo: _SimplePGO, ts: float) -> tuple[Graph3D, list[GraphDelta3D]]: - """The optimized graph (odometry chain + loop edges) and one - GraphDelta3D per loop pair not yet published. - - Caller must hold ``_pgo_lock``.""" - world_frame = self.config.world_frame - nodes = [ - _keyframe_node(index, key_pose, world_frame) - for index, key_pose in enumerate(pgo._key_poses) - ] - edges = [ - Graph3D.Edge( - start_id=index - 1, end_id=index, timestamp=pgo._key_poses[index].timestamp - ) - for index in range(1, len(pgo._key_poses)) - ] - edges += [ - Graph3D.Edge(start_id=target, end_id=source, metadata_id=1) - for target, source in pgo._history_pairs - ] - graph_msg = Graph3D(ts=ts, nodes=nodes, edges=edges) - - loop_events: list[GraphDelta3D] = [] - identity = GraphDelta3D.Transform( - translation=Vector3(0.0, 0.0, 0.0), rotation=Quaternion(0.0, 0.0, 0.0, 1.0) - ) - for target, source in pgo._history_pairs[self._published_loops :]: - loop_events.append( - GraphDelta3D( - ts=ts, - nodes=[ - _keyframe_node(target, pgo._key_poses[target], world_frame), - _keyframe_node(source, pgo._key_poses[source], world_frame), - ], - transforms=[identity, identity], - ) - ) - self._published_loops = len(pgo._history_pairs) - return graph_msg, loop_events - - def _publish_global_map_loop(self) -> None: - pgo = self._pgo - assert pgo is not None - rate = self.config.global_map_publish_rate - interval = 1.0 / rate if rate > 0 else 2.0 - - while self._running: - t0 = time.monotonic() - - if t0 - self._last_global_map_time > interval and pgo.num_key_poses > 0: - with self._pgo_lock: - cloud_np = pgo.build_global_map(self.config.global_map_voxel_size) - if len(cloud_np) > 0: - now = time.time() - self.global_map.publish( - PointCloud2.from_numpy( - cloud_np, frame_id=self.config.world_frame, timestamp=now - ) - ) - self._last_global_map_time = t0 - - elapsed = time.monotonic() - t0 - sleep_time = max(DEFAULT_THREAD_JOIN_TIMEOUT, interval - elapsed) - time.sleep(sleep_time) diff --git a/dimos/navigation/jnav/components/loop_closure/ivan_pgo_transformer/module.py b/dimos/navigation/jnav/components/loop_closure/ivan_pgo_transformer/module.py deleted file mode 100644 index 17e7c84511..0000000000 --- a/dimos/navigation/jnav/components/loop_closure/ivan_pgo_transformer/module.py +++ /dev/null @@ -1,263 +0,0 @@ -# Copyright 2026 Dimensional Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Ivan's CURRENT PGO (dimos/mapping/loop_closure/pgo.py, main, June 2026) -forced into an online LoopClosure module. - -That code ships as an offline memory2 Transformer, but its `_PGOState` core is -already incremental — one `process(pose, ts, world_cloud)` call per frame. This -wrapper brute-forces the online shape: every arriving scan is paired with the -latest odometry pose and pushed straight into `_PGOState`, then the corrected -odometry / pose graph / loop events are read back out of its (private) state. -No changes to the mapping code itself; this module owns the coupling. - -`_PGOState.process` expects world-frame registered scans (it unregisters them -internally via the odom pose) — set `input_is_registered: false` for raw -body-frame scans and they'll be pre-registered here first.""" - -from __future__ import annotations - -import threading -import time -from typing import Any - -import numpy as np -from scipy.spatial.transform import Rotation - -from dimos.core.core import rpc -from dimos.core.module import Module, ModuleConfig -from dimos.core.stream import In, Out -from dimos.mapping.loop_closure.pgo import ( - PGOConfig as MappingPGOConfig, - _PGOState, - _pose3_to_transform, -) -from dimos.msgs.geometry_msgs.Pose import Pose -from dimos.msgs.geometry_msgs.PoseStamped import PoseStamped -from dimos.msgs.geometry_msgs.Quaternion import Quaternion -from dimos.msgs.geometry_msgs.Transform import Transform -from dimos.msgs.geometry_msgs.Vector3 import Vector3 -from dimos.msgs.nav_msgs.Odometry import Odometry -from dimos.msgs.sensor_msgs.PointCloud2 import PointCloud2 -from dimos.navigation.jnav.components.loop_closure.spec import LoopClosure -from dimos.navigation.jnav.msgs.Graph3D import Graph3D -from dimos.navigation.jnav.msgs.GraphDelta3D import GraphDelta3D -from dimos.utils.logging_config import setup_logger - -logger = setup_logger() - - -class PGOConfig(ModuleConfig): - world_frame: str = "map" - odom_frame: str = "odom" - body_frame: str = "base_link" - # World-frame registered input (fastlio); false = raw body-frame scans. - input_is_registered: bool = True - - # Mirrors mapping/loop_closure PGOConfig — forwarded verbatim. - key_pose_delta_trans: float = 0.5 - key_pose_delta_deg: float = 10.0 - loop_search_radius: float = 2.0 - loop_time_thresh: float = 20.0 - loop_score_thresh: float = 0.3 - loop_submap_half_range: int = 10 - min_icp_inliers: int = 10 - min_keyframes_for_loop_search: int = 10 - loop_closure_extra_iterations: int = 4 - submap_resolution: float = 0.2 - min_loop_detect_duration: float = 5.0 - max_icp_iterations: int = 50 - max_icp_correspondence_dist: float = 1.0 - odom_rot_var: float = 1e-6 - odom_trans_var_xy: float = 1e-4 - odom_trans_var_z: float = 1e-6 - loop_rot_var: float = 0.05 - - -def _mapping_config(config: PGOConfig) -> MappingPGOConfig: - fields = {name: getattr(config, name) for name in MappingPGOConfig.model_fields} - return MappingPGOConfig(**fields) - - -def _pose3_node(index: int, pose: Any, ts: float, world_frame: str) -> Graph3D.Node3D: - translation = np.asarray(pose.translation()) - quaternion = Rotation.from_matrix(pose.rotation().matrix()).as_quat() # [x,y,z,w] - return Graph3D.Node3D( - pose=PoseStamped( - ts=ts, - frame_id=world_frame, - position=[float(v) for v in translation], - orientation=[float(v) for v in quaternion], - ), - id=index, - ) - - -class PGO(Module, LoopClosure): - """Online wrapper over Ivan's current incremental PGO core (`_PGOState`).""" - - config: PGOConfig - - lidar: In[PointCloud2] - odometry: In[Odometry] - corrected_odometry: Out[Odometry] - pose_graph: Out[Graph3D] - loop_closure_event: Out[GraphDelta3D] - - def __init__(self, **kwargs: Any) -> None: - super().__init__(**kwargs) - self._state: _PGOState | None = None - self._latest_odom: Odometry | None = None - self._published_loops = 0 - self._odom_lock = threading.Lock() - self._state_lock = threading.Lock() - self._unsub_odom: Any = None - self._unsub_lidar: Any = None - - @rpc - def start(self) -> None: - super().start() - self._state = _PGOState(_mapping_config(self.config)) - # Identity map -> odom so consumers querying map -> body get a result - # before any correction exists. - self.tf.publish(self._correction_tf(np.eye(4), time.time())) - self._unsub_odom = self.odometry.subscribe(self._on_odom) - self._unsub_lidar = self.lidar.subscribe(self._on_scan) - logger.info("PGO (ivan transformer core) started") - - @rpc - def stop(self) -> None: - if self._unsub_odom is not None: - self._unsub_odom.dispose() - if self._unsub_lidar is not None: - self._unsub_lidar.dispose() - super().stop() - - def _on_odom(self, msg: Odometry) -> None: - with self._odom_lock: - self._latest_odom = msg - - def _on_scan(self, cloud: PointCloud2) -> None: - import gtsam # type: ignore[import-untyped] - - with self._odom_lock: - odom = self._latest_odom - if odom is None or len(cloud) == 0: - return - state = self._state - assert state is not None - - position = odom.pose.position - orientation = odom.pose.orientation - local_pose = gtsam.Pose3( - gtsam.Rot3.Quaternion(orientation.w, orientation.x, orientation.y, orientation.z), - gtsam.Point3(position.x, position.y, position.z), - ) - ts = odom.ts if odom.ts else time.time() - - if not self.config.input_is_registered: - cloud = cloud.transform( - _pose3_to_transform( - local_pose, - ts=ts, - frame_id=self.config.world_frame, - child_frame_id=self.config.body_frame, - ) - ) - - with self._state_lock: - state.process(local_pose, ts, cloud) - keyframe_count = len(state._key_poses) - correction = state._world_correction - corrected = correction.compose(local_pose) - graph_msg = self._snapshot_graph(state, ts) if keyframe_count else None - loop_events = self._new_loop_events(state, ts) - - self._publish_corrected_odometry(corrected, ts) - self.tf.publish(self._correction_tf(correction.matrix(), ts)) - if graph_msg is not None: - self.pose_graph.publish(graph_msg) - for event in loop_events: - self.loop_closure_event.publish(event) - - def _publish_corrected_odometry(self, pose: Any, ts: float) -> None: - translation = np.asarray(pose.translation()) - quaternion = Rotation.from_matrix(pose.rotation().matrix()).as_quat() - self.corrected_odometry.publish( - Odometry( - ts=ts, - frame_id=self.config.world_frame, - child_frame_id=self.config.body_frame, - pose=Pose( - position=[float(v) for v in translation], - orientation=[float(v) for v in quaternion], - ), - ) - ) - - def _correction_tf(self, matrix: np.ndarray, ts: float) -> Transform: - quaternion = Rotation.from_matrix(matrix[:3, :3]).as_quat() - return Transform( - frame_id=self.config.world_frame, - child_frame_id=self.config.odom_frame, - translation=Vector3(float(matrix[0, 3]), float(matrix[1, 3]), float(matrix[2, 3])), - rotation=Quaternion(*[float(v) for v in quaternion]), - ts=ts, - ) - - def _snapshot_graph(self, state: _PGOState, ts: float) -> Graph3D: - """Optimized keyframes + odometry-chain and loop edges. - - Caller must hold ``_state_lock``.""" - world_frame = self.config.world_frame - nodes = [ - _pose3_node(index, key_pose.optimized, key_pose.timestamp, world_frame) - for index, key_pose in enumerate(state._key_poses) - ] - edges = [ - Graph3D.Edge( - start_id=index - 1, end_id=index, timestamp=state._key_poses[index].timestamp - ) - for index in range(1, len(state._key_poses)) - ] - edges += [ - Graph3D.Edge(start_id=pair.target, end_id=pair.source, metadata_id=1) - for pair in state._accepted_loops - ] - return Graph3D(ts=ts, nodes=nodes, edges=edges) - - def _new_loop_events(self, state: _PGOState, ts: float) -> list[GraphDelta3D]: - """One GraphDelta3D per accepted loop not yet published. - - Caller must hold ``_state_lock``.""" - world_frame = self.config.world_frame - identity = GraphDelta3D.Transform( - translation=Vector3(0.0, 0.0, 0.0), rotation=Quaternion(0.0, 0.0, 0.0, 1.0) - ) - events: list[GraphDelta3D] = [] - for pair in state._accepted_loops[self._published_loops :]: - source = state._key_poses[pair.source] - target = state._key_poses[pair.target] - events.append( - GraphDelta3D( - ts=ts, - nodes=[ - _pose3_node(pair.target, target.optimized, target.timestamp, world_frame), - _pose3_node(pair.source, source.optimized, source.timestamp, world_frame), - ], - transforms=[identity, identity], - ) - ) - self._published_loops = len(state._accepted_loops) - return events diff --git a/dimos/navigation/jnav/components/loop_closure/unrefined_pgo/cpp/CMakeLists.txt b/dimos/navigation/jnav/components/loop_closure/unrefined_pgo/cpp/CMakeLists.txt deleted file mode 100644 index 8c7b6d5b94..0000000000 --- a/dimos/navigation/jnav/components/loop_closure/unrefined_pgo/cpp/CMakeLists.txt +++ /dev/null @@ -1,49 +0,0 @@ -cmake_minimum_required(VERSION 3.14) -project(pgo CXX) - -set(CMAKE_CXX_STANDARD 17) -set(CMAKE_CXX_STANDARD_REQUIRED ON) -set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3") - -include(FetchContent) -FetchContent_Declare(dimos_lcm - GIT_REPOSITORY https://github.com/dimensionalOS/dimos-lcm.git - GIT_TAG main - GIT_SHALLOW TRUE -) -FetchContent_MakeAvailable(dimos_lcm) - -find_package(PkgConfig REQUIRED) -pkg_check_modules(LCM REQUIRED lcm) -find_package(Eigen3 REQUIRED) -find_package(PCL 1.8 REQUIRED COMPONENTS common filters kdtree registration io) -find_package(GTSAM REQUIRED) - -add_definitions(-DUSE_PCL) - -add_executable(pgo - main.cpp - simple_pgo.cpp - commons.cpp -) - -target_include_directories(pgo PRIVATE - ${CMAKE_CURRENT_SOURCE_DIR} - ${dimos_lcm_SOURCE_DIR}/generated/cpp_lcm_msgs - ${LCM_INCLUDE_DIRS} - ${EIGEN3_INCLUDE_DIR} - ${PCL_INCLUDE_DIRS} - ${GTSAM_INCLUDE_DIR} -) - -target_link_libraries(pgo PRIVATE - ${LCM_LIBRARIES} - ${PCL_LIBRARIES} - gtsam -) - -target_link_directories(pgo PRIVATE - ${LCM_LIBRARY_DIRS} -) - -install(TARGETS pgo DESTINATION bin) diff --git a/dimos/navigation/jnav/components/loop_closure/unrefined_pgo/cpp/commons.cpp b/dimos/navigation/jnav/components/loop_closure/unrefined_pgo/cpp/commons.cpp deleted file mode 100644 index f309a97a85..0000000000 --- a/dimos/navigation/jnav/components/loop_closure/unrefined_pgo/cpp/commons.cpp +++ /dev/null @@ -1,8 +0,0 @@ -#include "commons.h" - -void PoseWithTime::setTime(int32_t _sec, uint32_t _nsec) -{ - sec = _sec; - nsec = _nsec; - second = static_cast(sec) + static_cast(nsec) / 1e9; -} diff --git a/dimos/navigation/jnav/components/loop_closure/unrefined_pgo/cpp/commons.h b/dimos/navigation/jnav/components/loop_closure/unrefined_pgo/cpp/commons.h deleted file mode 100644 index 2f9244ede6..0000000000 --- a/dimos/navigation/jnav/components/loop_closure/unrefined_pgo/cpp/commons.h +++ /dev/null @@ -1,29 +0,0 @@ -#pragma once -#include -#include -#include - -using PointType = pcl::PointXYZI; -using CloudType = pcl::PointCloud; -using PointVec = std::vector>; - -using M3D = Eigen::Matrix3d; -using V3D = Eigen::Vector3d; -using M3F = Eigen::Matrix3f; -using V3F = Eigen::Vector3f; -using M4F = Eigen::Matrix4f; -using V4F = Eigen::Vector4f; - -struct PoseWithTime { - V3D t; - M3D r; - int32_t sec; - uint32_t nsec; - double second; - void setTime(int32_t sec, uint32_t nsec); -}; - -struct CloudWithPose { - CloudType::Ptr cloud; - PoseWithTime pose; -}; diff --git a/dimos/navigation/jnav/components/loop_closure/unrefined_pgo/cpp/dimos_native_module.hpp b/dimos/navigation/jnav/components/loop_closure/unrefined_pgo/cpp/dimos_native_module.hpp deleted file mode 100644 index c22cca4326..0000000000 --- a/dimos/navigation/jnav/components/loop_closure/unrefined_pgo/cpp/dimos_native_module.hpp +++ /dev/null @@ -1,97 +0,0 @@ -// SmartNav Native Module helpers. -// Re-exports dimos NativeModule patterns for CLI arg parsing and LCM helpers. -// Based on dimos/hardware/sensors/lidar/common/dimos_native_module.hpp - -#pragma once - -#include -#include -#include -#include - -#include "std_msgs/Header.hpp" -#include "std_msgs/Time.hpp" - -namespace dimos { - -class NativeModule { -public: - NativeModule(int argc, char** argv) { - for (int i = 1; i < argc; ++i) { - std::string arg(argv[i]); - if (arg.size() > 2 && arg[0] == '-' && arg[1] == '-' && i + 1 < argc) { - args_[arg.substr(2)] = argv[++i]; - } - } - } - - /// Get the full LCM channel string for a declared port. - const std::string& topic(const std::string& port) const { - auto it = args_.find(port); - if (it == args_.end()) { - throw std::runtime_error("NativeModule: no topic for port '" + port + "'"); - } - return it->second; - } - - /// Get a string arg value, or a default if not present. - std::string arg(const std::string& key, const std::string& default_val = "") const { - auto it = args_.find(key); - return it != args_.end() ? it->second : default_val; - } - - /// Get a float arg value, or a default if not present. - float arg_float(const std::string& key, float default_val = 0.0f) const { - auto it = args_.find(key); - return it != args_.end() ? std::stof(it->second) : default_val; - } - - /// Get an int arg value, or a default if not present. - int arg_int(const std::string& key, int default_val = 0) const { - auto it = args_.find(key); - return it != args_.end() ? std::stoi(it->second) : default_val; - } - - /// Get a bool arg value, or a default if not present. - /// Present-but-unparseable values throw, matching arg_int/arg_float's - /// std::stoi/std::stof behaviour — a typo'd value or empty string is a - /// misconfiguration we want to surface immediately, not silently coerce - /// to false. - bool arg_bool(const std::string& key, bool default_val = false) const { - auto it = args_.find(key); - if (it == args_.end()) return default_val; - if (it->second == "true" || it->second == "1") return true; - if (it->second == "false" || it->second == "0") return false; - throw std::runtime_error( - "NativeModule: arg '--" + key + "' has unparseable bool value '" - + it->second + "' (expected true/false or 1/0)"); - } - - /// Check if a port/arg was provided. - bool has(const std::string& key) const { - return args_.count(key) > 0; - } - -private: - std::map args_; -}; - -/// Convert seconds (double) to a ROS-style Time message. -inline std_msgs::Time time_from_seconds(double t) { - std_msgs::Time ts; - ts.sec = static_cast(t); - ts.nsec = static_cast((t - ts.sec) * 1e9); - return ts; -} - -/// Build a stamped Header with auto-incrementing sequence number. -inline std_msgs::Header make_header(const std::string& frame_id, double ts) { - static std::atomic seq{0}; - std_msgs::Header h; - h.seq = seq.fetch_add(1, std::memory_order_relaxed); - h.stamp = time_from_seconds(ts); - h.frame_id = frame_id; - return h; -} - -} // namespace dimos diff --git a/dimos/navigation/jnav/components/loop_closure/unrefined_pgo/cpp/flake.lock b/dimos/navigation/jnav/components/loop_closure/unrefined_pgo/cpp/flake.lock deleted file mode 100644 index 16898a2c2d..0000000000 --- a/dimos/navigation/jnav/components/loop_closure/unrefined_pgo/cpp/flake.lock +++ /dev/null @@ -1,144 +0,0 @@ -{ - "nodes": { - "dimos-lcm": { - "flake": false, - "locked": { - "lastModified": 1769774949, - "narHash": "sha256-icRK7jerqNlwK1WZBrnIP04I2WozzFqTD7qsmnPxQuo=", - "owner": "dimensionalOS", - "repo": "dimos-lcm", - "rev": "0aa72b7b1bd3a65f50f5c03485ee9b728df56afe", - "type": "github" - }, - "original": { - "owner": "dimensionalOS", - "ref": "main", - "repo": "dimos-lcm", - "type": "github" - } - }, - "flake-utils": { - "inputs": { - "systems": "systems" - }, - "locked": { - "lastModified": 1731533236, - "narHash": "sha256-l0KFg5HjrsfsO/JpG+r7fRrqm12kzFHyUHqHCVpMMbI=", - "owner": "numtide", - "repo": "flake-utils", - "rev": "11707dc2f618dd54ca8739b309ec4fc024de578b", - "type": "github" - }, - "original": { - "owner": "numtide", - "repo": "flake-utils", - "type": "github" - } - }, - "gtsam-extended": { - "inputs": { - "flake-utils": [ - "flake-utils" - ], - "nixpkgs": [ - "nixpkgs" - ], - "nixpkgs-stable": "nixpkgs-stable" - }, - "locked": { - "lastModified": 1775168452, - "narHash": "sha256-fsUWPQIw+lk4VEQUOniiPLwiPoldWh8ZdnIdos202+I=", - "owner": "jeff-hykin", - "repo": "gtsam-extended", - "rev": "f4572a80b6339181693aee6029ca28153e59a993", - "type": "github" - }, - "original": { - "owner": "jeff-hykin", - "repo": "gtsam-extended", - "type": "github" - } - }, - "lcm-extended": { - "inputs": { - "flake-utils": [ - "flake-utils" - ], - "nixpkgs": [ - "nixpkgs" - ] - }, - "locked": { - "lastModified": 1774902379, - "narHash": "sha256-gRFvEkbXCEoG4jEmsT+i0bMZ5kDHOtAaPsrbStXjdu4=", - "owner": "jeff-hykin", - "repo": "lcm_extended", - "rev": "7d12ad8546d3daae30528a6c28f2c9ff5b10baf7", - "type": "github" - }, - "original": { - "owner": "jeff-hykin", - "repo": "lcm_extended", - "type": "github" - } - }, - "nixpkgs": { - "locked": { - "lastModified": 1777954456, - "narHash": "sha256-hGdgeU2Nk87RAuZyYjyDjFL6LK7dAZN5RE9+hrDTkDU=", - "owner": "NixOS", - "repo": "nixpkgs", - "rev": "549bd84d6279f9852cae6225e372cc67fb91a4c1", - "type": "github" - }, - "original": { - "owner": "NixOS", - "ref": "nixos-unstable", - "repo": "nixpkgs", - "type": "github" - } - }, - "nixpkgs-stable": { - "locked": { - "lastModified": 1751274312, - "narHash": "sha256-/bVBlRpECLVzjV19t5KMdMFWSwKLtb5RyXdjz3LJT+g=", - "owner": "NixOS", - "repo": "nixpkgs", - "rev": "50ab793786d9de88ee30ec4e4c24fb4236fc2674", - "type": "github" - }, - "original": { - "owner": "NixOS", - "ref": "nixos-24.11", - "repo": "nixpkgs", - "type": "github" - } - }, - "root": { - "inputs": { - "dimos-lcm": "dimos-lcm", - "flake-utils": "flake-utils", - "gtsam-extended": "gtsam-extended", - "lcm-extended": "lcm-extended", - "nixpkgs": "nixpkgs" - } - }, - "systems": { - "locked": { - "lastModified": 1681028828, - "narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=", - "owner": "nix-systems", - "repo": "default", - "rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e", - "type": "github" - }, - "original": { - "owner": "nix-systems", - "repo": "default", - "type": "github" - } - } - }, - "root": "root", - "version": 7 -} diff --git a/dimos/navigation/jnav/components/loop_closure/unrefined_pgo/cpp/flake.nix b/dimos/navigation/jnav/components/loop_closure/unrefined_pgo/cpp/flake.nix deleted file mode 100644 index ea8eae1327..0000000000 --- a/dimos/navigation/jnav/components/loop_closure/unrefined_pgo/cpp/flake.nix +++ /dev/null @@ -1,70 +0,0 @@ -{ - description = "SmartNav PGO native module (pose graph optimization with iSAM2 + PCL ICP)"; - - inputs = { - nixpkgs.url = "github:NixOS/nixpkgs/nixos-unstable"; - flake-utils.url = "github:numtide/flake-utils"; - lcm-extended = { - url = "github:jeff-hykin/lcm_extended"; - inputs.nixpkgs.follows = "nixpkgs"; - inputs.flake-utils.follows = "flake-utils"; - }; - dimos-lcm = { - url = "github:dimensionalOS/dimos-lcm/main"; - flake = false; - }; - gtsam-extended = { - url = "github:jeff-hykin/gtsam-extended"; - inputs.nixpkgs.follows = "nixpkgs"; - inputs.flake-utils.follows = "flake-utils"; - }; - }; - - outputs = { self, nixpkgs, flake-utils, lcm-extended, dimos-lcm, gtsam-extended, ... }: - flake-utils.lib.eachDefaultSystem (system: - let - pkgs = import nixpkgs { inherit system; }; - lcm = lcm-extended.packages.${system}.lcm; - - gtsam-base = gtsam-extended.packages.${system}.gtsam-cpp; - gtsam = gtsam-base.overrideAttrs (_old: { - src = pkgs.fetchFromGitHub { - owner = "borglab"; - repo = "gtsam"; - rev = "1a9792a7ede244850a413739557635b606f295c0"; - sha256 = "sha256-zxm5TGVPW1vipFVpw01zcvKRw4mkh+5ZBCR1n6G466o="; - }; - env.NIX_CFLAGS_COMPILE = "-Wno-error=array-bounds"; - }); - in { - packages.default = pkgs.stdenv.mkDerivation { - pname = "smartnav-pgo"; - version = "0.1.0"; - src = ./.; - - nativeBuildInputs = [ pkgs.cmake pkgs.pkg-config ]; - buildInputs = [ - lcm - pkgs.glib - pkgs.eigen - pkgs.boost - pkgs.pcl - gtsam - ]; - - env.NIX_CFLAGS_COMPILE = "-Wno-error=array-bounds"; - - cmakeFlags = [ - "-DCMAKE_POLICY_VERSION_MINIMUM=3.5" - "-DFETCHCONTENT_SOURCE_DIR_DIMOS_LCM=${dimos-lcm}" - ]; - - # On macOS, libgtsam.4.dylib is referenced via @rpath but the binary - # has no LC_RPATH entries, so it fails to load at runtime. Add one - # pointing at the gtsam lib dir. - postInstall = pkgs.lib.optionalString pkgs.stdenv.isDarwin '' - ${pkgs.darwin.cctools}/bin/install_name_tool -add_rpath ${gtsam}/lib $out/bin/pgo - ''; - }; - }); -} diff --git a/dimos/navigation/jnav/components/loop_closure/unrefined_pgo/cpp/main.cpp b/dimos/navigation/jnav/components/loop_closure/unrefined_pgo/cpp/main.cpp deleted file mode 100644 index 38314d4390..0000000000 --- a/dimos/navigation/jnav/components/loop_closure/unrefined_pgo/cpp/main.cpp +++ /dev/null @@ -1,299 +0,0 @@ -// PGO NativeModule — faithful port of pgo_node.cpp from ROS2 to LCM. -// Subscribes to registered_scan + odometry, runs SimplePGO (iSAM2 + PCL ICP), -// publishes corrected_odometry, global_map, and TF correction offset. - -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -#include "commons.h" -#include "simple_pgo.h" -#include "dimos_native_module.hpp" -#include "point_cloud_utils.hpp" - -#include "nav_msgs/Odometry.hpp" -#include "sensor_msgs/PointCloud2.hpp" -#include "geometry_msgs/Pose.hpp" -#include "geometry_msgs/Quaternion.hpp" -#include "geometry_msgs/Point.hpp" - -static std::atomic g_running{true}; -static void signal_handler(int) { g_running.store(false); } - -// Shared state between LCM callbacks and main loop -static std::mutex g_buffer_mutex; -static std::queue g_cloud_buffer; -static double g_last_message_time = 0.0; - -// Latest odometry for non-keyframe TF broadcasting -static std::mutex g_odom_mutex; -static M3D g_latest_r = M3D::Identity(); -static V3D g_latest_t = V3D::Zero(); -static double g_latest_time = 0.0; -static bool g_has_odom = false; - -class Handlers { -public: - void on_odometry(const lcm::ReceiveBuffer*, const std::string&, - const nav_msgs::Odometry* msg) { - M3D r = Eigen::Quaterniond( - msg->pose.pose.orientation.w, - msg->pose.pose.orientation.x, - msg->pose.pose.orientation.y, - msg->pose.pose.orientation.z - ).toRotationMatrix(); - - V3D t(msg->pose.pose.position.x, - msg->pose.pose.position.y, - msg->pose.pose.position.z); - - double ts = msg->header.stamp.sec + msg->header.stamp.nsec / 1e9; - - std::lock_guard lock(g_odom_mutex); - g_latest_r = r; - g_latest_t = t; - g_latest_time = ts; - g_has_odom = true; - } - - void on_registered_scan(const lcm::ReceiveBuffer*, const std::string&, - const sensor_msgs::PointCloud2* msg) { - std::lock_guard odom_lock(g_odom_mutex); - if (!g_has_odom) - return; - - double ts = g_latest_time; - - // Reject out-of-order messages - if (ts < g_last_message_time) - return; - g_last_message_time = ts; - - CloudWithPose cp; - cp.pose.r = g_latest_r; - cp.pose.t = g_latest_t; - cp.pose.setTime(static_cast(ts), - static_cast((ts - static_cast(ts)) * 1e9)); - - // Parse PointCloud2 to PCL - cp.cloud = CloudType::Ptr(new CloudType); - smartnav::to_pcl(*msg, *cp.cloud); - - std::lock_guard buf_lock(g_buffer_mutex); - g_cloud_buffer.push(cp); - } -}; - -static nav_msgs::Odometry build_odometry(const M3D& r, const V3D& t, double ts, - const std::string& frame_id, - const std::string& child_frame_id) { - nav_msgs::Odometry odom; - odom.header = dimos::make_header(frame_id, ts); - odom.child_frame_id = child_frame_id; - - Eigen::Quaterniond q(r); - odom.pose.pose.position.x = t.x(); - odom.pose.pose.position.y = t.y(); - odom.pose.pose.position.z = t.z(); - odom.pose.pose.orientation.x = q.x(); - odom.pose.pose.orientation.y = q.y(); - odom.pose.pose.orientation.z = q.z(); - odom.pose.pose.orientation.w = q.w(); - - return odom; -} - -int main(int argc, char** argv) -{ - signal(SIGTERM, signal_handler); - signal(SIGINT, signal_handler); - - dimos::NativeModule mod(argc, argv); - - // Port topics - std::string scan_topic = mod.topic("registered_scan"); - std::string odom_topic = mod.topic("odometry"); - std::string corrected_odom_topic = mod.topic("corrected_odometry"); - std::string global_map_topic = mod.topic("global_map"); - std::string tf_topic = mod.topic("pgo_tf"); - - // Config parameters - Config config; - config.key_pose_delta_deg = mod.arg_float("key_pose_delta_deg", 10.0f); - config.key_pose_delta_trans = mod.arg_float("key_pose_delta_trans", 0.5f); - config.loop_search_radius = mod.arg_float("loop_search_radius", 1.0f); - config.loop_time_tresh = mod.arg_float("loop_time_thresh", 60.0f); - config.loop_score_tresh = mod.arg_float("loop_score_thresh", 0.15f); - config.loop_submap_half_range = mod.arg_int("loop_submap_half_range", 5); - config.submap_resolution = mod.arg_float("submap_resolution", 0.1f); - config.min_loop_detect_duration = mod.arg_float("min_loop_detect_duration", 5.0f); - - // Node-level config - std::string world_frame = mod.arg("world_frame", "map"); - std::string local_frame = mod.arg("local_frame", "odom"); - float global_map_voxel_size = mod.arg_float("global_map_voxel_size", 0.1f); - float global_map_publish_rate = mod.arg_float("global_map_publish_rate", 1.0f); - // rate <= 0 disables global_map entirely (it's a big cloud that can overflow - // LCM's single-message limit and congest the bus). Consumers that only need - // corrected_odometry / pose_graph should turn it off. - bool publish_global_map = global_map_publish_rate > 0; - double global_map_interval = publish_global_map ? 1.0 / global_map_publish_rate : 0.0; - - // Unregister mode: transform world-frame scans to body-frame - bool unregister_input = mod.arg_bool("unregister_input", true); - - bool debug = mod.arg_bool("debug", false); - - pcl::console::setVerbosityLevel( - debug ? pcl::console::L_INFO : pcl::console::L_ERROR); - - SimplePGO pgo(config); - - lcm::LCM lcm; - if (!lcm.good()) { - fprintf(stderr, "PGO: LCM init failed\n"); - return 1; - } - - Handlers handlers; - lcm.subscribe(odom_topic, &Handlers::on_odometry, &handlers); - lcm.subscribe(scan_topic, &Handlers::on_registered_scan, &handlers); - - if (debug) { - fprintf(stderr, "PGO native module started\n"); - fprintf(stderr, " registered_scan: %s\n", scan_topic.c_str()); - fprintf(stderr, " odometry: %s\n", odom_topic.c_str()); - fprintf(stderr, " corrected_odometry: %s\n", corrected_odom_topic.c_str()); - fprintf(stderr, " global_map: %s\n", global_map_topic.c_str()); - fprintf(stderr, " pgo_tf: %s\n", tf_topic.c_str()); - } - - double last_global_map_time = 0.0; - int timer_period_ms = 50; // 20 Hz, matching original - - while (g_running.load()) { - // Drain all pending LCM messages - while (lcm.handleTimeout(0) > 0) {} - - // Check buffer - CloudWithPose cp; - bool has_data = false; - { - std::lock_guard lock(g_buffer_mutex); - if (!g_cloud_buffer.empty()) { - cp = g_cloud_buffer.front(); - // Drain entire queue (matching original: process oldest, discard rest) - while (!g_cloud_buffer.empty()) { - g_cloud_buffer.pop(); - } - has_data = true; - } - } - - if (!has_data) { - std::this_thread::sleep_for(std::chrono::milliseconds(timer_period_ms)); - continue; - } - - // Optionally transform world-frame scan to body-frame - if (unregister_input && cp.cloud && cp.cloud->size() > 0) { - CloudType::Ptr body_cloud(new CloudType); - // body = R_odom^T * (world_pts - t_odom) - M3D r_inv = cp.pose.r.transpose(); - for (const auto& pt : *cp.cloud) { - V3D world_pt(pt.x, pt.y, pt.z); - V3D body_pt = r_inv * (world_pt - cp.pose.t); - PointType bp; - bp.x = static_cast(body_pt.x()); - bp.y = static_cast(body_pt.y()); - bp.z = static_cast(body_pt.z()); - bp.intensity = pt.intensity; - body_cloud->push_back(bp); - } - cp.cloud = body_cloud; - } - - double cur_time = cp.pose.second; - - if (!pgo.addKeyPose(cp)) { - // Not a keyframe — still broadcast TF and corrected odom - M3D corr_r = pgo.offsetR() * cp.pose.r; - V3D corr_t = pgo.offsetR() * cp.pose.t + pgo.offsetT(); - - nav_msgs::Odometry corrected = build_odometry( - corr_r, corr_t, cur_time, world_frame, "base_link"); - lcm.publish(corrected_odom_topic, &corrected); - - nav_msgs::Odometry tf_msg = build_odometry( - pgo.offsetR(), pgo.offsetT(), cur_time, world_frame, local_frame); - lcm.publish(tf_topic, &tf_msg); - - std::this_thread::sleep_for(std::chrono::milliseconds(timer_period_ms)); - continue; - } - - // Keyframe added - pgo.searchForLoopPairs(); - pgo.smoothAndUpdate(); - - if (debug) { - fprintf(stderr, "PGO: keyframe %zu at (%.1f, %.1f, %.1f)\n", - pgo.keyPoses().size(), - cp.pose.t.x(), cp.pose.t.y(), cp.pose.t.z()); - } - - // Publish corrected odometry - M3D corr_r = pgo.offsetR() * cp.pose.r; - V3D corr_t = pgo.offsetR() * cp.pose.t + pgo.offsetT(); - nav_msgs::Odometry corrected = build_odometry( - corr_r, corr_t, cur_time, world_frame, "base_link"); - lcm.publish(corrected_odom_topic, &corrected); - - // Publish TF correction (map -> odom offset) - nav_msgs::Odometry tf_msg = build_odometry( - pgo.offsetR(), pgo.offsetT(), cur_time, world_frame, local_frame); - lcm.publish(tf_topic, &tf_msg); - - // Publish global map (throttled) - double now = cur_time; - if (publish_global_map && now - last_global_map_time >= global_map_interval) { - last_global_map_time = now; - - if (!pgo.keyPoses().empty()) { - CloudType::Ptr global_cloud(new CloudType); - for (size_t i = 0; i < pgo.keyPoses().size(); i++) { - CloudType::Ptr world_cloud(new CloudType); - pcl::transformPointCloud( - *pgo.keyPoses()[i].body_cloud, - *world_cloud, - pgo.keyPoses()[i].t_global, - Eigen::Quaterniond(pgo.keyPoses()[i].r_global)); - *global_cloud += *world_cloud; - } - - // Voxel downsample - CloudType::Ptr filtered(new CloudType); - pcl::VoxelGrid voxel; - voxel.setInputCloud(global_cloud); - voxel.setLeafSize(global_map_voxel_size, global_map_voxel_size, global_map_voxel_size); - voxel.filter(*filtered); - - sensor_msgs::PointCloud2 map_msg = smartnav::from_pcl(*filtered, world_frame, now); - lcm.publish(global_map_topic, &map_msg); - } - } - - std::this_thread::sleep_for(std::chrono::milliseconds(timer_period_ms)); - } - - if (debug) fprintf(stderr, "PGO native module shutting down\n"); - return 0; -} diff --git a/dimos/navigation/jnav/components/loop_closure/unrefined_pgo/cpp/point_cloud_utils.hpp b/dimos/navigation/jnav/components/loop_closure/unrefined_pgo/cpp/point_cloud_utils.hpp deleted file mode 100644 index 0970e1f8de..0000000000 --- a/dimos/navigation/jnav/components/loop_closure/unrefined_pgo/cpp/point_cloud_utils.hpp +++ /dev/null @@ -1,170 +0,0 @@ -// Point cloud utility functions for SmartNav native modules. -// Provides PointCloud2 building/parsing helpers that work with dimos-lcm types. -// When USE_PCL is defined, also provides PCL interop utilities. - -#pragma once - -#include -#include -#include - -#include "sensor_msgs/PointCloud2.hpp" -#include "sensor_msgs/PointField.hpp" -#include "std_msgs/Header.hpp" - -#include "dimos_native_module.hpp" - -#ifdef USE_PCL -#include -#include -#include -#endif - -namespace smartnav { - -// Simple XYZI point structure (no PCL dependency) -struct PointXYZI { - float x, y, z, intensity; -}; - -// Build PointCloud2 from vector of XYZI points -inline sensor_msgs::PointCloud2 build_pointcloud2( - const std::vector& points, - const std::string& frame_id, - double timestamp -) { - sensor_msgs::PointCloud2 pc; - pc.header = dimos::make_header(frame_id, timestamp); - pc.height = 1; - pc.width = static_cast(points.size()); - pc.is_bigendian = 0; - pc.is_dense = 1; - - // Fields: x, y, z, intensity (all float32) - pc.fields_length = 4; - pc.fields.resize(4); - auto make_field = [](const std::string& name, int32_t offset) { - sensor_msgs::PointField f; - f.name = name; - f.offset = offset; - f.datatype = sensor_msgs::PointField::FLOAT32; - f.count = 1; - return f; - }; - pc.fields[0] = make_field("x", 0); - pc.fields[1] = make_field("y", 4); - pc.fields[2] = make_field("z", 8); - pc.fields[3] = make_field("intensity", 12); - - pc.point_step = 16; - pc.row_step = pc.point_step * pc.width; - pc.data_length = pc.row_step; - pc.data.resize(pc.data_length); - - for (size_t i = 0; i < points.size(); ++i) { - float* dst = reinterpret_cast(pc.data.data() + i * 16); - dst[0] = points[i].x; - dst[1] = points[i].y; - dst[2] = points[i].z; - dst[3] = points[i].intensity; - } - - return pc; -} - -// Parse PointCloud2 into vector of XYZI points -inline std::vector parse_pointcloud2(const sensor_msgs::PointCloud2& pc) { - std::vector points; - if (pc.width == 0 || pc.height == 0) return points; - - int num_points = pc.width * pc.height; - points.reserve(num_points); - - // Find field offsets - int x_off = -1, y_off = -1, z_off = -1, i_off = -1; - for (const auto& f : pc.fields) { - if (f.name == "x") x_off = f.offset; - else if (f.name == "y") y_off = f.offset; - else if (f.name == "z") z_off = f.offset; - else if (f.name == "intensity") i_off = f.offset; - } - - if (x_off < 0 || y_off < 0 || z_off < 0) return points; - - for (int n = 0; n < num_points; ++n) { - if (static_cast((n + 1) * pc.point_step) > pc.data.size()) break; - const uint8_t* base = pc.data.data() + n * pc.point_step; - PointXYZI p; - std::memcpy(&p.x, base + x_off, sizeof(float)); - std::memcpy(&p.y, base + y_off, sizeof(float)); - std::memcpy(&p.z, base + z_off, sizeof(float)); - if (i_off >= 0) std::memcpy(&p.intensity, base + i_off, sizeof(float)); - else p.intensity = 0.0f; - points.push_back(p); - } - - return points; -} - -// Get timestamp from PointCloud2 header -inline double get_timestamp(const sensor_msgs::PointCloud2& pc) { - return pc.header.stamp.sec + pc.header.stamp.nsec / 1e9; -} - -#ifdef USE_PCL -// Convert dimos-lcm PointCloud2 to PCL point cloud -inline void to_pcl(const sensor_msgs::PointCloud2& pc, - pcl::PointCloud& cloud) { - auto points = parse_pointcloud2(pc); - cloud.clear(); - cloud.reserve(points.size()); - for (const auto& p : points) { - pcl::PointXYZI pt; - pt.x = p.x; - pt.y = p.y; - pt.z = p.z; - pt.intensity = p.intensity; - cloud.push_back(pt); - } - cloud.width = cloud.size(); - cloud.height = 1; - cloud.is_dense = true; -} - -// Convert PCL point cloud to dimos-lcm PointCloud2 -inline sensor_msgs::PointCloud2 from_pcl( - const pcl::PointCloud& cloud, - const std::string& frame_id, - double timestamp -) { - std::vector points; - points.reserve(cloud.size()); - for (const auto& pt : cloud) { - points.push_back({pt.x, pt.y, pt.z, pt.intensity}); - } - return build_pointcloud2(points, frame_id, timestamp); -} -#endif - -// Quaternion to RPY conversion -inline void quat_to_rpy(double qx, double qy, double qz, double qw, - double& roll, double& pitch, double& yaw) { - // Roll (x-axis rotation) - double sinr_cosp = 2.0 * (qw * qx + qy * qz); - double cosr_cosp = 1.0 - 2.0 * (qx * qx + qy * qy); - roll = std::atan2(sinr_cosp, cosr_cosp); - - // Pitch (y-axis rotation) - double sinp = 2.0 * (qw * qy - qz * qx); - if (std::abs(sinp) >= 1.0) - pitch = std::copysign(M_PI / 2, sinp); - else - pitch = std::asin(sinp); - - // Yaw (z-axis rotation) - double siny_cosp = 2.0 * (qw * qz + qx * qy); - double cosy_cosp = 1.0 - 2.0 * (qy * qy + qz * qz); - yaw = std::atan2(siny_cosp, cosy_cosp); -} - -} // namespace smartnav diff --git a/dimos/navigation/jnav/components/loop_closure/unrefined_pgo/cpp/simple_pgo.cpp b/dimos/navigation/jnav/components/loop_closure/unrefined_pgo/cpp/simple_pgo.cpp deleted file mode 100644 index 5fc18bf0e7..0000000000 --- a/dimos/navigation/jnav/components/loop_closure/unrefined_pgo/cpp/simple_pgo.cpp +++ /dev/null @@ -1,211 +0,0 @@ -#include "simple_pgo.h" - -SimplePGO::SimplePGO(const Config &config) : m_config(config) -{ - gtsam::ISAM2Params isam2_params; - isam2_params.relinearizeThreshold = 0.01; - isam2_params.relinearizeSkip = 1; - m_isam2 = std::make_shared(isam2_params); - m_initial_values.clear(); - m_graph.resize(0); - m_r_offset.setIdentity(); - m_t_offset.setZero(); - - m_icp.setMaximumIterations(50); - m_icp.setMaxCorrespondenceDistance(10); - m_icp.setTransformationEpsilon(1e-6); - m_icp.setEuclideanFitnessEpsilon(1e-6); - m_icp.setRANSACIterations(0); -} - -bool SimplePGO::isKeyPose(const PoseWithTime &pose) -{ - if (m_key_poses.size() == 0) - return true; - const KeyPoseWithCloud &last_item = m_key_poses.back(); - double delta_trans = (pose.t - last_item.t_local).norm(); - double delta_deg = Eigen::Quaterniond(pose.r).angularDistance(Eigen::Quaterniond(last_item.r_local)) * 57.324; - if (delta_trans > m_config.key_pose_delta_trans || delta_deg > m_config.key_pose_delta_deg) - return true; - return false; -} -bool SimplePGO::addKeyPose(const CloudWithPose &cloud_with_pose) -{ - bool is_key_pose = isKeyPose(cloud_with_pose.pose); - if (!is_key_pose) - return false; - size_t idx = m_key_poses.size(); - M3D init_r = m_r_offset * cloud_with_pose.pose.r; - V3D init_t = m_r_offset * cloud_with_pose.pose.t + m_t_offset; - // 添加初始值 - m_initial_values.insert(idx, gtsam::Pose3(gtsam::Rot3(init_r), gtsam::Point3(init_t))); - if (idx == 0) - { - // 添加先验约束 - gtsam::noiseModel::Diagonal::shared_ptr noise = gtsam::noiseModel::Diagonal::Variances(gtsam::Vector6::Ones() * 1e-12); - m_graph.add(gtsam::PriorFactor(idx, gtsam::Pose3(gtsam::Rot3(init_r), gtsam::Point3(init_t)), noise)); - } - else - { - // 添加里程计约束 - const KeyPoseWithCloud &last_item = m_key_poses.back(); - M3D r_between = last_item.r_local.transpose() * cloud_with_pose.pose.r; - V3D t_between = last_item.r_local.transpose() * (cloud_with_pose.pose.t - last_item.t_local); - gtsam::noiseModel::Diagonal::shared_ptr noise = gtsam::noiseModel::Diagonal::Variances((gtsam::Vector(6) << 1e-6, 1e-6, 1e-6, 1e-4, 1e-4, 1e-6).finished()); - m_graph.add(gtsam::BetweenFactor(idx - 1, idx, gtsam::Pose3(gtsam::Rot3(r_between), gtsam::Point3(t_between)), noise)); - } - KeyPoseWithCloud item; - item.time = cloud_with_pose.pose.second; - item.r_local = cloud_with_pose.pose.r; - item.t_local = cloud_with_pose.pose.t; - item.body_cloud = cloud_with_pose.cloud; - item.r_global = init_r; - item.t_global = init_t; - m_key_poses.push_back(item); - return true; -} - -CloudType::Ptr SimplePGO::getSubMap(int idx, int half_range, double resolution) -{ - assert(idx >= 0 && idx < static_cast(m_key_poses.size())); - int min_idx = std::max(0, idx - half_range); - int max_idx = std::min(static_cast(m_key_poses.size()) - 1, idx + half_range); - - CloudType::Ptr ret(new CloudType); - for (int i = min_idx; i <= max_idx; i++) - { - - CloudType::Ptr body_cloud = m_key_poses[i].body_cloud; - CloudType::Ptr global_cloud(new CloudType); - pcl::transformPointCloud(*body_cloud, *global_cloud, m_key_poses[i].t_global, Eigen::Quaterniond(m_key_poses[i].r_global)); - *ret += *global_cloud; - } - if (resolution > 0) - { - pcl::VoxelGrid voxel_grid; - voxel_grid.setLeafSize(resolution, resolution, resolution); - voxel_grid.setInputCloud(ret); - voxel_grid.filter(*ret); - } - return ret; -} - -void SimplePGO::searchForLoopPairs() -{ - if (m_key_poses.size() < 10) - return; - if (m_config.min_loop_detect_duration > 0.0) - { - if (m_history_pairs.size() > 0) - { - double current_time = m_key_poses.back().time; - double last_time = m_key_poses[m_history_pairs.back().second].time; - if (current_time - last_time < m_config.min_loop_detect_duration) - return; - } - } - - size_t cur_idx = m_key_poses.size() - 1; - const KeyPoseWithCloud &last_item = m_key_poses.back(); - pcl::PointXYZ last_pose_pt; - last_pose_pt.x = last_item.t_global(0); - last_pose_pt.y = last_item.t_global(1); - last_pose_pt.z = last_item.t_global(2); - - pcl::PointCloud::Ptr key_poses_cloud(new pcl::PointCloud); - for (size_t i = 0; i < m_key_poses.size() - 1; i++) - { - pcl::PointXYZ pt; - pt.x = m_key_poses[i].t_global(0); - pt.y = m_key_poses[i].t_global(1); - pt.z = m_key_poses[i].t_global(2); - key_poses_cloud->push_back(pt); - } - pcl::KdTreeFLANN kdtree; - kdtree.setInputCloud(key_poses_cloud); - std::vector ids; - std::vector sqdists; - int neighbors = kdtree.radiusSearch(last_pose_pt, m_config.loop_search_radius, ids, sqdists); - if (neighbors == 0) - return; - - int loop_idx = -1; - for (size_t i = 0; i < ids.size(); i++) - { - int idx = ids[i]; - if (std::abs(last_item.time - m_key_poses[idx].time) > m_config.loop_time_tresh) - { - loop_idx = idx; - break; - } - } - - if (loop_idx == -1) - return; - - CloudType::Ptr target_cloud = getSubMap(loop_idx, m_config.loop_submap_half_range, m_config.submap_resolution); - CloudType::Ptr source_cloud = getSubMap(m_key_poses.size() - 1, 0, m_config.submap_resolution); - CloudType::Ptr align_cloud(new CloudType); - - m_icp.setInputSource(source_cloud); - m_icp.setInputTarget(target_cloud); - m_icp.align(*align_cloud); - - if (!m_icp.hasConverged() || m_icp.getFitnessScore() > m_config.loop_score_tresh) - return; - - M4F loop_transform = m_icp.getFinalTransformation(); - - LoopPair one_pair; - one_pair.source_id = cur_idx; - one_pair.target_id = loop_idx; - one_pair.score = m_icp.getFitnessScore(); - M3D r_refined = loop_transform.block<3, 3>(0, 0).cast() * m_key_poses[cur_idx].r_global; - V3D t_refined = loop_transform.block<3, 3>(0, 0).cast() * m_key_poses[cur_idx].t_global + loop_transform.block<3, 1>(0, 3).cast(); - one_pair.r_offset = m_key_poses[loop_idx].r_global.transpose() * r_refined; - one_pair.t_offset = m_key_poses[loop_idx].r_global.transpose() * (t_refined - m_key_poses[loop_idx].t_global); - m_cache_pairs.push_back(one_pair); - m_history_pairs.emplace_back(one_pair.target_id, one_pair.source_id); -} - -void SimplePGO::smoothAndUpdate() -{ - bool has_loop = !m_cache_pairs.empty(); - // 添加回环因子 - if (has_loop) - { - for (LoopPair &pair : m_cache_pairs) - { - m_graph.add(gtsam::BetweenFactor(pair.target_id, pair.source_id, - gtsam::Pose3(gtsam::Rot3(pair.r_offset), - gtsam::Point3(pair.t_offset)), - gtsam::noiseModel::Diagonal::Variances(gtsam::Vector6::Ones() * pair.score))); - } - std::vector().swap(m_cache_pairs); - } - // smooth and mapping - m_isam2->update(m_graph, m_initial_values); - m_isam2->update(); - if (has_loop) - { - m_isam2->update(); - m_isam2->update(); - m_isam2->update(); - m_isam2->update(); - } - m_graph.resize(0); - m_initial_values.clear(); - - // update key poses - gtsam::Values estimate_values = m_isam2->calculateBestEstimate(); - for (size_t i = 0; i < m_key_poses.size(); i++) - { - gtsam::Pose3 pose = estimate_values.at(i); - m_key_poses[i].r_global = pose.rotation().matrix().cast(); - m_key_poses[i].t_global = pose.translation().matrix().cast(); - } - // update offset - const KeyPoseWithCloud &last_item = m_key_poses.back(); - m_r_offset = last_item.r_global * last_item.r_local.transpose(); - m_t_offset = last_item.t_global - m_r_offset * last_item.t_local; -} diff --git a/dimos/navigation/jnav/components/loop_closure/unrefined_pgo/cpp/simple_pgo.h b/dimos/navigation/jnav/components/loop_closure/unrefined_pgo/cpp/simple_pgo.h deleted file mode 100644 index 7f80c5b09a..0000000000 --- a/dimos/navigation/jnav/components/loop_closure/unrefined_pgo/cpp/simple_pgo.h +++ /dev/null @@ -1,78 +0,0 @@ -#pragma once -#include "commons.h" -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -struct KeyPoseWithCloud -{ - M3D r_local; - V3D t_local; - M3D r_global; - V3D t_global; - double time; - CloudType::Ptr body_cloud; -}; -struct LoopPair -{ - size_t source_id; - size_t target_id; - M3D r_offset; - V3D t_offset; - double score; -}; - -struct Config -{ - double key_pose_delta_deg = 10; - double key_pose_delta_trans = 1.0; - double loop_search_radius = 1.0; - double loop_time_tresh = 60.0; - double loop_score_tresh = 0.15; - int loop_submap_half_range = 5; - double submap_resolution = 0.1; - double min_loop_detect_duration = 10.0; -}; - -class SimplePGO -{ -public: - SimplePGO(const Config &config); - - bool isKeyPose(const PoseWithTime &pose); - - bool addKeyPose(const CloudWithPose &cloud_with_pose); - - bool hasLoop(){return m_cache_pairs.size() > 0;} - - void searchForLoopPairs(); - - void smoothAndUpdate(); - - CloudType::Ptr getSubMap(int idx, int half_range, double resolution); - std::vector> &historyPairs() { return m_history_pairs; } - std::vector &keyPoses() { return m_key_poses; } - - M3D offsetR() { return m_r_offset; } - V3D offsetT() { return m_t_offset; } - -private: - Config m_config; - std::vector m_key_poses; - std::vector> m_history_pairs; - std::vector m_cache_pairs; - M3D m_r_offset; - V3D m_t_offset; - std::shared_ptr m_isam2; - gtsam::Values m_initial_values; - gtsam::NonlinearFactorGraph m_graph; - pcl::IterativeClosestPoint m_icp; -}; diff --git a/dimos/navigation/jnav/components/loop_closure/unrefined_pgo/module.py b/dimos/navigation/jnav/components/loop_closure/unrefined_pgo/module.py deleted file mode 100644 index 73a5fc59e4..0000000000 --- a/dimos/navigation/jnav/components/loop_closure/unrefined_pgo/module.py +++ /dev/null @@ -1,286 +0,0 @@ -# Copyright 2026 Dimensional Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""The unrefined PGO — a frozen standalone snapshot of main's cmu_nav PGO -(GTSAM iSAM2 + PCL ICP C++ binary, the original gsc_pgo was refined from), -adapted to the LoopClosure spec without touching the binary. - -The cpp/ directory here is a copy of `cmu_nav/modules/pgo/cpp` at the time -of the snapshot, so later cmu_nav changes don't silently move this baseline. - -Spec adaptations, all Python-side: - * `lidar` — the binary expects a `registered_scan` topic; `_collect_topics` - aliases the lidar port's topic onto that arg name. - * `pose_graph` — the binary doesn't expose its internal graph, only the - current map->odom offset (`pgo_tf`) and corrected odometry. The wrapper - keyframes the RAW odometry stream (same delta gates as the binary) and - re-applies the LATEST offset to every keyframe on each correction update. - A single global offset can't reproduce iSAM2's per-keyframe smoothing — - but that offset is exactly what this PGO exposes to consumers, so the - synthesized graph is an honest picture of its output. - * `loop_closure_event` — emitted when the offset jumps by more than the - `_LOOP_EVENT_*` thresholds (the offset only moves materially when a loop - closure lands).""" - -from __future__ import annotations - -from dataclasses import dataclass -import math -from pathlib import Path -import threading -import time - -import numpy as np -from reactivex.disposable import Disposable -from scipy.spatial.transform import Rotation - -from dimos.core.core import rpc -from dimos.core.native_module import NativeModule, NativeModuleConfig -from dimos.core.stream import In, Out -from dimos.msgs.geometry_msgs.PoseStamped import PoseStamped -from dimos.msgs.geometry_msgs.Quaternion import Quaternion -from dimos.msgs.geometry_msgs.Transform import Transform -from dimos.msgs.geometry_msgs.Vector3 import Vector3 -from dimos.msgs.nav_msgs.Odometry import Odometry -from dimos.msgs.sensor_msgs.PointCloud2 import PointCloud2 -from dimos.navigation.jnav.components.loop_closure.spec import LoopClosure -from dimos.navigation.jnav.msgs.Graph3D import Graph3D -from dimos.navigation.jnav.msgs.GraphDelta3D import GraphDelta3D -from dimos.utils.logging_config import setup_logger - -logger = setup_logger() - -# Offset jumps below these are smoothing noise, not loop closures. -_LOOP_EVENT_MIN_TRANS_M = 0.05 -_LOOP_EVENT_MIN_ROT_DEG = 1.0 - - -class PGOConfig(NativeModuleConfig): - cwd: str | None = str(Path(__file__).resolve().parent / "cpp") - # Absolute so the exists() check works from any worker cwd (skips rebuild). - executable: str = str(Path(__file__).resolve().parent / "cpp/result/bin/pgo") - # path:$PWD makes nix see this (git-untracked) copied directory. - build_command: str | None = 'nix build "path:$PWD#default" --no-write-lock-file' - - # Frame names - world_frame: str = "map" - local_frame: str = "odom" - - # Keyframe detection - key_pose_delta_deg: float = 10.0 - key_pose_delta_trans: float = 0.5 - - # Loop closure - loop_search_radius: float = 1.0 - loop_time_thresh: float = 60.0 - loop_score_thresh: float = 0.15 - loop_submap_half_range: int = 5 - submap_resolution: float = 0.1 - min_loop_detect_duration: float = 5.0 - - # Input mode: transform world-frame scans to body-frame using odom - unregister_input: bool = True - - # Global map publishing - global_map_voxel_size: float = 0.1 - global_map_publish_rate: float = 1.0 - - debug: bool = False - - -@dataclass -class _RawKeyframe: - ts: float - translation: np.ndarray # (3,) - rotation: np.ndarray # 3x3 - - -class PGO(NativeModule, LoopClosure): - """Pose graph optimization with loop closure using GTSAM iSAM2 + PCL ICP.""" - - config: PGOConfig - - lidar: In[PointCloud2] - odometry: In[Odometry] - corrected_odometry: Out[Odometry] - pose_graph: Out[Graph3D] - loop_closure_event: Out[GraphDelta3D] - global_map: Out[PointCloud2] - pgo_tf: Out[Odometry] - - def __init__(self, **kwargs: object) -> None: - super().__init__(**kwargs) - self._keyframes: list[_RawKeyframe] = [] - self._offset_rotation = np.eye(3) - self._offset_translation = np.zeros(3) - self._graph_lock = threading.Lock() - - def _collect_topics(self) -> dict[str, str]: - topics = super()._collect_topics() - # The binary asks for --registered_scan; feed it the lidar topic. - if "lidar" in topics: - topics["registered_scan"] = topics["lidar"] - return topics - - @rpc - def start(self) -> None: - super().start() - self.register_disposable( - Disposable(self.pgo_tf.transport.subscribe(self._on_tf_correction, self.pgo_tf)) - ) - self.register_disposable( - Disposable(self.odometry.transport.subscribe(self._on_raw_odometry, self.odometry)) - ) - # Seed identity TF so consumers can query map->body immediately. - self._publish_tf( - translation=(0.0, 0.0, 0.0), - rotation=(0.0, 0.0, 0.0, 1.0), - ts=time.time(), - ) - if self.config.debug: - logger.info("unrefined PGO native module started (C++ iSAM2 + PCL ICP)") - - @rpc - def stop(self) -> None: - super().stop() - - # --- TF passthrough (same as the cmu_nav wrapper) ----------------------- - - def _publish_tf( - self, - translation: tuple[float, float, float], - rotation: tuple[float, float, float, float], - ts: float, - ) -> None: - self.tf.publish( - Transform( - frame_id=self.config.world_frame, - child_frame_id=self.config.local_frame, - translation=Vector3(*translation), - rotation=Quaternion(*rotation), - ts=ts, - ) - ) - - def _on_tf_correction(self, msg: Odometry) -> None: - self._publish_tf( - translation=(msg.pose.position.x, msg.pose.position.y, msg.pose.position.z), - rotation=( - msg.pose.orientation.x, - msg.pose.orientation.y, - msg.pose.orientation.z, - msg.pose.orientation.w, - ), - ts=msg.ts or time.time(), - ) - - new_rotation = Rotation.from_quat( - [ - msg.pose.orientation.x, - msg.pose.orientation.y, - msg.pose.orientation.z, - msg.pose.orientation.w, - ] - ).as_matrix() - new_translation = np.array([msg.pose.position.x, msg.pose.position.y, msg.pose.position.z]) - - with self._graph_lock: - delta_trans = float(np.linalg.norm(new_translation - self._offset_translation)) - cos_theta = float( - np.clip((np.trace(self._offset_rotation.T @ new_rotation) - 1.0) / 2.0, -1.0, 1.0) - ) - delta_deg = math.degrees(math.acos(cos_theta)) - self._offset_rotation = new_rotation - self._offset_translation = new_translation - is_loop = ( - delta_trans > _LOOP_EVENT_MIN_TRANS_M or delta_deg > _LOOP_EVENT_MIN_ROT_DEG - ) and bool(self._keyframes) - graph_msg = self._build_graph(msg.ts) if self._keyframes else None - event = self._build_loop_event(msg.ts) if is_loop else None - - if graph_msg is not None: - self.pose_graph.publish(graph_msg) - if event is not None: - self.loop_closure_event.publish(event) - - # --- synthesized pose graph (the binary doesn't expose its own) ---------- - - def _on_raw_odometry(self, msg: Odometry) -> None: - rotation = Rotation.from_quat( - [ - msg.pose.orientation.x, - msg.pose.orientation.y, - msg.pose.orientation.z, - msg.pose.orientation.w, - ] - ).as_matrix() - translation = np.array([msg.pose.position.x, msg.pose.position.y, msg.pose.position.z]) - - with self._graph_lock: - if not self._is_keyframe(rotation, translation): - return - self._keyframes.append( - _RawKeyframe(ts=msg.ts, translation=translation, rotation=rotation) - ) - graph_msg = self._build_graph(msg.ts) - self.pose_graph.publish(graph_msg) - - def _is_keyframe(self, rotation: np.ndarray, translation: np.ndarray) -> bool: - if not self._keyframes: - return True - last = self._keyframes[-1] - delta_trans = float(np.linalg.norm(translation - last.translation)) - cos_theta = float(np.clip((np.trace(last.rotation.T @ rotation) - 1.0) / 2.0, -1.0, 1.0)) - delta_deg = math.degrees(math.acos(cos_theta)) - return ( - delta_trans > self.config.key_pose_delta_trans - or delta_deg > self.config.key_pose_delta_deg - ) - - def _corrected_node(self, index: int, keyframe: _RawKeyframe) -> Graph3D.Node3D: - rotation = self._offset_rotation @ keyframe.rotation - translation = self._offset_rotation @ keyframe.translation + self._offset_translation - quaternion = Rotation.from_matrix(rotation).as_quat() - return Graph3D.Node3D( - pose=PoseStamped( - ts=keyframe.ts, - frame_id=self.config.world_frame, - position=[float(v) for v in translation], - orientation=[float(v) for v in quaternion], - ), - id=index, - ) - - def _build_graph(self, ts: float) -> Graph3D: - """Caller must hold ``_graph_lock``.""" - nodes = [ - self._corrected_node(index, keyframe) for index, keyframe in enumerate(self._keyframes) - ] - edges = [ - Graph3D.Edge(start_id=index - 1, end_id=index, timestamp=self._keyframes[index].ts) - for index in range(1, len(self._keyframes)) - ] - return Graph3D(ts=ts, nodes=nodes, edges=edges) - - def _build_loop_event(self, ts: float) -> GraphDelta3D: - """Caller must hold ``_graph_lock``.""" - latest_index = len(self._keyframes) - 1 - identity = GraphDelta3D.Transform( - translation=Vector3(0.0, 0.0, 0.0), rotation=Quaternion(0.0, 0.0, 0.0, 1.0) - ) - return GraphDelta3D( - ts=ts, - nodes=[self._corrected_node(latest_index, self._keyframes[latest_index])], - transforms=[identity], - ) From 844f8f82e0a3b001f4e22eec3fae36ee47011079 Mon Sep 17 00:00:00 2001 From: Jeff Hykin Date: Wed, 24 Jun 2026 15:38:40 +0800 Subject: [PATCH 14/21] address greptile review: guard SQL table names, lock tf lazy-load, close cached stores --- dimos/memory2/db_tf.py | 48 ++++++++++++------- .../loop_closure/gsc_pgo/post_process.py | 5 ++ dimos/navigation/jnav/utils/recording_db.py | 11 +++++ 3 files changed, 48 insertions(+), 16 deletions(-) diff --git a/dimos/memory2/db_tf.py b/dimos/memory2/db_tf.py index 60b40242d4..7d498d1abe 100644 --- a/dimos/memory2/db_tf.py +++ b/dimos/memory2/db_tf.py @@ -26,6 +26,9 @@ from __future__ import annotations +import re +import sqlite3 +import threading from typing import TYPE_CHECKING import numpy as np @@ -43,6 +46,15 @@ # Larger than any single recording's span so the buffer never prunes loaded # transforms (MultiTBuffer drops samples older than ts - buffer_size). _NO_PRUNE = 1.0e15 +# SQLite can't parameterize table names, so caller-supplied stream names are +# interpolated; allow only safe identifiers to keep that injection-free. +_SAFE_TABLE = re.compile(r"^[A-Za-z_][A-Za-z0-9_]*$") + + +def _safe_table(name: str) -> str: + if not _SAFE_TABLE.match(name): + raise ValueError(f"unsafe stream/table name: {name!r}") + return name class DbTf: @@ -52,23 +64,27 @@ def __init__(self, store: Store, stream_names: tuple[str, ...] = TF_STREAMS) -> self._store = store self._stream_names = stream_names self._buffer: MultiTBuffer | None = None + self._load_lock = threading.Lock() def _ensure_loaded(self) -> MultiTBuffer: if self._buffer is not None: return self._buffer - buffer = MultiTBuffer(buffer_size=_NO_PRUNE) - available = set(self._store.list_streams()) - for name in self._stream_names: - if name not in available: - continue - for observation in self._store.stream(name, TFMessage): - message = observation.data - transforms = getattr(message, "transforms", None) - if transforms is None: - transforms = [message] - buffer.receive_transform(*transforms) - self._buffer = buffer - return buffer + with self._load_lock: + if self._buffer is not None: # another thread loaded while we waited + return self._buffer + buffer = MultiTBuffer(buffer_size=_NO_PRUNE) + available = set(self._store.list_streams()) + for name in self._stream_names: + if name not in available: + continue + for observation in self._store.stream(name, TFMessage): + message = observation.data + transforms = getattr(message, "transforms", None) + if transforms is None: + transforms = [message] + buffer.receive_transform(*transforms) + self._buffer = buffer + return buffer def has_transforms(self) -> bool: return bool(self._ensure_loaded().buffers) @@ -86,6 +102,8 @@ def get( non-warning lookup so per-scan misses don't spam the log. """ buffer = self._ensure_loaded() + # _get is the non-warning lookup; public get() logs on every miss, which + # spams the log for per-scan registration where misses are expected. return buffer._get(target_frame, source_frame, time_point, time_tolerance) @@ -121,15 +139,13 @@ def write_tf_tree( Returns the number of tf observations written. """ - import sqlite3 - db_path = store.config.path connection = sqlite3.connect(f"file:{db_path}?mode=ro&immutable=1", uri=True) odom = np.array( list( connection.execute( "select ts,pose_x,pose_y,pose_z,pose_qx,pose_qy,pose_qz,pose_qw " - f"from {odom_stream} order by ts" + f"from {_safe_table(odom_stream)} order by ts" ) ), float, diff --git a/dimos/navigation/jnav/components/loop_closure/gsc_pgo/post_process.py b/dimos/navigation/jnav/components/loop_closure/gsc_pgo/post_process.py index 609b80f93b..9f52ae8dfe 100644 --- a/dimos/navigation/jnav/components/loop_closure/gsc_pgo/post_process.py +++ b/dimos/navigation/jnav/components/loop_closure/gsc_pgo/post_process.py @@ -34,6 +34,7 @@ import json from pathlib import Path +import re import sqlite3 import sys import time @@ -79,6 +80,10 @@ def arg(flag, default=""): SUFFIX = arg("--suffix") LIDAR_STREAM = arg("--lidar", "pointlio_lidar") # input lidar stream (world-registered scans) ODOM_STREAM = arg("--odom", "pointlio_odometry") # input odometry stream (keyframe source) +# ODOM_STREAM is interpolated as a table name (SQLite can't parameterize those); +# reject anything that isn't a plain identifier to keep that injection-free. +if not re.match(r"^[A-Za-z_][A-Za-z0-9_]*$", ODOM_STREAM): + raise ValueError(f"unsafe --odom stream name: {ODOM_STREAM!r}") RAW_STREAM = arg("--tags", "raw_april_tags") # input unfiltered AprilTag stream IGNORE_TAGS = { int(x) for x in arg("--ignore-tags").replace(",", " ").split() diff --git a/dimos/navigation/jnav/utils/recording_db.py b/dimos/navigation/jnav/utils/recording_db.py index c71ea06a4d..88f1aab647 100644 --- a/dimos/navigation/jnav/utils/recording_db.py +++ b/dimos/navigation/jnav/utils/recording_db.py @@ -21,6 +21,7 @@ from __future__ import annotations +import atexit from collections.abc import Iterator from pathlib import Path from typing import Any @@ -55,6 +56,16 @@ def store(db_path: Path) -> SqliteStore: return cached +def close_all() -> None: + """Stop every cached store, releasing their file handles.""" + for cached in _stores.values(): + cached.stop() + _stores.clear() + + +atexit.register(close_all) + + def list_streams(db_path: Path) -> list[str]: return store(db_path).list_streams() From 12d2b6a7d263fff15feef41bff952779155f12d5 Mon Sep 17 00:00:00 2001 From: Jeff Hykin Date: Wed, 24 Jun 2026 16:07:33 +0800 Subject: [PATCH 15/21] port missing apriltag gate functions (ensure_april_streams, gate_params) so add_april imports resolve --- dimos/navigation/jnav/utils/apriltags.py | 253 ++++++++++++++++++++++- 1 file changed, 251 insertions(+), 2 deletions(-) diff --git a/dimos/navigation/jnav/utils/apriltags.py b/dimos/navigation/jnav/utils/apriltags.py index 9bf22cdf82..347065af38 100644 --- a/dimos/navigation/jnav/utils/apriltags.py +++ b/dimos/navigation/jnav/utils/apriltags.py @@ -421,9 +421,7 @@ def detect_apriltags( return detections -# --------------------------------------------------------------------------- # Streaming / incremental API -# --------------------------------------------------------------------------- _DEFAULT_MARKER_LENGTH = 0.10 _DEFAULT_DICTIONARY = "DICT_APRILTAG_36h11" @@ -663,3 +661,254 @@ def load_or_detect_sightings( if sightings: return sightings, "stream" return sightings_from_observations(detect()), "detected" + + +def detect_raw_detections( + store: Any, + intrinsics: np.ndarray, + distortion: np.ndarray, + *, + image_stream: str = "color_image", + marker_length: float = 0.10, + dictionary: str = "DICT_APRILTAG_36h11", +) -> tuple[list[Detection], bool, int]: + """Every valid-PnP tag detection over `image_stream`, unfiltered, with gate diagnostics.""" + detector = make_detector(dictionary) + raw_detections: list[Detection] = [] + images = store.stream(image_stream, Image).to_list() + speed_by_ts, speed_available = _camera_speeds(images) + for image_obs in images: + image = image_obs.data + bgr = image.numpy() if hasattr(image, "numpy") else np.asarray(image.data) + gray = cv2.cvtColor(bgr, cv2.COLOR_BGR2GRAY) if bgr.ndim == 3 else bgr + all_corners, marker_ids, _ = detector.detectMarkers(bgr) + if marker_ids is None: + continue + for corners, marker_id in zip(all_corners, marker_ids.flatten(), strict=False): + pose = estimate_marker_pose(corners, marker_length, intrinsics, distortion) + if pose is None: + continue + rotation_vector, translation_vector = pose + quaternion = Rotation.from_rotvec(rotation_vector.reshape(3)).as_quat() # x,y,z,w + translation = translation_vector.reshape(3) + tag_in_camera = [ + float(translation[0]), + float(translation[1]), + float(translation[2]), + float(quaternion[0]), + float(quaternion[1]), + float(quaternion[2]), + float(quaternion[3]), + ] + raw_detections.append( + { + "ts": float(image_obs.ts), + "marker_id": int(marker_id), + "t_cam_marker": tag_in_camera, + "sharpness": tag_sharpness(gray, corners), + "reproj_px": reprojection_error_px( + corners, + rotation_vector, + translation_vector, + marker_length, + intrinsics, + distortion, + ), + "tag_px": tag_pixel_size(corners), + "speed": speed_by_ts.get(float(image_obs.ts)), + } + ) + return raw_detections, speed_available, len(images) + + +def gate_detections( + raw_detections: list[Detection], + *, + max_distance_m: float = DEFAULT_MAX_DISTANCE_M, + max_view_angle_deg: float = DEFAULT_MAX_VIEW_ANGLE_DEG, + cluster_gap_sec: float = DEFAULT_CLUSTER_GAP_SEC, + rotation_weight_m_per_rad: float = DEFAULT_ROTATION_WEIGHT_M_PER_RAD, + min_sharpness: float = DEFAULT_MIN_SHARPNESS, + max_reproj_px: float = DEFAULT_MAX_REPROJ_PX, + min_tag_px: float = DEFAULT_MIN_TAG_PX, + max_linear_speed_mps: float = DEFAULT_MAX_LINEAR_SPEED_MPS, + max_angular_speed_dps: float = DEFAULT_MAX_ANGULAR_SPEED_DPS, + min_observations: int = DEFAULT_MIN_OBSERVATIONS, + huber_delta_m: float = DEFAULT_HUBER_DELTA_M, +) -> tuple[list[Detection], dict[str, int], int]: + """Gate + time-cluster raw detections into one Huber-refined medoid per cluster.""" + rejected: dict[str, int] = defaultdict(int) + kept: list[Detection] = [] + for detection in raw_detections: + if detection["sharpness"] < min_sharpness: + rejected["blur"] += 1 + continue + if detection["reproj_px"] > max_reproj_px: + rejected["reproj"] += 1 + continue + if detection["tag_px"] < min_tag_px: + rejected["small"] += 1 + continue + distance, view_angle = view_quality(detection["t_cam_marker"]) + if distance > max_distance_m: + rejected["far"] += 1 + continue + if view_angle > max_view_angle_deg: + rejected["oblique"] += 1 + continue + speed = detection["speed"] + if speed is not None and ( + speed[0] > max_linear_speed_mps or speed[1] > max_angular_speed_dps + ): + rejected["motion"] += 1 + continue + kept.append(detection) + + detections: list[Detection] = [] + thin_clusters = 0 + for cluster in cluster_by_time(kept, cluster_gap_sec): + if len(cluster) < min_observations: + thin_clusters += 1 + continue + detections.append( + { + **robust_cluster_pose(cluster, rotation_weight_m_per_rad, huber_delta_m), + "n_observations": len(cluster), + } + ) + detections.sort(key=lambda detection: detection["ts"]) + return detections, dict(rejected), thin_clusters + + +def _write_tag_stream( + store: Any, stream_name: str, detections: list[Detection], *, diagnostics: bool +) -> None: + """(Re)write a PoseStamped tag stream, with gate diagnostics when `diagnostics`.""" + if stream_name in store.list_streams(): + store.delete_stream(stream_name) + stream = store.stream(stream_name, PoseStamped) + for detection in detections: + pose = detection["t_cam_marker"] + tags: dict[str, Any] = {"marker_id": detection["marker_id"]} + if diagnostics: + speed = detection.get("speed") + distance, view_angle = view_quality(pose) + tags.update( + { + "sharpness": float(detection["sharpness"]), + "reproj_px": float(detection["reproj_px"]), + "tag_px": float(detection["tag_px"]), + "distance_m": float(distance), + "view_angle_deg": float(view_angle), + "lin_speed": float(speed[0]) if speed else -1.0, + "ang_speed": float(speed[1]) if speed else -1.0, + } + ) + stream.append( + PoseStamped(ts=detection["ts"], position=pose[:3], orientation=pose[3:]), + ts=detection["ts"], + pose=tuple(pose), + tags=tags, + ) + + +def write_april_streams( + store: Any, + raw_detections: list[Detection], + filtered_detections: list[Detection], + *, + raw_stream: str = "raw_april_tags", + filtered_stream: str = "april_tags", +) -> None: + """Persist the unfiltered `raw_april_tags` and the gated `april_tags` streams.""" + _write_tag_stream(store, raw_stream, raw_detections, diagnostics=True) + _write_tag_stream(store, filtered_stream, filtered_detections, diagnostics=False) + + +def _print_tag_summary( + stream_name: str, + n_raw: int, + detections: list[Detection], + rejected: dict[str, int], + thin_clusters: int, + speed_available: bool, + n_images: int, +) -> None: + in_spec = n_raw - sum(rejected.values()) + found_ids = sorted({detection["marker_id"] for detection in detections}) + gate_summary = ", ".join(f"{reason}={count}" for reason, count in sorted(rejected.items())) + if not speed_available: + gate_summary += (", " if gate_summary else "") + "motion-gate-off(no poses)" + print( + f" {stream_name}: {n_raw} raw -> {in_spec} in-spec " + f"-> {len(detections)} clusters (dropped {thin_clusters} thin), " + f"markers {found_ids} (over {n_images} images)" + ) + if gate_summary: + print(f" {stream_name} rejected: {gate_summary}") + + +def gate_params() -> dict[str, Any]: + """The current AprilTag gate thresholds, for recording in a sidecar.""" + return { + "max_distance_m": DEFAULT_MAX_DISTANCE_M, + "max_view_angle_deg": DEFAULT_MAX_VIEW_ANGLE_DEG, + "min_sharpness": DEFAULT_MIN_SHARPNESS, + "max_reproj_px": DEFAULT_MAX_REPROJ_PX, + "min_tag_px": DEFAULT_MIN_TAG_PX, + "max_linear_speed_mps": DEFAULT_MAX_LINEAR_SPEED_MPS, + "max_angular_speed_dps": DEFAULT_MAX_ANGULAR_SPEED_DPS, + "min_observations": DEFAULT_MIN_OBSERVATIONS, + "cluster_gap_sec": DEFAULT_CLUSTER_GAP_SEC, + "huber_delta_m": DEFAULT_HUBER_DELTA_M, + "rotation_weight_m_per_rad": DEFAULT_ROTATION_WEIGHT_M_PER_RAD, + } + + +def ensure_april_streams( + store: Any, + intrinsics: np.ndarray, + distortion: np.ndarray, + *, + image_stream: str = "color_image", + marker_length: float = 0.10, + dictionary: str = "DICT_APRILTAG_36h11", + raw_stream: str = "raw_april_tags", + filtered_stream: str = "april_tags", + exclude_tags: Iterable[int] = (), + force: bool = False, +) -> list[Detection]: + """Ensure both tag streams exist (`raw_april_tags`, `april_tags`); force=True rewrites. + + `exclude_tags` are kept in raw but dropped from filtered. Returns the filtered reps.""" + existing = set(store.list_streams()) + if not force and raw_stream in existing and filtered_stream in existing: + return [] + raw_detections, speed_available, n_images = detect_raw_detections( + store, + intrinsics, + distortion, + image_stream=image_stream, + marker_length=marker_length, + dictionary=dictionary, + ) + detections, rejected, thin_clusters = gate_detections(raw_detections) + excluded = set(exclude_tags) + if excluded: + detections = [d for d in detections if d["marker_id"] not in excluded] + write_april_streams( + store, raw_detections, detections, raw_stream=raw_stream, filtered_stream=filtered_stream + ) + print(f" {raw_stream}: {len(raw_detections)} detections (unfiltered)") + if excluded: + print(f" excluded dynamic tags from {filtered_stream}: {sorted(excluded)}") + _print_tag_summary( + filtered_stream, + len(raw_detections), + detections, + rejected, + thin_clusters, + speed_available, + n_images, + ) + return detections From 49bd14267eaa1a59d9e0a6e25d58b8667b084927 Mon Sep 17 00:00:00 2001 From: Jeff Hykin Date: Wed, 24 Jun 2026 16:07:34 +0800 Subject: [PATCH 16/21] fix mypy strict errors in jnav (type annotations, SqliteStoreConfig.path, loop var) --- dimos/memory2/db_tf.py | 10 +++++++--- dimos/navigation/jnav/components/loop_closure/eval.py | 2 +- .../components/loop_closure/eval_ground_truth_tag.py | 5 +++-- .../jnav/components/loop_closure/eval_kitti.py | 2 +- 4 files changed, 12 insertions(+), 7 deletions(-) diff --git a/dimos/memory2/db_tf.py b/dimos/memory2/db_tf.py index 7d498d1abe..ae4247aad0 100644 --- a/dimos/memory2/db_tf.py +++ b/dimos/memory2/db_tf.py @@ -33,6 +33,7 @@ import numpy as np +from dimos.memory2.store.sqlite import SqliteStoreConfig from dimos.msgs.geometry_msgs.Quaternion import Quaternion from dimos.msgs.geometry_msgs.Transform import Transform from dimos.msgs.geometry_msgs.Vector3 import Vector3 @@ -139,7 +140,10 @@ def write_tf_tree( Returns the number of tf observations written. """ - db_path = store.config.path + config = store.config + if not isinstance(config, SqliteStoreConfig): + raise TypeError("write_tf_tree reads the db directly and needs a SqliteStore") + db_path = config.path connection = sqlite3.connect(f"file:{db_path}?mode=ro&immutable=1", uri=True) odom = np.array( list( @@ -189,8 +193,8 @@ def statics_at(ts: float) -> list[Transform]: ) return links - for ts in np.arange(t0, t1 + static_period, static_period): - tf_stream.append(TFMessage(*statics_at(float(ts))), ts=float(ts)) + for static_ts in np.arange(t0, t1 + static_period, static_period): + tf_stream.append(TFMessage(*statics_at(float(static_ts))), ts=float(static_ts)) written += 1 return written diff --git a/dimos/navigation/jnav/components/loop_closure/eval.py b/dimos/navigation/jnav/components/loop_closure/eval.py index 02afb44749..27902331b0 100644 --- a/dimos/navigation/jnav/components/loop_closure/eval.py +++ b/dimos/navigation/jnav/components/loop_closure/eval.py @@ -640,7 +640,7 @@ def evaluate( camera = camera_stream intrinsics_config = load_intrinsics_json(intrinsics_json) db_store = store(db_path) - stored_stream = ( + stored_stream: Any = ( db_store.stream(APRIL_TAGS_STREAM) if APRIL_TAGS_STREAM in db_store.list_streams() else [] diff --git a/dimos/navigation/jnav/components/loop_closure/eval_ground_truth_tag.py b/dimos/navigation/jnav/components/loop_closure/eval_ground_truth_tag.py index 5644fb8add..3cb34b9018 100644 --- a/dimos/navigation/jnav/components/loop_closure/eval_ground_truth_tag.py +++ b/dimos/navigation/jnav/components/loop_closure/eval_ground_truth_tag.py @@ -87,7 +87,7 @@ def tag_world_position( @ pose7_to_matrix(optical_in_base7) @ pose7_to_matrix(tag_in_optical7) ) - return transform[:3, 3] + return np.asarray(transform[:3, 3]) def read_optical_in_base(intrinsics_json: Path) -> list[float]: @@ -102,7 +102,8 @@ def tag_in_camera_sightings(db_path: Path) -> list[tuple[float, int, list[float] """(ts, marker_id, tag-in-optical pose7) per April-tag observation, read straight from the recorded april_tags PoseStamped stream (no re-detection).""" sightings: list[tuple[float, int, list[float]]] = [] - for observation in store(db_path).stream("april_tags"): + tag_stream: Any = store(db_path).stream("april_tags") + for observation in tag_stream: pose_tuple = observation.pose_tuple if pose_tuple is None: continue diff --git a/dimos/navigation/jnav/components/loop_closure/eval_kitti.py b/dimos/navigation/jnav/components/loop_closure/eval_kitti.py index cf7e53fe32..b7dfac55a1 100644 --- a/dimos/navigation/jnav/components/loop_closure/eval_kitti.py +++ b/dimos/navigation/jnav/components/loop_closure/eval_kitti.py @@ -85,7 +85,7 @@ def corrected_trajectory( *, lidar_stream: str, odom_stream: str, -): +) -> tuple[list[Any], list[Any], Any]: module_class = load_module_class(module_path, module_name) config = filter_config_for_module(module_class, config) graph, closures, _ = run_module_graph( From bf56b945f48772ade9b68d05f8657350c959627a Mon Sep 17 00:00:00 2001 From: Jeff Hykin Date: Wed, 24 Jun 2026 16:07:36 +0800 Subject: [PATCH 17/21] drop section-marker comments to satisfy codebase check --- .../jnav/components/loop_closure/gsc_pgo/module.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/dimos/navigation/jnav/components/loop_closure/gsc_pgo/module.py b/dimos/navigation/jnav/components/loop_closure/gsc_pgo/module.py index a0d0c22099..f2d412ecb3 100644 --- a/dimos/navigation/jnav/components/loop_closure/gsc_pgo/module.py +++ b/dimos/navigation/jnav/components/loop_closure/gsc_pgo/module.py @@ -106,7 +106,7 @@ class PGOConfig(NativeModuleConfig): # Skip ICP on candidates farther than this (m). 0 disables. loop_candidate_max_distance_m: float = 30.0 - # --- Tag (AprilTag/ArUco) loop closure -------------------------------- + # Tag (AprilTag/ArUco) loop closure use_tag_loop_closure: bool = False # LCM channel of the static TF tree (dimos "pattern#msg_name" convention). tf_static_channel: str = "/tf_static#tf2_msgs.TFMessage" @@ -125,7 +125,7 @@ class PGOConfig(NativeModuleConfig): loop_robust_kernel: bool = False loop_robust_huber_k: float = 1.345 - # --- Landmark events (decoupled perceiver -> PGO factor-graph manager) ---- + # Landmark events (decoupled perceiver -> PGO factor-graph manager) # When set, the PGO ingests Landmark events on the `landmarks` In and # attaches each as a graph landmark variable + a BetweenFactor(keyframe, # landmark). Two sightings of the same landmark id share the variable, so a @@ -142,7 +142,7 @@ class PGOConfig(NativeModuleConfig): landmark_assoc_max_dt: float = 0.2 landmark_buffer_window: float = 3.0 - # --- Gravity anchor ------------------------------------------------------ + # Gravity anchor # Pin keyframe 0 (whose orientation is gravity-aligned by the LIO front end) # so landmark/loop closures cannot rotate the initial roll/pitch off gravity. # The full pose is pinned (also the gauge reference); roll/pitch stiffness is From 9d5e332a3a090bbc5c879f4f7b70f621a36bccd8 Mon Sep 17 00:00:00 2001 From: Jeff Hykin Date: Wed, 24 Jun 2026 16:21:49 +0800 Subject: [PATCH 18/21] regenerate all_blueprints.py for jnav loop-closure modules --- dimos/robot/all_blueprints.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/dimos/robot/all_blueprints.py b/dimos/robot/all_blueprints.py index 22bf7d27e1..c79a3b75f4 100644 --- a/dimos/robot/all_blueprints.py +++ b/dimos/robot/all_blueprints.py @@ -172,6 +172,7 @@ "go2-teleop-module": "dimos.teleop.quest.quest_extensions.Go2TeleopModule", "google-maps-skill-container": "dimos.agents.skills.google_maps_skill_container.GoogleMapsSkillContainer", "gps-nav-skill-container": "dimos.agents.skills.gps_nav_skill.GpsNavSkillContainer", + "graph-capture": "dimos.navigation.jnav.components.loop_closure.eval.GraphCapture", "grasping-module": "dimos.manipulation.grasping.grasping.GraspingModule", "gstreamer-camera-module": "dimos.hardware.sensors.camera.gstreamer.gstreamer_camera.GstreamerCameraModule", "hosted-arm-teleop-module": "dimos.teleop.quest_hosted.hosted_extensions.HostedArmTeleopModule", @@ -183,6 +184,7 @@ "keyboard-teleop": "dimos.robot.unitree.keyboard_teleop.KeyboardTeleop", "keyboard-teleop-module": "dimos.teleop.keyboard.keyboard_teleop_module.KeyboardTeleopModule", "local-planner": "dimos.navigation.cmu_nav.modules.local_planner.local_planner.LocalPlanner", + "lockstep-replay": "dimos.navigation.jnav.components.loop_closure.eval.LockstepReplay", "manipulation-module": "dimos.manipulation.manipulation_module.ManipulationModule", "map": "dimos.robot.unitree.type.map.Map", "marker-detection-stream-module": "dimos.perception.fiducial.marker_detection_stream_module.MarkerDetectionStreamModule", @@ -210,12 +212,13 @@ "perceive-loop-skill": "dimos.perception.perceive_loop_skill.PerceiveLoopSkill", "person-follow-skill-container": "dimos.agents.skills.person_follow.PersonFollowSkillContainer", "person-tracker": "dimos.perception.detection.person_tracker.PersonTracker", - "pgo": "dimos.navigation.cmu_nav.modules.pgo.pgo.PGO", + "pgo": "dimos.navigation.jnav.components.loop_closure.gsc_pgo.module.PGO", "phone-teleop-module": "dimos.teleop.phone.phone_teleop_module.PhoneTeleopModule", "pick-and-place-module": "dimos.manipulation.pick_and_place_module.PickAndPlaceModule", "point-lio": "dimos.hardware.sensors.lidar.pointlio.module.PointLio", "pointlio-recorder": "dimos.hardware.sensors.lidar.pointlio.recorder.PointlioRecorder", "quest-teleop-module": "dimos.teleop.quest.quest_teleop_module.QuestTeleopModule", + "rate-replay": "dimos.navigation.jnav.components.loop_closure.eval.RateReplay", "ray-tracing-voxel-map": "dimos.mapping.ray_tracing.module.RayTracingVoxelMap", "real-sense-camera": "dimos.hardware.sensors.camera.realsense.camera.RealSenseCamera", "receiver-module": "dimos.utils.demo_image_encoding.ReceiverModule", From 60d114cbf0f4809d9331ab348457199c8bdb81e4 Mon Sep 17 00:00:00 2001 From: Jeff Hykin Date: Wed, 24 Jun 2026 16:39:08 +0800 Subject: [PATCH 19/21] move jnav .gitignore up to jnav/ root --- .../jnav/{components/loop_closure/gsc_pgo => }/.gitignore | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename dimos/navigation/jnav/{components/loop_closure/gsc_pgo => }/.gitignore (100%) diff --git a/dimos/navigation/jnav/components/loop_closure/gsc_pgo/.gitignore b/dimos/navigation/jnav/.gitignore similarity index 100% rename from dimos/navigation/jnav/components/loop_closure/gsc_pgo/.gitignore rename to dimos/navigation/jnav/.gitignore From aae43194611760d1fa1186b78a8fbb2633d1b03f Mon Sep 17 00:00:00 2001 From: Jeff Hykin Date: Wed, 24 Jun 2026 16:42:51 +0800 Subject: [PATCH 20/21] add public MultiTBuffer.lookup (non-warning) and use it in DbTf instead of private _get --- dimos/memory2/db_tf.py | 4 +--- dimos/protocol/tf/tf.py | 13 +++++++++++++ 2 files changed, 14 insertions(+), 3 deletions(-) diff --git a/dimos/memory2/db_tf.py b/dimos/memory2/db_tf.py index ae4247aad0..7f52de854b 100644 --- a/dimos/memory2/db_tf.py +++ b/dimos/memory2/db_tf.py @@ -103,9 +103,7 @@ def get( non-warning lookup so per-scan misses don't spam the log. """ buffer = self._ensure_loaded() - # _get is the non-warning lookup; public get() logs on every miss, which - # spams the log for per-scan registration where misses are expected. - return buffer._get(target_frame, source_frame, time_point, time_tolerance) + return buffer.lookup(target_frame, source_frame, time_point, time_tolerance) def transform_matrix(transform: Transform) -> tuple[np.ndarray, np.ndarray]: diff --git a/dimos/protocol/tf/tf.py b/dimos/protocol/tf/tf.py index eb3d72b470..4dd345a530 100644 --- a/dimos/protocol/tf/tf.py +++ b/dimos/protocol/tf/tf.py @@ -214,6 +214,19 @@ def _wait_get( return None self._cv.wait(timeout=remaining) + def lookup( + self, + parent_frame: str, + child_frame: str, + time_point: float | None = None, + time_tolerance: float | None = None, + ) -> Transform | None: + """Composed transform lookup that does NOT log on a miss (unlike `get`). + + For high-frequency / best-effort lookups where misses are expected and a + per-call warning would spam the log (e.g. per-scan world-registration).""" + return self._get(parent_frame, child_frame, time_point, time_tolerance) + def get( self, parent_frame: str, From 5f3adfc58fa62e853ca02dca964b4c0cac4f827e Mon Sep 17 00:00:00 2001 From: Jeff Hykin Date: Wed, 24 Jun 2026 16:42:52 +0800 Subject: [PATCH 21/21] rename DEFAULT_MARKER_LENGTH_M -> DEFAULT_MARKER_LENGTH_METERS --- .../jnav/components/loop_closure/gsc_pgo/add_april.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dimos/navigation/jnav/components/loop_closure/gsc_pgo/add_april.py b/dimos/navigation/jnav/components/loop_closure/gsc_pgo/add_april.py index 010a02fc08..818a71e8f3 100644 --- a/dimos/navigation/jnav/components/loop_closure/gsc_pgo/add_april.py +++ b/dimos/navigation/jnav/components/loop_closure/gsc_pgo/add_april.py @@ -41,7 +41,7 @@ FILTERED_STREAM = "april_tags" SUMMARY_NAME = "summary.json" MIN_REVISITS = 2 # a tag seen on fewer visits than this carries no agreement signal -DEFAULT_MARKER_LENGTH_M = 0.10 +DEFAULT_MARKER_LENGTH_METERS = 0.10 DEFAULT_DICTIONARY = "DICT_APRILTAG_36h11" @@ -181,7 +181,7 @@ def main() -> None: marker_length = ( args.tag_size if args.tag_size is not None - else config.get("marker_length", DEFAULT_MARKER_LENGTH_M) + else config.get("marker_length", DEFAULT_MARKER_LENGTH_METERS) ) dictionary = args.dictionary or config.get("dictionary", DEFAULT_DICTIONARY) ensure_april_streams(