diff --git a/dimos/hardware/sensors/camera/realsense/camera.py b/dimos/hardware/sensors/camera/realsense/camera.py index 6257720608..5371672862 100644 --- a/dimos/hardware/sensors/camera/realsense/camera.py +++ b/dimos/hardware/sensors/camera/realsense/camera.py @@ -14,7 +14,6 @@ from __future__ import annotations -import atexit import threading import time from typing import TYPE_CHECKING @@ -26,11 +25,11 @@ from scipy.spatial.transform import Rotation from dimos.constants import DEFAULT_THREAD_JOIN_TIMEOUT +from dimos.core.coordination.blueprints import autoconnect from dimos.core.coordination.module_coordinator import ModuleCoordinator from dimos.core.core import rpc from dimos.core.module import Module, ModuleConfig from dimos.core.stream import Out -from dimos.core.transport import LCMTransport from dimos.hardware.sensors.camera.spec import ( OPTICAL_ROTATION, DepthCameraConfig, @@ -41,9 +40,11 @@ from dimos.msgs.geometry_msgs.Vector3 import Vector3 from dimos.msgs.sensor_msgs.CameraInfo import CameraInfo from dimos.msgs.sensor_msgs.Image import Image, ImageFormat +from dimos.msgs.sensor_msgs.Imu import Imu from dimos.msgs.sensor_msgs.PointCloud2 import PointCloud2 from dimos.spec import perception from dimos.utils.reactive import backpressure +from dimos.visualization.vis_module import vis_module if TYPE_CHECKING: import pyrealsense2 as rs # type: ignore[import-not-found,import-untyped] @@ -67,6 +68,7 @@ class RealSenseCameraConfig(ModuleConfig, DepthCameraConfig): align_depth_to_color: bool = True enable_depth: bool = True enable_pointcloud: bool = False + enable_imu: bool = True pointcloud_fps: float = 5.0 camera_info_fps: float = 1.0 serial_number: str | None = None @@ -79,6 +81,7 @@ class RealSenseCamera(DepthCameraHardware, Module, perception.DepthCamera): pointcloud: Out[PointCloud2] camera_info: Out[CameraInfo] depth_camera_info: Out[CameraInfo] + imu: Out[Imu] @property def _camera_link(self) -> str: @@ -100,6 +103,10 @@ def _depth_frame(self) -> str: def _depth_optical_frame(self) -> str: return f"{self.config.camera_name}_depth_optical_frame" + @property + def _imu_frame(self) -> str: + return f"{self.config.camera_name}_imu_optical_frame" + def __init__(self, *args, **kwargs) -> None: # type: ignore[no-untyped-def] super().__init__(*args, **kwargs) self._pipeline: rs.pipeline | None = None @@ -115,6 +122,10 @@ def __init__(self, *args, **kwargs) -> None: # type: ignore[no-untyped-def] self._latest_color_img: Image | None = None self._latest_depth_img: Image | None = None self._pointcloud_lock = threading.Lock() + # IMU state + self._imu_pipeline: rs.pipeline | None = None + self._depth_to_imu_extrinsics: rs.extrinsics | None = None + self._latest_accel: Vector3 | None = None @rpc def start(self) -> None: @@ -176,6 +187,71 @@ def start(self) -> None: ) ) + if self.config.enable_imu: + self._start_imu() + + def _start_imu(self) -> None: + import pyrealsense2 as rs + + imu_pipeline = rs.pipeline() + imu_config = rs.config() + if self.config.serial_number: + imu_config.enable_device(self.config.serial_number) + + try: + # Explicit IMU rates required: without an fps librealsense falls back to + # accel @ 63 Hz, which D4xx firmware doesn't offer -> "Couldn't resolve requests". + imu_config.enable_stream(rs.stream.accel, rs.format.motion_xyz32f, 200) + imu_config.enable_stream(rs.stream.gyro, rs.format.motion_xyz32f, 200) + imu_pipeline.start(imu_config, self._on_imu_frame) + except RuntimeError as error: + print(f"RealSense IMU unavailable, disabling IMU stream: {error}") + return + + self._imu_pipeline = imu_pipeline + + if self._profile is not None and self.config.enable_depth: + depth_stream = self._profile.get_stream(rs.stream.depth) + # The accel profile must come from the device's extrinsics graph, not the + # separately-started IMU pipeline -- cross-pipeline profiles aren't linked and + # get_extrinsics_to() raises "Requested extrinsics are not available!". + accel_stream = next( + profile + for sensor in self._profile.get_device().query_sensors() + for profile in sensor.get_stream_profiles() + if profile.stream_type() == rs.stream.accel + ) + self._depth_to_imu_extrinsics = depth_stream.get_extrinsics_to(accel_stream) + + def _on_imu_frame(self, frame: rs.frame) -> None: + if frame.is_frameset(): + for sub_frame in frame.as_frameset(): + self._handle_motion_frame(sub_frame) + else: + self._handle_motion_frame(frame) + + def _handle_motion_frame(self, frame: rs.frame) -> None: + import pyrealsense2 as rs + + motion = frame.as_motion_frame() + if not motion: + return + + motion_data = motion.get_motion_data() + stream_type = motion.get_profile().stream_type() + + if stream_type == rs.stream.accel: + self._latest_accel = Vector3(motion_data.x, motion_data.y, motion_data.z) + elif stream_type == rs.stream.gyro and self._latest_accel is not None: + # Gyro drives publishing, paired with the most recent accel sample. + self.imu.publish( + Imu( + angular_velocity=Vector3(motion_data.x, motion_data.y, motion_data.z), + linear_acceleration=self._latest_accel, + frame_id=self._imu_frame, + ) + ) + def _publish_camera_info(self) -> None: ts = time.time() if self._color_camera_info: @@ -381,6 +457,16 @@ def _publish_tf(self, ts: float) -> None: ) transforms.append(color_to_color_optical) + # camera_link -> imu (physical motion-sensor extrinsics) + if self._depth_to_imu_extrinsics is not None: + imu_tf = self._extrinsics_to_transform( + self._depth_to_imu_extrinsics, + self._camera_link, + self._imu_frame, + ts, + ) + transforms.append(imu_tf) + self.tf.publish(*transforms) def _generate_pointcloud(self) -> None: @@ -408,6 +494,13 @@ def _generate_pointcloud(self) -> None: def stop(self) -> None: self._running = False + if self._imu_pipeline: + try: + self._imu_pipeline.stop() + except Exception: + pass # Pipeline might already be stopped + self._imu_pipeline = None + # Stop pipeline first to unblock wait_for_frames() if self._pipeline: try: @@ -426,6 +519,8 @@ def stop(self) -> None: self._profile = None self._align = None self._color_to_depth_extrinsics = None + self._depth_to_imu_extrinsics = None + self._latest_accel = None self._latest_color_img = None self._latest_depth_img = None super().stop() @@ -443,34 +538,25 @@ def get_depth_scale(self) -> float: return self._depth_scale -def main() -> None: - dimos = ModuleCoordinator() - dimos.start() +def _color_camera_info_to_rerun(camera_info: CameraInfo) -> object: + # Re-parent the pinhole onto the color image entity + its optical frame so + # the color image renders inside the camera frustum in the 3D view. + return camera_info.to_rerun( + image_topic="world/color_image", + optical_frame="camera_color_optical_frame", + ) - camera = dimos.deploy(RealSenseCamera, enable_pointcloud=True, pointcloud_fps=5.0) - camera.color_image.transport = LCMTransport("/camera/color", Image) - camera.depth_image.transport = LCMTransport("/camera/depth", Image) - camera.pointcloud.transport = LCMTransport("/camera/pointcloud", PointCloud2) - camera.camera_info.transport = LCMTransport("/camera/color_info", CameraInfo) - camera.depth_camera_info.transport = LCMTransport("/camera/depth_info", CameraInfo) - def cleanup() -> None: - try: - dimos.stop() - except Exception: - pass - - atexit.register(cleanup) - dimos.start_all_modules() - - try: - while True: - time.sleep(0.1) - except (KeyboardInterrupt, SystemExit): - pass - finally: - atexit.unregister(cleanup) - cleanup() +def main() -> None: + blueprint = autoconnect( + RealSenseCamera.blueprint(enable_pointcloud=True, pointcloud_fps=5.0), + vis_module( + "rerun", + rerun_config={"visual_override": {"world/camera_info": _color_camera_info_to_rerun}}, + ), + ) + + ModuleCoordinator.build(blueprint).loop() if __name__ == "__main__": diff --git a/dimos/hardware/sensors/lidar/pointlio/config/default.yaml b/dimos/hardware/sensors/lidar/pointlio/config/default.yaml deleted file mode 100644 index 1fd09ec9fa..0000000000 --- a/dimos/hardware/sensors/lidar/pointlio/config/default.yaml +++ /dev/null @@ -1,69 +0,0 @@ -# Point-LIO config. Mid-360-specific values to retune for a different sensor: -# preprocess.lidar_type (Livox), blind/scan_line, mapping.extrinsic_T/R (Mid-360 -# IMU->lidar mount), det_range, fov_degree. -common: - con_frame: false - con_frame_num: 1 - cut_frame: false - cut_frame_time_interval: 0.1 - time_lag_imu_to_lidar: 0.0 - -preprocess: - # LID_TYPE enum (Point-LIO src/preprocess.h): - # 1 = AVIA (Livox), 2 = VELO16, 3 = OUST64, 4 = HESAIxt32, 5 = UNILIDAR - # 1 selects the Livox branch (preprocess.cpp avia_handler), which expects the - # Livox CustomMsg point layout the Mid-360 emits: - # https://github.com/Livox-SDK/livox_ros_driver2/blob/master/msg/CustomMsg.msg - lidar_type: 1 - scan_line: 4 - scan_rate: 10 - timestamp_unit: 3 # 3 = nanosecond - blind: 0.5 - # Pre-KF input decimation: keep every Nth raw point. 1 = keep all (disable). - point_filter_num: 3 - -mapping: - use_imu_as_input: false # false = IMU-as-output model (Point-LIO's robust path) - prop_at_freq_of_imu: true - check_satu: true - init_map_size: 10 - # Pre-KF voxel downsample of each scan before the filter. false = feed the - # full scan (disable). Leaf size is filter_size_surf below. - space_down_sample: true - satu_acc: 3.0 # g; accel >= this is treated as saturated (residual zeroed) to bound velocity - satu_gyro: 35.0 - acc_norm: 1.0 # IMU accel unit: g - plane_thr: 0.1 - filter_size_surf: 0.2 # pre-KF scan downsample leaf size (m), used iff space_down_sample - filter_size_map: 0.5 - ivox_grid_resolution: 2.0 # iVox local-map grid (m) - ivox_nearby_type: 6 # NEARBY6 - cube_side_length: 1000.0 - det_range: 100.0 - fov_degree: 360.0 - imu_en: true - start_in_aggressive_motion: false - extrinsic_est_en: false - imu_time_inte: 0.005 - lidar_meas_cov: 0.01 - acc_cov_input: 0.1 - vel_cov: 20.0 - gyr_cov_input: 0.01 - gyr_cov_output: 1000.0 - acc_cov_output: 500.0 - b_gyr_cov: 0.0001 - b_acc_cov: 0.0001 - imu_meas_acc_cov: 0.01 - imu_meas_omg_cov: 0.01 - match_s: 81.0 - gravity_align: true - gravity: [0.0, 0.0, -9.810] - gravity_init: [0.0, 0.0, -9.810] - extrinsic_T: [-0.011, -0.02329, 0.04412] # Mid-360 IMU->lidar offset (m) - extrinsic_R: [1.0, 0.0, 0.0, - 0.0, 1.0, 0.0, - 0.0, 0.0, 1.0] - -odometry: - publish_odometry_without_downsample: false - odom_only: false diff --git a/dimos/hardware/sensors/lidar/pointlio/cpp/CMakeLists.txt b/dimos/hardware/sensors/lidar/pointlio/cpp/CMakeLists.txt index 27ad294a3b..9bce7246f2 100644 --- a/dimos/hardware/sensors/lidar/pointlio/cpp/CMakeLists.txt +++ b/dimos/hardware/sensors/lidar/pointlio/cpp/CMakeLists.txt @@ -53,8 +53,6 @@ find_package(Eigen3 REQUIRED) # PCL (only components we need — avoid full PCL which drags in VTK via io) find_package(PCL 1.8 REQUIRED COMPONENTS common filters) -find_package(yaml-cpp REQUIRED) - # glog (iVox map backend — include/ivox/ivox3d.h needs glog) find_package(glog REQUIRED) @@ -94,7 +92,6 @@ target_link_libraries(pointlio_native PRIVATE ${LCM_LIBRARIES} ${LIVOX_SDK} ${PCL_LIBRARIES} - yaml-cpp::yaml-cpp glog::glog ) diff --git a/dimos/hardware/sensors/lidar/pointlio/cpp/flake.lock b/dimos/hardware/sensors/lidar/pointlio/cpp/flake.lock index 531b9d7a15..3cb06c2284 100644 --- a/dimos/hardware/sensors/lidar/pointlio/cpp/flake.lock +++ b/dimos/hardware/sensors/lidar/pointlio/cpp/flake.lock @@ -37,11 +37,11 @@ "fast-lio": { "flake": false, "locked": { - "lastModified": 1781514593, - "narHash": "sha256-85zc03F2Ztw/pO1nUtVHnC+4yGzVi5z9pKk9lZcFnE8=", + "lastModified": 1781782101, + "narHash": "sha256-2phOAdagFal8BTBEKxEbl3LDSx/7SNGVTFu0zYEXB1g=", "owner": "dimensionalOS", "repo": "dimos-module-fastlio2", - "rev": "286c530db7661ca6874cd8c1381357adf83cd19f", + "rev": "288e357e5457723c1cce4d4060f76ed7f85b10d4", "type": "github" }, "original": { diff --git a/dimos/hardware/sensors/lidar/pointlio/cpp/flake.nix b/dimos/hardware/sensors/lidar/pointlio/cpp/flake.nix index 303a5aa093..0ef30ba768 100644 --- a/dimos/hardware/sensors/lidar/pointlio/cpp/flake.nix +++ b/dimos/hardware/sensors/lidar/pointlio/cpp/flake.nix @@ -89,7 +89,6 @@ pkgs.glib pkgs.eigen pkgs.pcl - pkgs.yaml-cpp pkgs.glog pkgs.boost pkgs.llvmPackages.openmp diff --git a/dimos/hardware/sensors/lidar/pointlio/cpp/main.cpp b/dimos/hardware/sensors/lidar/pointlio/cpp/main.cpp index 72e7a25094..e3d53482ef 100644 --- a/dimos/hardware/sensors/lidar/pointlio/cpp/main.cpp +++ b/dimos/hardware/sensors/lidar/pointlio/cpp/main.cpp @@ -5,13 +5,13 @@ // // Binds Livox SDK2 directly into the Point-LIO core: SDK callbacks feed // CustomMsg/Imu to the IESKF estimator, which performs LiDAR-inertial SLAM. -// Sensor-frame (mid360_link) point clouds and odometry are published on LCM. +// Sensor-frame point clouds and odometry are published on LCM. // // Usage: // ./pointlio_native \ // --lidar '/lidar#sensor_msgs.PointCloud2' \ // --odometry '/odometry#nav_msgs.Odometry' \ -// --config_path /path/to/default.yaml \ +// --filter_size_surf 0.2 --ivox_grid_resolution 2.0 ... \ # tuning as plain CLI args // --host_ip 192.168.1.5 --lidar_ip 192.168.1.155 #include @@ -57,10 +57,28 @@ static double get_publish_ts() { return std::chrono::duration( std::chrono::system_clock::now().time_since_epoch()).count(); } +// Parse a comma-separated list of doubles (CLI vector args); empty on bad input. +static std::vector parse_doubles(const std::string& csv) { + std::vector out; + size_t i = 0; + while (i < csv.size()) { + size_t j = csv.find(',', i); + if (j == std::string::npos) { j = csv.size(); } + try { + out.push_back(std::stod(csv.substr(i, j - i))); + } catch (...) { + return {}; + } + i = j + 1; + } + return out; +} + static std::string g_lidar_topic; static std::string g_odometry_topic; static std::string g_frame_id; // required via --frame_id static std::string g_child_frame_id; // required via --child_frame_id +static std::string g_sensor_frame_id; // required via --sensor_frame_id static float g_frequency = 10.0f; // Frame accumulator (Livox SDK raw → CustomMsg) @@ -85,7 +103,7 @@ static uint64_t get_timestamp_ns(const LivoxLidarEthernetPacket* pkt) { using dimos::time_from_seconds; using dimos::make_header; -// Publish the lidar point cloud in the sensor body frame (g_frame_id). +// Publish the lidar point cloud in the sensor frame (g_sensor_frame_id). // `cloud` is Point-LIO's undistorted scan in the sensor's own frame // (get_body_cloud), so points are published as-is with no world registration. static void publish_lidar(PointCloudXYZI::Ptr cloud, double timestamp, const std::string& topic = "") { @@ -95,7 +113,7 @@ static void publish_lidar(PointCloudXYZI::Ptr cloud, double timestamp, const std int num_points = static_cast(cloud->size()); sensor_msgs::PointCloud2 pc; - pc.header = make_header(g_frame_id, timestamp); + pc.header = make_header(g_sensor_frame_id, timestamp); pc.height = 1; pc.width = num_points; pc.is_bigendian = 0; @@ -321,11 +339,68 @@ int main(int argc, char** argv) { return 1; } - std::string config_path = mod.arg("config_path", ""); - if (config_path.empty()) { - fprintf(stderr, "Error: --config_path is required\n"); - return 1; - } + // Point-LIO tuning, passed as CLI args by the dimos module (no YAML). + PointLioParams params; + // common + params.con_frame = mod.arg_bool("con_frame", params.con_frame); + params.con_frame_num = mod.arg_int("con_frame_num", params.con_frame_num); + params.cut_frame = mod.arg_bool("cut_frame", params.cut_frame); + params.cut_frame_time_interval = mod.arg_float("cut_frame_time_interval", params.cut_frame_time_interval); + params.time_lag_imu_to_lidar = mod.arg_float("time_lag_imu_to_lidar", params.time_lag_imu_to_lidar); + // preprocess + params.scan_line = mod.arg_int("scan_line", params.scan_line); + params.scan_rate = mod.arg_int("scan_rate", params.scan_rate); + params.blind = mod.arg_float("blind", params.blind); + params.point_filter_num = mod.arg_int("point_filter_num", params.point_filter_num); + std::string lidar_type = mod.arg("lidar_type", "avia"); + params.lidar_type = lidar_type == "velodyne" ? 2 : lidar_type == "ouster" ? 3 : + lidar_type == "hesai" ? 4 : lidar_type == "unilidar" ? 5 : 1; + std::string ts_unit = mod.arg("timestamp_unit", "nanosecond"); + params.timestamp_unit = ts_unit == "second" ? 0 : ts_unit == "millisecond" ? 1 : + ts_unit == "microsecond" ? 2 : 3; + // mapping + params.use_imu_as_input = mod.arg_bool("use_imu_as_input", params.use_imu_as_input); + params.prop_at_freq_of_imu = mod.arg_bool("prop_at_freq_of_imu", params.prop_at_freq_of_imu); + params.check_satu = mod.arg_bool("check_satu", params.check_satu); + params.init_map_size = mod.arg_int("init_map_size", params.init_map_size); + params.space_down_sample = mod.arg_bool("space_down_sample", params.space_down_sample); + params.satu_acc = mod.arg_float("satu_acc", params.satu_acc); + params.satu_gyro = mod.arg_float("satu_gyro", params.satu_gyro); + params.acc_norm = mod.arg_float("acc_norm", params.acc_norm); + params.plane_thr = mod.arg_float("plane_thr", params.plane_thr); + params.filter_size_surf = mod.arg_float("filter_size_surf", params.filter_size_surf); + params.filter_size_map = mod.arg_float("filter_size_map", params.filter_size_map); + params.ivox_grid_resolution = mod.arg_float("ivox_grid_resolution", params.ivox_grid_resolution); + std::string ivox_nearby = mod.arg("ivox_nearby_type", "nearby6"); + params.ivox_nearby_type = ivox_nearby == "center" ? 0 : ivox_nearby == "nearby18" ? 18 : + ivox_nearby == "nearby26" ? 26 : 6; + params.cube_side_length = mod.arg_float("cube_side_length", params.cube_side_length); + params.det_range = mod.arg_float("det_range", params.det_range); + params.fov_degree = mod.arg_float("fov_degree", params.fov_degree); + params.imu_en = mod.arg_bool("imu_en", params.imu_en); + params.start_in_aggressive_motion = mod.arg_bool("start_in_aggressive_motion", params.start_in_aggressive_motion); + params.extrinsic_est_en = mod.arg_bool("extrinsic_est_en", params.extrinsic_est_en); + params.imu_time_inte = mod.arg_float("imu_time_inte", params.imu_time_inte); + params.lidar_meas_cov = mod.arg_float("lidar_meas_cov", params.lidar_meas_cov); + params.acc_cov_input = mod.arg_float("acc_cov_input", params.acc_cov_input); + params.vel_cov = mod.arg_float("vel_cov", params.vel_cov); + params.gyr_cov_input = mod.arg_float("gyr_cov_input", params.gyr_cov_input); + params.gyr_cov_output = mod.arg_float("gyr_cov_output", params.gyr_cov_output); + params.acc_cov_output = mod.arg_float("acc_cov_output", params.acc_cov_output); + params.b_gyr_cov = mod.arg_float("b_gyr_cov", params.b_gyr_cov); + params.b_acc_cov = mod.arg_float("b_acc_cov", params.b_acc_cov); + params.imu_meas_acc_cov = mod.arg_float("imu_meas_acc_cov", params.imu_meas_acc_cov); + params.imu_meas_omg_cov = mod.arg_float("imu_meas_omg_cov", params.imu_meas_omg_cov); + params.match_s = mod.arg_float("match_s", params.match_s); + params.gravity_align = mod.arg_bool("gravity_align", params.gravity_align); + if (auto g = parse_doubles(mod.arg("gravity", "")); !g.empty()) params.gravity = g; + if (auto gi = parse_doubles(mod.arg("gravity_init", "")); !gi.empty()) params.gravity_init = gi; + if (auto et = parse_doubles(mod.arg("extrinsic_t", "")); !et.empty()) params.extrinsic_T = et; + if (auto er = parse_doubles(mod.arg("extrinsic_r", "")); !er.empty()) params.extrinsic_R = er; + // odometry + params.publish_odometry_without_downsample = + mod.arg_bool("publish_odometry_without_downsample", params.publish_odometry_without_downsample); + params.odom_only = mod.arg_bool("odom_only", params.odom_only); // Point-LIO internal processing rates double msr_freq = mod.arg_float("msr_freq", 50.0f); @@ -336,7 +411,8 @@ int main(int argc, char** argv) { std::string lidar_ip = mod.arg("lidar_ip", "192.168.1.155"); g_frequency = mod.arg_float("frequency", 10.0f); g_frame_id = mod.arg_required("frame_id"); - g_child_frame_id = mod.arg_required("body_frame_id"); + g_child_frame_id = mod.arg_required("child_frame_id"); + g_sensor_frame_id = mod.arg_required("sensor_frame_id"); float pointcloud_freq = mod.arg_float("pointcloud_freq", 5.0f); float odom_freq = mod.arg_float("odom_freq", 50.0f); @@ -362,7 +438,7 @@ int main(int argc, char** argv) { printf("[pointlio] Starting Point-LIO + Livox Mid-360 native module\n"); printf("[pointlio] lidar topic: %s\n", g_lidar_topic.empty() ? "(disabled)" : g_lidar_topic.c_str()); printf("[pointlio] odometry topic: %s\n", g_odometry_topic.empty() ? "(disabled)" : g_odometry_topic.c_str()); - printf("[pointlio] config: %s\n", config_path.c_str()); + printf("[pointlio] tuning: filter_size_surf=%.3f ivox_res=%.3f lidar_type=%d\n", params.filter_size_surf, params.ivox_grid_resolution, params.lidar_type); printf("[pointlio] host_ip: %s lidar_ip: %s frequency: %.1f Hz\n", host_ip.c_str(), lidar_ip.c_str(), g_frequency); printf("[pointlio] pointcloud_freq: %.1f Hz odom_freq: %.1f Hz\n", pointcloud_freq, odom_freq); } @@ -378,7 +454,7 @@ int main(int argc, char** argv) { g_lcm = &lcm; if (debug) { printf("[pointlio] Initializing Point-LIO...\n"); } - PointLio point_lio(config_path, msr_freq, main_freq); + PointLio point_lio(params, msr_freq, main_freq); g_point_lio = &point_lio; if (debug) { printf("[pointlio] Point-LIO initialized.\n"); } diff --git a/dimos/hardware/sensors/lidar/pointlio/module.py b/dimos/hardware/sensors/lidar/pointlio/module.py index 86cac95413..679d678b18 100644 --- a/dimos/hardware/sensors/lidar/pointlio/module.py +++ b/dimos/hardware/sensors/lidar/pointlio/module.py @@ -24,16 +24,17 @@ PointLio.blueprint(host_ip="192.168.1.5", lidar_ip="192.168.1.155"), SomeConsumer.blueprint(), )).loop() + +Point-LIO tuning lives directly on ``PointLioConfig`` and is passed to the C++ +binary as plain CLI args (no YAML). """ from __future__ import annotations import os -from pathlib import Path -from typing import TYPE_CHECKING, Annotated +from typing import TYPE_CHECKING, Literal from pydantic import Field -from pydantic.experimental.pipeline import validate_as from reactivex.disposable import Disposable from dimos.core.core import rpc @@ -57,10 +58,17 @@ from dimos.msgs.geometry_msgs.Vector3 import Vector3 from dimos.msgs.nav_msgs.Odometry import Odometry from dimos.msgs.sensor_msgs.PointCloud2 import PointCloud2 -from dimos.navigation.nav_stack.frames import FRAME_ODOM +from dimos.navigation.nav_stack.frames import FRAME_BODY, FRAME_ODOM from dimos.spec import perception -_CONFIG_DIR = Path(__file__).parent / "config" +# Human-readable enums; the C++ binary (main.cpp) maps these strings to +# Point-LIO's int codes. +# LID_TYPE enum (Point-LIO src/preprocess.h). avia = 1 selects the Livox branch +# the Mid-360 emits. +LidarType = Literal["avia", "velodyne", "ouster", "hesai", "unilidar"] +TimestampUnit = Literal["second", "millisecond", "microsecond", "nanosecond"] +# iVox local-map neighbour stencil. +IvoxNearbyType = Literal["center", "nearby6", "nearby18", "nearby26"] class PointLioConfig(NativeModuleConfig): @@ -73,11 +81,13 @@ class PointLioConfig(NativeModuleConfig): lidar_ip: str | None = Field(default_factory=lambda: os.environ.get("DIMOS_POINTLIO_LIDAR_IP")) frequency: float = 10.0 - # Sensor frame for the cloud + odometry headers. - frame_id: str = "mid360_link" - # Published TF: body_start_frame_id -> body_frame_id. - body_start_frame_id: str = FRAME_ODOM - body_frame_id: str = "base_link" + # Odometry is published as frame_id (fixed) -> child_frame_id (moving body), + # and also broadcast on TF. The point cloud is stamped with sensor_frame_id + # (the lidar's own frame — get_body_cloud is the undistorted scan, not yet + # transformed into the body frame). + frame_id: str = FRAME_ODOM + child_frame_id: str = FRAME_BODY + sensor_frame_id: str = "mid360_link" # Point-LIO internal processing rates (Hz) msr_freq: float = 50.0 @@ -86,14 +96,65 @@ class PointLioConfig(NativeModuleConfig): pointcloud_freq: float = 10.0 odom_freq: float = 30.0 - # Point-LIO YAML config (relative to config/ dir, or absolute path). - config: Annotated[ - Path, - validate_as(...).transform(lambda path: path if path.is_absolute() else _CONFIG_DIR / path), - ] = Path("default.yaml") - debug: bool = False + # Point-LIO tuning, passed to the binary as plain CLI args (read in main.cpp). + # common + con_frame: bool = False + con_frame_num: int = 1 + cut_frame: bool = False + cut_frame_time_interval: float = 0.1 + time_lag_imu_to_lidar: float = 0.0 + # preprocess + lidar_type: LidarType = "avia" # 1 = AVIA (Livox) branch the Mid-360 emits + scan_line: int = 4 + scan_rate: int = 10 + timestamp_unit: TimestampUnit = "nanosecond" + blind: float = 0.5 # spherical min range (m) + point_filter_num: int = 3 # pre-KF decimation: keep every Nth raw point (1 = all) + # mapping + use_imu_as_input: bool = False # false = IMU-as-output model (robust path) + prop_at_freq_of_imu: bool = True + check_satu: bool = True + init_map_size: int = 10 + space_down_sample: bool = True # pre-KF voxel downsample (leaf = filter_size_surf) + satu_acc: float = 3.0 # g; accel >= this is treated as saturated, bounding velocity + satu_gyro: float = 35.0 + acc_norm: float = 1.0 # IMU accel unit: g + plane_thr: float = 0.1 + filter_size_surf: float = 0.2 # pre-KF scan downsample leaf (m), iff space_down_sample + filter_size_map: float = 0.5 + ivox_grid_resolution: float = 2.0 # iVox local-map grid (m) + ivox_nearby_type: IvoxNearbyType = "nearby6" + cube_side_length: float = 1000.0 + det_range: float = 100.0 + fov_degree: float = 360.0 + imu_en: bool = True + start_in_aggressive_motion: bool = False + extrinsic_est_en: bool = False + imu_time_inte: float = 0.005 + lidar_meas_cov: float = 0.01 + acc_cov_input: float = 0.1 + vel_cov: float = 20.0 + gyr_cov_input: float = 0.01 + gyr_cov_output: float = 1000.0 + acc_cov_output: float = 500.0 + b_gyr_cov: float = 0.0001 + b_acc_cov: float = 0.0001 + imu_meas_acc_cov: float = 0.01 + imu_meas_omg_cov: float = 0.01 + match_s: float = 81.0 + gravity_align: bool = True + gravity: list[float] = Field(default_factory=lambda: [0.0, 0.0, -9.81]) + gravity_init: list[float] = Field(default_factory=lambda: [0.0, 0.0, -9.81]) + extrinsic_t: list[float] = Field(default_factory=lambda: [-0.011, -0.02329, 0.04412]) + extrinsic_r: list[float] = Field( + default_factory=lambda: [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0] + ) + # odometry + publish_odometry_without_downsample: bool = False + odom_only: bool = False + # SDK port configuration (see livox/ports.py for defaults) cmd_data_port: int = SDK_CMD_DATA_PORT push_msg_port: int = SDK_PUSH_MSG_PORT @@ -106,18 +167,6 @@ class PointLioConfig(NativeModuleConfig): host_imu_data_port: int = SDK_HOST_IMU_DATA_PORT host_log_data_port: int = SDK_HOST_LOG_DATA_PORT - # Resolved in __post_init__, passed as --config_path to the binary - config_path: str | None = None - - cli_exclude: frozenset[str] = frozenset({"config", "body_start_frame_id"}) - - def model_post_init(self, __context: object) -> None: - super().model_post_init(__context) - cfg = self.config - if not cfg.is_absolute(): - cfg = _CONFIG_DIR / cfg - self.config_path = str(cfg.resolve()) - class PointLio(NativeModule, perception.Lidar, perception.Odometry): config: PointLioConfig @@ -136,8 +185,8 @@ def start(self) -> None: def _on_odom_for_tf(self, msg: Odometry) -> None: self.tf.publish( Transform( - frame_id=self.config.body_start_frame_id, - child_frame_id=self.config.body_frame_id, + frame_id=self.frame_id, + child_frame_id=self.config.child_frame_id, translation=Vector3( msg.pose.position.x, msg.pose.position.y, diff --git a/dimos/hardware/sensors/lidar/pointlio/recorder.py b/dimos/hardware/sensors/lidar/pointlio/recorder.py index e1afbd502c..ba9a3230f6 100644 --- a/dimos/hardware/sensors/lidar/pointlio/recorder.py +++ b/dimos/hardware/sensors/lidar/pointlio/recorder.py @@ -14,169 +14,44 @@ """Record Point-LIO odometry + lidar into a memory2 SQLite db. -Subscribes to a PointLio's ``odometry`` / ``lidar`` outputs (auto-connected by -matching stream name + type — no remappings needed) and appends them to a -memory2 store. Timestamps are converted onto the db's existing clock so a run -can be appended to an existing db (e.g. a fastlio replay) and compared on one -timeline. Owns the db lifecycle: refuses to clobber existing streams unless -``force``, and derives the alignment reference from whatever the db already holds. - -Each lidar frame is stamped with the latest odometry pose, so ``pointlio_lidar`` -carries the trajectory and ``dimos map global`` can register it directly (it -transforms the body-frame cloud by that pose) — no ``dimos map pose-fill`` pass. +A ``Recorder`` that records its In ports under their own names +(``pointlio_odometry`` / ``pointlio_lidar``) — wire them to PointLio's +``odometry`` / ``lidar`` outputs with ``.remappings()``. Poses come straight +from the odometry stream (``@pose_setter_for``): each lidar frame is stamped with +the latest odometry pose so ``pointlio_lidar`` carries the trajectory and ``dimos +map global`` can register the body-frame cloud directly (no ``pose-fill`` pass). """ from __future__ import annotations -from collections.abc import AsyncIterator -import math -from pathlib import Path -import sqlite3 -import time - -from dimos.core.module import Module, ModuleConfig from dimos.core.stream import In +from dimos.memory2.module import OnExisting, Recorder, RecorderConfig, pose_setter_for from dimos.msgs.geometry_msgs.Pose import Pose from dimos.msgs.nav_msgs.Odometry import Odometry from dimos.msgs.sensor_msgs.PointCloud2 import PointCloud2 -# Below this the db's existing timestamps are sensor-boot seconds, not unix time. -_SENSOR_CLOCK_MAX = 1e8 -# Strictly-increasing tie-breaker so two samples never collide on ts. -_EPS = 1e-9 -# Max sensor-ts gap to attach the latest odometry pose to a lidar frame, so -# pointlio_lidar carries the trajectory and `dimos map global` can register it -# (it transforms by the per-frame pose). Matches pose_fill's nearest-match window. -_POSE_MATCH_TOL = 0.1 - - -def _existing_min_ts(db_path: Path) -> float: - """Min ts across the db's existing streams, or -1.0 if none/absent.""" - if not db_path.exists(): - return -1.0 - con = sqlite3.connect(f"file:{db_path}?mode=ro", uri=True, timeout=2.0) - try: - tables = [ - row[0] - for row in con.execute("SELECT name FROM sqlite_master WHERE type='table'").fetchall() - ] - best: float | None = None - for table in tables: - if table.startswith("_") or table.startswith("sqlite_"): - continue - try: - cols = [col[1] for col in con.execute(f"PRAGMA table_info('{table}')").fetchall()] - if "ts" not in cols: - continue - row = con.execute(f"SELECT MIN(ts) FROM '{table}'").fetchone() - except sqlite3.OperationalError: - continue - if row and row[0] is not None: - best = row[0] if best is None else min(best, row[0]) - return best if best is not None else -1.0 - finally: - con.close() - - -class PointlioRecorderConfig(ModuleConfig): - """Configures the recorder with the target db and timing conversion.""" - db_path: str = "" - # db stream/table names the Point-LIO outputs are recorded under. - odom_stream_name: str = "pointlio_odometry" - lidar_stream_name: str = "pointlio_lidar" - # Explicit offset override; NaN means auto-derive from the db's earliest ts. - time_offset: float = float("nan") - # Drop pre-existing odom/lidar streams instead of refusing to overwrite. - force: bool = False +class PointlioRecorderConfig(RecorderConfig): + # Append into a populated db (keep other streams); replace only our own. + on_existing: OnExisting = OnExisting.APPEND -class PointlioRecorder(Module): +class PointlioRecorder(Recorder): config: PointlioRecorderConfig - lidar: In[PointCloud2] - odometry: In[Odometry] - _offset: float | None = None - _ref_start_ts: float = -1.0 - _last_odom_ts: float = 0.0 - _last_lidar_ts: float = 0.0 - _odom_count: int = 0 - _lidar_count: int = 0 - # Latest odometry pose + its raw sensor ts, stamped onto each lidar frame so - # pointlio_lidar carries the trajectory (no separate pose-fill pass). - _last_odom_pose: Pose | None = None - _last_odom_raw_ts: float = 0.0 - - async def main(self) -> AsyncIterator[None]: - # Deferred: the store is opened in the worker process that runs main(), - # not at module-scan/import time on the host. - from dimos.memory2.store.sqlite import SqliteStore - - cfg = self.config - self._store = SqliteStore(path=cfg.db_path) - names = (cfg.odom_stream_name, cfg.lidar_stream_name) - existing = sorted(set(self._store.list_streams()) & set(names)) - if existing and not cfg.force: - raise RuntimeError( - f"PointlioRecorder: {Path(cfg.db_path).name} already has {existing}; " - "set force=True to overwrite" - ) - for name in existing: - self._store.delete_stream(name) - # Reference is the db's earliest ts *after* dropping our own old streams, - # so only the data we're aligning against (e.g. a fastlio replay) counts. - self._ref_start_ts = _existing_min_ts(Path(cfg.db_path)) - self._os = self._store.stream(cfg.odom_stream_name, Odometry) - self._ls = self._store.stream(cfg.lidar_stream_name, PointCloud2) - yield - self._store.stop() + pointlio_odometry: In[Odometry] + pointlio_lidar: In[PointCloud2] - def _resolve_offset(self, first_ts: float) -> float: - override = self.config.time_offset - if not math.isnan(override): - return override - ref = self._ref_start_ts - if ref < 0.0 or ref < _SENSOR_CLOCK_MAX: - # Empty db, or db already on the sensor clock -> exact alignment. - return 0.0 - # db on wall-clock time -> start-align Point-LIO onto the db's earliest ts. - return ref - first_ts - - def _aligned_ts(self, raw_ts: float, last_ts: float) -> float: - """Convert a sensor ts onto the db clock, kept strictly above last_ts.""" - if self._offset is None: - self._offset = self._resolve_offset(raw_ts) - return max(raw_ts + self._offset, last_ts + _EPS) + _last_odom_pose: Pose | None = None - async def handle_odometry(self, msg: Odometry) -> None: - # `is not None`, not `or`: a real sensor ts of 0.0 must not fall back to - # wall time (would misclassify the stream's clock in _resolve_offset). - raw_ts_raw = getattr(msg, "ts", None) - raw_ts = raw_ts_raw if raw_ts_raw is not None else time.time() - ts = self._aligned_ts(raw_ts, self._last_odom_ts) - self._last_odom_ts = ts + @pose_setter_for("pointlio_odometry") + def _odom_pose(self, msg: Odometry) -> Pose | None: pose = getattr(msg, "pose", None) - pose_inner = getattr(pose, "pose", None) if pose is not None else None - self._os.append(msg, ts=ts, pose=pose_inner) - self._last_odom_pose = pose_inner - self._last_odom_raw_ts = raw_ts - self._odom_count += 1 - - async def handle_lidar(self, msg: PointCloud2) -> None: - raw_ts_raw = getattr(msg, "ts", None) - raw_ts = raw_ts_raw if raw_ts_raw is not None else time.time() - ts = self._aligned_ts(raw_ts, self._last_lidar_ts) - self._last_lidar_ts = ts - # Stamp the latest odometry pose (within tolerance) onto the frame so - # pointlio_lidar carries the trajectory; map global transforms the - # body-frame cloud by it. Both Point-LIO outputs share a publish ts, so - # the nearest odometry is at most ~one odom period stale. Frames with no - # match (e.g. before the first odometry) get None and are map-skipped. - pose = ( - self._last_odom_pose - if self._last_odom_pose is not None - and abs(raw_ts - self._last_odom_raw_ts) <= _POSE_MATCH_TOL - else None - ) - self._ls.append(msg, ts=ts, pose=pose) - self._lidar_count += 1 + self._last_odom_pose = getattr(pose, "pose", None) if pose is not None else None + return self._last_odom_pose + + @pose_setter_for("pointlio_lidar") + def _lidar_pose(self, msg: PointCloud2) -> Pose | None: + # Most-recent odometry pose, stamped directly (no tf). None before the + # first odometry -> frame stored unposed, map-skipped. + return self._last_odom_pose diff --git a/dimos/hardware/sensors/lidar/pointlio/scripts/pcap_to_db.py b/dimos/hardware/sensors/lidar/pointlio/scripts/pcap_to_db.py index 6584646fd6..123d78bf90 100644 --- a/dimos/hardware/sensors/lidar/pointlio/scripts/pcap_to_db.py +++ b/dimos/hardware/sensors/lidar/pointlio/scripts/pcap_to_db.py @@ -18,10 +18,12 @@ # LFS archive, NOT the archive name) PCAP_PATH="$(python -c "from dimos.utils.data import get_data; print(get_data('mid360_shake_stairs/mid360_shake_stairs.pcap'))")" - # gen .db from pcap with your config (defaults to .db next to the pcap) + # gen .db from pcap (defaults to .db next to the pcap) + python -m dimos.hardware.sensors.lidar.pointlio.scripts.pcap_to_db --pcap "$PCAP_PATH" + + # override any PointLioConfig field via a small YAML/JSON doc, e.g. {acc_cov_input: 0.3} python -m dimos.hardware.sensors.lidar.pointlio.scripts.pcap_to_db \ - --config dimos/hardware/sensors/lidar/pointlio/config/default.yaml \ - --pcap "$PCAP_PATH" + --pcap "$PCAP_PATH" --config overrides.yaml # add to existing .db DB="mem2.db" @@ -61,6 +63,9 @@ _STARTUP_TIMEOUT_SEC = 60.0 # Max |Δts| to match a lidar frame to an odometry pose when aggregating the .rrd. _POSE_MATCH_TOL = 0.1 +# db stream/table names (= the recorder's In-port names). +_ODOM_STREAM = "pointlio_odometry" +_LIDAR_STREAM = "pointlio_lidar" # Extra seconds past the pcap's own duration before auto-stopping, when no # explicit --max-sensor-sec is given. _DRAIN_MARGIN_SEC = 4.0 @@ -199,7 +204,9 @@ def _write_rrd(db_path: Path, odom_stream: str, lidar_stream: str, voxel: float) store.stop() -def _build_blueprint(args: argparse.Namespace, db_path: Path, config_path: str) -> Blueprint: +def _build_blueprint( + args: argparse.Namespace, db_path: Path, overrides: dict[str, Any] +) -> Blueprint: """autoconnect(VirtualMid360 + PointLio + PointlioRecorder). PointLio's ``odometry``/``lidar`` outputs auto-wire to the recorder's @@ -211,36 +218,35 @@ def _build_blueprint(args: argparse.Namespace, db_path: Path, config_path: str) from dimos.hardware.sensors.lidar.pointlio.recorder import PointlioRecorder from dimos.hardware.sensors.lidar.virtual_mid360.module import VirtualMid360 - # `config` (not `config_path`, which PointLioConfig derives itself); already - # an absolute path so it bypasses the config-dir-relative resolution. Omit - # when empty to keep the default.yaml. - pointlio_kwargs: dict[str, object] = dict( + pointlio_kwargs: dict[str, Any] = dict( host_ip=args.host_ip, lidar_ip=args.lidar_ip, odom_freq=args.odom_freq, debug=False ) - if config_path: - pointlio_kwargs["config"] = config_path - - return autoconnect( - VirtualMid360.blueprint( - pcap=str(args.pcap_path), - rate=args.rate, - delay=args.warmup_sec, # hold streaming until PointLio's SDK is up - host_ip=args.host_ip, - lidar_ip=args.lidar_ip, - alias_iface=args.alias_iface, - # When the NIC is provisioned by hand, skip the module's own sudo - # (it runs in a tty-less worker where a password prompt can't appear). - setup_network=not args.no_network_setup, - ), - PointLio.blueprint(**pointlio_kwargs), - PointlioRecorder.blueprint( - db_path=str(db_path), - odom_stream_name=args.odom_stream_name, - lidar_stream_name=args.lidar_stream_name, - time_offset=float("nan") if args.time_offset is None else args.time_offset, - force=args.force, - ), - ).global_config(n_workers=4, robot_model="mid360_pointlio_pcap_to_db") + pointlio_kwargs.update(overrides) + + return ( + autoconnect( + VirtualMid360.blueprint( + pcap=str(args.pcap_path), + rate=args.rate, + delay=args.warmup_sec, # hold streaming until PointLio's SDK is up + host_ip=args.host_ip, + lidar_ip=args.lidar_ip, + alias_iface=args.alias_iface, + # When the NIC is provisioned by hand, skip the module's own sudo + # (it runs in a tty-less worker where a password prompt can't appear). + setup_network=not args.no_network_setup, + ), + PointLio.blueprint(**pointlio_kwargs), + PointlioRecorder.blueprint(db_path=str(db_path)), + ) + .remappings( + [ + (PointlioRecorder, "pointlio_odometry", "odometry"), + (PointlioRecorder, "pointlio_lidar", "lidar"), + ] + ) + .global_config(n_workers=4, robot_model="mid360_pointlio_pcap_to_db") + ) def _poll_until_drained( @@ -291,6 +297,21 @@ def _poll_until_drained( stagnant_since = None +def _load_overrides(config: str) -> dict[str, Any]: + """Load a YAML/JSON doc of PointLioConfig field overrides, e.g. {acc_cov_input: 0.3}.""" + if not config: + return {} + import yaml + + path = Path(config).expanduser().resolve() + if not path.exists(): + raise FileNotFoundError(f"--config not found: {path}") + data = yaml.safe_load(path.read_text()) or {} + if not isinstance(data, dict): + raise ValueError(f"--config must be a mapping of PointLioConfig fields, got {type(data)}") + return data + + def _run(args: argparse.Namespace) -> int: from dimos.core.coordination.module_coordinator import ModuleCoordinator @@ -301,13 +322,7 @@ def _run(args: argparse.Namespace) -> int: args.pcap_path = pcap_path db_path = Path(args.db).expanduser().resolve() if args.db else pcap_path.with_suffix(".db") db_path.parent.mkdir(parents=True, exist_ok=True) - # Resolve --config against the *invoking* cwd (pwd-relative) up front; the - # PointLio config field otherwise resolves a relative path against its own - # config/ dir, never the pwd. Absolute path passes through unchanged. - config_path = str(Path(args.config).expanduser().resolve()) if args.config else "" - if config_path and not Path(config_path).exists(): - print(f"[pcap_to_db] missing --config: {config_path}", file=sys.stderr) - return 2 + overrides = _load_overrides(args.config) # Default the stop bound to the pcap's own duration: Point-LIO keeps # dead-reckoning (publishing at full rate) after the pcap drains, so the @@ -328,15 +343,13 @@ def _run(args: argparse.Namespace) -> int: coord = None try: - coord = ModuleCoordinator.build(_build_blueprint(args, db_path, config_path)) - drained = _poll_until_drained( - db_path, args.odom_stream_name, args.lidar_stream_name, max_sensor_sec - ) + coord = ModuleCoordinator.build(_build_blueprint(args, db_path, overrides)) + drained = _poll_until_drained(db_path, _ODOM_STREAM, _LIDAR_STREAM, max_sensor_sec) finally: if coord is not None: coord.stop() - o_cnt, o_min, o_max = _odom_stats(db_path, args.odom_stream_name) + o_cnt, o_min, o_max = _odom_stats(db_path, _ODOM_STREAM) if o_cnt == 0 or not drained: print("[pcap_to_db] no odometry recorded — check the run above", file=sys.stderr) return 1 @@ -346,7 +359,7 @@ def _run(args: argparse.Namespace) -> int: ) if not args.no_rrd: try: - rrd = _write_rrd(db_path, args.odom_stream_name, args.lidar_stream_name, args.voxel) + rrd = _write_rrd(db_path, _ODOM_STREAM, _LIDAR_STREAM, args.voxel) if rrd is not None: print(f"[pcap_to_db] wrote {rrd.name} (aggregated lidar + pose path)", flush=True) except Exception as exc: # viz is a non-fatal bonus @@ -375,13 +388,6 @@ def main(argv: list[str]) -> int: default=0.0, help="stop after N sensor seconds (0 = whole pcap)", ) - parser.add_argument( - "--time-offset", - type=float, - default=None, - help="seconds added to every output ts (auto if omitted)", - ) - parser.add_argument("--force", action="store_true", help="overwrite existing pointlio streams") parser.add_argument( "--no-rrd", action="store_true", @@ -399,17 +405,7 @@ def main(argv: list[str]) -> int: parser.add_argument( "--config", default="", - help="Point-LIO YAML config (pwd-relative or absolute; default: module's default.yaml)", - ) - parser.add_argument( - "--odom-stream-name", - default="pointlio_odometry", - help="db stream/table name for the recorded odometry", - ) - parser.add_argument( - "--lidar-stream-name", - default="pointlio_lidar", - help="db stream/table name for the recorded point cloud", + help="YAML/JSON doc of PointLioConfig field overrides (e.g. {acc_cov_input: 0.3})", ) # Addressing knobs (override to run two replays at once). parser.add_argument("--host-ip", default="192.168.1.5") diff --git a/dimos/mapping/recording/go2_mid360/post_process.py b/dimos/mapping/recording/go2_mid360/post_process.py new file mode 100755 index 0000000000..19ac13e476 --- /dev/null +++ b/dimos/mapping/recording/go2_mid360/post_process.py @@ -0,0 +1,52 @@ +#!/usr/bin/env python +# Copyright 2026 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Post-process Go2 + Livox recordings: AprilTag-corrected groundtruth + .rrd. + +Thin wrapper over dimos/mapping/recording/utils/post_process.py with the Go2 +front-camera calibration and the lidar/odom pairs present in these recordings. + + uv run --no-sync python \ + dimos/mapping/recording/go2_mid360/post_process.py [TARGET] [--force] + +TARGET may be a `mem2.db`, a recording dir containing one, or a dir to scan for +recordings. With no TARGET it processes the most recently created recording +under --recordings-dir. +""" + +from __future__ import annotations + +from pathlib import Path + +from dimos.mapping.recording.utils.post_process import CameraParams, run +from dimos.robot.unitree.go2.config import ( + GO2_FRONT_CAMERA_DISTORTION, + GO2_FRONT_CAMERA_INTRINSICS, + GO2_FRONT_CAMERA_OPTICAL_IN_BASE, + GO2_FRONT_CAMERA_RESOLUTION, +) + + +def load_camera(db: Path) -> CameraParams: + return ( + GO2_FRONT_CAMERA_INTRINSICS, + GO2_FRONT_CAMERA_DISTORTION, + GO2_FRONT_CAMERA_OPTICAL_IN_BASE, + GO2_FRONT_CAMERA_RESOLUTION, + ) + + +if __name__ == "__main__": + run(description=__doc__, load_camera=load_camera) diff --git a/dimos/mapping/recording/go2_mid360/record.py b/dimos/mapping/recording/go2_mid360/record.py new file mode 100644 index 0000000000..c3da28b2fb --- /dev/null +++ b/dimos/mapping/recording/go2_mid360/record.py @@ -0,0 +1,114 @@ +#!/usr/bin/env python3 +# Copyright 2025-2026 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from datetime import datetime +import os +from pathlib import Path + +from dimos.core.coordination.blueprints import autoconnect +from dimos.core.coordination.module_coordinator import ModuleCoordinator +from dimos.core.global_config import global_config +from dimos.core.stream import In +from dimos.hardware.sensors.lidar.livox.module import Mid360 +from dimos.hardware.sensors.lidar.pointlio.module import PointLio +from dimos.hardware.sensors.lidar.pointlio.recorder import PointlioRecorder +from dimos.msgs.geometry_msgs.PoseStamped import PoseStamped +from dimos.msgs.nav_msgs.Odometry import Odometry +from dimos.msgs.sensor_msgs.Image import Image +from dimos.msgs.sensor_msgs.Imu import Imu +from dimos.msgs.sensor_msgs.PointCloud2 import PointCloud2 +from dimos.navigation.movement_manager.movement_manager import MovementManager +from dimos.robot.unitree.go2.connection import GO2Connection +from dimos.robot.unitree.keyboard_teleop import KeyboardTeleop +from dimos.utils.logging_config import set_run_log_dir, setup_logger + +logger = setup_logger() + +_LIDAR_IP = os.getenv("LIDAR_IP", "192.168.1.171") +_LIDAR_HOST_IP = os.getenv("LIDAR_HOST_IP", "192.168.1.100") + + +def _default_recording_dir() -> Path: + now = datetime.now() + stamp = now.strftime("%Y-%m-%d") + "_" + now.strftime("%I-%M%p").lower() + "-PST" + return Path("recordings") / stamp + + +class Go2Recorder(PointlioRecorder): + """Records Point-LIO odom + lidar plus the Go2's companion streams. + + Point-LIO stamps each ``pointlio_lidar`` frame with the latest odometry pose + (inherited ``@pose_setter_for``), so the trajectory is baked into the cloud at + record time — no static-transform pose-fill needed here. The companion streams + (camera, raw livox, go2 odom/lidar) are recorded as-is; the offline + post-process aligns them. + """ + + pointlio_odometry: In[Odometry] + pointlio_lidar: In[PointCloud2] + go2_lidar: In[PointCloud2] + go2_odom: In[PoseStamped] + color_image: In[Image] + livox_lidar: In[PointCloud2] + livox_imu: In[Imu] + + +unitree_go2_record = autoconnect( + MovementManager.blueprint(), + GO2Connection.blueprint().remappings( + [ + (GO2Connection, "lidar", "go2_lidar"), + (GO2Connection, "odom", "go2_odom"), + ] + ), + Mid360.blueprint( + lidar_ip=_LIDAR_IP, + host_ip=_LIDAR_HOST_IP, + ).remappings( + [ + (Mid360, "lidar", "livox_lidar"), + (Mid360, "imu", "livox_imu"), + ] + ), + PointLio.blueprint( + frame_id="world", + lidar_ip=_LIDAR_IP, + ).remappings( + [ + (PointLio, "lidar", "pointlio_lidar"), + (PointLio, "odometry", "pointlio_odometry"), + ] + ), + Go2Recorder.blueprint(), + # Pygame keyboard teleop (WASD drive + Q/E strafe). Its cmd_vel feeds + # MovementManager's tele_cmd_vel. + KeyboardTeleop.blueprint(linear_speed=0.3, angular_speed=0.6).remappings( + [ + (KeyboardTeleop, "cmd_vel", "tele_cmd_vel"), + ] + ), +).global_config(n_workers=10, robot_model="unitree_go2") + + +if __name__ == "__main__": + recording_dir = _default_recording_dir().resolve() + recording_dir.mkdir(parents=True, exist_ok=True) + set_run_log_dir(recording_dir) + global_config.obstacle_avoidance = False + coordinator = ModuleCoordinator.build( + unitree_go2_record, + {Go2Recorder.name: {"db_path": str(recording_dir / "mem2.db")}}, + ) + coordinator.loop() diff --git a/dimos/mapping/recording/go2_mid360/static_transforms.py b/dimos/mapping/recording/go2_mid360/static_transforms.py new file mode 100644 index 0000000000..6cc315c80e --- /dev/null +++ b/dimos/mapping/recording/go2_mid360/static_transforms.py @@ -0,0 +1,187 @@ +# Copyright 2026 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Static transforms between the Go2 body, its front camera, and the Mid-360 odom frame. + +Computes the rigid mounts used while recording (Mid-360 IMU -> base_link, base_link -> +camera optical) and renders every frame in Rerun as XYZ basis arrows plus a simple box +for the robot body (self-contained: no mesh/URDF files needed). + +Mount geometry (measured on the physical rig) +--------------------------------------------- +- base_link -> front_camera: 32.7cm forward, ~4.3cm up (URDF front_camera mount). +- front_camera -> mid360_link: lidar is 3.2cm back, 12cm up, pitched 44 deg down. +- front_camera -> camera_optical: the standard ROS optical rotation (x-right, y-down, + z-forward). + +FAST-LIO odometry is the Mid-360 IMU ("body") frame +--------------------------------------------------- +hku-mars/FAST_LIO runs its EKF on the IMU state and publishes child_frame_id="body" (the +IMU pose in the gravity-aligned world frame), so the odom FAST-LIO reports is the Mid-360 +frame tracked here as `mid360_link`. +- https://github.com/hku-mars/FAST_LIO/blob/main/src/laserMapping.cpp +""" + +import argparse +import math +from pathlib import Path + +import numpy as np + +from dimos.msgs.geometry_msgs.Quaternion import Quaternion +from dimos.msgs.geometry_msgs.Transform import Transform +from dimos.msgs.geometry_msgs.Vector3 import Vector3 + +ROOT = "go2" + +MID360_PITCH_DOWN = math.radians(44.0) + +# rpy that maps a sensor frame to its optical frame (z-forward, x-right, y-down) +OPTICAL_RPY = (-math.pi / 2, 0.0, -math.pi / 2) + +AXIS_COLORS = [[255, 0, 0], [0, 255, 0], [0, 0, 255]] # X red, Y green, Z blue + +# Approximate Go2 trunk box (~65 x 31 x 15 cm), centered on base_link. The front face lands +# at the front_camera mount (x ~= 0.327). Adjust if you want the full leg-span bounding box. +GO2_BODY_HALF_SIZES = (0.325, 0.155, 0.075) +GO2_BODY_CENTER = (0.0, 0.0, 0.0) +GO2_BODY_COLOR = [120, 120, 130] + +# (name, parent_name, translation_xyz, rpy) — parent None means attached to ROOT +FRAMES: list[tuple[str, str | None, tuple[float, float, float], tuple[float, float, float]]] = [ + ("base_link", None, (0.0, 0.0, 0.0), (0.0, 0.0, 0.0)), + ("front_camera", "base_link", (0.32715, -0.00003, 0.04297), (0.0, 0.0, 0.0)), + ("mid360_link", "front_camera", (-0.032, 0.0, 0.12), (0.0, MID360_PITCH_DOWN, 0.0)), + ("camera_optical", "front_camera", (0.0, 0.0, 0.0), OPTICAL_RPY), +] + +PARENT_OF: dict[str, str | None] = {name: parent for name, parent, _, _ in FRAMES} +EDGE_OF: dict[str, tuple[tuple[float, float, float], tuple[float, float, float]]] = { + name: (translation, rpy) for name, _parent, translation, rpy in FRAMES +} + + +def rpy_to_matrix(roll: float, pitch: float, yaw: float) -> np.ndarray: + """URDF fixed-axis rpy -> rotation matrix (Rz @ Ry @ Rx).""" + cos_r, sin_r = math.cos(roll), math.sin(roll) + cos_p, sin_p = math.cos(pitch), math.sin(pitch) + cos_y, sin_y = math.cos(yaw), math.sin(yaw) + rot_x = np.array([[1, 0, 0], [0, cos_r, -sin_r], [0, sin_r, cos_r]]) + rot_y = np.array([[cos_p, 0, sin_p], [0, 1, 0], [-sin_p, 0, cos_p]]) + rot_z = np.array([[cos_y, -sin_y, 0], [sin_y, cos_y, 0], [0, 0, 1]]) + return rot_z @ rot_y @ rot_x + + +def pose_relative_to_root(name: str) -> Transform: + """Compose the frame edges from ROOT down to ``name`` (root -> name).""" + chain: list[str] = [] + cursor: str | None = name + while cursor is not None: + chain.append(cursor) + cursor = PARENT_OF[cursor] + pose: Transform | None = None + for frame_name in reversed(chain): + translation, rpy = EDGE_OF[frame_name] + edge = Transform( + translation=Vector3(*translation), rotation=Quaternion.from_euler(Vector3(*rpy)) + ) + pose = edge if pose is None else pose + edge + assert pose is not None + return pose + + +def transform_between(source: str, target: str, frame_id: str, child_frame_id: str) -> Transform: + """Static transform source -> target (frame_id=source, child_frame_id=target).""" + result = pose_relative_to_root(source).inverse() + pose_relative_to_root(target) + result.frame_id = frame_id + result.child_frame_id = child_frame_id + return result + + +BASE_TO_FRONT_CAMERA = transform_between("base_link", "front_camera", "base_link", "front_camera") +BASE_TO_MID360 = transform_between("base_link", "mid360_link", "base_link", "mid360_link") +# Mid-360 IMU (FAST-LIO odom) frame -> robot base. Compose with world->mid360 from odom to +# anchor recorded observations to base_link. +MID360_TO_BASE = transform_between("mid360_link", "base_link", "mid360_link", "base_link") +# base_link -> camera optical frame (x-right, y-down, z-forward) for anchoring images. +BASE_TO_CAMERA_OPTICAL = transform_between( + "base_link", "camera_optical", "base_link", "camera_optical" +) + + +def entity_path(name: str) -> str: + chain = [name] + cursor = PARENT_OF[name] + while cursor is not None: + chain.append(cursor) + cursor = PARENT_OF[cursor] + chain.append(ROOT) + return "/".join(reversed(chain)) + + +def render(axis_length: float, rrd_path: Path | None) -> None: + import rerun as rr + + rr.init("go2_mid360_frames", spawn=rrd_path is None) + if rrd_path is not None: + rr.save(str(rrd_path)) + + rr.log(ROOT, rr.ViewCoordinates.RIGHT_HAND_Z_UP, static=True) + + paths = {name: entity_path(name) for name in PARENT_OF} + + axes = np.eye(3) * axis_length + for name, _parent, translation, rpy in FRAMES: + rr.log( + paths[name], + rr.Transform3D(translation=list(translation), mat3x3=rpy_to_matrix(*rpy)), + rr.Arrows3D( + vectors=axes, origins=np.zeros((3, 3)), colors=AXIS_COLORS, labels=["X", "Y", "Z"] + ), + ) + rr.log( + f"{paths[name]}/label", + rr.Points3D([[0.0, 0.0, 0.0]], labels=[name], show_labels=True, radii=axis_length / 15), + ) + + rr.log( + f"{paths['base_link']}/body", + rr.Boxes3D( + centers=[list(GO2_BODY_CENTER)], + half_sizes=[list(GO2_BODY_HALF_SIZES)], + colors=[GO2_BODY_COLOR], + ), + ) + + if rrd_path is not None: + print(f"wrote {rrd_path}") + + +def main() -> None: + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument( + "--axis-length", type=float, default=0.1, help="basis-vector length in meters" + ) + parser.add_argument( + "--rrd", type=Path, default=None, help="save to this .rrd instead of spawning the viewer" + ) + args = parser.parse_args() + + for transform in (BASE_TO_FRONT_CAMERA, BASE_TO_MID360, MID360_TO_BASE, BASE_TO_CAMERA_OPTICAL): + print(transform) + render(args.axis_length, args.rrd) + + +if __name__ == "__main__": + main() diff --git a/dimos/mapping/recording/mid360_realsense/post_process.py b/dimos/mapping/recording/mid360_realsense/post_process.py new file mode 100644 index 0000000000..78b2fc788d --- /dev/null +++ b/dimos/mapping/recording/mid360_realsense/post_process.py @@ -0,0 +1,61 @@ +#!/usr/bin/env python +# Copyright 2026 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Post-process RealSense + Mid-360 recordings: AprilTag-corrected groundtruth + .rrd. + +Thin wrapper over dimos/mapping/recording/utils/post_process.py. Intrinsics come +from the recorded RealSense camera_info; the camera_optical mount is fixed by the +record blueprint (1cm forward + 1cm below the lidar). + + uv run --no-sync python \ + dimos/mapping/recording/mid360_realsense/post_process.py [TARGET] [--force] + +TARGET may be a `mem2.db`, a recording dir containing one, or a dir to scan for +recordings. With no TARGET it processes the most recently created recording +under --recordings-dir. +""" + +from __future__ import annotations + +from pathlib import Path + +import numpy as np + +from dimos.mapping.recording.utils.post_process import CameraParams, run +from dimos.memory2.store.sqlite import SqliteStore +from dimos.msgs.sensor_msgs.CameraInfo import CameraInfo + +CAMERA_INFO_STREAM = "realsense_camera_info" + +# camera_optical pose in mid360_link (the frame fastlio odom is anchored to), +# matching the camera mount in record.py: 1cm forward + 1cm below the lidar, +# then the REP-103 optical rotation. +REALSENSE_OPTICAL_IN_BASE = [0.01, 0.0, -0.01, -0.5, 0.5, -0.5, 0.5] + + +def load_camera(db: Path) -> CameraParams: + """Read intrinsics/distortion/resolution from the recorded RealSense + camera_info stream; the optical mount is fixed by the record blueprint.""" + with SqliteStore(path=str(db)) as store: + if CAMERA_INFO_STREAM not in store.list_streams(): + raise SystemExit(f"no '{CAMERA_INFO_STREAM}' stream in {db} — cannot get intrinsics") + info = next(iter(store.stream(CAMERA_INFO_STREAM, CameraInfo))).data + intrinsics = np.array(info.K, dtype=np.float64).reshape(3, 3) + distortion = np.array(info.D, dtype=np.float64) + return intrinsics, distortion, REALSENSE_OPTICAL_IN_BASE, (info.width, info.height) + + +if __name__ == "__main__": + run(description=__doc__, load_camera=load_camera) diff --git a/dimos/mapping/recording/mid360_realsense/record.py b/dimos/mapping/recording/mid360_realsense/record.py new file mode 100644 index 0000000000..4668cb6cd7 --- /dev/null +++ b/dimos/mapping/recording/mid360_realsense/record.py @@ -0,0 +1,107 @@ +#!/usr/bin/env python3 +# Copyright 2025-2026 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from datetime import datetime +import os +from pathlib import Path + +from dimos.core.coordination.blueprints import autoconnect +from dimos.core.coordination.module_coordinator import ModuleCoordinator +from dimos.core.global_config import global_config +from dimos.core.stream import In +from dimos.hardware.sensors.camera.realsense.camera import RealSenseCamera +from dimos.hardware.sensors.lidar.livox.module import Mid360 +from dimos.hardware.sensors.lidar.pointlio.module import PointLio +from dimos.hardware.sensors.lidar.pointlio.recorder import PointlioRecorder +from dimos.msgs.nav_msgs.Odometry import Odometry +from dimos.msgs.sensor_msgs.CameraInfo import CameraInfo +from dimos.msgs.sensor_msgs.Image import Image +from dimos.msgs.sensor_msgs.Imu import Imu +from dimos.msgs.sensor_msgs.PointCloud2 import PointCloud2 +from dimos.utils.logging_config import set_run_log_dir, setup_logger + +logger = setup_logger() + +_LIDAR_IP = os.getenv("LIDAR_IP", "192.168.1.107") + + +def _default_recording_dir() -> Path: + now = datetime.now() + stamp = now.strftime("%Y-%m-%d") + "_" + now.strftime("%I-%M%p").lower() + "-PST" + return Path("recordings") / stamp + + +class RealsenseRecorder(PointlioRecorder): + """Records Point-LIO odom + lidar plus the RealSense + raw Livox streams. + + Point-LIO stamps each ``pointlio_lidar`` frame with the latest odometry pose + (inherited ``@pose_setter_for``), so the trajectory is baked into the cloud at + record time. The camera and raw-livox streams are recorded as-is; the offline + post-process aligns them. + """ + + pointlio_odometry: In[Odometry] + pointlio_lidar: In[PointCloud2] + color_image: In[Image] + realsense_depth_image: In[Image] + realsense_pointcloud: In[PointCloud2] + realsense_camera_info: In[CameraInfo] + realsense_depth_camera_info: In[CameraInfo] + realsense_imu: In[Imu] + livox_lidar: In[PointCloud2] + livox_imu: In[Imu] + + +realsense_mid360_record = autoconnect( + RealSenseCamera.blueprint().remappings( + [ + (RealSenseCamera, "depth_image", "realsense_depth_image"), + (RealSenseCamera, "pointcloud", "realsense_pointcloud"), + (RealSenseCamera, "camera_info", "realsense_camera_info"), + (RealSenseCamera, "depth_camera_info", "realsense_depth_camera_info"), + (RealSenseCamera, "imu", "realsense_imu"), + ] + ), + Mid360.blueprint( + lidar_ip=_LIDAR_IP, + ).remappings( + [ + (Mid360, "lidar", "livox_lidar"), + (Mid360, "imu", "livox_imu"), + ] + ), + PointLio.blueprint( + frame_id="world", + lidar_ip=_LIDAR_IP, + ).remappings( + [ + (PointLio, "lidar", "pointlio_lidar"), + (PointLio, "odometry", "pointlio_odometry"), + ] + ), + RealsenseRecorder.blueprint(), +).global_config(n_workers=6) + + +if __name__ == "__main__": + recording_dir = _default_recording_dir().resolve() + recording_dir.mkdir(parents=True, exist_ok=True) + set_run_log_dir(recording_dir) + global_config.obstacle_avoidance = False + coordinator = ModuleCoordinator.build( + realsense_mid360_record, + {RealsenseRecorder.name: {"db_path": str(recording_dir / "mem2.db")}}, + ) + coordinator.loop() diff --git a/dimos/mapping/recording/mid360_realsense/static_transforms.py b/dimos/mapping/recording/mid360_realsense/static_transforms.py new file mode 100644 index 0000000000..b93eb8edd9 --- /dev/null +++ b/dimos/mapping/recording/mid360_realsense/static_transforms.py @@ -0,0 +1,270 @@ +# Copyright 2026 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Static transforms between the RealSense D435i frames and the Mid-360 odom frame. + +Computes rigid transforms (RealSense color/gyro/depth -> Mid-360 IMU frame) for use when +recording, and renders every frame in Rerun as XYZ basis arrows plus simple boxes for the +camera and lidar bodies (self-contained: no mesh files needed). + +Frame sources +------------- +RealSense D435i frame transforms are transcribed from the official +realsense2_description xacro (urdf/_d435.urdf.xacro + urdf/_d435i_imu_modules.urdf.xacro, +use_nominal_extrinsics=true). + +Mid-360 geometry (manual): body is 65 x 65 x 60 mm; the point-cloud origin O lies on the +central vertical axis, ~47 mm above the base. The IMU chip is *not* on that axis. +- Livox Mid-360 User Manual (Dimensions + Coordinates): + https://www.livoxtech.com/mid-360/downloads + +FAST-LIO odometry is the IMU ("body") frame, NOT the lidar frame +--------------------------------------------------------------- +hku-mars/FAST_LIO runs its EKF on the IMU state: publish_odometry() sets +child_frame_id = "body" and fills the pose from state_point (the IMU pose in the +gravity-aligned "camera_init" world frame). RGBpointBodyLidarToIMU() transforms lidar +points into the IMU frame via offset_R_L_I / offset_T_L_I, confirming "body" == IMU. +- https://github.com/hku-mars/FAST_LIO/blob/main/src/laserMapping.cpp + +The lidar-to-IMU extrinsic comes from the official Mid-360 config; it matches the local +dimos config (dimos/hardware/sensors/lidar/fastlio2/config/mid360.yaml): + extrinsic_T: [ -0.011, -0.02329, 0.04412 ] # lidar origin expressed in IMU frame +Flipping the sign gives the IMU position in lidar coords = (11.0, 23.29, -44.12) mm, +the value Livox publishes for the IMU in the point-cloud coordinate system. The large +y = 23.29 mm term is why the IMU sits near a corner of the body, not the center. +- https://github.com/hku-mars/FAST_LIO/blob/main/config/mid360.yaml +- https://github.com/Livox-SDK/livox_ros_driver2/issues/139 + +So the odom frame FAST-LIO reports is `imu_frame` below, not `lidar_frame`. +""" + +import argparse +import math +from pathlib import Path + +import numpy as np + +from dimos.msgs.geometry_msgs.Quaternion import Quaternion +from dimos.msgs.geometry_msgs.Transform import Transform +from dimos.msgs.geometry_msgs.Vector3 import Vector3 + +CAMERA_ANGLE_UP = math.radians(10) + +# Mid-360 box: pitched down from bottom_screw_frame, then offset back/up in that frame +BOX_PITCH_DOWN = math.radians(26) + CAMERA_ANGLE_UP +BOX_BACK = 0.085 +BOX_UP = 0.037 # ~4cm up + +ROOT = "d435i" + +# Physical constants from _d435.urdf.xacro (meters) +CAM_HEIGHT = 0.025 +DEPTH_PY = 0.0175 +DEPTH_PZ = CAM_HEIGHT / 2 +MOUNT_FROM_CENTER_OFFSET = 0.0149 +GLASS_TO_FRONT = 0.1e-3 +ZERO_DEPTH_TO_GLASS = 4.2e-3 +MESH_X_OFFSET = MOUNT_FROM_CENTER_OFFSET - GLASS_TO_FRONT - ZERO_DEPTH_TO_GLASS + +DEPTH_TO_INFRA1_OFFSET = 0.0 +DEPTH_TO_INFRA2_OFFSET = -0.050 +DEPTH_TO_COLOR_OFFSET = 0.015 +IMU_XYZ = (-0.01174, -0.00552, 0.0051) + +# rpy that maps a sensor frame to its optical frame (z-forward, x-right, y-down) +OPTICAL_RPY = (-math.pi / 2, 0.0, -math.pi / 2) + +AXIS_COLORS = [[255, 0, 0], [0, 255, 0], [0, 0, 255]] # X red, Y green, Z blue + +# D435i body box (90 x 25 x 25 mm), centered on bottom_screw_frame in x/y and lifted in +# z so its bottom face sits on the screw plane (x fwd, y left, z up). +CAMERA_BOX_HALF_SIZES = (0.0125, 0.045, 0.0125) +CAMERA_BOX_CENTER = (0.0, 0.0, CAMERA_BOX_HALF_SIZES[2]) +CAMERA_BOX_COLOR = [80, 160, 230] + +BOX_HALF_SIZES = (0.0325, 0.0325, 0.030) # Mid-360 body: 65 x 65 x 60 mm +BOX_COLOR = [230, 160, 40] + +# Mid-360 internal frames (manual: point-cloud origin O ~47mm above base, on central axis). +# Box center is 30mm above base, so O sits +17mm along box +z. +LIDAR_ABOVE_BOX_CENTER = 0.017 +# IMU position in point-cloud (lidar) coordinates, from Livox Mid-360 extrinsics. +IMU_IN_LIDAR = (0.011, 0.02329, -0.04412) + +# (name, parent_name, translation_xyz, rpy) — parent None means attached to ROOT +FRAMES: list[tuple[str, str | None, tuple[float, float, float], tuple[float, float, float]]] = [ + ("bottom_screw_frame", None, (0.0, 0.0, 0.0), (0.0, 0.0, 0.0)), + # Flat world: the screw frame leveled out. The camera points up CAMERA_ANGLE_UP, so + # pitching the screw frame down by that angle gives a gravity-flat frame at the screw. + ("world", "bottom_screw_frame", (0.0, 0.0, 0.0), (0.0, CAMERA_ANGLE_UP, 0.0)), + ("link", "bottom_screw_frame", (MESH_X_OFFSET, DEPTH_PY, DEPTH_PZ), (0.0, 0.0, 0.0)), + ("depth_frame", "link", (0.0, 0.0, 0.0), (0.0, 0.0, 0.0)), + ("depth_optical_frame", "depth_frame", (0.0, 0.0, 0.0), OPTICAL_RPY), + ("infra1_frame", "link", (0.0, DEPTH_TO_INFRA1_OFFSET, 0.0), (0.0, 0.0, 0.0)), + ("infra1_optical_frame", "infra1_frame", (0.0, 0.0, 0.0), OPTICAL_RPY), + ("infra2_frame", "link", (0.0, DEPTH_TO_INFRA2_OFFSET, 0.0), (0.0, 0.0, 0.0)), + ("infra2_optical_frame", "infra2_frame", (0.0, 0.0, 0.0), OPTICAL_RPY), + ("color_frame", "link", (0.0, DEPTH_TO_COLOR_OFFSET, 0.0), (0.0, 0.0, 0.0)), + ("color_optical_frame", "color_frame", (0.0, 0.0, 0.0), OPTICAL_RPY), + ("accel_frame", "link", IMU_XYZ, (0.0, 0.0, 0.0)), + ("accel_optical_frame", "accel_frame", (0.0, 0.0, 0.0), OPTICAL_RPY), + ("gyro_frame", "link", IMU_XYZ, (0.0, 0.0, 0.0)), + ("gyro_optical_frame", "gyro_frame", (0.0, 0.0, 0.0), OPTICAL_RPY), + ("box_pitch_frame", "bottom_screw_frame", (0.0, 0.0, 0.0), (0.0, BOX_PITCH_DOWN, 0.0)), + ("box_center", "box_pitch_frame", (-BOX_BACK, 0.0, BOX_UP), (0.0, 0.0, 0.0)), + ("lidar_frame", "box_center", (0.0, 0.0, LIDAR_ABOVE_BOX_CENTER), (0.0, 0.0, 0.0)), + ("imu_frame", "lidar_frame", IMU_IN_LIDAR, (0.0, 0.0, 0.0)), +] + +PARENT_OF: dict[str, str | None] = {name: parent for name, parent, _, _ in FRAMES} +EDGE_OF: dict[str, tuple[tuple[float, float, float], tuple[float, float, float]]] = { + name: (translation, rpy) for name, _parent, translation, rpy in FRAMES +} + + +def rpy_to_matrix(roll: float, pitch: float, yaw: float) -> np.ndarray: + """URDF fixed-axis rpy -> rotation matrix (Rz @ Ry @ Rx).""" + cos_r, sin_r = math.cos(roll), math.sin(roll) + cos_p, sin_p = math.cos(pitch), math.sin(pitch) + cos_y, sin_y = math.cos(yaw), math.sin(yaw) + rot_x = np.array([[1, 0, 0], [0, cos_r, -sin_r], [0, sin_r, cos_r]]) + rot_y = np.array([[cos_p, 0, sin_p], [0, 1, 0], [-sin_p, 0, cos_p]]) + rot_z = np.array([[cos_y, -sin_y, 0], [sin_y, cos_y, 0], [0, 0, 1]]) + return rot_z @ rot_y @ rot_x + + +def pose_relative_to_root(name: str) -> Transform: + """Compose the frame edges from ROOT down to ``name`` (root -> name).""" + chain: list[str] = [] + cursor: str | None = name + while cursor is not None: + chain.append(cursor) + cursor = PARENT_OF[cursor] + pose: Transform | None = None + for frame_name in reversed(chain): + translation, rpy = EDGE_OF[frame_name] + edge = Transform( + translation=Vector3(*translation), rotation=Quaternion.from_euler(Vector3(*rpy)) + ) + pose = edge if pose is None else pose + edge + assert pose is not None + return pose + + +def transform_between(source: str, target: str, frame_id: str, child_frame_id: str) -> Transform: + """Static transform source -> target (frame_id=source, child_frame_id=target).""" + result = pose_relative_to_root(source).inverse() + pose_relative_to_root(target) + result.frame_id = frame_id + result.child_frame_id = child_frame_id + return result + + +# RealSense frame -> Mid-360 IMU (odom) frame. The IMU frame is what FAST-LIO reports as +# odom. Use `.inverse()` for the Mid-360 -> RealSense direction, or swap to the +# *_optical_frame source if you need the image-optical convention. +REALSENSE_COLOR_FRAME_TO_MID360_IMU_FRAME = transform_between( + "color_frame", "imu_frame", "realsense_color_frame", "mid360_imu_frame" +) +REALSENSE_GYRO_FRAME_TO_MID360_IMU_FRAME = transform_between( + "gyro_frame", "imu_frame", "realsense_gyro_frame", "mid360_imu_frame" +) +REALSENSE_DEPTH_FRAME_TO_MID360_IMU_FRAME = transform_between( + "depth_frame", "imu_frame", "realsense_depth_frame", "mid360_imu_frame" +) +# Optical convention (x-right, y-down, z-forward) — what image/pointcloud data uses. +REALSENSE_COLOR_OPTICAL_FRAME_TO_MID360_IMU_FRAME = transform_between( + "color_optical_frame", "imu_frame", "realsense_color_optical_frame", "mid360_imu_frame" +) +# Mid-360 IMU (FAST-LIO odom) frame -> flat world frame at the camera screw. Invert for the +# world -> mid360 direction used to re-anchor FAST-LIO odometry while recording. +MID360_TO_WORLD = transform_between("imu_frame", "world", "mid360_imu_frame", "world") + + +def entity_path(name: str) -> str: + chain = [name] + cursor = PARENT_OF[name] + while cursor is not None: + chain.append(cursor) + cursor = PARENT_OF[cursor] + chain.append(ROOT) + return "/".join(reversed(chain)) + + +def render(axis_length: float, rrd_path: Path | None) -> None: + import rerun as rr + + rr.init("mid360_realsense_frames", spawn=rrd_path is None) + if rrd_path is not None: + rr.save(str(rrd_path)) + + rr.log(ROOT, rr.ViewCoordinates.RIGHT_HAND_Z_UP, static=True) + + paths = {name: entity_path(name) for name in PARENT_OF} + + axes = np.eye(3) * axis_length + for name, _parent, translation, rpy in FRAMES: + rr.log( + paths[name], + rr.Transform3D(translation=list(translation), mat3x3=rpy_to_matrix(*rpy)), + rr.Arrows3D( + vectors=axes, origins=np.zeros((3, 3)), colors=AXIS_COLORS, labels=["X", "Y", "Z"] + ), + ) + rr.log( + f"{paths[name]}/label", + rr.Points3D([[0.0, 0.0, 0.0]], labels=[name], show_labels=True, radii=axis_length / 15), + ) + + rr.log( + f"{paths['bottom_screw_frame']}/box", + rr.Boxes3D( + centers=[list(CAMERA_BOX_CENTER)], + half_sizes=[list(CAMERA_BOX_HALF_SIZES)], + colors=[CAMERA_BOX_COLOR], + ), + ) + rr.log( + f"{paths['box_center']}/box", + rr.Boxes3D( + centers=[[0.0, 0.0, 0.0]], half_sizes=[list(BOX_HALF_SIZES)], colors=[BOX_COLOR] + ), + ) + + if rrd_path is not None: + print(f"wrote {rrd_path}") + + +def main() -> None: + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument( + "--axis-length", type=float, default=0.015, help="basis-vector length in meters" + ) + parser.add_argument( + "--rrd", type=Path, default=None, help="save to this .rrd instead of spawning the viewer" + ) + args = parser.parse_args() + + for transform in ( + REALSENSE_COLOR_FRAME_TO_MID360_IMU_FRAME, + REALSENSE_GYRO_FRAME_TO_MID360_IMU_FRAME, + REALSENSE_DEPTH_FRAME_TO_MID360_IMU_FRAME, + REALSENSE_COLOR_OPTICAL_FRAME_TO_MID360_IMU_FRAME, + MID360_TO_WORLD, + ): + print(transform) + render(args.axis_length, args.rrd) + + +if __name__ == "__main__": + main() diff --git a/dimos/mapping/recording/multi_map_anchor.py b/dimos/mapping/recording/multi_map_anchor.py new file mode 100644 index 0000000000..0d54212696 --- /dev/null +++ b/dimos/mapping/recording/multi_map_anchor.py @@ -0,0 +1,294 @@ +#!/usr/bin/env python +# Copyright 2026 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Anchor one recording's trajectory onto another via shared AprilTags. + +Takes two recording dirs of the same route -- a stable mid360_realsense map (the +anchor) and a shakier go2_mid360 map (the target). It: + + 1. solves the anchor's drift-corrected trajectory (AprilTag landmark SLAM), + ignoring the dog-mounted tag, and keeps the optimized world poses of the + static environment tags as a tag map (this defines the world frame), + 2. solves the target the same way but pins the *shared* tags to the anchor's + tag map, so the target lands in the anchor's world frame, drift-corrected, + 3. writes one .rrd overlaying the original FAST-LIO odom and the corrected + odom from both recordings (and, by default, the re-anchored lidar maps so + you can see whether the two corrected maps line up). + + uv run --no-sync python \ + dimos/mapping/recording/multi_map_anchor.py DIR_A DIR_B [--out PATH] + +DIR_A/DIR_B may be in any order; the mid360_realsense recording (detected by its +`realsense_camera_info` stream) is always used as the anchor. +""" + +from __future__ import annotations + +import argparse +from pathlib import Path +from typing import cast + +import numpy as np +import rerun as rr +import rerun.blueprint as rrb +from scipy.spatial.transform import Rotation + +from dimos.mapping.recording.go2_mid360.post_process import load_camera as load_go2_camera +from dimos.mapping.recording.mid360_realsense.post_process import ( + load_camera as load_realsense_camera, +) +from dimos.mapping.recording.utils.apriltags import detect_apriltags +from dimos.mapping.recording.utils.build_rrd import _log_map, _log_path_gradient +from dimos.mapping.recording.utils.gtsam_gt import build_gtsam_gt, write_gtsam_odom +from dimos.mapping.recording.utils.post_process import CameraParams +from dimos.memory2.store.sqlite import SqliteStore + +TagMap = dict[int, list[float]] +Trajectory = list[tuple[float, list[float]]] +MIN_SHARED_TAGS = 3 # rigid SE3 needs >=3 non-collinear correspondences + +DB_NAME = "mem2.db" +DOG_TAG_ID = 17 # mounted on the robot dog -> not a static landmark, ignored +GTSAM_STREAM = "gtsam_odom" +POINTLIO_LIDAR = "pointlio_lidar" +POINTLIO_ODOM = "pointlio_odometry" +LOOP_LIDAR = "livox_lidar" # raw sensor-frame cloud (loop closure needs sensor, not world, frame) +REALSENSE_INFO_STREAM = "realsense_camera_info" + + +def _resolve_db(arg: str) -> Path: + path = Path(arg) + if path.name == DB_NAME: + return path + if (path / DB_NAME).exists(): + return path / DB_NAME + raise SystemExit(f"no {DB_NAME} found at {arg}") + + +def _is_realsense(db: Path) -> bool: + # recordings drop a `type.` marker file; prefer it over opening the + # (multi-GB) db, falling back to a stream check if absent. + if (db.parent / "type.mid360_realsense").exists(): + return True + if (db.parent / "type.go2_mid360").exists(): + return False + with SqliteStore(path=str(db)) as store: + return REALSENSE_INFO_STREAM in store.list_streams() + + +def _load_camera(db: Path) -> CameraParams: + return load_realsense_camera(db) if _is_realsense(db) else load_go2_camera(db) + + +def _mat_from_pose7(pose7: list[float]) -> np.ndarray: + matrix = np.eye(4) + matrix[:3, :3] = Rotation.from_quat(pose7[3:7]).as_matrix() + matrix[:3, 3] = pose7[:3] + return matrix + + +def _pose7_from_mat(matrix: np.ndarray) -> list[float]: + quaternion = Rotation.from_matrix(matrix[:3, :3]).as_quat() + return [*matrix[:3, 3].tolist(), *quaternion.tolist()] + + +def _rigid_align(source_points: np.ndarray, target_points: np.ndarray) -> np.ndarray: + """Kabsch: the rigid SE3 (4x4) that best maps `source_points` onto + `target_points` (no scale). Position-only — tag orientation is untrusted.""" + source_centroid = source_points.mean(axis=0) + target_centroid = target_points.mean(axis=0) + covariance = (source_points - source_centroid).T @ (target_points - target_centroid) + u_matrix, _singular, vt_matrix = np.linalg.svd(covariance) + reflection = np.diag([1.0, 1.0, np.sign(np.linalg.det(vt_matrix.T @ u_matrix.T))]) + rotation = vt_matrix.T @ reflection @ u_matrix.T + transform = np.eye(4) + transform[:3, :3] = rotation + transform[:3, 3] = target_centroid - rotation @ source_centroid + return transform + + +def _align_to_anchor(target_map: TagMap, anchor_map: TagMap) -> np.ndarray: + """Rigid SE3 placing the target's frame into the anchor's frame, fit from the + tags both maps share.""" + shared = sorted(set(target_map) & set(anchor_map)) + if len(shared) < MIN_SHARED_TAGS: + raise SystemExit( + f"only {len(shared)} shared tags {shared} (need >={MIN_SHARED_TAGS}) — cannot anchor" + ) + source_points = np.array([target_map[marker_id][:3] for marker_id in shared]) + target_points = np.array([anchor_map[marker_id][:3] for marker_id in shared]) + transform = _rigid_align(source_points, target_points) + residuals = np.linalg.norm( + (source_points @ transform[:3, :3].T + transform[:3, 3]) - target_points, axis=1 + ) + print( + f" align: {len(shared)} shared tags {shared} | residual " + f"mean {residuals.mean():.3f} m, max {residuals.max():.3f} m" + ) + return transform + + +def _solve( + db: Path, + *, + image_stream: str, + marker_length: float, + dictionary: str, + add_loop_closures: bool, +) -> tuple[Trajectory, TagMap]: + """Detect tags and run the GTSAM solve (ignoring the dog tag), each map in its + own frame. Returns its corrected trajectory and optimized static tag map.""" + intrinsics, distortion, optical_in_base, _resolution = _load_camera(db) + with SqliteStore(path=str(db)) as store: + detections = detect_apriltags( + store, intrinsics, distortion, image_stream, "april_tags", marker_length, dictionary + ) + if not detections: + raise SystemExit(f"no AprilTags detected in {db} -- cannot anchor") + + return cast( + "tuple[Trajectory, TagMap]", + build_gtsam_gt( + str(db), + detections, + optical_in_base, + exclude_marker_ids=(DOG_TAG_ID,), + pose_stream=POINTLIO_ODOM, + loop_lidar_stream=LOOP_LIDAR, + add_loop_closures=add_loop_closures, + return_landmarks=True, + ), + ) + + +def _write_corrected(db: Path, trajectory: Trajectory) -> None: + """Write `gtsam_odom` (+ .tum) — the drift-corrected groundtruth trajectory. + + Point-LIO already stamps ``pointlio_lidar`` with its odom pose at record time, + so there's no separate lidar re-anchor pass (dropped in the post-process + refactor); the map viz below uses that recorder-stamped cloud directly. + """ + with SqliteStore(path=str(db)) as store: + write_gtsam_odom(store, trajectory, GTSAM_STREAM, db.parent / "gtsam_odom.tum") + + +def build_combined_rrd( + out_path: str, anchor_db: Path, target_db: Path, tag_map: TagMap, *, with_maps: bool +) -> None: + """One Rerun recording overlaying both recordings in the shared world frame: + raw FAST-LIO odom + corrected odom for each, the anchor tag map, and + (optionally) the re-anchored lidar maps.""" + rr.init("multi_map_anchor", recording_id=str(out_path)) + rr.save(str(out_path)) + + map_entities = ["anchor/map", "target/map"] + hide = {f"/world/{entity}": rrb.EntityBehavior(visible=False) for entity in map_entities} + rr.send_blueprint( + rrb.Blueprint( + rrb.Spatial3DView(origin="/world", name="3D", overrides=hide), + rrb.BlueprintPanel(state=rrb.PanelState.Expanded), + rrb.TimePanel(state=rrb.PanelState.Collapsed), + ) + ) + + # raw odom dim, corrected odom bright; anchor in greens/cyan, target in warms. + _log_path_gradient(str(anchor_db), POINTLIO_ODOM, "world/anchor/pointlio_raw", (0, 110, 150)) + _log_path_gradient(str(anchor_db), GTSAM_STREAM, "world/anchor/corrected", (0, 220, 120)) + _log_path_gradient(str(target_db), POINTLIO_ODOM, "world/target/pointlio_raw", (150, 110, 0)) + _log_path_gradient(str(target_db), GTSAM_STREAM, "world/target/corrected", (255, 150, 60)) + + for marker_id, pose7 in sorted(tag_map.items()): + rr.log( + f"world/tags/marker_{marker_id}", + rr.Points3D( + [pose7[:3]], + colors=(255, 220, 60), + radii=0.25, + labels=[f"tag {marker_id}"], + show_labels=True, + ), + static=True, + ) + + if with_maps: + with SqliteStore(path=str(anchor_db)) as store: + _log_map(store, POINTLIO_LIDAR, "world/anchor/map", 0.1, (0, 180, 170)) + with SqliteStore(path=str(target_db)) as store: + _log_map(store, POINTLIO_LIDAR, "world/target/map", 0.1, (240, 160, 40)) + + print(f" wrote {out_path}") + + +def main() -> None: + parser = argparse.ArgumentParser( + description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter + ) + parser.add_argument("dir_a", help="a recording dir or mem2.db") + parser.add_argument("dir_b", help="a recording dir or mem2.db") + parser.add_argument("--out", default=None, help="output .rrd (default: next to the anchor)") + parser.add_argument("--image-stream", default="color_image") + parser.add_argument("--marker-length", type=float, default=0.10) + parser.add_argument("--dictionary", default="DICT_APRILTAG_36h11") + parser.add_argument( + "--loop", + action="store_true", + help="add lidar loop-closure constraints (off by default: with pointlio_odometry as the " + "pose chain the livox_lidar revisits are unreliable and currently degrade the solve)", + ) + parser.add_argument("--no-maps", action="store_true", help="skip the re-anchored lidar maps") + args = parser.parse_args() + + db_a, db_b = _resolve_db(args.dir_a), _resolve_db(args.dir_b) + if _is_realsense(db_a): + anchor_db, target_db = db_a, db_b + elif _is_realsense(db_b): + anchor_db, target_db = db_b, db_a + else: + raise SystemExit("neither recording is a mid360_realsense (no realsense_camera_info)") + print(f"anchor (realsense): {anchor_db.parent}") + print(f"target (go2): {target_db.parent}") + + common = { + "image_stream": args.image_stream, + "marker_length": args.marker_length, + "dictionary": args.dictionary, + "add_loop_closures": args.loop, + } + + # The anchor (realsense) defines the world frame: solve it, write it as-is. + print(">> solving anchor (defines the world frame)") + anchor_trajectory, anchor_map = _solve(anchor_db, **common) + _write_corrected(anchor_db, anchor_trajectory) + + # The target (go2) is solved in its own frame, then rigidly placed into the + # anchor frame via the tags both maps share. + print(">> solving target") + target_trajectory, target_map = _solve(target_db, **common) + print(">> aligning target onto the anchor frame") + transform = _align_to_anchor(target_map, anchor_map) + target_trajectory = [ + (timestamp, _pose7_from_mat(transform @ _mat_from_pose7(pose7))) + for timestamp, pose7 in target_trajectory + ] + _write_corrected(target_db, target_trajectory) + + out_path = args.out or str(anchor_db.parent / "multi_map_anchor.rrd") + print(">> building combined .rrd") + build_combined_rrd(out_path, anchor_db, target_db, anchor_map, with_maps=not args.no_maps) + print("done") + + +if __name__ == "__main__": + main() diff --git a/dimos/mapping/recording/utils/apriltags.py b/dimos/mapping/recording/utils/apriltags.py new file mode 100644 index 0000000000..723148c3ee --- /dev/null +++ b/dimos/mapping/recording/utils/apriltags.py @@ -0,0 +1,203 @@ +# Copyright 2026 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""AprilTag detection over a mem2.db color stream. + +Detects tags in the `color_image` stream, keeps only close head-on views, and +clusters same-id detections in time into a single medoid representative each. +(Re)writes the `april_tags` PoseStamped stream of tag-in-camera relative poses +(solvePnP, marker_id in each observation's tags) and returns those +representatives for the GTSAM solve. +""" + +from __future__ import annotations + +from collections import defaultdict +import math + +import cv2 +import numpy as np +from scipy.spatial.transform import Rotation + +from dimos.msgs.geometry_msgs.PoseStamped import PoseStamped +from dimos.msgs.sensor_msgs.Image import Image + + +def make_detector(dictionary_name: str): + d = cv2.aruco.getPredefinedDictionary(getattr(cv2.aruco, dictionary_name)) + return cv2.aruco.ArucoDetector(d, cv2.aruco.DetectorParameters()) + + +def _object_points(marker_length_m: float) -> np.ndarray: + h = marker_length_m / 2.0 + return np.array([[-h, h, 0.0], [h, h, 0.0], [h, -h, 0.0], [-h, -h, 0.0]], dtype=np.float32) + + +def estimate_marker_pose(corners_pixels, marker_length_m, intrinsics, distortion): + """solvePnP a single tag -> (rotation_vector, translation_vector) in the + camera_optical frame, or None if it failed.""" + image_corners = corners_pixels.reshape(4, 1, 2).astype(np.float32) + found, rotation_vector, translation_vector = cv2.solvePnP( + _object_points(marker_length_m), + image_corners, + intrinsics, + distortion, + flags=cv2.SOLVEPNP_IPPE_SQUARE, + ) + return (rotation_vector, translation_vector) if found else None + + +def view_quality(t_cam_marker: list[float]) -> tuple[float, float]: + """(distance_m, view_angle_deg) for a tag pose in the camera optical frame. + + distance is the camera->tag range; view_angle is the angle between the line + of sight and the tag's surface normal (0 = perfectly head-on).""" + translation = np.array(t_cam_marker[:3], dtype=np.float64) + distance = float(np.linalg.norm(translation)) + normal = Rotation.from_quat(t_cam_marker[3:7]).as_matrix()[:, 2] + line_of_sight = translation / (distance + 1e-9) + cos_angle = abs(float(np.dot(line_of_sight, normal))) + view_angle = math.degrees(math.acos(min(1.0, cos_angle))) + return distance, view_angle + + +def cluster_by_time(detections: list[dict], gap_sec: float) -> list[list[dict]]: + """Group same-marker detections into clusters. A new cluster begins whenever + the time gap to the previous same-marker detection exceeds gap_sec.""" + clusters: list[list[dict]] = [] + by_marker: dict[int, list[dict]] = defaultdict(list) + for detection in detections: + by_marker[detection["marker_id"]].append(detection) + for marker_detections in by_marker.values(): + marker_detections.sort(key=lambda detection: detection["ts"]) + current = [marker_detections[0]] + for detection in marker_detections[1:]: + if detection["ts"] - current[-1]["ts"] > gap_sec: + clusters.append(current) + current = [detection] + else: + current.append(detection) + clusters.append(current) + return clusters + + +def _pose_distance(a: list[float], b: list[float], rotation_weight_m_per_rad: float) -> float: + translation = float(np.linalg.norm(np.array(a[:3]) - np.array(b[:3]))) + rotation = 2.0 * math.acos(min(1.0, abs(float(np.dot(a[3:7], b[3:7]))))) + return translation + rotation_weight_m_per_rad * rotation + + +def cluster_medoid(cluster: list[dict], rotation_weight_m_per_rad: float) -> dict: + """The detection whose pose is most central (min total spatial+rotational + distance to the rest) — a robust representative of the cluster.""" + poses = [detection["t_cam_marker"] for detection in cluster] + best_index, best_cost = 0, float("inf") + for i in range(len(poses)): + cost = sum( + _pose_distance(poses[i], poses[j], rotation_weight_m_per_rad) + for j in range(len(poses)) + if j != i + ) + if cost < best_cost: + best_cost, best_index = cost, i + return cluster[best_index] + + +def detect_apriltags( + store, + intrinsics, + distortion, + image_stream="color_image", + stream_name="april_tags", + marker_length=0.10, + dictionary="DICT_APRILTAG_36h11", + *, + max_distance_m=1.0, + max_view_angle_deg=45.0, + cluster_gap_sec=5.0, + rotation_weight_m_per_rad=0.5, +): + """Detect tags in `image_stream`, keep only close, head-on views, cluster + same-id detections by time, and (re)write the `april_tags` stream from one + medoid representative per cluster. Returns that list of representatives.""" + detector = make_detector(dictionary) + raw_detections: list[dict] = [] + image_count = 0 + for image_obs in store.stream(image_stream, Image): + image_count += 1 + image = image_obs.data + bgr = image.numpy() if hasattr(image, "numpy") else np.asarray(image.data) + all_corners, marker_ids, _ = detector.detectMarkers(bgr) + if marker_ids is None: + continue + for corners, marker_id in zip(all_corners, marker_ids.flatten(), strict=False): + pose = estimate_marker_pose(corners, marker_length, intrinsics, distortion) + if pose is None: + continue + rotation_vector, translation_vector = pose + quaternion = Rotation.from_rotvec(rotation_vector.reshape(3)).as_quat() # x,y,z,w + translation = translation_vector.reshape(3) + tag_in_camera = [ + float(translation[0]), + float(translation[1]), + float(translation[2]), + float(quaternion[0]), + float(quaternion[1]), + float(quaternion[2]), + float(quaternion[3]), + ] + raw_detections.append( + { + "ts": float(image_obs.ts), + "marker_id": int(marker_id), + "t_cam_marker": tag_in_camera, + } + ) + + # Quality gate: drop distant or oblique glimpses, keep close head-on views. + kept = [] + for detection in raw_detections: + distance, view_angle = view_quality(detection["t_cam_marker"]) + if distance <= max_distance_m and view_angle <= max_view_angle_deg: + kept.append(detection) + + # One representative (medoid) per time-clustered group of same-id detections. + detections = [] + for cluster in cluster_by_time(kept, cluster_gap_sec): + detections.append( + {**cluster_medoid(cluster, rotation_weight_m_per_rad), "n_observations": len(cluster)} + ) + detections.sort(key=lambda detection: detection["ts"]) + + if stream_name in store.list_streams(): + store.delete_stream(stream_name) + april_tag_stream = store.stream(stream_name, PoseStamped) + for detection in detections: + tag_in_camera = detection["t_cam_marker"] + pose_stamped = PoseStamped( + ts=detection["ts"], position=tag_in_camera[:3], orientation=tag_in_camera[3:] + ) + april_tag_stream.append( + pose_stamped, + ts=detection["ts"], + pose=tuple(tag_in_camera), + tags={"marker_id": detection["marker_id"]}, + ) + + found_ids = sorted({detection["marker_id"] for detection in detections}) + print( + f" april_tags: {len(raw_detections)} raw -> {len(kept)} in-spec " + f"-> {len(detections)} clusters, markers {found_ids} (over {image_count} images)" + ) + return detections diff --git a/dimos/mapping/recording/utils/build_rrd.py b/dimos/mapping/recording/utils/build_rrd.py new file mode 100644 index 0000000000..cb60cfefaa --- /dev/null +++ b/dimos/mapping/recording/utils/build_rrd.py @@ -0,0 +1,514 @@ +# Copyright 2026 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Dump a go2 mem2.db to a Rerun .rrd (adapted from dimos +mapping/loop_closure/utils/map_rrd.py), focused on the post-processed result. + +For each lidar stream it logs both the per-frame clouds and a single aggregated, +voxel-downsampled "map" (each cloud in its own world frame). Clouds get a slight +height-color gradient; trajectories get a start->end gradient. Each AprilTag is +placed in 3D (via the gtsam-corrected trajectory) with its detections, a labeled +marker, basis-vector axes at the perceived pose, and the robot's-eye camera image +at the recognition moment. +""" + +from __future__ import annotations + +from datetime import datetime +import json +import math +from pathlib import Path +import sqlite3 + +import numpy as np +import rerun as rr +import rerun.blueprint as rrb +from scipy.spatial.transform import Rotation + +from dimos.memory2.store.sqlite import SqliteStore +from dimos.msgs.geometry_msgs.PoseStamped import PoseStamped +from dimos.msgs.sensor_msgs.Image import Image +from dimos.msgs.sensor_msgs.PointCloud2 import PointCloud2 + +TIMELINE = "ts" +# one distinct base color per point cloud (height modulates brightness within each) +_CLOUD_PALETTE = [(0, 180, 170), (240, 160, 40), (220, 90, 180), (150, 200, 60), (90, 150, 235)] +_TAG_PALETTE = [ + (255, 80, 80), + (80, 200, 255), + (255, 220, 60), + (160, 100, 255), + (90, 230, 130), + (255, 150, 60), +] + + +def _mat(trans, quat) -> np.ndarray: + M = np.eye(4) + M[:3, :3] = Rotation.from_quat(quat).as_matrix() + M[:3, 3] = trans + return M + + +def _mat7(p) -> np.ndarray: + return _mat(p[:3], p[3:7]) + + +# fastlio reports the mid360 (lidar) frame, which is mounted pitched 44 deg down +# relative to the front camera; place tags/camera by chaining mid360 -> camera. +# (base_link cancels out, so we only need the camera<->mid360 relationship.) +_PITCH_HALF = math.radians(44.0) / 2.0 +_M_FC_MID360 = _mat([-0.032, 0.0, 0.12], [0.0, math.sin(_PITCH_HALF), 0.0, math.cos(_PITCH_HALF)]) +_M_FC_OPTICAL = _mat([0.0, 0.0, 0.0], [-0.5, 0.5, -0.5, 0.5]) # REP-103 optical in camera body +MID360_TO_OPTICAL = np.linalg.inv(_M_FC_MID360) @ _M_FC_OPTICAL # legacy: optical pts -> mid360 pts + + +def _pose7(p): + if hasattr(p, "orientation"): + o = p.orientation + return [p.x, p.y, p.z, o.x, o.y, o.z, o.w] + return list(p) + + +def _down(pts: np.ndarray, voxel: float) -> np.ndarray: + if voxel <= 0 or len(pts) == 0: + return pts + k = np.floor(pts / voxel).astype(np.int64) + (1 << 20) + key = k[:, 0] | (k[:, 1] << 21) | (k[:, 2] << 42) + _, idx = np.unique(key, return_index=True) + return pts[idx] + + +def _shaded(pts: np.ndarray, base) -> np.ndarray: + """Per-cloud color: the cloud's base color, brightness modulated by height + (darker low -> brighter high). Distinct hue per cloud, subtle within-cloud.""" + z = pts[:, 2] + lo, hi = np.percentile(z, 5), np.percentile(z, 95) + t = np.clip((z - lo) / (hi - lo + 1e-9), 0, 1)[:, None] + return (np.array(base, np.float32) * (0.5 + 0.5 * t)).astype(np.uint8) + + +def _log_frames(store, stream, entity, stride, voxel, base): + if stream not in store.list_streams(): + return + print(f" rrd: logging {stream} frames -> {entity} (stride {stride}) ...", flush=True) + n = 0 + for k, obs in enumerate(store.stream(stream, PointCloud2)): + if k % stride: + continue + pts = _down(obs.data.points_f32(), voxel) + if len(pts) == 0: + continue + rr.set_time(TIMELINE, timestamp=obs.ts) + rr.log(entity, rr.Points3D(pts, colors=_shaded(pts, base))) + n += 1 + print(f" rrd: {entity} <- {stream} ({n} frames, stride {stride}, voxel {voxel}m)") + + +# Compact the running map once this many uncompacted points accumulate. Bounds +# peak memory to ~(final map + this) instead of every frame's full-res points. +_MAP_COMPACT_THRESHOLD = 5_000_000 + + +def _log_map(store, stream, entity, voxel, base): + if stream not in store.list_streams(): + return + running: np.ndarray | None = None + pending: list[np.ndarray] = [] + pending_count = 0 + + def compact(parts: list[np.ndarray]) -> np.ndarray | None: + if running is not None: + parts = [running, *parts] + if not parts: + return running + merged = np.concatenate(parts) + return _down(merged, voxel) if voxel > 0 else merged + + for obs in store.stream(stream, PointCloud2): + cloud = obs.data.points_f32() + if len(cloud) == 0: + continue + # Per-frame downsample collapses each dense cloud to its voxel footprint, + # so we never hold all frames' raw points at once. + pending.append(_down(cloud, voxel) if voxel > 0 else cloud) + pending_count += len(pending[-1]) + if pending_count >= _MAP_COMPACT_THRESHOLD: + running = compact(pending) + pending, pending_count = [], 0 + + pts = compact(pending) + if pts is None or len(pts) == 0: + return + rr.log(entity, rr.Points3D(pts, colors=_shaded(pts, base)), static=True) + print(f" rrd: {entity} <- {stream} ({len(pts):,} pts, voxel {voxel}m)") + + +def _path(db_path, stream): + try: + conn = sqlite3.connect(db_path) + rows = conn.execute( + f'SELECT pose_x,pose_y,pose_z FROM "{stream}" WHERE pose_x IS NOT NULL ORDER BY ts' + ).fetchall() + conn.close() + except sqlite3.OperationalError: + return None + return np.array(rows, float) if rows else None + + +def _log_odom_frames(db_path, stride=5): + """A moving XYZ basis-vector triad per odom stream (Transform3D over time + + a static axis triad). Doubles as the eye's tracking target.""" + for stream, name in [ + ("gtsam_odom", "gtsam"), + ("go2_odom", "go2"), + ("pointlio_odometry", "pointlio"), + ("odom", "odom"), + ]: + try: + conn = sqlite3.connect(db_path) + rows = conn.execute( + f"SELECT ts,pose_x,pose_y,pose_z,pose_qx,pose_qy,pose_qz,pose_qw " + f'FROM "{stream}" WHERE pose_qw IS NOT NULL ORDER BY ts' + ).fetchall() + conn.close() + except sqlite3.OperationalError: + continue + if not rows: + continue + ent = f"world/{name}_frame" + rr.log( + ent, + rr.Arrows3D( + vectors=[[0.5, 0, 0], [0, 0.5, 0], [0, 0, 0.5]], + colors=[[255, 0, 0], [0, 255, 0], [0, 0, 255]], + ), + static=True, + ) + # green box marking the odom source (child -> inherits the moving transform) + rr.log( + f"{ent}/box", + rr.Boxes3D(half_sizes=[[0.35, 0.2, 0.15]], colors=[[0, 220, 0]]), + static=True, + ) + for r in rows[::stride]: + rr.set_time(TIMELINE, timestamp=r[0]) + rr.log( + ent, rr.Transform3D(translation=r[1:4], quaternion=rr.Quaternion(xyzw=list(r[4:8]))) + ) + print(f" rrd: {ent} <- {stream} ({len(rows[::stride])} frames, basis axes)") + + +def _log_path_gradient(db_path, stream, entity, base): + """Log a trajectory as per-segment line strips with a start->end brightness + gradient (dim at start, full color at end) so direction/time is visible.""" + pts = _path(db_path, stream) + if pts is None or len(pts) < 2: + return + segs = [pts[i : i + 2] for i in range(len(pts) - 1)] + t = np.linspace(0.0, 1.0, len(segs))[:, None] + base = np.array(base, np.float32) + colors = ((0.25 + 0.75 * t) * base).astype(np.uint8) + rr.log(entity, rr.LineStrips3D(segs, colors=colors), static=True) + print(f" rrd: {entity} <- {stream} ({len(pts)} poses, gradient)") + + +def _has_rows(conn, stream): + try: + return ( + conn.execute(f'SELECT 1 FROM "{stream}" WHERE pose_qw IS NOT NULL LIMIT 1').fetchone() + is not None + ) + except sqlite3.OperationalError: + return False + + +def _log_apriltags(store, db_path, cam_xform, intrinsics, resolution, max_views_per_tag=40): + """Place every AprilTag recognition in 3D via the corrected trajectory: + T_world_tag = T_world_base(t) . T_base_optical . T_cam_tag. Per marker logs + the detection cloud + a labeled marker, and for a sample of recognitions: + XYZ basis axes at the perceived tag pose and the robot's-eye camera image on + a pinhole frustum at the camera pose (3D only — see the blueprint).""" + if "april_tags" not in store.list_streams(): + return + print(" rrd: placing april_tags in 3D ...", flush=True) + connection = sqlite3.connect(db_path) + traj_stream = "gtsam_odom" if _has_rows(connection, "gtsam_odom") else "pointlio_odometry" + pose_rows = connection.execute( + f"SELECT ts,pose_x,pose_y,pose_z,pose_qx,pose_qy,pose_qz,pose_qw " + f'FROM "{traj_stream}" WHERE pose_qw IS NOT NULL ORDER BY ts' + ).fetchall() + connection.close() + if not pose_rows: + return + traj_timestamps = np.array([row[0] for row in pose_rows]) + traj_poses = np.array([row[1:8] for row in pose_rows], float) + + def optical_in_world(timestamp): + """T_world_optical = T_world_traj . (traj-frame -> optical).""" + index = int(np.searchsorted(traj_timestamps, timestamp)) + return _mat7(traj_poses[min(max(index, 0), len(traj_timestamps) - 1)]) @ cam_xform + + detections_by_marker: dict[int, dict] = {} + for detection_obs in store.stream("april_tags", PoseStamped): + marker_id = (detection_obs.tags or {}).get("marker_id") + if marker_id is None or detection_obs.pose is None: + continue + tag_world = optical_in_world(detection_obs.ts) @ _mat7(_pose7(detection_obs.pose)) + entry = detections_by_marker.setdefault(int(marker_id), {"poses": [], "ts": []}) + entry["poses"].append(tag_world) + entry["ts"].append(detection_obs.ts) + + camera_targets = [] # (entity, T_world_optical, ts) for image logging + for palette_index, (marker_id, entry) in enumerate(sorted(detections_by_marker.items())): + tag_poses = np.array(entry["poses"]) + timestamps = np.array(entry["ts"]) + positions = tag_poses[:, :3, 3] + color = _TAG_PALETTE[palette_index % len(_TAG_PALETTE)] + center = np.median(positions, 0) + rr.log( + f"world/april_tags/marker_{marker_id}/detections", + rr.Points3D(positions, colors=color, radii=0.1), + static=True, + ) + rr.log( + f"world/april_tags/marker_{marker_id}", + rr.Points3D( + [center], colors=color, radii=0.5, labels=[f"tag {marker_id}"], show_labels=True + ), + static=True, + ) + # sample recognitions: axes at the perceived tag pose + a camera frustum + sample_stride = max(1, len(tag_poses) // max_views_per_tag) + sampled = range(0, len(tag_poses), sample_stride) + for sample_index in sampled: + tag_pose = tag_poses[sample_index] + tag_entity = f"world/april_tags/marker_{marker_id}/recognitions/{sample_index:04d}/tag" + rr.log( + tag_entity, + rr.Transform3D(translation=tag_pose[:3, 3], mat3x3=tag_pose[:3, :3]), + static=True, + ) + rr.log( + tag_entity, + rr.Arrows3D( + vectors=[[0.3, 0, 0], [0, 0.3, 0], [0, 0, 0.3]], + colors=[[255, 0, 0], [0, 255, 0], [0, 0, 255]], + ), + static=True, + ) + cam_entity = f"world/april_tags/marker_{marker_id}/recognitions/{sample_index:04d}/cam" + camera_targets.append( + (cam_entity, optical_in_world(timestamps[sample_index]), timestamps[sample_index]) + ) + print( + f" rrd: world/april_tags/marker_{marker_id} @ {center.round(1).tolist()} " + f"({len(positions)} detections, {len(sampled)} views, via {traj_stream})" + ) + + _log_cam_frustums(store, camera_targets, intrinsics, resolution) + + +def _log_cam_frustums(store, camera_targets, intrinsics, resolution): + """Place the robot's-eye color image on a pinhole frustum at the camera pose + for each (entity, T_world_optical, ts) target — rendered only in 3D.""" + if not camera_targets or "color_image" not in store.list_streams(): + return + nearest = [(1e18, None) for _ in camera_targets] # (time delta, image obs) per target + for image_obs in store.stream("color_image", Image): + for target_index, (_entity, _pose, target_ts) in enumerate(camera_targets): + delta = abs(image_obs.ts - target_ts) + if delta < nearest[target_index][0]: + nearest[target_index] = (delta, image_obs) + logged = 0 + for (entity, optical_pose, _target_ts), (_delta, image_obs) in zip( + camera_targets, nearest, strict=False + ): + if image_obs is None: + continue + rr.log( + entity, + rr.Transform3D(translation=optical_pose[:3, 3], mat3x3=optical_pose[:3, :3]), + static=True, + ) + rr.log( + entity, + rr.Pinhole( + image_from_camera=intrinsics, + resolution=list(resolution), + camera_xyz=rr.ViewCoordinates.RDF, + image_plane_distance=0.6, + ), + static=True, + ) + try: + rr.log(f"{entity}/rgb", image_obs.data.to_rerun(), static=True) + logged += 1 + except Exception: + pass + print(f" rrd: {logged} recognition camera frustums (robot's-eye images, 3D only)") + + +# dimos jsonl level -> rerun TextLog level +_LOG_LEVELS = { + "debug": "DEBUG", + "info": "INFO", + "warning": "WARN", + "error": "ERROR", + "critical": "CRITICAL", +} +# keys rendered structurally; everything else is appended as key=value context +_LOG_STD_KEYS = {"event", "level", "logger", "timestamp", "func_name", "lineno"} + + +def _find_jsonl(db_path: str) -> Path | None: + """A dimos `main.jsonl` for this recording — next to the db.""" + candidate = Path(db_path).parent / "main.jsonl" + return candidate if candidate.exists() else None + + +def _iso_to_epoch(value: str) -> float: + return datetime.fromisoformat(value.replace("Z", "+00:00")).timestamp() + + +def _log_jsonl(jsonl_path: Path) -> None: + """Replay a dimos `main.jsonl` as rerun TextLog entries on the `ts` timeline.""" + n = 0 + for line in jsonl_path.read_text().splitlines(): + line = line.strip() + if not line: + continue + try: + entry = json.loads(line) + except json.JSONDecodeError: + continue + timestamp = entry.get("timestamp") + if not timestamp: + continue + body = str(entry.get("event", "")) + extra = " ".join(f"{k}={entry[k]}" for k in entry if k not in _LOG_STD_KEYS) + if extra: + body = f"{body} ({extra})" + if entry.get("logger"): + body = f"[{entry['logger']}] {body}" + rr.set_time(TIMELINE, timestamp=_iso_to_epoch(timestamp)) + rr.log( + "logs", rr.TextLog(body, level=_LOG_LEVELS.get(str(entry.get("level")).lower(), "INFO")) + ) + n += 1 + print(f" rrd: logs <- {jsonl_path.name} ({n} entries)") + + +def build_rrd( + db_path: str, + out_path: str, + intrinsics, + optical_in_base, + resolution, + *, + map_voxel: float = 0.1, + cloud_stride: int = 3, + camera_stride: int = 30, + mid360_pitch: bool = False, +): + rr.init("recording_post_process", recording_id=str(out_path)) + rr.save(str(out_path)) + cam_xform = MID360_TO_OPTICAL if mid360_pitch else _mat7(optical_in_base) + jsonl_path = _find_jsonl(db_path) + + with SqliteStore(path=db_path) as store: + streams = store.list_streams() + # Explicit blueprint: a 3D view (incl. the recognition camera frustums) + + # a 2D panel for the live camera, so rerun doesn't auto-make a panel per + # pinhole. Heavy aggregated maps default to hidden (toggle in the entity + # panel); the eye tracks the primary odom frame; left/bottom panels open. + track = ( + "/world/go2_frame" + if "go2_odom" in streams + else "/world/pointlio_frame" + if "pointlio_odometry" in streams + else "/world/gtsam_frame" + ) + hide = { + f"/world/{m}": rrb.EntityBehavior(visible=False) + for m in ( + "go2_map", + "pointlio_map", + "onboard_map", + ) + } + views = rrb.Horizontal( + rrb.Spatial3DView( + origin="/world", + name="3D", + overrides=hide, + eye_controls=rrb.EyeControls3D(kind=rrb.Eye3DKind.Orbital, tracking_entity=track), + ), + rrb.Spatial2DView(origin="/world/camera", name="camera"), + column_shares=[3, 1], + ) + # When a dimos main.jsonl is present, dock its log replay below the views. + layout = ( + rrb.Vertical(views, rrb.TextLogView(origin="/logs", name="logs"), row_shares=[4, 1]) + if jsonl_path is not None + else views + ) + rr.send_blueprint( + rrb.Blueprint( + layout, + rrb.BlueprintPanel(state=rrb.PanelState.Expanded), + rrb.TimePanel(state=rrb.PanelState.Expanded), + rrb.SelectionPanel(state=rrb.PanelState.Collapsed), + ) + ) + + ci = 0 # rotate a distinct base color through each point cloud + for name, stream in [ + ("go2", "go2_lidar"), + ("pointlio", "pointlio_lidar"), + ("onboard", "lidar"), # legacy Go2 onboard L1, own frame + ]: + if stream in streams: + base = _CLOUD_PALETTE[ci % len(_CLOUD_PALETTE)] + ci += 1 + _log_frames(store, stream, f"world/{name}_lidar", cloud_stride, map_voxel, base) + _log_map(store, stream, f"world/{name}_map", map_voxel, base) + + _log_apriltags(store, db_path, cam_xform, intrinsics, resolution) + + for stream, entity, base in [ + ("gtsam_odom", "world/gtsam_path", (0, 220, 0)), # corrected GT -> green + ("go2_odom", "world/go2_path", (220, 200, 0)), # go2 odom -> yellow + ("pointlio_odometry", "world/pointlio_path", (0, 200, 220)), # cyan + ("odom", "world/odom_path", (255, 165, 0)), # Go2 onboard odom -> orange + ]: + _log_path_gradient(db_path, stream, entity, base) + _log_odom_frames(db_path) # moving XYZ basis triads (+ eye tracking target) + + if "color_image" in streams: + n = 0 + for k, obs in enumerate(store.stream("color_image", Image)): + if k % camera_stride: + continue + rr.set_time(TIMELINE, timestamp=obs.ts) + try: + rr.log("world/camera/image", obs.data.to_rerun()) + n += 1 + except Exception: + break + print(f" rrd: world/camera/image <- color_image ({n} frames, stride {camera_stride})") + + if jsonl_path is not None: + _log_jsonl(jsonl_path) + print(f" wrote {out_path}") diff --git a/dimos/mapping/recording/utils/camera.py b/dimos/mapping/recording/utils/camera.py new file mode 100644 index 0000000000..255164eacf --- /dev/null +++ b/dimos/mapping/recording/utils/camera.py @@ -0,0 +1,32 @@ +# Copyright 2026 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Go2 front RGB camera rig (1280x720) — shared intrinsics/extrinsic. + +Same values as front_camera_720.yaml. Edit here for a different rig. +""" + +from __future__ import annotations + +import numpy as np + +# Pinhole intrinsic matrix K (fx, fy, cx, cy). +CAMERA_INTRINSICS = np.array( + [[797.47561649, 0.0, 643.53521678], [0.0, 796.48721128, 349.27836053], [0.0, 0.0, 1.0]], + dtype=np.float64, +) +# Lens distortion coefficients (k1, k2, p1, p2). +CAMERA_DISTORTION = np.array([-0.07309429, -0.02341141, -0.00693059, 0.00923868], dtype=np.float64) +# Pose of the camera_optical frame expressed in base_link: [x, y, z, qx, qy, qz, qw]. +CAMERA_OPTICAL_IN_BASE = [0.3, 0.0, 0.0, -0.5, 0.5, -0.5, 0.5] diff --git a/dimos/mapping/recording/utils/gtsam_gt.py b/dimos/mapping/recording/utils/gtsam_gt.py new file mode 100644 index 0000000000..d0a0cbb979 --- /dev/null +++ b/dimos/mapping/recording/utils/gtsam_gt.py @@ -0,0 +1,235 @@ +# Copyright 2026 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""GTSAM landmark-SLAM groundtruth from AprilTag observations. + +Treats the FAST-LIO / odom pose chain as locally correct and the AprilTags as +static landmarks; a tag seen at several times pins the chain and removes +accumulated odometry drift. Trusts tag POSITION (solvePnP is metric) but +distrusts tag ORIENTATION (a small planar tag is yaw/pitch ambiguous), and wraps +tag factors in a Huber kernel so a bad detection can't dominate. +""" + +from __future__ import annotations + +import sqlite3 + +import numpy as np + +from dimos.mapping.recording.utils.lidar_loop_closure import find_loop_closures +from dimos.msgs.geometry_msgs.PoseStamped import PoseStamped + + +def _pose_from7(pose7): + """[x y z qx qy qz qw] -> gtsam.Pose3.""" + import gtsam + + return gtsam.Pose3( + gtsam.Rot3.Quaternion(pose7[6], pose7[3], pose7[4], pose7[5]), + gtsam.Point3(pose7[0], pose7[1], pose7[2]), + ) + + +def _pose_to7(pose3): + """gtsam.Pose3 -> [x y z qx qy qz qw].""" + quaternion = pose3.rotation().toQuaternion() + translation = pose3.translation() + return [ + translation[0], + translation[1], + translation[2], + quaternion.x(), + quaternion.y(), + quaternion.z(), + quaternion.w(), + ] + + +def pick_pose_stream(connection) -> str: + """The odom stream to use as the pose chain (go2_odom / pointlio_odometry preferred).""" + stream_names = [row[0] for row in connection.execute("SELECT name FROM _streams").fetchall()] + candidates = [name for name in ["go2_odom", "pointlio_odometry"] if name in stream_names] + candidates += [ + name for name in stream_names if "odom" in name.lower() and name not in candidates + ] + for name in candidates: + try: + populated = connection.execute( + f'SELECT count(*) FROM "{name}" WHERE pose_qw IS NOT NULL' + ).fetchone()[0] + except sqlite3.OperationalError: + continue + if populated > 0: + return name + raise ValueError(f"no odom stream with populated pose columns among {candidates}") + + +def build_gtsam_gt( + db_path, + markers, + optical_in_base, + *, + node_stride=3, + odom_rot_sig=0.004, + odom_trans_sig=0.02, + tag_rot_sig=1.0, + tag_trans_sig=0.1, + tag_huber=0.5, + add_loop_closures=True, + loop_lidar_stream="livox_lidar", + loop_rot_sig=0.01, + loop_trans_sig=1.0, + loop_huber=1.0, + exclude_marker_ids=(), + pose_stream=None, + return_landmarks=False, +): + """Landmark-SLAM the odom chain + AprilTag landmarks + lidar loop closures. + + Per-DOF weighting plays each source to its strength: AprilTags trust POSITION + (tight tag_trans_sig) and ignore their own ORIENTATION (loose tag_rot_sig); + lidar loop closures trust ORIENTATION (tight loop_rot_sig, fixes accumulated + pitch/yaw drift) and stay out of translation (loose loop_trans_sig). + + `exclude_marker_ids` drops those tags entirely (e.g. a tag mounted on a moving + robot is not a static landmark). `pose_stream` forces which odom stream is the + pose chain (default: auto-pick) — set it to match the stream the lidar is + re-anchored through, so trajectory and clouds share a frame. + + Returns [(ts, pose7), ...], or ([(ts, pose7), ...], {marker_id: pose7_world}) + of the optimized static-tag world poses when `return_landmarks` is set.""" + import gtsam + from gtsam import BetweenFactorPose3, PriorFactorPose3 + from gtsam.symbol_shorthand import L, X + + connection = sqlite3.connect(db_path) + if pose_stream is None: + pose_stream = pick_pose_stream(connection) + pose_rows = connection.execute( + f"SELECT ts,pose_x,pose_y,pose_z,pose_qx,pose_qy,pose_qz,pose_qw " + f'FROM "{pose_stream}" WHERE pose_qw IS NOT NULL ORDER BY ts' + ).fetchall() + connection.close() + pose_rows = pose_rows[::node_stride] + node_timestamps = np.array([row[0] for row in pose_rows]) + node_poses7 = [list(row[1:8]) for row in pose_rows] + node_poses = [_pose_from7(pose7) for pose7 in node_poses7] + num_nodes = len(pose_rows) + print( + f" gtsam: pose stream '{pose_stream}', {num_nodes} nodes (stride {node_stride}), " + f"{len(markers)} tag obs" + ) + + base_to_optical = _pose_from7(optical_in_base) + + def nearest_node(timestamp): + node_index = int(np.searchsorted(node_timestamps, timestamp)) + node_index = min(max(node_index, 0), num_nodes - 1) + if node_index > 0 and abs(node_timestamps[node_index - 1] - timestamp) < abs( + node_timestamps[node_index] - timestamp + ): + node_index -= 1 + return node_index + + exclude = {int(marker_id) for marker_id in exclude_marker_ids} + if exclude: + markers = [marker for marker in markers if int(marker["marker_id"]) not in exclude] + + graph = gtsam.NonlinearFactorGraph() + initial = gtsam.Values() + prior_noise = gtsam.noiseModel.Diagonal.Sigmas(np.full(6, 1e-4)) + odom_noise = gtsam.noiseModel.Diagonal.Sigmas( + np.array([odom_rot_sig] * 3 + [odom_trans_sig] * 3) + ) + tag_noise_base = gtsam.noiseModel.Diagonal.Sigmas( + np.array([tag_rot_sig] * 3 + [tag_trans_sig] * 3) + ) + tag_noise = gtsam.noiseModel.Robust.Create( + gtsam.noiseModel.mEstimator.Huber.Create(tag_huber), tag_noise_base + ) + + for node_index in range(num_nodes): + initial.insert(X(node_index), node_poses[node_index]) + graph.add(PriorFactorPose3(X(0), node_poses[0], prior_noise)) + for node_index in range(num_nodes - 1): + relative = node_poses[node_index].between(node_poses[node_index + 1]) + graph.add(BetweenFactorPose3(X(node_index), X(node_index + 1), relative, odom_noise)) + + landmark_ids = set() + for detection in markers: + marker_id = int(detection["marker_id"]) + node_index = nearest_node(detection["ts"]) + tag_in_body = base_to_optical.compose(_pose_from7(detection["t_cam_marker"])) + if marker_id not in landmark_ids: + initial.insert(L(marker_id), node_poses[node_index].compose(tag_in_body)) + landmark_ids.add(marker_id) + graph.add(BetweenFactorPose3(X(node_index), L(marker_id), tag_in_body, tag_noise)) + + loops = [] + if add_loop_closures: + loops = find_loop_closures( + db_path, node_timestamps, node_poses7, lidar_stream=loop_lidar_stream + ) + loop_noise = gtsam.noiseModel.Robust.Create( + gtsam.noiseModel.mEstimator.Huber.Create(loop_huber), + gtsam.noiseModel.Diagonal.Sigmas(np.array([loop_rot_sig] * 3 + [loop_trans_sig] * 3)), + ) + for loop in loops: + graph.add( + BetweenFactorPose3( + X(loop.i), X(loop.j), _pose_from7(loop.relative_pose7), loop_noise + ) + ) + + params = gtsam.LevenbergMarquardtParams() + params.setMaxIterations(100) + optimizer = gtsam.LevenbergMarquardtOptimizer(graph, initial, params) + result = optimizer.optimize() + corrections = [ + np.linalg.norm( + result.atPose3(X(node_index)).translation() - node_poses[node_index].translation() + ) + for node_index in range(num_nodes) + ] + print( + f" gtsam: landmarks {sorted(landmark_ids)} | {len(loops)} loop closures | " + f"correction max {max(corrections):.2f} m, " + f"mean {np.mean(corrections):.2f} m ({optimizer.iterations()} iters)" + ) + trajectory = [ + (float(node_timestamps[node_index]), _pose_to7(result.atPose3(X(node_index)))) + for node_index in range(num_nodes) + ] + if return_landmarks: + landmarks = { + marker_id: _pose_to7(result.atPose3(L(marker_id))) for marker_id in sorted(landmark_ids) + } + return trajectory, landmarks + return trajectory + + +def write_gtsam_odom(store, trajectory, stream_name, tum_path): + """Write the corrected trajectory as a PoseStamped stream + a .tum file.""" + if stream_name in store.list_streams(): + store.delete_stream(stream_name) + odom_stream = store.stream(stream_name, PoseStamped) + with open(tum_path, "w") as tum_file: + for timestamp, pose in trajectory: + odom_stream.append( + PoseStamped(ts=timestamp, position=pose[:3], orientation=pose[3:7]), + ts=timestamp, + pose=tuple(pose), + ) + tum_file.write(f"{timestamp:.9f} " + " ".join(f"{value:.9f}" for value in pose) + "\n") + print(f" wrote '{stream_name}' stream ({len(trajectory)} poses) + {tum_path}") diff --git a/dimos/mapping/recording/utils/lidar_loop_closure.py b/dimos/mapping/recording/utils/lidar_loop_closure.py new file mode 100644 index 0000000000..97b2eb4d11 --- /dev/null +++ b/dimos/mapping/recording/utils/lidar_loop_closure.py @@ -0,0 +1,299 @@ +# Copyright 2026 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""LiDAR loop closure for the AprilTag groundtruth solve. + +AprilTags only pin drift where a tag is re-seen; between tags the FAST-LIO chain +drifts uncorrected. This module finds *revisits* from the lidar alone and turns +each into a relative-pose constraint the GTSAM solve can add as a loop closure: + + 1. sample keyframes along the odom chain (one per `keyframe_gap_m` of travel), + 2. describe each keyframe cloud with a rotation-invariant Scan Context + descriptor (Kim & Kim, IROS'18) so revisits match by appearance, not by the + drifted odom position, + 3. shortlist candidates by ring-key nearest neighbour, confirm with the + column-aligned Scan Context distance, + 4. verify each candidate with Generalized-ICP and keep only well-aligned pairs. + +Returns `LoopClosure` measurements indexed against the same node array the solve +uses. The keyframe clouds and the odom backbone are both in the lidar (mid360) +frame, so the GICP transform *is* the relative-pose measurement — no extrinsics. +""" + +from __future__ import annotations + +from dataclasses import dataclass + +import numpy as np +import open3d as o3d +from scipy.spatial.transform import Rotation +from sklearn.neighbors import NearestNeighbors + +from dimos.memory2.store.sqlite import SqliteStore +from dimos.msgs.sensor_msgs.PointCloud2 import PointCloud2 + + +@dataclass +class LoopClosure: + """A revisit constraint: node `j` (later) seen again at node `i` (earlier). + + `relative_pose7` is the pose of frame `j` expressed in frame `i` + ([x y z qx qy qz qw]); `fitness`/`rmse` are the GICP alignment quality. + """ + + i: int + j: int + relative_pose7: list[float] + fitness: float + rmse: float + + +def scan_context(points: np.ndarray, n_ring: int, n_sector: int, max_radius: float) -> np.ndarray: + """Bird's-eye Scan Context: an `n_ring` x `n_sector` grid whose cells hold the + max height of the points falling in each (range, azimuth) bin.""" + descriptor = np.zeros((n_ring, n_sector), dtype=np.float64) + xy_range = np.linalg.norm(points[:, :2], axis=1) + in_range = (xy_range > 1e-3) & (xy_range < max_radius) + if not np.any(in_range): + return descriptor + points = points[in_range] + xy_range = xy_range[in_range] + azimuth = np.arctan2(points[:, 1], points[:, 0]) + ring_index = np.clip((xy_range / max_radius * n_ring).astype(int), 0, n_ring - 1) + sector_index = np.clip( + ((azimuth + np.pi) / (2 * np.pi) * n_sector).astype(int), 0, n_sector - 1 + ) + flat_index = ring_index * n_sector + sector_index + np.maximum.at(descriptor.reshape(-1), flat_index, points[:, 2]) + return descriptor + + +def ring_key(descriptor: np.ndarray) -> np.ndarray: + """Rotation-invariant summary of a descriptor (mean height per ring).""" + return descriptor.mean(axis=1) + + +def scan_context_distance(query: np.ndarray, candidate: np.ndarray) -> tuple[float, int]: + """Minimum column-aligned cosine distance over all azimuth shifts. + + Returns (distance in [0, 1], best column shift). The shift recovers the yaw + between the two views and seeds GICP.""" + n_sector = query.shape[1] + query_norm = np.linalg.norm(query, axis=0) + best_distance, best_shift = 2.0, 0 + for shift in range(n_sector): + shifted = np.roll(candidate, shift, axis=1) + shifted_norm = np.linalg.norm(shifted, axis=0) + denominator = query_norm * shifted_norm + valid = denominator > 1e-9 + if not np.any(valid): + continue + cosine = (query * shifted).sum(axis=0)[valid] / denominator[valid] + distance = 1.0 - float(cosine.mean()) + if distance < best_distance: + best_distance, best_shift = distance, shift + return best_distance, best_shift + + +def _yaw_transform(yaw_radians: float) -> np.ndarray: + transform = np.eye(4) + transform[:3, :3] = Rotation.from_euler("z", yaw_radians).as_matrix() + return transform + + +def _world_rotation(pose7: list[float]) -> np.ndarray: + """Rotation-only 4x4 of a [x y z qx qy qz qw] pose (its world orientation).""" + rotation = np.eye(4) + rotation[:3, :3] = Rotation.from_quat(pose7[3:7]).as_matrix() + return rotation + + +def _to_open3d(points: np.ndarray, voxel: float) -> o3d.geometry.PointCloud: + cloud = o3d.geometry.PointCloud() + cloud.points = o3d.utility.Vector3dVector(points.astype(np.float64)) + return cloud.voxel_down_sample(voxel) if voxel > 0 else cloud + + +def _gicp( + source: np.ndarray, + target: np.ndarray, + init: np.ndarray, + max_correspondence: float, + voxel: float, +) -> tuple[np.ndarray, float, float]: + """Generalized-ICP of `source` onto `target`. Returns (target_from_source 4x4, + fitness, inlier_rmse). The transform maps a source-frame point into target.""" + source_cloud = _to_open3d(source, voxel) + target_cloud = _to_open3d(target, voxel) + result = o3d.pipelines.registration.registration_generalized_icp( + source_cloud, + target_cloud, + max_correspondence, + init, + o3d.pipelines.registration.TransformationEstimationForGeneralizedICP(), + o3d.pipelines.registration.ICPConvergenceCriteria(max_iteration=60), + ) + return result.transformation, result.fitness, result.inlier_rmse + + +def _matrix_to_pose7(transform: np.ndarray) -> list[float]: + quaternion = Rotation.from_matrix(transform[:3, :3]).as_quat() # x, y, z, w + translation = transform[:3, 3] + return [float(value) for value in (*translation, *quaternion)] + + +def _load_keyframe_clouds( + db_path: str, lidar_stream: str, keyframe_timestamps: np.ndarray +) -> dict[int, np.ndarray]: + """Stream `lidar_stream` once, keeping the nearest-in-time cloud per keyframe.""" + best_delta = {index: float("inf") for index in range(len(keyframe_timestamps))} + clouds: dict[int, np.ndarray] = {} + with SqliteStore(path=db_path) as store: + if lidar_stream not in store.list_streams(): + return clouds + for observation in store.stream(lidar_stream, PointCloud2): + keyframe_index = int(np.argmin(np.abs(keyframe_timestamps - observation.ts))) + delta = abs(float(observation.ts) - keyframe_timestamps[keyframe_index]) + if delta < best_delta[keyframe_index]: + points = observation.data.points_f32() + if len(points): + best_delta[keyframe_index] = delta + clouds[keyframe_index] = np.asarray(points, dtype=np.float64) + return clouds + + +def _select_keyframes(node_poses7: list[list[float]], keyframe_gap_m: float) -> list[int]: + """Node indices spaced ~`keyframe_gap_m` apart along the travelled path.""" + keyframe_nodes = [0] + last_position = np.array(node_poses7[0][:3]) + for node_index in range(1, len(node_poses7)): + position = np.array(node_poses7[node_index][:3]) + if np.linalg.norm(position - last_position) >= keyframe_gap_m: + keyframe_nodes.append(node_index) + last_position = position + return keyframe_nodes + + +def find_loop_closures( + db_path: str, + node_timestamps: np.ndarray, + node_poses7: list[list[float]], + *, + lidar_stream: str = "livox_lidar", + keyframe_gap_m: float = 1.0, + n_ring: int = 20, + n_sector: int = 60, + max_radius: float = 20.0, + num_candidates: int = 10, + sc_distance_max: float = 0.5, + min_time_gap_s: float = 15.0, + gicp_max_correspondence: float = 2.0, + gicp_voxel: float = 0.3, + gicp_fitness_min: float = 0.7, + gicp_rmse_max: float = 0.55, + max_translation_m: float = 2.5, + max_vertical_m: float = 1.0, +) -> list[LoopClosure]: + """Detect lidar revisits among the solve's odom nodes and verify them with GICP.""" + keyframe_nodes = _select_keyframes(node_poses7, keyframe_gap_m) + if len(keyframe_nodes) < 3: + print(f" loop: too few keyframes ({len(keyframe_nodes)}) — skipping") + return [] + keyframe_timestamps = node_timestamps[keyframe_nodes] + clouds = _load_keyframe_clouds(db_path, lidar_stream, keyframe_timestamps) + usable = [index for index in range(len(keyframe_nodes)) if index in clouds] + if len(usable) < 3: + print(f" loop: no usable '{lidar_stream}' clouds at keyframes — skipping") + return [] + + # The mid360 is mounted pitched, so raw scans aren't a level bird's-eye. Rotate + # each keyframe into its world orientation (gravity up) for Scan Context and + # GICP, so a revisit from any heading differs only by a yaw column-shift. The + # GICP transform is converted back to the sensor frame at the end. + world_rotation = { + index: _world_rotation(node_poses7[keyframe_nodes[index]]) for index in usable + } + aligned = {index: clouds[index] @ world_rotation[index][:3, :3].T for index in usable} + + descriptors = { + index: scan_context(aligned[index], n_ring, n_sector, max_radius) for index in usable + } + ring_keys = np.array([ring_key(descriptors[index]) for index in usable]) + neighbours = NearestNeighbors(n_neighbors=min(num_candidates + 1, len(usable))).fit(ring_keys) + + loops: list[LoopClosure] = [] + for position, query_index in enumerate(usable): + _, candidate_positions = neighbours.kneighbors(ring_keys[position : position + 1]) + best: LoopClosure | None = None + best_distance = sc_distance_max + for candidate_position in candidate_positions[0]: + candidate_index = usable[candidate_position] + if ( + keyframe_timestamps[query_index] - keyframe_timestamps[candidate_index] + < min_time_gap_s + ): + continue # not a revisit: too close in time (or in the future) + distance, shift = scan_context_distance( + descriptors[query_index], descriptors[candidate_index] + ) + if distance >= best_distance: + continue + # Scan Context recovers the yaw magnitude but not its sign — seed GICP + # from both and keep the better-aligned result. + yaw = shift * (2 * np.pi / n_sector) + transform, fitness, rmse = max( + ( + _gicp( + aligned[query_index], + aligned[candidate_index], + _yaw_transform(signed_yaw), + gicp_max_correspondence, + gicp_voxel, + ) + for signed_yaw in (yaw, -yaw) + ), + key=lambda result: result[1], + ) + if fitness < gicp_fitness_min or rmse > gicp_rmse_max: + continue + # A genuine revisit puts the two scans at nearly the same place; a large + # (especially vertical) offset is perceptual aliasing — drop it. GT is + # corrupted far worse by one bad loop than helped by a marginal one. + if ( + abs(transform[2, 3]) > max_vertical_m + or np.linalg.norm(transform[:3, 3]) > max_translation_m + ): + continue + # convert aligned-frame transform back to the sensor (mid360) node frame + sensor_transform = ( + np.linalg.inv(world_rotation[candidate_index]) + @ transform + @ world_rotation[query_index] + ) + best_distance = distance + best = LoopClosure( + i=keyframe_nodes[candidate_index], + j=keyframe_nodes[query_index], + relative_pose7=_matrix_to_pose7(sensor_transform), + fitness=float(fitness), + rmse=float(rmse), + ) + if best is not None: + loops.append(best) + + print( + f" loop: {len(keyframe_nodes)} keyframes ({len(usable)} with clouds) " + f"-> {len(loops)} lidar loop closures (stream '{lidar_stream}')" + ) + return loops diff --git a/dimos/mapping/recording/utils/post_process.py b/dimos/mapping/recording/utils/post_process.py new file mode 100644 index 0000000000..d51588a3d4 --- /dev/null +++ b/dimos/mapping/recording/utils/post_process.py @@ -0,0 +1,288 @@ +# Copyright 2026 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Shared post-process runner for recording rigs (robot-agnostic). + +For every `mem2.db` under a recordings directory it: + 1. prints a recording sanity check (rec_check), + 2. detects AprilTags -> `april_tags` stream (apriltags), + 3. solves a drift-corrected trajectory -> `gtsam_odom` (gtsam_gt), + 4. writes a Rerun `.rrd` visualization (build_rrd). + +A tag seen at several times pins the odometry chain and removes accumulated +drift. Also writes `gtsam_odom.tum` next to each db (relocalization groundtruth). + +Each rig calls `run()` with a `load_camera(db)` returning +`(intrinsics, distortion, optical_in_base, resolution)`. +""" + +from __future__ import annotations + +import argparse +from collections.abc import Callable +from pathlib import Path + +import numpy as np + +from dimos.mapping.recording.utils import rec_check +from dimos.mapping.recording.utils.apriltags import detect_apriltags +from dimos.mapping.recording.utils.build_rrd import build_rrd +from dimos.mapping.recording.utils.gtsam_gt import build_gtsam_gt, write_gtsam_odom +from dimos.memory2.store.sqlite import SqliteStore + +DB_NAME = "mem2.db" + +# (intrinsics 3x3, distortion, optical_in_base [x,y,z,qx,qy,qz,qw], (width, height)) +CameraParams = tuple[np.ndarray, np.ndarray, list[float], tuple[int, int]] + + +def _created_time(path: Path) -> float: + """File creation time (st_birthtime on macOS/BSD; falls back to mtime).""" + stat = path.stat() + return getattr(stat, "st_birthtime", stat.st_mtime) + + +def _scan(root: Path) -> list[Path]: + return sorted(path for path in root.rglob(DB_NAME) if "-wal" not in path.name) + + +def resolve_databases(target: str | None, recordings_dir: str) -> list[Path]: + """Pick which mem2.db(s) to process. + + TARGET wins when given: a `mem2.db` file, a dir holding one (process just + that recording), or any other dir (scan it recursively). With no TARGET, + process only the most recently created recording under recordings_dir. + """ + if target: + path = Path(target) + if path.name == DB_NAME: + return [path] + if (path / DB_NAME).exists(): + return [path / DB_NAME] + databases = _scan(path) + if not databases: + raise SystemExit(f"no {DB_NAME} found under {path}") + return databases + + databases = _scan(Path(recordings_dir)) + if not databases: + raise SystemExit(f"no {DB_NAME} found under {recordings_dir}") + most_recent = max(databases, key=_created_time) + print(f"no target given — using most recent recording: {most_recent.parent}") + return [most_recent] + + +def correct_db( + db: Path, + *, + intrinsics, + distortion, + optical_in_base, + image_stream, + apriltag_stream, + gtsam_stream, + marker_length, + dictionary, + add_loop_closures=True, +): + """AprilTag detection -> GTSAM trajectory. Returns True if a corrected + trajectory was written.""" + with SqliteStore(path=str(db)) as store: + detections = detect_apriltags( + store, intrinsics, distortion, image_stream, apriltag_stream, marker_length, dictionary + ) + if not detections: + print(" no AprilTags detected — skipping gtsam_odom (no landmark constraints)") + return False + trajectory = build_gtsam_gt( + str(db), detections, optical_in_base, add_loop_closures=add_loop_closures + ) + with SqliteStore(path=str(db)) as store: + write_gtsam_odom(store, trajectory, gtsam_stream, db.parent / "gtsam_odom.tum") + return True + + +def process_db( + db: Path, + *, + intrinsics, + distortion, + optical_in_base, + resolution, + image_stream, + apriltag_stream, + gtsam_stream, + marker_length, + dictionary, + force, + no_gtsam=False, + no_loop=False, + make_rrd=True, + camera_freq=30, + map_voxel=0.1, + cloud_stride=3, + mid360_pitch=False, + check_only=False, +): + print(f">> {db}") + try: + rec_check.report(db.parent) + except Exception as error: + print(f" rec_check skipped: {error}") + + try: + print(f" wrote {rec_check.write_summary(db.parent)}") + except Exception as error: + print(f" summary failed: {error}") + + if check_only: + return + + with SqliteStore(path=str(db)) as store: + already_corrected = gtsam_stream in store.list_streams() + + if no_gtsam: + print(" --no-gtsam: skipping AprilTag/GTSAM") + elif already_corrected and not force: + print(f" already has '{gtsam_stream}' — skipping AprilTag/GTSAM (use --force)") + else: + correct_db( + db, + intrinsics=intrinsics, + distortion=distortion, + optical_in_base=optical_in_base, + image_stream=image_stream, + apriltag_stream=apriltag_stream, + gtsam_stream=gtsam_stream, + marker_length=marker_length, + dictionary=dictionary, + add_loop_closures=not no_loop, + ) + + if make_rrd: + try: + build_rrd( + str(db), + str(db.parent / f"{db.parent.name}.rrd"), + intrinsics, + optical_in_base, + resolution, + camera_stride=camera_freq, + map_voxel=map_voxel, + cloud_stride=cloud_stride, + mid360_pitch=mid360_pitch, + ) + except Exception as error: + print(f" rrd failed: {error}") + + +def run( + *, + description: str | None, + load_camera: Callable[[Path], CameraParams], +) -> None: + """Parse CLI args and post-process each resolved recording. `load_camera` + supplies the rig's `(intrinsics, distortion, optical_in_base, resolution)`.""" + parser = argparse.ArgumentParser( + description=description, formatter_class=argparse.RawDescriptionHelpFormatter + ) + parser.add_argument( + "target", + nargs="?", + default=None, + help="a mem2.db, a recording dir containing one, or a dir to scan. " + "Omit to use the most recently created recording under --recordings-dir.", + ) + parser.add_argument( + "--recordings-dir", + default="./recordings", + help="root searched when no target is given", + ) + parser.add_argument("--image-stream", default="color_image") + parser.add_argument("--apriltag-stream", default="april_tags") + parser.add_argument("--gtsam-stream", default="gtsam_odom") + parser.add_argument("--marker-length", type=float, default=0.10) + parser.add_argument("--dictionary", default="DICT_APRILTAG_36h11") + parser.add_argument("--force", action="store_true", help="reprocess even if gtsam_odom exists") + parser.add_argument( + "--check", + action="store_true", + help="only sanity-check each recording and write summary.json (no GTSAM/.rrd)", + ) + parser.add_argument( + "--no-gtsam", + action="store_true", + help="skip AprilTag/GTSAM (e.g. rebuild only the .rrd)", + ) + parser.add_argument( + "--no-loop", + action="store_true", + help="skip lidar loop-closure detection (AprilTags-only drift correction)", + ) + parser.add_argument("--no-rrd", action="store_true", help="skip the .rrd visualization step") + parser.add_argument( + "--camera-freq", + type=int, + default=30, + help="keep 1 of every N color frames in the .rrd (usually the biggest part)", + ) + parser.add_argument( + "--map-voxel", + type=float, + default=0.1, + help="voxel size (m) for the .rrd clouds/maps; larger = smaller .rrd", + ) + parser.add_argument( + "--cloud-stride", + type=int, + default=3, + help="keep 1 of every N per-frame lidar clouds in the .rrd", + ) + parser.add_argument( + "--mid360-pitch", + action="store_true", + help="apply the legacy mid360->camera 44deg pitch correction (old fastlio " + "recordings; new data stores correct transforms, leave off)", + ) + args = parser.parse_args() + + databases = resolve_databases(args.target, args.recordings_dir) + print(f"found {len(databases)} recording(s)") + for db in databases: + try: + intrinsics, distortion, optical_in_base, resolution = load_camera(db) + process_db( + db, + intrinsics=intrinsics, + distortion=distortion, + optical_in_base=optical_in_base, + resolution=resolution, + image_stream=args.image_stream, + apriltag_stream=args.apriltag_stream, + gtsam_stream=args.gtsam_stream, + marker_length=args.marker_length, + dictionary=args.dictionary, + force=args.force, + no_gtsam=args.no_gtsam, + no_loop=args.no_loop, + make_rrd=not args.no_rrd, + camera_freq=args.camera_freq, + map_voxel=args.map_voxel, + cloud_stride=args.cloud_stride, + mid360_pitch=args.mid360_pitch, + check_only=args.check, + ) + except Exception as error: + print(f" !! failed: {error}") + print("done") diff --git a/dimos/mapping/recording/utils/rec_check.py b/dimos/mapping/recording/utils/rec_check.py new file mode 100644 index 0000000000..15936d3a84 --- /dev/null +++ b/dimos/mapping/recording/utils/rec_check.py @@ -0,0 +1,301 @@ +#!/usr/bin/env python3 +# Copyright 2026 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Sanity-check a recording dir: pcap + mem2.db sizes, stream rates, pose travel.""" + +from __future__ import annotations + +from datetime import datetime +import json +import math +from pathlib import Path +import sqlite3 +import subprocess +import sys +from typing import Any + +STREAMS = ( + "lidar", + "odom", + "color_image", + "livox_imu", + "livox_lidar", + "pointlio_lidar", + "pointlio_odometry", + "go2_odom", + "go2_color_image", + "realsense_color_image", + "realsense_depth_image", + "realsense_pointcloud", + "realsense_camera_info", + "realsense_depth_camera_info", + "realsense_imu", +) +RECORDINGS_DIR = Path("recordings") +# A pcap with only its global header (no packets) is exactly this many bytes. +PCAP_HEADER_BYTES = 24 + + +def find_dir(argv: list[str]) -> Path: + if len(argv) > 1: + directory = Path(argv[1]) + if not directory.exists(): + sys.exit(f"not found: {directory}") + return directory + candidates = sorted( + (p for p in RECORDINGS_DIR.glob("2*") if p.is_dir()), + key=lambda p: p.stat().st_mtime, + ) + if not candidates: + sys.exit(f"no recordings under {RECORDINGS_DIR}/") + return candidates[-1] + + +def human_size(n: int) -> str: + for unit in ("B", "KB", "MB", "GB", "TB"): + if n < 1024: + return f"{n:.1f}{unit}" + n /= 1024 # type: ignore[assignment] + return f"{n:.1f}PB" + + +def pcap_stats(pcap: Path) -> tuple[int, float, float] | None: + try: + result = subprocess.run( + ["capinfos", "-Mra", str(pcap)], + capture_output=True, + text=True, + check=True, + ) + except (FileNotFoundError, subprocess.CalledProcessError): + return _pcap_stats_via_tcpdump(pcap) + packets = first = last = None + for line in result.stdout.splitlines(): + if "Number of packets" in line: + packets = int(line.split(":", 1)[1].strip().replace(",", "")) + elif "First packet time" in line: + first = _parse_capinfos_time(line.split(":", 1)[1].strip()) + elif "Last packet time" in line: + last = _parse_capinfos_time(line.split(":", 1)[1].strip()) + if packets is None or first is None or last is None: + return None + return packets, first, last + + +def _parse_capinfos_time(value: str) -> float | None: + for fmt in ("%Y-%m-%d %H:%M:%S.%f", "%Y-%m-%d %H:%M:%S"): + try: + return datetime.strptime(value.split(" UTC")[0], fmt).timestamp() + except ValueError: + continue + return None + + +def _pcap_stats_via_tcpdump(pcap: Path) -> tuple[int, float, float] | None: + try: + result = subprocess.run( + ["tcpdump", "-r", str(pcap), "-tt", "-nn"], + capture_output=True, + text=True, + check=True, + ) + except (FileNotFoundError, subprocess.CalledProcessError): + return None + timestamps = [] + for line in result.stdout.splitlines(): + head = line.split(" ", 1)[0] + try: + timestamps.append(float(head)) + except ValueError: + continue + if not timestamps: + return None + return len(timestamps), timestamps[0], timestamps[-1] + + +def stream_rows(cur: sqlite3.Cursor, name: str) -> tuple[int, float | None, float | None, int]: + tables = {row[0] for row in cur.execute("SELECT name FROM sqlite_master WHERE type='table'")} + if name not in tables: + return 0, None, None, 0 + n, t0, t1 = cur.execute(f'SELECT COUNT(*), MIN(ts), MAX(ts) FROM "{name}"').fetchone() + pose_non_null = cur.execute(f'SELECT COUNT(pose_x) FROM "{name}"').fetchone()[0] + return n, t0, t1, pose_non_null + + +def odometry_travel(cur: sqlite3.Cursor) -> dict | None: + rows = cur.execute( + "SELECT pose_x, pose_y, pose_z FROM pointlio_odometry WHERE pose_x IS NOT NULL ORDER BY ts" + ).fetchall() + if not rows: + return None + xs, ys, zs = zip(*rows, strict=False) + path_length = sum(math.dist(rows[i - 1], rows[i]) for i in range(1, len(rows))) + return { + "rows": len(rows), + "start": rows[0], + "end": rows[-1], + "path_length": path_length, + "straight_line": math.dist(rows[0], rows[-1]), + "bbox_x": (min(xs), max(xs)), + "bbox_y": (min(ys), max(ys)), + "bbox_z": (min(zs), max(zs)), + } + + +def format_clock(seconds: float | None) -> str: + if seconds is None: + return "-" + return datetime.fromtimestamp(seconds).strftime("%H:%M:%S") + + +def summarize(directory: Path) -> dict[str, Any]: + """The same stats report() prints, as a JSON-able dict.""" + pcap = directory / "raw_mid360.pcap" + db = directory / "mem2.db" + summary: dict[str, Any] = { + "directory": str(directory), + "files": {}, + "pcap": None, + "streams": {}, + "pointlio_odometry_travel": None, + } + for path in (pcap, db, directory / "mem2.db-wal", directory / "mem2.db-shm"): + summary["files"][path.name] = path.stat().st_size if path.exists() else None + + if pcap.exists() and pcap.stat().st_size > PCAP_HEADER_BYTES: + stats = pcap_stats(pcap) + if stats is not None: + packets, first, last = stats + span = last - first + summary["pcap"] = { + "packets": packets, + "first": first, + "last": last, + "span_s": span, + "rate_pkt_s": packets / span if span > 0 else 0, + } + + if not db.exists(): + summary["error"] = "mem2.db missing" + return summary + + connection = sqlite3.connect(db) + cur = connection.cursor() + for name in STREAMS: + n, t0, t1, pose_n = stream_rows(cur, name) + if n == 0: + summary["streams"][name] = {"rows": 0} + continue + span = (t1 - t0) if (t0 and t1) else 0 + summary["streams"][name] = { + "rows": n, + "span_s": span, + "hz": (n - 1) / span if span > 0 else 0, + "pose_pct": 100 * pose_n / n if n else 0, + } + summary["pointlio_odometry_travel"] = odometry_travel(cur) + connection.close() + return summary + + +def write_summary(directory: Path) -> Path: + """Write summarize() to /summary.json and return its path.""" + path = directory / "summary.json" + path.write_text(json.dumps(summarize(directory), indent=2)) + return path + + +def main() -> int: + return report(find_dir(sys.argv)) + + +def report(directory: Path) -> int: + print(f"=== {directory} ===") + print() + + pcap = directory / "raw_mid360.pcap" + db = directory / "mem2.db" + print("files:") + for path in (pcap, db, directory / "mem2.db-wal", directory / "mem2.db-shm"): + if path.exists(): + print(f" {path.name:<20} {human_size(path.stat().st_size):>10}") + else: + print(f" {path.name:<20} (missing)") + print() + + if pcap.exists() and pcap.stat().st_size > PCAP_HEADER_BYTES: + stats = pcap_stats(pcap) + if stats is None: + print("pcap: present (capinfos/tcpdump unavailable to inspect)") + else: + packets, first, last = stats + span = last - first + rate = packets / span if span > 0 else 0 + print( + f"pcap: {packets:,} packets {format_clock(first)} -> {format_clock(last)} " + f"span={span:.1f}s rate={rate:.0f}pkt/s" + ) + elif pcap.exists(): + print(f"pcap: empty (only {pcap.stat().st_size}B — header only)") + else: + print("pcap: missing") + print() + + if not db.exists(): + print("mem2.db missing — cannot check streams.") + return 1 + + connection = sqlite3.connect(db) + cur = connection.cursor() + header = f"{'stream':<18} {'rows':>9} {'span_s':>8} {'hz':>7} {'pose%':>7} blob" + print(header) + print("-" * len(header)) + for name in STREAMS: + n, t0, t1, pose_n = stream_rows(cur, name) + if n == 0: + print(f" {name:<16} {'-':>9} (no rows)") + continue + span = (t1 - t0) if (t0 and t1) else 0 + rate = (n - 1) / span if span > 0 else 0 + pose_pct = 100 * pose_n / n if n else 0 + blob = cur.execute( + f'SELECT LENGTH(b.data) FROM "{name}" t JOIN "{name}_blob" b ON t.id=b.id LIMIT 1' + ).fetchone() + blob_label = human_size(blob[0]) if blob else "-" + print(f" {name:<16} {n:>9,} {span:>8.1f} {rate:>7.1f} {pose_pct:>6.0f}% {blob_label}") + + travel = odometry_travel(cur) + print() + if travel: + sx, sy, sz = travel["start"] + ex, ey, ez = travel["end"] + bx, by, bz = travel["bbox_x"], travel["bbox_y"], travel["bbox_z"] + print("pointlio_odometry travel:") + print(f" start x={sx:.2f} y={sy:.2f} z={sz:.2f}") + print(f" end x={ex:.2f} y={ey:.2f} z={ez:.2f}") + print(f" path_length {travel['path_length']:.2f} m") + print(f" straight_line {travel['straight_line']:.2f} m") + print( + f" bbox x=[{bx[0]:.1f},{bx[1]:.1f}] " + f"y=[{by[0]:.1f},{by[1]:.1f}] z=[{bz[0]:.1f},{bz[1]:.1f}]" + ) + else: + print("pointlio_odometry travel: no pose-stamped rows") + + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/dimos/robot/all_blueprints.py b/dimos/robot/all_blueprints.py index b6133933e2..7439d1a7bb 100644 --- a/dimos/robot/all_blueprints.py +++ b/dimos/robot/all_blueprints.py @@ -76,6 +76,7 @@ "openarm-mock-planner-coordinator": "dimos.robot.manipulators.openarm.blueprints.planner:openarm_mock_planner_coordinator", "openarm-planner-coordinator": "dimos.robot.manipulators.openarm.blueprints.planner:openarm_planner_coordinator", "path-planner-eval": "dimos.navigation.nav_3d.evaluator.blueprints:path_planner_eval", + "realsense-mid360-record": "dimos.mapping.recording.mid360_realsense.record:realsense_mid360_record", "teleop-hosted-go2": "dimos.teleop.quest_hosted.blueprints:teleop_hosted_go2", "teleop-hosted-xarm7": "dimos.teleop.quest_hosted.blueprints:teleop_hosted_xarm7", "teleop-phone": "dimos.teleop.phone.blueprints:teleop_phone", @@ -115,6 +116,7 @@ "unitree-go2-keyboard-teleop": "dimos.robot.unitree.go2.blueprints.basic.unitree_go2_keyboard_teleop:unitree_go2_keyboard_teleop", "unitree-go2-markers": "dimos.robot.unitree.go2.blueprints.smart.unitree_go2:unitree_go2_markers", "unitree-go2-memory": "dimos.robot.unitree.go2.blueprints.smart.unitree_go2:unitree_go2_memory", + "unitree-go2-record": "dimos.mapping.recording.go2_mid360.record:unitree_go2_record", "unitree-go2-relocalization": "dimos.robot.unitree.go2.blueprints.smart.unitree_go2:unitree_go2_relocalization", "unitree-go2-ros": "dimos.robot.unitree.go2.blueprints.smart.unitree_go2_ros:unitree_go2_ros", "unitree-go2-security": "dimos.robot.unitree.go2.blueprints.agentic.unitree_go2_security:unitree_go2_security", @@ -169,6 +171,7 @@ "go2-connection": "dimos.robot.unitree.go2.connection.GO2Connection", "go2-fleet-connection": "dimos.robot.unitree.go2.fleet_connection.Go2FleetConnection", "go2-memory": "dimos.robot.unitree.go2.blueprints.smart.unitree_go2.Go2Memory", + "go2-recorder": "dimos.mapping.recording.go2_mid360.record.Go2Recorder", "go2-teleop-module": "dimos.teleop.quest.quest_extensions.Go2TeleopModule", "google-maps-skill-container": "dimos.agents.skills.google_maps_skill_container.GoogleMapsSkillContainer", "gps-nav-skill-container": "dimos.agents.skills.gps_nav_skill.GpsNavSkillContainer", @@ -182,6 +185,7 @@ "joystick-module": "dimos.robot.unitree.b1.joystick_module.JoystickModule", "keyboard-teleop": "dimos.robot.unitree.keyboard_teleop.KeyboardTeleop", "keyboard-teleop-module": "dimos.teleop.keyboard.keyboard_teleop_module.KeyboardTeleopModule", + "keyboard-teleop-tui": "dimos.robot.unitree.keyboard_teleop_tui.KeyboardTeleopTUI", "local-planner": "dimos.navigation.nav_stack.modules.local_planner.local_planner.LocalPlanner", "manipulation-module": "dimos.manipulation.manipulation_module.ManipulationModule", "map": "dimos.robot.unitree.type.map.Map", @@ -218,6 +222,7 @@ "quest-teleop-module": "dimos.teleop.quest.quest_teleop_module.QuestTeleopModule", "ray-tracing-voxel-map": "dimos.mapping.ray_tracing.module.RayTracingVoxelMap", "real-sense-camera": "dimos.hardware.sensors.camera.realsense.camera.RealSenseCamera", + "realsense-recorder": "dimos.mapping.recording.mid360_realsense.record.RealsenseRecorder", "receiver-module": "dimos.utils.demo_image_encoding.ReceiverModule", "recorder": "dimos.memory2.module.Recorder", "reid-module": "dimos.perception.detection.reid.module.ReidModule", diff --git a/dimos/robot/unitree/go2/config.py b/dimos/robot/unitree/go2/config.py new file mode 100644 index 0000000000..daf9ce5517 --- /dev/null +++ b/dimos/robot/unitree/go2/config.py @@ -0,0 +1,32 @@ +# Copyright 2026 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Go2 hardware configuration shared across blueprints and tools.""" + +from __future__ import annotations + +import numpy as np + +# Go2 front RGB camera rig (1280x720). Same values as front_camera_720.yaml. +# GO2_FRONT_CAMERA_OPTICAL_IN_BASE is the camera_optical pose in base_link, +# matching the URDF front_camera mount + REP-103 optical convention. +GO2_FRONT_CAMERA_INTRINSICS = np.array( + [[797.47561649, 0.0, 643.53521678], [0.0, 796.48721128, 349.27836053], [0.0, 0.0, 1.0]], + dtype=np.float64, +) +GO2_FRONT_CAMERA_DISTORTION = np.array( + [-0.07309429, -0.02341141, -0.00693059, 0.00923868], dtype=np.float64 +) +GO2_FRONT_CAMERA_OPTICAL_IN_BASE = [0.32715, 0.0, 0.04297, -0.5, 0.5, -0.5, 0.5] +GO2_FRONT_CAMERA_RESOLUTION = (1280, 720) diff --git a/dimos/robot/unitree/go2/connection_spec.py b/dimos/robot/unitree/go2/connection_spec.py index dd6aab9c40..3ea52f9266 100644 --- a/dimos/robot/unitree/go2/connection_spec.py +++ b/dimos/robot/unitree/go2/connection_spec.py @@ -19,3 +19,5 @@ class GO2ConnectionSpec(Spec, Protocol): def publish_request(self, topic: str, data: dict[str, Any]) -> dict[Any, Any]: ... + def liedown(self) -> bool: ... + def standup(self) -> bool: ... diff --git a/dimos/robot/unitree/keyboard_teleop_tui.py b/dimos/robot/unitree/keyboard_teleop_tui.py new file mode 100644 index 0000000000..4affbe708b --- /dev/null +++ b/dimos/robot/unitree/keyboard_teleop_tui.py @@ -0,0 +1,344 @@ +#!/usr/bin/env python3 +# Copyright 2025-2026 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Terminal (TUI) keyboard teleop. Drop-in alternative to ``KeyboardTeleop``. + +Unlike the pygame version this needs no display/window: it reads keypresses +straight from the controlling terminal and renders a status panel with ANSI +escapes. Same ``tele_cmd_vel`` Twist output and the same constructor knobs, so it +can be swapped into a blueprint wherever ``KeyboardTeleop`` is used. + +Terminals can't report key-release, only key-repeat, so "hold to move" is +emulated: a movement key stays active for ``key_timeout`` seconds after its +last repeat. Holding a key refreshes it continuously (smooth motion); on +release the robot coasts to a stop within ``key_timeout``. + +Controls: + W/S: forward / back + A/D: turn left / right + Q/E: strafe left / right + Shift+key (tap): toggle turbo on/off + 1: sit (lie down) 2: stand (and re-enter walk mode) + SPACE: emergency stop + ESC: quit + +Sit/stand are driven through optional ``on_sit`` / ``on_stand`` callbacks so the +module stays robot-agnostic; the caller wires them to the robot's RPCs. +""" + +from collections.abc import Callable +import os +import select +import sys +import termios +import threading +import time +import tty +from typing import Any + +from dimos.constants import DEFAULT_THREAD_JOIN_TIMEOUT +from dimos.core.core import rpc +from dimos.core.module import Module +from dimos.core.stream import Out +from dimos.msgs.geometry_msgs.Twist import Twist +from dimos.msgs.geometry_msgs.Vector3 import Vector3 +from dimos.utils.logging_config import setup_logger + +logger = setup_logger() + +DEFAULT_LINEAR_SPEED: float = 0.3 # m/s +DEFAULT_ANGULAR_SPEED: float = 0.6 # rad/s +DEFAULT_BOOST_MULTIPLIER: float = 2.0 +DEFAULT_SLOW_MULTIPLIER: float = 0.5 +DEFAULT_KEY_TIMEOUT: float = 0.4 # s a key stays "held" after its last repeat + +_CONTROL_RATE_HZ = 50 +# Min seconds between turbo toggles; debounces terminal key-repeat so holding +# Shift+key flips turbo once, not every repeat. +_TURBO_TOGGLE_COOLDOWN = 0.5 + +# ANSI helpers. +_ENTER_ALT = "\033[?1049h" # switch to alternate screen buffer +_EXIT_ALT = "\033[?1049l" # restore the normal screen (and its scrollback) +_HIDE_CURSOR = "\033[?25l" +_SHOW_CURSOR = "\033[?25h" +_CLEAR_SCREEN = "\033[2J" +_CLEAR_BELOW = "\033[J" # clear from cursor to end of screen +_CURSOR_HOME = "\033[H" +_CLEAR_LINE = "\033[K" +_RESET = "\033[0m" +_CYAN = "\033[36m" +_DIM = "\033[2m" +_GREEN = "\033[32m" +_RED = "\033[31m" + +_BOX_WIDTH = 46 # visible width of the panel's inner content area + +# Movement keys -> (linear_x, linear_y, angular_z) unit contribution. +_KEY_MOTION: dict[str, tuple[float, float, float]] = { + "w": (1.0, 0.0, 0.0), + "s": (-1.0, 0.0, 0.0), + "q": (0.0, 1.0, 0.0), + "e": (0.0, -1.0, 0.0), + "a": (0.0, 0.0, 1.0), + "d": (0.0, 0.0, -1.0), +} + + +class KeyboardTeleopTUI(Module): + """Terminal-based keyboard control. Outputs Twist on tele_cmd_vel.""" + + tele_cmd_vel: Out[Twist] + + _stop_event: threading.Event + _thread: threading.Thread | None = None + + def __init__( + self, + linear_speed: float = DEFAULT_LINEAR_SPEED, + angular_speed: float = DEFAULT_ANGULAR_SPEED, + boost_multiplier: float = DEFAULT_BOOST_MULTIPLIER, + slow_multiplier: float = DEFAULT_SLOW_MULTIPLIER, + key_timeout: float = DEFAULT_KEY_TIMEOUT, + publish_only_when_active: bool = False, + on_sit: Callable[[], Any] | None = None, + on_stand: Callable[[], Any] | None = None, + **kwargs: Any, + ) -> None: + super().__init__(**kwargs) + self._stop_event = threading.Event() + self.linear_speed = linear_speed + self.angular_speed = angular_speed + self.boost_multiplier = boost_multiplier + self.slow_multiplier = slow_multiplier + self.key_timeout = key_timeout + # Discrete robot actions, run on a background thread so the control loop + # stays responsive. on_stand should also re-enter the locomotion mode so + # the robot can walk again (e.g. standup() then balance_stand()). + self.on_sit = on_sit + self.on_stand = on_stand + # When True, only publish while a movement key is held; on + # release publish a single zero Twist (stop) then go silent. + # Lets the teleop coexist with another /tele_cmd_vel publisher + # instead of flooding zeros. + self.publish_only_when_active = publish_only_when_active + self._was_active = False + # key char -> monotonic timestamp of its most recent press. + self._last_press: dict[str, float] = {} + # Turbo (boost) is a toggle now, not press-and-hold. + self._turbo = False + self._last_turbo_toggle = 0.0 + # Discrete-action state: name of the in-flight action ("sitting"/ + # "standing") or None. Guarded so only one runs at a time. + self._action_lock = threading.Lock() + self._action_status: str | None = None + + @rpc + def start(self) -> None: + super().start() + self._stop_event.clear() + self._last_press.clear() + self._turbo = False + self._last_turbo_toggle = 0.0 + self._thread = threading.Thread(target=self._tui_loop, daemon=True) + self._thread.start() + + @rpc + def stop(self) -> None: + self._publish_stop() + self._stop_event.set() + if self._thread is None: + raise RuntimeError("Cannot stop: thread was never started") + self._thread.join(DEFAULT_THREAD_JOIN_TIMEOUT) + super().stop() + + def _publish_stop(self) -> None: + stop_twist = Twist() + stop_twist.linear = Vector3(0, 0, 0) + stop_twist.angular = Vector3(0, 0, 0) + self.tele_cmd_vel.publish(stop_twist) + + def _tui_loop(self) -> None: + if not sys.stdin.isatty(): + logger.warning( + "KeyboardTeleopTUI: stdin is not a TTY; no keyboard input will be " + "read. Run this module in the foreground of an interactive terminal." + ) + self._stop_event.wait() + return + + fd = sys.stdin.fileno() + old_attrs = termios.tcgetattr(fd) + period = 1.0 / _CONTROL_RATE_HZ + # Render on the alternate screen buffer so the panel owns the terminal, + # isolated from the coordinator/worker log spam (which keeps going to the + # log file). The full repaint each frame wipes any stray writes that leak + # through. Leaving the buffer on exit restores the user's normal screen. + sys.stdout.write(_ENTER_ALT + _HIDE_CURSOR + _CLEAR_SCREEN) + sys.stdout.flush() + try: + # cbreak leaves ISIG on, so Ctrl+C still works as usual. + tty.setcbreak(fd) + while not self._stop_event.is_set(): + ready, _, _ = select.select([sys.stdin], [], [], period) + if ready: + data = os.read(fd, 64).decode("utf-8", errors="ignore") + if not self._handle_input(data): + break + + twist = self._compute_twist() + self._maybe_publish(twist) + self._render(twist) + finally: + termios.tcsetattr(fd, termios.TCSADRAIN, old_attrs) + sys.stdout.write(_SHOW_CURSOR + _EXIT_ALT) + sys.stdout.flush() + + def _handle_input(self, data: str) -> bool: + """Process a raw read. Returns False to request quit.""" + now = time.monotonic() + for ch in data: + if ch == " ": + # Emergency stop: drop all held keys and send a zero Twist. + self._last_press.clear() + self._publish_stop() + logger.warning("EMERGENCY STOP!") + elif ch == "\x1b": + # Lone ESC quits; ESC-sequences (arrow keys) are ignored here. + if len(data) == 1: + return False + elif ch == "1": + self._trigger_action("sitting", self.on_sit) + elif ch == "2": + self._trigger_action("standing", self.on_stand) + else: + lower = ch.lower() + if lower in _KEY_MOTION: + self._last_press[lower] = now + # Uppercase (Shift) taps toggle turbo; cooldown debounces + # terminal key-repeat so a held key flips it only once. + if ch.isupper() and now - self._last_turbo_toggle > _TURBO_TOGGLE_COOLDOWN: + self._turbo = not self._turbo + self._last_turbo_toggle = now + return True + + def _trigger_action(self, name: str, fn: Callable[[], Any] | None) -> None: + """Run a discrete robot action (sit/stand) on a background thread. + + Movement is suppressed while it runs; re-triggers are ignored until it + finishes so key-repeat or impatient taps can't stack commands. + """ + if fn is None: + return + with self._action_lock: + if self._action_status is not None: + return + self._action_status = name + self._last_press.clear() # stop driving while the robot changes posture + + def run() -> None: + try: + fn() + except Exception: + logger.exception(f"KeyboardTeleopTUI: {name} action failed") + finally: + with self._action_lock: + self._action_status = None + + threading.Thread(target=run, daemon=True).start() + + def _compute_twist(self) -> Twist: + now = time.monotonic() + twist = Twist() + twist.linear = Vector3(0, 0, 0) + twist.angular = Vector3(0, 0, 0) + + # Hold still while a sit/stand action is in flight. + if self._action_status is not None: + return twist + + for key, (fx, fy, fz) in _KEY_MOTION.items(): + if now - self._last_press.get(key, float("-inf")) <= self.key_timeout: + twist.linear.x += fx * self.linear_speed + twist.linear.y += fy * self.linear_speed + twist.angular.z += fz * self.angular_speed + + multiplier = self.boost_multiplier if self._turbo else 1.0 + twist.linear.x *= multiplier + twist.linear.y *= multiplier + twist.angular.z *= multiplier + return twist + + def _maybe_publish(self, twist: Twist) -> None: + if self.publish_only_when_active: + active = twist.linear.x != 0 or twist.linear.y != 0 or twist.angular.z != 0 + # Publish while active; publish exactly one zero on the + # active->idle transition (clean stop); then stay silent. + if active or self._was_active: + self.tele_cmd_vel.publish(twist) + self._was_active = active + else: + self.tele_cmd_vel.publish(twist) + + def _render(self, twist: Twist) -> None: + now = time.monotonic() + moving = twist.linear.x != 0 or twist.linear.y != 0 or twist.angular.z != 0 + action = self._action_status + held = sorted( + key.upper() + for key in _KEY_MOTION + if now - self._last_press.get(key, float("-inf")) <= self.key_timeout + ) + status_plain = "● MOVING" if moving else "● IDLE" + status_color = _RED if moving else _GREEN + turbo_txt = f" [TURBO {self.boost_multiplier:g}x]" if self._turbo else "" + action_line = f"▶ {action}…" if action else "" + + # (plain_text, color) rows. Padding is computed from the *plain* text so + # invisible ANSI color codes don't throw off the box alignment. + rows: list[tuple[str, str]] = [ + (f"{status_plain}{turbo_txt}", status_color), + (action_line, _CYAN if action else ""), + ("", ""), + (f"Linear X (Fwd/Back) : {twist.linear.x:+.2f} m/s", ""), + (f"Linear Y (Strafe) : {twist.linear.y:+.2f} m/s", ""), + (f"Angular Z (Turn) : {twist.angular.z:+.2f} rad/s", ""), + ("", ""), + (f"Held: {', '.join(held) if held else '-'}", ""), + ("", ""), + ("WS: move AD: turn QE: strafe", _DIM), + ("Shift+key (tap): toggle turbo", _DIM), + ("1: sit 2: stand", _DIM), + ("SPACE: e-stop ESC: quit", _DIM), + ] + + w = _BOX_WIDTH + title = " Keyboard Teleop (TUI) " + top = "┌" + title + "─" * (w + 2 - len(title)) + "┐" + bottom = "└" + "─" * (w + 2) + "┘" + + lines = [f"{_CYAN}{top}{_RESET}"] + for plain, color in rows: + text = plain[:w] + pad = " " * (w - len(text)) + inner = f"{color}{text}{_RESET}{pad}" if color else f"{text}{pad}" + lines.append(f"{_CYAN}│{_RESET} {inner} {_CYAN}│{_RESET}") + lines.append(f"{_CYAN}{bottom}{_RESET}") + + out = ( + _CURSOR_HOME + "\r\n".join(line + _CLEAR_LINE for line in lines) + "\r\n" + _CLEAR_BELOW + ) + sys.stdout.write(out) + sys.stdout.flush() diff --git a/docs/capabilities/navigation/readme.md b/docs/capabilities/navigation/readme.md index 66c59d3540..b5ac009dbd 100644 --- a/docs/capabilities/navigation/readme.md +++ b/docs/capabilities/navigation/readme.md @@ -9,3 +9,7 @@ Note: in the future these will be merged into one system. ## Simple Nav - [Simple Navigation](/docs/capabilities/navigation/native/index.md) — column-carving voxel mapping + slope-based costmap + +## Recording + +- [Recording a Map](/docs/capabilities/navigation/recording_a_map.md) — drive a Go2 + Mid-360 around a space and capture lidar, odometry, and camera for offline mapping diff --git a/docs/capabilities/navigation/recording_a_map.md b/docs/capabilities/navigation/recording_a_map.md new file mode 100644 index 0000000000..d0c8951b72 --- /dev/null +++ b/docs/capabilities/navigation/recording_a_map.md @@ -0,0 +1,109 @@ +# Recording a Map + +This walks you through driving a Go2 around a space and capturing everything you need to build a map afterward: the Mid-360 point cloud, Point-LIO odometry, and the camera. You drive, it records, you post-process. That's the whole loop. + +If you're on the RealSense rig instead of a Go2, the steps are the same — just use the `mid360_realsense` paths in place of `go2_mid360`. + +## What you need + +- A Unitree Go2 with a Livox Mid-360 mounted on it +- A computer to do the recording (it talks to the dog over wifi and to the lidar over a wired link) +- A phone with a hotspot +- The Mid-360's USB-ethernet adapter and cable + +## 1. Mount the Mid-360 + +Bolt the Mid-360 to the top of the dog, pointing forward, as level as you reasonably can. The recorder doesn't need a perfect mount — Point-LIO figures out the lidar's motion on its own and stamps every frame with a pose — but a level, rigid mount gives you cleaner data. Don't let it wobble. A loose lidar is the fastest way to ruin a recording. + +Run the Mid-360's ethernet to your recording computer. The lidar speaks plain ethernet over a USB adapter, so it's a separate wired link, not part of the wifi. + +## 2. Find the lidar's IP and get on its subnet + +The Mid-360 ships with a static IP. Each unit's address is derived from its serial number: the last octet is the last two digits of the serial. So a lidar whose serial ends in `71` is at `192.168.1.171`. A factory-default unit sits at `192.168.1.155`. Check the sticker. + +If the sticker isn't telling you anything, plug it in, power it on, and watch for its packets: + +```bash +sudo tcpdump -ni udp +``` + +The source IP that starts spamming you is the lidar. + +Your computer's wired interface has to live on the same `/24` as the lidar. Set it to `192.168.1.5`: + +```bash +sudo nmcli con add type ethernet ifname con-name livox-mid360 \ + ipv4.addresses 192.168.1.5/24 ipv4.method manual +sudo nmcli con up livox-mid360 +``` + +This sticks across reboots, so you only do it once per machine. + +## 3. Put the dog and your computer on the same hotspot + +The recorder talks to the dog over wifi, so both the dog and your computer need to be on the same network. A phone hotspot is the easy, portable answer. + +Turn on your phone's hotspot, then point the dog at it over Bluetooth: + +```bash +dimos go2tool connect-wifi --ssid --password +``` + +Power the dog on first — it advertises over Bluetooth right away. The command scans, finds the dog, and hands it the wifi credentials. If more than one robot shows up, it'll ask which one. + +Now connect your computer to the same hotspot. Then find the dog's IP on it: + +```bash +dimos go2tool discover +``` + +That prints a row per robot it sees. Grab the dog's IP and export it: + +```bash +export ROBOT_IP= +``` + +At this point your computer has two links going at once: wifi to the dog, wired ethernet to the lidar. That's expected. + +## 4. Record + +Tell the recorder where the lidar is and start it: + +```bash +export LIDAR_IP=192.168.1.171 # whatever you found in step 2 +uv run python dimos/mapping/recording/go2_mid360/record.py +``` + +A keyboard-teleop window opens. Drive with WASD, turn with Q/E, `Z` to lie down, `X` to stand. Drive the dog through the whole space you want mapped. A few tips: + +- Move at a calm walking pace. Whipping it around blurs scans. +- Close the loop — end where you started, and re-cross your own path a couple times. Those revisits are what let the post-process snap out accumulated drift. +- If you've got AprilTags up, get a clean look at each one more than once. They anchor the final groundtruth. + +When you're done, `Ctrl+C` the recorder. It writes everything to a timestamped folder under `recordings/`, e.g. `recordings/2026-06-22_03-15pm-PST/mem2.db`. + +You don't fuss with poses while recording — the Point-LIO recorder stamps each lidar frame with the live odometry pose as it goes, so the trajectory is already baked into the recording. + +## 5. Post-process + +This detects AprilTags, runs the drift-corrected trajectory solve, and writes a `.rrd` you can open in Rerun: + +```bash +uv run --no-sync python dimos/mapping/recording/go2_mid360/post_process.py +``` + +With no argument it grabs the most recent recording. Point it at a specific one if you want: + +```bash +uv run --no-sync python dimos/mapping/recording/go2_mid360/post_process.py recordings/2026-06-22_03-15pm-PST +``` + +## 6. Sanity-check it + +Before you trust a recording, look at it: + +```bash +uv run python dimos/mapping/recording/utils/rec_check.py +``` + +It reports stream rates and how far the odometry traveled. If `pointlio_lidar` or `pointlio_odometry` is empty, or the travel distance is wildly off, something went wrong on the lidar link — recheck the IP and the wired interface and run it again.