diff --git a/pyproject.toml b/pyproject.toml index 9bbc394..8a10c14 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -37,6 +37,7 @@ dependencies = [ "geopandas>=0.14", "pyogrio>=0.7", "rasterio>=1.3", + "scipy>=1.10", "shapely>=2.0", "aioboto3>=15.5.0", "boto3>=1.40.61", diff --git a/standard_e2e/caching/src_datasets/av2_sensor/av2_sensor_dataset_processor.py b/standard_e2e/caching/src_datasets/av2_sensor/av2_sensor_dataset_processor.py index 7feba7f..a904fa6 100644 --- a/standard_e2e/caching/src_datasets/av2_sensor/av2_sensor_dataset_processor.py +++ b/standard_e2e/caching/src_datasets/av2_sensor/av2_sensor_dataset_processor.py @@ -15,7 +15,6 @@ from av2.map.map_api import ArgoverseStaticMap from av2.structures.sweep import Sweep from av2.utils.io import read_img -from scipy.spatial.transform import Rotation from standard_e2e.caching import SourceDatasetProcessor from standard_e2e.caching.adapters import ( @@ -49,7 +48,12 @@ ) from standard_e2e.enums import TrajectoryComponent as TC from standard_e2e.indexing import IndexDataGenerator -from standard_e2e.utils import matrix_to_xyz_heading +from standard_e2e.utils import ( + intrinsics_matrix, + matrix_to_xyz_heading, + quat_wxyz_to_rotmat, + se3, +) # AV2 ring cameras → our canonical CameraDirection. Stereo cameras are # excluded: they run at 5 Hz and do not synchronise 1:1 with the 10 Hz @@ -131,18 +135,14 @@ def _quat_wxyz_to_rotmat(qw: float, qx: float, qy: float, qz: float) -> np.ndarray: - """AV2 stores Hamilton quaternions as (qw, qx, qy, qz); scipy expects (x,y,z,w).""" - rotmat = Rotation.from_quat([qx, qy, qz, qw]).as_matrix().astype(np.float32) - return cast(np.ndarray, rotmat) + """AV2 stores Hamilton quaternions as (qw, qx, qy, qz).""" + return quat_wxyz_to_rotmat((qw, qx, qy, qz)).astype(np.float32) def _se3_from_quat_translation( qw: float, qx: float, qy: float, qz: float, tx: float, ty: float, tz: float ) -> np.ndarray: - T = np.eye(4, dtype=np.float32) - T[:3, :3] = _quat_wxyz_to_rotmat(qw, qx, qy, qz) - T[:3, 3] = (tx, ty, tz) - return T + return se3(_quat_wxyz_to_rotmat(qw, qx, qy, qz), (tx, ty, tz), dtype=np.float32) class Av2SensorDatasetProcessor(SourceDatasetProcessor): @@ -223,10 +223,7 @@ def _refresh_log_cache(self, log_dir: Path) -> None: self._camera_extrinsics.clear() for raw_row in intrinsics_df.itertuples(index=False): row = cast(Any, raw_row) - K = np.array( - [[row.fx_px, 0.0, row.cx_px], [0.0, row.fy_px, row.cy_px], [0, 0, 1]], - dtype=np.float32, - ) + K = intrinsics_matrix(row.fx_px, row.fy_px, row.cx_px, row.cy_px) self._camera_intrinsics[row.sensor_name] = K # AV2 ships 3 radial distortion coefficients (k1, k2, k3); CameraData # expands them into the 5-term Brown-Conrady form with zero tangential. diff --git a/standard_e2e/caching/src_datasets/comma2k19/_comma_geometry.py b/standard_e2e/caching/src_datasets/comma2k19/_comma_geometry.py index d5bc12b..27aca56 100644 --- a/standard_e2e/caching/src_datasets/comma2k19/_comma_geometry.py +++ b/standard_e2e/caching/src_datasets/comma2k19/_comma_geometry.py @@ -37,44 +37,16 @@ import numpy as np +from standard_e2e.utils.geometry import quats_wxyz_to_rotmats + # FRD (x-fwd, y-right, z-down) <-> FLU (x-fwd, y-left, z-up): negate y and z. # The matrix is its own inverse, so it converts vectors in both directions. FRD_TO_FLU = np.diag([1.0, -1.0, -1.0]).astype(np.float64) -def quats_to_rotmats(quats_wxyz: np.ndarray) -> np.ndarray: - """Convert Hamilton quaternions to rotation matrices. - - Args: - quats_wxyz: ``(N, 4)`` array of Hamilton quaternions ordered - ``(w, x, y, z)`` (comma2k19's native order). Need not be - unit-norm; each row is normalised first. - - Returns: - ``(N, 3, 3)`` float64 array of rotation matrices ``ecef_from_device``: - each maps a device-frame (FRD) vector into ECEF, i.e. its columns are - the device axes expressed in ECEF. - """ - q = np.asarray(quats_wxyz, dtype=np.float64) - if q.ndim != 2 or q.shape[1] != 4: - raise ValueError(f"quats must have shape (N, 4); got {q.shape}") - norms = np.linalg.norm(q, axis=1, keepdims=True) - if np.any(norms == 0.0): - raise ValueError("quaternions must be non-zero") - q = q / norms - w, x, y, z = q[:, 0], q[:, 1], q[:, 2], q[:, 3] - n = q.shape[0] - r = np.empty((n, 3, 3), dtype=np.float64) - r[:, 0, 0] = 1.0 - 2.0 * (y * y + z * z) - r[:, 0, 1] = 2.0 * (x * y - z * w) - r[:, 0, 2] = 2.0 * (x * z + y * w) - r[:, 1, 0] = 2.0 * (x * y + z * w) - r[:, 1, 1] = 1.0 - 2.0 * (x * x + z * z) - r[:, 1, 2] = 2.0 * (y * z - x * w) - r[:, 2, 0] = 2.0 * (x * z - y * w) - r[:, 2, 1] = 2.0 * (y * z + x * w) - r[:, 2, 2] = 1.0 - 2.0 * (x * x + y * y) - return r +# comma2k19 stores Hamilton (w, x, y, z) quaternions; the resulting matrix is +# ``ecef_from_device`` -- the device-FRD axes expressed as ECEF columns. +quats_to_rotmats = quats_wxyz_to_rotmats def pose_matrices_world_from_ego( diff --git a/standard_e2e/caching/src_datasets/comma2k19/comma2k19_dataset_processor.py b/standard_e2e/caching/src_datasets/comma2k19/comma2k19_dataset_processor.py index 6a6e181..084ce38 100644 --- a/standard_e2e/caching/src_datasets/comma2k19/comma2k19_dataset_processor.py +++ b/standard_e2e/caching/src_datasets/comma2k19/comma2k19_dataset_processor.py @@ -66,7 +66,7 @@ from standard_e2e.enums import CameraDirection, StandardFrameDataField # noqa: E402 from standard_e2e.enums import TrajectoryComponent as TC # noqa: E402 from standard_e2e.indexing import IndexDataGenerator # noqa: E402 -from standard_e2e.utils import matrix_to_xyz_heading # noqa: E402 +from standard_e2e.utils import intrinsics_matrix, matrix_to_xyz_heading # noqa: E402 # Single decode thread per worker process: comma2k19 parallelism is across # segments/frames in the pool, so letting each VideoCapture spin up its own @@ -124,13 +124,8 @@ def allowed_splits(self) -> list[str]: def camera_intrinsics(self) -> np.ndarray: """EON pinhole intrinsics ``K`` (3x3 float32).""" focal = self.EON_FOCAL_LENGTH - return np.array( - [ - [focal, 0.0, self.CAM_WIDTH / 2.0], - [0.0, focal, self.CAM_HEIGHT / 2.0], - [0.0, 0.0, 1.0], - ], - dtype=np.float32, + return intrinsics_matrix( + focal, focal, self.CAM_WIDTH / 2.0, self.CAM_HEIGHT / 2.0 ) def _get_default_adapters(self) -> list[AbstractAdapter]: diff --git a/standard_e2e/caching/src_datasets/navsim/navsim_dataset_processor.py b/standard_e2e/caching/src_datasets/navsim/navsim_dataset_processor.py index 70aea8a..6bce254 100644 --- a/standard_e2e/caching/src_datasets/navsim/navsim_dataset_processor.py +++ b/standard_e2e/caching/src_datasets/navsim/navsim_dataset_processor.py @@ -21,12 +21,11 @@ import os import pickle from pathlib import Path -from typing import Any, Optional, cast +from typing import Any, Optional import numpy as np import pandas as pd from PIL import Image -from scipy.spatial.transform import Rotation from standard_e2e.caching import SourceDatasetProcessor from standard_e2e.caching.adapters import ( @@ -62,7 +61,7 @@ ) from standard_e2e.enums import TrajectoryComponent as TC from standard_e2e.indexing import IndexDataGenerator -from standard_e2e.utils import matrix_to_xyz_heading +from standard_e2e.utils import matrix_to_xyz_heading, quat_wxyz_to_rotmat, se3 # nuPlan ships maps under a fixed map-version folder name. NAVSIM's official # install docs hardcode this version (see navsim/docs/install.md). @@ -112,17 +111,13 @@ def _quat_to_rotmat_wxyz(qw: float, qx: float, qy: float, qz: float) -> np.ndarray: """NAVSIM (and nuplan) store Hamilton quaternions as (qw, qx, qy, qz).""" - rotmat = Rotation.from_quat([qx, qy, qz, qw]).as_matrix().astype(np.float32) - return cast(np.ndarray, rotmat) + return quat_wxyz_to_rotmat((qw, qx, qy, qz)).astype(np.float32) def _se3_from_rotation_translation( rotation: np.ndarray, translation: np.ndarray ) -> np.ndarray: - T = np.eye(4, dtype=np.float32) - T[:3, :3] = np.asarray(rotation, dtype=np.float32) - T[:3, 3] = np.asarray(translation, dtype=np.float32) - return T + return se3(rotation, translation, dtype=np.float32) def _driving_command_to_intent(driving_command: np.ndarray) -> Intent: diff --git a/standard_e2e/caching/src_datasets/wayve_scenes/_colmap.py b/standard_e2e/caching/src_datasets/wayve_scenes/_colmap.py index e81b6fc..9c6dd94 100644 --- a/standard_e2e/caching/src_datasets/wayve_scenes/_colmap.py +++ b/standard_e2e/caching/src_datasets/wayve_scenes/_colmap.py @@ -140,16 +140,3 @@ def read_points3D_bin( error = np.array(err_list, dtype=np.float64).reshape(-1) track_length = np.array(track_list, dtype=np.int64).reshape(-1) return xyz, rgb, error, track_length - - -def qvec_wxyz_to_rotmat(qvec_wxyz: np.ndarray) -> np.ndarray: - """COLMAP (qw, qx, qy, qz) -> 3x3 rotation matrix (cam_from_world).""" - w, x, y, z = qvec_wxyz - return np.array( - [ - [1 - 2 * (y * y + z * z), 2 * (x * y - z * w), 2 * (x * z + y * w)], - [2 * (x * y + z * w), 1 - 2 * (x * x + z * z), 2 * (y * z - x * w)], - [2 * (x * z - y * w), 2 * (y * z + x * w), 1 - 2 * (x * x + y * y)], - ], - dtype=np.float64, - ) diff --git a/standard_e2e/caching/src_datasets/wayve_scenes/wayve_scenes_dataset_processor.py b/standard_e2e/caching/src_datasets/wayve_scenes/wayve_scenes_dataset_processor.py index 1661159..f4fe9a0 100644 --- a/standard_e2e/caching/src_datasets/wayve_scenes/wayve_scenes_dataset_processor.py +++ b/standard_e2e/caching/src_datasets/wayve_scenes/wayve_scenes_dataset_processor.py @@ -46,7 +46,6 @@ SegmentContextAggregator, ) from standard_e2e.caching.src_datasets.wayve_scenes._colmap import ( - qvec_wxyz_to_rotmat, read_cameras_bin, read_images_bin, read_points3D_bin, @@ -64,7 +63,13 @@ ) from standard_e2e.enums import TrajectoryComponent as TC from standard_e2e.indexing import IndexDataGenerator -from standard_e2e.utils import matrix_to_xyz_heading +from standard_e2e.utils import ( + intrinsics_matrix, + matrix_to_xyz_heading, + quat_wxyz_to_rotmat, + se3, + transform_points, +) def _decode_rgb(path: Path) -> np.ndarray: @@ -108,10 +113,7 @@ def _opencv_points_to_flu(points_opencv: np.ndarray) -> np.ndarray: def _cam_from_world_4x4(qvec_wxyz: np.ndarray, tvec: np.ndarray) -> np.ndarray: - T = np.eye(4, dtype=np.float64) - T[:3, :3] = qvec_wxyz_to_rotmat(qvec_wxyz) - T[:3, 3] = tvec - return T + return se3(quat_wxyz_to_rotmat(qvec_wxyz), tvec) def _world_from_cam_flu(qvec_wxyz: np.ndarray, tvec: np.ndarray) -> np.ndarray: @@ -200,9 +202,7 @@ def _refresh_scene_cache(self, scene_dir: Path) -> None: self._cam_calib = {} for cid, cam in cameras.items(): fx, fy, cx, cy = cam.params[:4] - K = np.array( - [[fx, 0.0, cx], [0.0, fy, cy], [0.0, 0.0, 1.0]], dtype=np.float32 - ) + K = intrinsics_matrix(fx, fy, cx, cy) dist = cam.params[4:8].astype(np.float32) # OPENCV_FISHEYE k1..k4 self._cam_calib[cid] = (K, dist) @@ -279,10 +279,7 @@ def _build_lidar(self, T_ego_world: np.ndarray) -> LidarData: if len(pts) == 0: xyz_ego = np.zeros((0, 3), dtype=np.float32) else: - homog = np.concatenate( - [pts, np.ones((len(pts), 1), dtype=np.float32)], axis=1 - ) - xyz_ego = (homog @ T_ego_world.T)[:, :3] + xyz_ego = transform_points(T_ego_world, pts) within = np.linalg.norm(xyz_ego, axis=1) <= self.LIDAR_MAX_RANGE_M xyz_ego = xyz_ego[within].astype(np.float32) return LidarData( diff --git a/standard_e2e/utils/__init__.py b/standard_e2e/utils/__init__.py index 6a8c9ea..8a12345 100644 --- a/standard_e2e/utils/__init__.py +++ b/standard_e2e/utils/__init__.py @@ -4,10 +4,22 @@ load_yaml_config, matrix_to_xyz_heading, ) +from standard_e2e.utils.geometry import ( + intrinsics_matrix, + quat_wxyz_to_rotmat, + quats_wxyz_to_rotmats, + se3, + transform_points, +) __all__ = [ "load_yaml_config", "_assert_strictly_increasing", "_check_list_of_objects_or_none", "matrix_to_xyz_heading", + "quats_wxyz_to_rotmats", + "quat_wxyz_to_rotmat", + "se3", + "intrinsics_matrix", + "transform_points", ] diff --git a/standard_e2e/utils/geometry.py b/standard_e2e/utils/geometry.py new file mode 100644 index 0000000..ce62075 --- /dev/null +++ b/standard_e2e/utils/geometry.py @@ -0,0 +1,65 @@ +"""Shared SE(3) / quaternion / camera-intrinsics helpers for dataset processors. + +These small operations were otherwise reimplemented per dataset (comma2k19, +WayveScenes/COLMAP, NAVSIM/nuPlan, Argoverse 2). Quaternions follow the +**Hamilton, scalar-first ``(w, x, y, z)``** convention every ingested dataset +uses; the scalar-last reorder that :class:`scipy.spatial.transform.Rotation` +expects lives here once instead of at each call site. The active rotation a +quaternion represents is returned as-is — callers attach the frame semantics +(``ecef_from_device``, ``cam_from_world``, ...). +""" + +from __future__ import annotations + +from typing import cast + +import numpy as np +from numpy.typing import ArrayLike, DTypeLike +from scipy.spatial.transform import Rotation + + +def quats_wxyz_to_rotmats(quats_wxyz: ArrayLike) -> np.ndarray: + """Convert ``(N, 4)`` Hamilton ``(w, x, y, z)`` quaternions to ``(N, 3, 3)`` + rotation matrices (float64). Quaternions are normalised by scipy.""" + q = np.asarray(quats_wxyz, dtype=np.float64) + if q.ndim != 2 or q.shape[1] != 4: + raise ValueError(f"quats must have shape (N, 4); got {q.shape}") + # scipy uses scalar-last (x, y, z, w); reorder from our scalar-first wxyz. + rotmats = Rotation.from_quat(q[:, [1, 2, 3, 0]]).as_matrix() + return cast(np.ndarray, rotmats) + + +def quat_wxyz_to_rotmat(quat_wxyz: ArrayLike) -> np.ndarray: + """Convert a single Hamilton ``(w, x, y, z)`` quaternion to ``(3, 3)`` (float64).""" + q = np.asarray(quat_wxyz, dtype=np.float64).reshape(1, 4) + return cast(np.ndarray, quats_wxyz_to_rotmats(q)[0]) + + +def se3( + rotation: ArrayLike, translation: ArrayLike, dtype: DTypeLike = np.float64 +) -> np.ndarray: + """Assemble a 4x4 homogeneous transform from a 3x3 rotation + 3-vector.""" + transform = np.eye(4, dtype=dtype) + transform[:3, :3] = np.asarray(rotation, dtype=dtype) + transform[:3, 3] = np.asarray(translation, dtype=dtype).reshape(3) + return transform + + +def intrinsics_matrix( + fx: float, fy: float, cx: float, cy: float, dtype: DTypeLike = np.float32 +) -> np.ndarray: + """Build a pinhole camera matrix ``[[fx, 0, cx], [0, fy, cy], [0, 0, 1]]``.""" + return np.array([[fx, 0.0, cx], [0.0, fy, cy], [0.0, 0.0, 1.0]], dtype=dtype) + + +def transform_points(transform: ArrayLike, points: ArrayLike) -> np.ndarray: + """Apply a 4x4 SE(3) ``transform`` to ``(N, 3)`` ``points`` -> ``(N, 3)``. + + Computed in the points' dtype (so float32 in stays float32 out). + """ + pts = np.asarray(points) + if pts.ndim != 2 or pts.shape[1] != 3: + raise ValueError(f"points must have shape (N, 3); got {pts.shape}") + mat = np.asarray(transform, dtype=pts.dtype) + homog = np.concatenate([pts, np.ones((len(pts), 1), dtype=pts.dtype)], axis=1) + return cast(np.ndarray, (homog @ mat.T)[:, :3]) diff --git a/tests/test_colmap_io.py b/tests/test_colmap_io.py index c57f48e..c6e6d4b 100644 --- a/tests/test_colmap_io.py +++ b/tests/test_colmap_io.py @@ -19,11 +19,11 @@ import pytest from standard_e2e.caching.src_datasets.wayve_scenes._colmap import ( - qvec_wxyz_to_rotmat, read_cameras_bin, read_images_bin, read_points3D_bin, ) +from standard_e2e.utils.geometry import quat_wxyz_to_rotmat # Extracted WayveScenes scenes: ``WAYVE_SCENES_ROOT`` env var wins; the raw # mount is a fallback (it normally holds only .zip archives, so tests skip in @@ -135,7 +135,7 @@ def test_qvec_to_rotmat_matches_scipy(): from scipy.spatial.transform import Rotation ref = Rotation.from_quat(q_xyzw).as_matrix() - ours = qvec_wxyz_to_rotmat(np.array([q_xyzw[3], *q_xyzw[:3]])) + ours = quat_wxyz_to_rotmat(np.array([q_xyzw[3], *q_xyzw[:3]])) np.testing.assert_allclose(ours, ref, atol=1e-12) @@ -175,7 +175,7 @@ def test_parity_with_pycolmap_on_real_scene(): cfw = ref.cam_from_world() ref_R = Rotation.from_quat(np.asarray(cfw.rotation.quat)).as_matrix() o = our_by_name[ref.name] - if not np.allclose(qvec_wxyz_to_rotmat(o.qvec_wxyz), ref_R, atol=1e-9): + if not np.allclose(quat_wxyz_to_rotmat(o.qvec_wxyz), ref_R, atol=1e-9): mismatches += 1 if not np.allclose(o.tvec, np.asarray(cfw.translation), atol=1e-9): mismatches += 1 diff --git a/tests/test_geometry.py b/tests/test_geometry.py new file mode 100644 index 0000000..b6df0b4 --- /dev/null +++ b/tests/test_geometry.py @@ -0,0 +1,80 @@ +import numpy as np +import pytest + +from standard_e2e.utils.geometry import ( + intrinsics_matrix, + quat_wxyz_to_rotmat, + quats_wxyz_to_rotmats, + se3, + transform_points, +) + + +def _manual_wxyz(q): + """The hand-rolled Hamilton (w, x, y, z) formula the datasets used to carry.""" + w, x, y, z = q + return np.array( + [ + [1 - 2 * (y * y + z * z), 2 * (x * y - z * w), 2 * (x * z + y * w)], + [2 * (x * y + z * w), 1 - 2 * (x * x + z * z), 2 * (y * z - x * w)], + [2 * (x * z - y * w), 2 * (y * z + x * w), 1 - 2 * (x * x + y * y)], + ] + ) + + +def test_quats_match_hand_rolled_hamilton_formula(): + # Regression guard: the scipy-backed helper must reproduce the manual + # formula comma2k19 / wayve_scenes previously hand-rolled (whose convention + # is empirically locked), to bit-level tolerance. + rng = np.random.default_rng(0) + q = rng.standard_normal((64, 4)) + q /= np.linalg.norm(q, axis=1, keepdims=True) + rotmats = quats_wxyz_to_rotmats(q) + assert rotmats.shape == (64, 3, 3) + for i in range(len(q)): + np.testing.assert_allclose(rotmats[i], _manual_wxyz(q[i]), atol=1e-12) + + +def test_quat_wxyz_identity_and_known_rotation(): + np.testing.assert_allclose(quat_wxyz_to_rotmat([1, 0, 0, 0]), np.eye(3), atol=1e-12) + s = np.sqrt(0.5) # +90 deg about z: (w,x,y,z) = (cos45, 0, 0, sin45) + np.testing.assert_allclose( + quat_wxyz_to_rotmat([s, 0, 0, s]), + [[0, -1, 0], [1, 0, 0], [0, 0, 1]], + atol=1e-12, + ) + + +def test_quats_shape_and_norm_validation(): + with pytest.raises(ValueError): + quats_wxyz_to_rotmats(np.zeros((1, 3))) # wrong shape + with pytest.raises(ValueError): + quats_wxyz_to_rotmats(np.zeros((1, 4))) # zero-norm (scipy rejects) + + +def test_se3_assembles_and_respects_dtype(): + r = quat_wxyz_to_rotmat([np.sqrt(0.5), 0, 0, np.sqrt(0.5)]) + t = [1.0, 2.0, 3.0] + transform = se3(r, t) + assert transform.shape == (4, 4) + np.testing.assert_allclose(transform[:3, :3], r) + np.testing.assert_allclose(transform[:3, 3], t) + np.testing.assert_allclose(transform[3], [0, 0, 0, 1]) + assert se3(np.eye(3), [0, 0, 0], dtype=np.float32).dtype == np.float32 + + +def test_intrinsics_matrix(): + k = intrinsics_matrix(910, 910, 582, 437) + np.testing.assert_allclose(k, [[910, 0, 582], [0, 910, 437], [0, 0, 1]]) + assert k.dtype == np.float32 + + +def test_transform_points(): + # +90 deg about z, then translate by +x. + transform = se3([[0, -1, 0], [1, 0, 0], [0, 0, 1]], [1, 0, 0]) + pts = np.array([[1.0, 0.0, 0.0], [0.0, 1.0, 0.0]], dtype=np.float32) + out = transform_points(transform, pts) + np.testing.assert_allclose(out, [[1, 1, 0], [0, 0, 0]], atol=1e-6) + assert out.dtype == np.float32 # computed in the points' dtype + with pytest.raises(ValueError): + transform_points(transform, np.zeros((4, 2))) diff --git a/uv.lock b/uv.lock index c3a39ef..80fbef3 100644 --- a/uv.lock +++ b/uv.lock @@ -1866,6 +1866,7 @@ dependencies = [ { name = "rasterio" }, { name = "requests" }, { name = "retry" }, + { name = "scipy" }, { name = "shapely" }, { name = "tensorflow-cpu" }, { name = "torch" }, @@ -1942,6 +1943,7 @@ requires-dist = [ { name = "rasterio", specifier = ">=1.3" }, { name = "requests", specifier = ">=2.32.5" }, { name = "retry", specifier = ">=0.9.2" }, + { name = "scipy", specifier = ">=1.10" }, { name = "shapely", specifier = ">=2.0" }, { name = "tensorflow-cpu" }, { name = "torch" },