Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ dependencies = [
"geopandas>=0.14",
"pyogrio>=0.7",
"rasterio>=1.3",
"scipy>=1.10",
"shapely>=2.0",
"aioboto3>=15.5.0",
"boto3>=1.40.61",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@
from av2.map.map_api import ArgoverseStaticMap
from av2.structures.sweep import Sweep
from av2.utils.io import read_img
from scipy.spatial.transform import Rotation

from standard_e2e.caching import SourceDatasetProcessor
from standard_e2e.caching.adapters import (
Expand Down Expand Up @@ -49,7 +48,12 @@
)
from standard_e2e.enums import TrajectoryComponent as TC
from standard_e2e.indexing import IndexDataGenerator
from standard_e2e.utils import matrix_to_xyz_heading
from standard_e2e.utils import (
intrinsics_matrix,
matrix_to_xyz_heading,
quat_wxyz_to_rotmat,
se3,
)

# AV2 ring cameras → our canonical CameraDirection. Stereo cameras are
# excluded: they run at 5 Hz and do not synchronise 1:1 with the 10 Hz
Expand Down Expand Up @@ -131,18 +135,14 @@


def _quat_wxyz_to_rotmat(qw: float, qx: float, qy: float, qz: float) -> np.ndarray:
"""AV2 stores Hamilton quaternions as (qw, qx, qy, qz); scipy expects (x,y,z,w)."""
rotmat = Rotation.from_quat([qx, qy, qz, qw]).as_matrix().astype(np.float32)
return cast(np.ndarray, rotmat)
"""AV2 stores Hamilton quaternions as (qw, qx, qy, qz)."""
return quat_wxyz_to_rotmat((qw, qx, qy, qz)).astype(np.float32)


def _se3_from_quat_translation(
qw: float, qx: float, qy: float, qz: float, tx: float, ty: float, tz: float
) -> np.ndarray:
T = np.eye(4, dtype=np.float32)
T[:3, :3] = _quat_wxyz_to_rotmat(qw, qx, qy, qz)
T[:3, 3] = (tx, ty, tz)
return T
return se3(_quat_wxyz_to_rotmat(qw, qx, qy, qz), (tx, ty, tz), dtype=np.float32)


class Av2SensorDatasetProcessor(SourceDatasetProcessor):
Expand Down Expand Up @@ -223,10 +223,7 @@ def _refresh_log_cache(self, log_dir: Path) -> None:
self._camera_extrinsics.clear()
for raw_row in intrinsics_df.itertuples(index=False):
row = cast(Any, raw_row)
K = np.array(
[[row.fx_px, 0.0, row.cx_px], [0.0, row.fy_px, row.cy_px], [0, 0, 1]],
dtype=np.float32,
)
K = intrinsics_matrix(row.fx_px, row.fy_px, row.cx_px, row.cy_px)
self._camera_intrinsics[row.sensor_name] = K
# AV2 ships 3 radial distortion coefficients (k1, k2, k3); CameraData
# expands them into the 5-term Brown-Conrady form with zero tangential.
Expand Down
38 changes: 5 additions & 33 deletions standard_e2e/caching/src_datasets/comma2k19/_comma_geometry.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,44 +37,16 @@

import numpy as np

from standard_e2e.utils.geometry import quats_wxyz_to_rotmats

# FRD (x-fwd, y-right, z-down) <-> FLU (x-fwd, y-left, z-up): negate y and z.
# The matrix is its own inverse, so it converts vectors in both directions.
FRD_TO_FLU = np.diag([1.0, -1.0, -1.0]).astype(np.float64)


def quats_to_rotmats(quats_wxyz: np.ndarray) -> np.ndarray:
"""Convert Hamilton quaternions to rotation matrices.

Args:
quats_wxyz: ``(N, 4)`` array of Hamilton quaternions ordered
``(w, x, y, z)`` (comma2k19's native order). Need not be
unit-norm; each row is normalised first.

Returns:
``(N, 3, 3)`` float64 array of rotation matrices ``ecef_from_device``:
each maps a device-frame (FRD) vector into ECEF, i.e. its columns are
the device axes expressed in ECEF.
"""
q = np.asarray(quats_wxyz, dtype=np.float64)
if q.ndim != 2 or q.shape[1] != 4:
raise ValueError(f"quats must have shape (N, 4); got {q.shape}")
norms = np.linalg.norm(q, axis=1, keepdims=True)
if np.any(norms == 0.0):
raise ValueError("quaternions must be non-zero")
q = q / norms
w, x, y, z = q[:, 0], q[:, 1], q[:, 2], q[:, 3]
n = q.shape[0]
r = np.empty((n, 3, 3), dtype=np.float64)
r[:, 0, 0] = 1.0 - 2.0 * (y * y + z * z)
r[:, 0, 1] = 2.0 * (x * y - z * w)
r[:, 0, 2] = 2.0 * (x * z + y * w)
r[:, 1, 0] = 2.0 * (x * y + z * w)
r[:, 1, 1] = 1.0 - 2.0 * (x * x + z * z)
r[:, 1, 2] = 2.0 * (y * z - x * w)
r[:, 2, 0] = 2.0 * (x * z - y * w)
r[:, 2, 1] = 2.0 * (y * z + x * w)
r[:, 2, 2] = 1.0 - 2.0 * (x * x + y * y)
return r
# comma2k19 stores Hamilton (w, x, y, z) quaternions; the resulting matrix is
# ``ecef_from_device`` -- the device-FRD axes expressed as ECEF columns.
quats_to_rotmats = quats_wxyz_to_rotmats


def pose_matrices_world_from_ego(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@
from standard_e2e.enums import CameraDirection, StandardFrameDataField # noqa: E402
from standard_e2e.enums import TrajectoryComponent as TC # noqa: E402
from standard_e2e.indexing import IndexDataGenerator # noqa: E402
from standard_e2e.utils import matrix_to_xyz_heading # noqa: E402
from standard_e2e.utils import intrinsics_matrix, matrix_to_xyz_heading # noqa: E402

# Single decode thread per worker process: comma2k19 parallelism is across
# segments/frames in the pool, so letting each VideoCapture spin up its own
Expand Down Expand Up @@ -124,13 +124,8 @@ def allowed_splits(self) -> list[str]:
def camera_intrinsics(self) -> np.ndarray:
"""EON pinhole intrinsics ``K`` (3x3 float32)."""
focal = self.EON_FOCAL_LENGTH
return np.array(
[
[focal, 0.0, self.CAM_WIDTH / 2.0],
[0.0, focal, self.CAM_HEIGHT / 2.0],
[0.0, 0.0, 1.0],
],
dtype=np.float32,
return intrinsics_matrix(
focal, focal, self.CAM_WIDTH / 2.0, self.CAM_HEIGHT / 2.0
)

def _get_default_adapters(self) -> list[AbstractAdapter]:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,11 @@
import os
import pickle
from pathlib import Path
from typing import Any, Optional, cast
from typing import Any, Optional

import numpy as np
import pandas as pd
from PIL import Image
from scipy.spatial.transform import Rotation

from standard_e2e.caching import SourceDatasetProcessor
from standard_e2e.caching.adapters import (
Expand Down Expand Up @@ -62,7 +61,7 @@
)
from standard_e2e.enums import TrajectoryComponent as TC
from standard_e2e.indexing import IndexDataGenerator
from standard_e2e.utils import matrix_to_xyz_heading
from standard_e2e.utils import matrix_to_xyz_heading, quat_wxyz_to_rotmat, se3

# nuPlan ships maps under a fixed map-version folder name. NAVSIM's official
# install docs hardcode this version (see navsim/docs/install.md).
Expand Down Expand Up @@ -112,17 +111,13 @@

def _quat_to_rotmat_wxyz(qw: float, qx: float, qy: float, qz: float) -> np.ndarray:
"""NAVSIM (and nuplan) store Hamilton quaternions as (qw, qx, qy, qz)."""
rotmat = Rotation.from_quat([qx, qy, qz, qw]).as_matrix().astype(np.float32)
return cast(np.ndarray, rotmat)
return quat_wxyz_to_rotmat((qw, qx, qy, qz)).astype(np.float32)


def _se3_from_rotation_translation(
rotation: np.ndarray, translation: np.ndarray
) -> np.ndarray:
T = np.eye(4, dtype=np.float32)
T[:3, :3] = np.asarray(rotation, dtype=np.float32)
T[:3, 3] = np.asarray(translation, dtype=np.float32)
return T
return se3(rotation, translation, dtype=np.float32)


def _driving_command_to_intent(driving_command: np.ndarray) -> Intent:
Expand Down
13 changes: 0 additions & 13 deletions standard_e2e/caching/src_datasets/wayve_scenes/_colmap.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,16 +140,3 @@ def read_points3D_bin(
error = np.array(err_list, dtype=np.float64).reshape(-1)
track_length = np.array(track_list, dtype=np.int64).reshape(-1)
return xyz, rgb, error, track_length


def qvec_wxyz_to_rotmat(qvec_wxyz: np.ndarray) -> np.ndarray:
"""COLMAP (qw, qx, qy, qz) -> 3x3 rotation matrix (cam_from_world)."""
w, x, y, z = qvec_wxyz
return np.array(
[
[1 - 2 * (y * y + z * z), 2 * (x * y - z * w), 2 * (x * z + y * w)],
[2 * (x * y + z * w), 1 - 2 * (x * x + z * z), 2 * (y * z - x * w)],
[2 * (x * z - y * w), 2 * (y * z + x * w), 1 - 2 * (x * x + y * y)],
],
dtype=np.float64,
)
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,6 @@
SegmentContextAggregator,
)
from standard_e2e.caching.src_datasets.wayve_scenes._colmap import (
qvec_wxyz_to_rotmat,
read_cameras_bin,
read_images_bin,
read_points3D_bin,
Expand All @@ -64,7 +63,13 @@
)
from standard_e2e.enums import TrajectoryComponent as TC
from standard_e2e.indexing import IndexDataGenerator
from standard_e2e.utils import matrix_to_xyz_heading
from standard_e2e.utils import (
intrinsics_matrix,
matrix_to_xyz_heading,
quat_wxyz_to_rotmat,
se3,
transform_points,
)


def _decode_rgb(path: Path) -> np.ndarray:
Expand Down Expand Up @@ -108,10 +113,7 @@ def _opencv_points_to_flu(points_opencv: np.ndarray) -> np.ndarray:


def _cam_from_world_4x4(qvec_wxyz: np.ndarray, tvec: np.ndarray) -> np.ndarray:
T = np.eye(4, dtype=np.float64)
T[:3, :3] = qvec_wxyz_to_rotmat(qvec_wxyz)
T[:3, 3] = tvec
return T
return se3(quat_wxyz_to_rotmat(qvec_wxyz), tvec)


def _world_from_cam_flu(qvec_wxyz: np.ndarray, tvec: np.ndarray) -> np.ndarray:
Expand Down Expand Up @@ -200,9 +202,7 @@ def _refresh_scene_cache(self, scene_dir: Path) -> None:
self._cam_calib = {}
for cid, cam in cameras.items():
fx, fy, cx, cy = cam.params[:4]
K = np.array(
[[fx, 0.0, cx], [0.0, fy, cy], [0.0, 0.0, 1.0]], dtype=np.float32
)
K = intrinsics_matrix(fx, fy, cx, cy)
dist = cam.params[4:8].astype(np.float32) # OPENCV_FISHEYE k1..k4
self._cam_calib[cid] = (K, dist)

Expand Down Expand Up @@ -279,10 +279,7 @@ def _build_lidar(self, T_ego_world: np.ndarray) -> LidarData:
if len(pts) == 0:
xyz_ego = np.zeros((0, 3), dtype=np.float32)
else:
homog = np.concatenate(
[pts, np.ones((len(pts), 1), dtype=np.float32)], axis=1
)
xyz_ego = (homog @ T_ego_world.T)[:, :3]
xyz_ego = transform_points(T_ego_world, pts)
within = np.linalg.norm(xyz_ego, axis=1) <= self.LIDAR_MAX_RANGE_M
xyz_ego = xyz_ego[within].astype(np.float32)
return LidarData(
Expand Down
12 changes: 12 additions & 0 deletions standard_e2e/utils/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,22 @@
load_yaml_config,
matrix_to_xyz_heading,
)
from standard_e2e.utils.geometry import (
intrinsics_matrix,
quat_wxyz_to_rotmat,
quats_wxyz_to_rotmats,
se3,
transform_points,
)

__all__ = [
"load_yaml_config",
"_assert_strictly_increasing",
"_check_list_of_objects_or_none",
"matrix_to_xyz_heading",
"quats_wxyz_to_rotmats",
"quat_wxyz_to_rotmat",
"se3",
"intrinsics_matrix",
"transform_points",
]
65 changes: 65 additions & 0 deletions standard_e2e/utils/geometry.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
"""Shared SE(3) / quaternion / camera-intrinsics helpers for dataset processors.

These small operations were otherwise reimplemented per dataset (comma2k19,
WayveScenes/COLMAP, NAVSIM/nuPlan, Argoverse 2). Quaternions follow the
**Hamilton, scalar-first ``(w, x, y, z)``** convention every ingested dataset
uses; the scalar-last reorder that :class:`scipy.spatial.transform.Rotation`
expects lives here once instead of at each call site. The active rotation a
quaternion represents is returned as-is — callers attach the frame semantics
(``ecef_from_device``, ``cam_from_world``, ...).
"""

from __future__ import annotations

from typing import cast

import numpy as np
from numpy.typing import ArrayLike, DTypeLike
from scipy.spatial.transform import Rotation


def quats_wxyz_to_rotmats(quats_wxyz: ArrayLike) -> np.ndarray:
"""Convert ``(N, 4)`` Hamilton ``(w, x, y, z)`` quaternions to ``(N, 3, 3)``
rotation matrices (float64). Quaternions are normalised by scipy."""
q = np.asarray(quats_wxyz, dtype=np.float64)
if q.ndim != 2 or q.shape[1] != 4:
raise ValueError(f"quats must have shape (N, 4); got {q.shape}")
# scipy uses scalar-last (x, y, z, w); reorder from our scalar-first wxyz.
rotmats = Rotation.from_quat(q[:, [1, 2, 3, 0]]).as_matrix()
return cast(np.ndarray, rotmats)


def quat_wxyz_to_rotmat(quat_wxyz: ArrayLike) -> np.ndarray:
"""Convert a single Hamilton ``(w, x, y, z)`` quaternion to ``(3, 3)`` (float64)."""
q = np.asarray(quat_wxyz, dtype=np.float64).reshape(1, 4)
return cast(np.ndarray, quats_wxyz_to_rotmats(q)[0])


def se3(
rotation: ArrayLike, translation: ArrayLike, dtype: DTypeLike = np.float64
) -> np.ndarray:
"""Assemble a 4x4 homogeneous transform from a 3x3 rotation + 3-vector."""
transform = np.eye(4, dtype=dtype)
transform[:3, :3] = np.asarray(rotation, dtype=dtype)
transform[:3, 3] = np.asarray(translation, dtype=dtype).reshape(3)
return transform


def intrinsics_matrix(
fx: float, fy: float, cx: float, cy: float, dtype: DTypeLike = np.float32
) -> np.ndarray:
"""Build a pinhole camera matrix ``[[fx, 0, cx], [0, fy, cy], [0, 0, 1]]``."""
return np.array([[fx, 0.0, cx], [0.0, fy, cy], [0.0, 0.0, 1.0]], dtype=dtype)


def transform_points(transform: ArrayLike, points: ArrayLike) -> np.ndarray:
"""Apply a 4x4 SE(3) ``transform`` to ``(N, 3)`` ``points`` -> ``(N, 3)``.

Computed in the points' dtype (so float32 in stays float32 out).
"""
pts = np.asarray(points)
if pts.ndim != 2 or pts.shape[1] != 3:
raise ValueError(f"points must have shape (N, 3); got {pts.shape}")
mat = np.asarray(transform, dtype=pts.dtype)
homog = np.concatenate([pts, np.ones((len(pts), 1), dtype=pts.dtype)], axis=1)
return cast(np.ndarray, (homog @ mat.T)[:, :3])
6 changes: 3 additions & 3 deletions tests/test_colmap_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,11 @@
import pytest

from standard_e2e.caching.src_datasets.wayve_scenes._colmap import (
qvec_wxyz_to_rotmat,
read_cameras_bin,
read_images_bin,
read_points3D_bin,
)
from standard_e2e.utils.geometry import quat_wxyz_to_rotmat

# Extracted WayveScenes scenes: ``WAYVE_SCENES_ROOT`` env var wins; the raw
# mount is a fallback (it normally holds only .zip archives, so tests skip in
Expand Down Expand Up @@ -135,7 +135,7 @@ def test_qvec_to_rotmat_matches_scipy():
from scipy.spatial.transform import Rotation

ref = Rotation.from_quat(q_xyzw).as_matrix()
ours = qvec_wxyz_to_rotmat(np.array([q_xyzw[3], *q_xyzw[:3]]))
ours = quat_wxyz_to_rotmat(np.array([q_xyzw[3], *q_xyzw[:3]]))
np.testing.assert_allclose(ours, ref, atol=1e-12)


Expand Down Expand Up @@ -175,7 +175,7 @@ def test_parity_with_pycolmap_on_real_scene():
cfw = ref.cam_from_world()
ref_R = Rotation.from_quat(np.asarray(cfw.rotation.quat)).as_matrix()
o = our_by_name[ref.name]
if not np.allclose(qvec_wxyz_to_rotmat(o.qvec_wxyz), ref_R, atol=1e-9):
if not np.allclose(quat_wxyz_to_rotmat(o.qvec_wxyz), ref_R, atol=1e-9):
mismatches += 1
if not np.allclose(o.tvec, np.asarray(cfw.translation), atol=1e-9):
mismatches += 1
Expand Down
Loading
Loading