Source code for stereocomplex.synthetic.parallel_plate

from __future__ import annotations

from dataclasses import dataclass
from collections.abc import Sequence

import numpy as np


[docs] @dataclass(frozen=True) class ParallelPlateSyntheticParams: """Physical parameters used only by the inclined parallel-plate oracle.""" eta: float = 1.5 thickness: float = 8.0 alpha_deg: float = 12.0 beta_deg: float = 5.0 d1: float = 80.0
[docs] @dataclass(frozen=True) class SyntheticStereoDataset: """ Synthetic stereo observations generated by an oracle. Conventions: - units are millimetres except pixels and K entries; - `board_poses` are 4x4 transforms from board coordinates to world coordinates; - `T_left_world` and `T_right_world` are 4x4 transforms from world to camera coordinates. """ object_points: np.ndarray board_poses: list[np.ndarray] left_pixels: list[np.ndarray] right_pixels: list[np.ndarray] K_left: np.ndarray K_right: np.ndarray T_left_world: np.ndarray T_right_world: np.ndarray image_size: tuple[int, int] oracle_left_params: ParallelPlateSyntheticParams | None = None oracle_right_params: ParallelPlateSyntheticParams | None = None per_frame_object_points: list[np.ndarray] | None = None @property def T_right_left(self) -> np.ndarray: """4x4 transform mapping left-camera coordinates to right-camera coordinates.""" return np.asarray(self.T_right_world, dtype=np.float64) @ np.linalg.inv( np.asarray(self.T_left_world, dtype=np.float64) ) @property def oracle_left_ray_function(self): """Exact ray function for the left channel (no noise).""" if self.oracle_left_params is None: return None def _ray(u: np.ndarray, v: np.ndarray): return parallel_plate_ray_from_pixel(u, v, self.K_left, self.oracle_left_params) return _ray @property def oracle_right_ray_function(self): """Exact ray function for the right channel (no noise).""" if self.oracle_right_params is None: return None def _ray(u: np.ndarray, v: np.ndarray): return parallel_plate_ray_from_pixel(u, v, self.K_right, self.oracle_right_params) return _ray
[docs] def subset(self, indices: Sequence[int]) -> SyntheticStereoDataset: """Return a new dataset restricted to the frames at *indices*.""" idx = list(indices) return SyntheticStereoDataset( object_points=self.object_points, board_poses=[self.board_poses[i] for i in idx], left_pixels=[self.left_pixels[i] for i in idx], right_pixels=[self.right_pixels[i] for i in idx], K_left=self.K_left, K_right=self.K_right, T_left_world=self.T_left_world, T_right_world=self.T_right_world, image_size=self.image_size, oracle_left_params=self.oracle_left_params, oracle_right_params=self.oracle_right_params, per_frame_object_points=( [self.per_frame_object_points[i] for i in idx] if self.per_frame_object_points is not None else None ), )
def _normalize(v: np.ndarray, *, axis: int = -1) -> np.ndarray: v = np.asarray(v, dtype=np.float64) n = np.linalg.norm(v, axis=axis, keepdims=True) if np.any(n <= 0): raise ValueError("cannot normalize a zero vector") return v / n def _as_points(points: np.ndarray) -> np.ndarray: return np.asarray(points, dtype=np.float64).reshape(-1, 3) def transform_points(T: np.ndarray, points: np.ndarray) -> np.ndarray: """Apply a 4x4 rigid transform to points shaped (N,3).""" T = np.asarray(T, dtype=np.float64).reshape(4, 4) pts = _as_points(points) return (T[:3, :3] @ pts.T).T + T[:3, 3].reshape(1, 3)
[docs] def normal_from_tilts(alpha_deg: float, beta_deg: float) -> np.ndarray: """Return the unit normal q of the tilted parallel plate.""" alpha = np.deg2rad(float(alpha_deg)) beta = np.deg2rad(float(beta_deg)) q = np.array([np.tan(alpha), np.tan(beta), 1.0], dtype=np.float64) return _normalize(q)
[docs] def pinhole_ray_from_pixel( u: float | np.ndarray, v: float | np.ndarray, K: np.ndarray, ) -> np.ndarray: """Compute normalised pinhole ray directions from pixel coordinates. Uses the camera matrix K to back-project pixels to unit vectors in camera space. The origin is implicitly at (0, 0, 0) — this function returns directions only. Parameters ---------- u : float or ndarray Pixel x-coordinate(s). v : float or ndarray Pixel y-coordinate(s). K : ndarray, shape (3, 3) Camera matrix. Returns ------- ndarray, shape (..., 3) Unit ray directions in camera coordinates. """ K = np.asarray(K, dtype=np.float64).reshape(3, 3) u_arr = np.asarray(u, dtype=np.float64) v_arr = np.asarray(v, dtype=np.float64) x = (u_arr - K[0, 2]) / K[0, 0] y = (v_arr - K[1, 2]) / K[1, 1] dirs = np.stack([x, y, np.ones_like(x, dtype=np.float64)], axis=-1) return _normalize(dirs)
[docs] def parallel_plate_ray_from_pixel( u: float | np.ndarray, v: float | np.ndarray, K: np.ndarray, params: ParallelPlateSyntheticParams, ) -> tuple[np.ndarray, np.ndarray]: """ Return the physical oracle ray `(O_true, d_true)` for a pixel. The origin is the plate exit point I2. It is not gauge-projected here: the generator keeps the physical ray, and evaluation code compares rays geometrically rather than by raw origin equality. """ s = pinhole_ray_from_pixel(u, v, K) q = normal_from_tilts(params.alpha_deg, params.beta_deg) c = np.sum(s * q, axis=-1, keepdims=True) if np.any(c <= 1e-12): raise ValueError("ray does not intersect the front side of the plate") s_perp = s - c * q sin2_g = np.sum(s_perp * s_perp, axis=-1, keepdims=True) / float(params.eta) ** 2 if np.any(sin2_g > 1.0 + 1e-12): raise ValueError("total internal reflection in synthetic plate model") beta_g = np.sqrt(np.maximum(0.0, 1.0 - sin2_g)) s_g = s_perp / float(params.eta) + beta_g * q I1 = (float(params.d1) / c) * s I2 = I1 + (float(params.thickness) / beta_g) * s_g return I2, s
def _point_line_residual( point: np.ndarray, origin: np.ndarray, direction: np.ndarray, ) -> np.ndarray: return np.cross(point - origin, direction)
[docs] def project_point_with_parallel_plate( P_cam: np.ndarray, K: np.ndarray, params: ParallelPlateSyntheticParams, image_size: tuple[int, int] | None = None, ) -> tuple[float, float]: """Find the pixel whose generated plate-distorted ray passes closest to a 3-D point. Uses iterative least-squares optimisation. The point must be in front of the camera (Z > 0). Parameters ---------- P_cam : ndarray, shape (3,) 3-D point in camera coordinates, in millimetres. K : ndarray, shape (3, 3) Camera matrix. params : ParallelPlateSyntheticParams Plate geometry parameters. image_size : (int, int), optional Sensor dimensions, used to validate the returned pixel. Returns ------- (u, v) : (float, float) Pixel coordinates of the closest ray. """ P = np.asarray(P_cam, dtype=np.float64).reshape(3) if P[2] <= 0: raise ValueError("point must be in front of the camera") K = np.asarray(K, dtype=np.float64).reshape(3, 3) u0 = K[0, 0] * P[0] / P[2] + K[0, 2] v0 = K[1, 1] * P[1] / P[2] + K[1, 2] from scipy.optimize import least_squares # type: ignore def fun(x: np.ndarray) -> np.ndarray: """Residual function for plate parameter optimisation.""" origin, direction = parallel_plate_ray_from_pixel(float(x[0]), float(x[1]), K, params) return _point_line_residual( P, np.asarray(origin).reshape(3), np.asarray(direction).reshape(3) ) if image_size is None: bounds = (-np.inf, np.inf) else: width, height = image_size bounds = ([0.0, 0.0], [float(width - 1), float(height - 1)]) u0 = float(np.clip(u0, 0.0, width - 1)) v0 = float(np.clip(v0, 0.0, height - 1)) sol = least_squares( fun, x0=np.array([u0, v0], dtype=np.float64), bounds=bounds, xtol=1e-12, ftol=1e-12, gtol=1e-12, ) return float(sol.x[0]), float(sol.x[1])
[docs] def generate_parallel_plate_stereo_dataset( object_points: np.ndarray, board_poses: Sequence[np.ndarray], K_left: np.ndarray, K_right: np.ndarray, T_left_world: np.ndarray, T_right_world: np.ndarray, plate_left: ParallelPlateSyntheticParams, plate_right: ParallelPlateSyntheticParams, image_size: tuple[int, int], noise_std_px: float = 0.0, keep_oracle_rayfields: bool = True, ) -> SyntheticStereoDataset: """Generate synthetic stereo pixel observations from inclined-plate oracle cameras. For each board pose, projects the known object points through the left and right parallel-plate camera models and optionally adds Gaussian pixel noise. Parameters ---------- object_points : ndarray, shape (N, 3) Board-plane object points in mm (Z=0). board_poses : sequence of ndarray Board-to-world transforms, each shape (4, 4). K_left, K_right : ndarray, shape (3, 3) Camera matrices for the two channels. T_left_world, T_right_world : ndarray, shape (4, 4) World-to-camera transforms. plate_left, plate_right : ParallelPlateSyntheticParams Plate geometry for each channel. image_size : (int, int) Sensor dimensions in pixels (width, height). noise_std_px : float Standard deviation of additive Gaussian pixel noise (default 0). keep_oracle_rayfields : bool If True, attach exact ray functions to the returned dataset for ground-truth evaluation. Returns ------- SyntheticStereoDataset Dataclass dataset object with ``left_pixels``, ``right_pixels``, ``object_points``, ``board_poses``, and optional oracle rayfields. """ obj = _as_points(object_points) rng = np.random.default_rng(12345) left_pixels: list[np.ndarray] = [] right_pixels: list[np.ndarray] = [] poses = [np.asarray(T, dtype=np.float64).reshape(4, 4) for T in board_poses] for T_board_world in poses: P_world = transform_points(T_board_world, obj) P_left = transform_points(T_left_world, P_world) P_right = transform_points(T_right_world, P_world) uv_left = np.array( [project_point_with_parallel_plate(P, K_left, plate_left, image_size) for P in P_left], dtype=np.float64, ) uv_right = np.array( [ project_point_with_parallel_plate(P, K_right, plate_right, image_size) for P in P_right ], dtype=np.float64, ) if noise_std_px > 0: uv_left = uv_left + rng.normal(scale=float(noise_std_px), size=uv_left.shape) uv_right = uv_right + rng.normal(scale=float(noise_std_px), size=uv_right.shape) left_pixels.append(uv_left) right_pixels.append(uv_right) return SyntheticStereoDataset( object_points=obj, board_poses=poses, left_pixels=left_pixels, right_pixels=right_pixels, K_left=np.asarray(K_left, dtype=np.float64).reshape(3, 3), K_right=np.asarray(K_right, dtype=np.float64).reshape(3, 3), T_left_world=np.asarray(T_left_world, dtype=np.float64).reshape(4, 4), T_right_world=np.asarray(T_right_world, dtype=np.float64).reshape(4, 4), image_size=(int(image_size[0]), int(image_size[1])), oracle_left_params=plate_left if keep_oracle_rayfields else None, oracle_right_params=plate_right if keep_oracle_rayfields else None, )