"""SHOT (Signatures of Histograms of OrienTations) descriptor.
Shape-only implementation translated from the reference C++ code by
Tombari, Salti and Di Stefano:
"Unique Signatures of Histograms for Local Surface Description"
ECCV 2010
Only the shape channel (normal-alignment cosines) is implemented here;
the color channel is omitted since geomfum meshes do not carry per-vertex RGB.
References
----------
.. [1] Tombari, Salti, Di Stefano. "Unique Signatures of Histograms for
Local Surface Description." ECCV 2010.
"""
import gsops.backend as gs
import numpy as np
from scipy.spatial import cKDTree
from ._base import Descriptor
# Angular constants (radians) matching the C++ macros
_DEG_45 = np.pi / 4 # 0.785...
_DEG_90 = np.pi / 2 # 1.570...
_DEG_135 = 3.0 * np.pi / 4 # 2.356...
_DEG_168 = (
2.7488935718910690836548129603696 # 168° in radians (azimuth sector boundary)
)
# Standard SHOT spatial decomposition: 8 azimuth × 2 inclination × 2 radial = 32 volumes
_N_SECTORS = 32
def _compute_lrf(center, neighbors, radius):
"""Compute the SHOT local reference frame at *center*.
Translated from ``getSHOTLocalRF`` in ``shot.cpp``.
Parameters
----------
center : ndarray, shape=[3]
Central point.
neighbors : ndarray, shape=[n, 3]
Neighbour positions (must not contain *center* itself).
radius : float
Support radius used for distance-based weighting.
Returns
-------
x_axis, y_axis, z_axis : ndarray, shape=[3] each
Orthonormal local frame. ``z_axis`` approximates the surface normal
(smallest-eigenvalue direction of the weighted covariance);
``x_axis`` is the primary tangent (largest eigenvalue);
``y_axis = cross(z_axis, x_axis)``.
"""
vij = neighbors - center # (n, 3)
dists = np.linalg.norm(vij, axis=1) # (n,)
weights = radius - dists # (n,) positive since dists < radius
w_sum = weights.sum()
if w_sum < 1e-30:
# Degenerate neighbourhood — return identity frame
return (
np.array([1.0, 0.0, 0.0]),
np.array([0.0, 1.0, 0.0]),
np.array([0.0, 0.0, 1.0]),
)
cov = (weights[:, None] * vij).T @ vij / w_sum # (3, 3)
# eigh returns eigenvalues/vectors in ASCENDING order
_, eigvecs = np.linalg.eigh(cov)
x_axis = eigvecs[:, 2].copy() # largest eigenvalue → primary tangent
z_axis = eigvecs[:, 0].copy() # smallest eigenvalue → surface normal
# Sign disambiguation: orient each axis so that the majority of
# neighbours lie on its positive side (mirroring the C++ logic).
plus_x = int(np.sum(vij @ x_axis >= 0))
if plus_x < len(vij) - plus_x:
x_axis = -x_axis
plus_z = int(np.sum(vij @ z_axis >= 0))
if plus_z < len(vij) - plus_z:
z_axis = -z_axis
y_axis = np.cross(z_axis, x_axis)
return x_axis, y_axis, z_axis
def _shot_single_point(
center,
neighbors,
neighbor_normals,
sq_distances,
x_axis,
y_axis,
z_axis,
radius,
n_bins,
):
"""Compute the SHOT histogram for a single central point (vectorised).
Translated from ``interpolateSingleChannel`` + the main loop in
``computeDesc`` in ``shot.cpp``. The per-neighbour loop is replaced by
bulk numpy operations; ``np.add.at`` handles unbuffered histogram voting.
Parameters
----------
center : ndarray, shape=[3]
neighbors : ndarray, shape=[n, 3]
neighbor_normals : ndarray, shape=[n, 3]
Unit normals at each neighbour.
sq_distances : ndarray, shape=[n]
Squared Euclidean distances from *center* to each neighbour.
x_axis, y_axis, z_axis : ndarray, shape=[3]
Local reference frame at *center*.
radius : float
n_bins : int
Number of shape bins (default 10 → 11 bins including boundary).
Returns
-------
desc : ndarray, shape=[_N_SECTORS * (n_bins + 1)]
L2-normalised descriptor.
"""
desc = np.zeros(_N_SECTORS * (n_bins + 1), dtype=np.float64)
# --- filter degenerate neighbours ---
dists = np.sqrt(sq_distances)
valid = dists > 1e-15
if not np.any(valid):
return desc
dists = dists[valid]
neighbors = neighbors[valid]
neighbor_normals = neighbor_normals[valid]
r_half = radius * 0.5
r_14 = radius * 0.25
r_34 = radius * 0.75
# --- project neighbours into local reference frame ---
deltas = neighbors - center # (n, 3)
x = deltas @ x_axis # (n,)
y = deltas @ y_axis
z = deltas @ z_axis
x = np.where(np.abs(x) < 1e-30, 0.0, x)
y = np.where(np.abs(y) < 1e-30, 0.0, y)
z = np.where(np.abs(z) < 1e-30, 0.0, z)
# --- spatial sector index (desc_idx, 0–31) ---
bit4 = ((y > 0) | ((y == 0) & (x < 0))).astype(np.int32)
bit3_cond = (x > 0) | ((x == 0) & (y > 0))
bit3 = np.where(bit3_cond, 1 - bit4, bit4).astype(np.int32)
desc_idx = ((bit4 << 3) + (bit3 << 2)) << 1 # (n,) in {0, 8, 16, 24}
cond = (x * y > 0) | (x == 0)
desc_idx += np.where(
cond,
np.where(np.abs(x) >= np.abs(y), 0, 4),
np.where(np.abs(x) > np.abs(y), 4, 0),
)
desc_idx += np.where(z > 0, 1, 0) # inclination bit
desc_idx += np.where(dists > r_half, 2, 0) # radial bit
# --- shape feature: cos between LRF z-axis and neighbour normals ---
cos_desc = np.clip(neighbor_normals @ z_axis, -1.0, 1.0)
bin_dist_f = ((1.0 + cos_desc) * n_bins) / 2.0 # (n,), in [0, n_bins]
step_idx = np.floor(bin_dist_f + 0.5).astype(np.int32)
volume_idx = desc_idx * (n_bins + 1)
frac = bin_dist_f - step_idx # fractional part for bin interpolation
int_weight = 1.0 - np.abs(frac)
# Feature-bin interpolation (modular wrapping)
pos_mask = frac > 0
np.add.at(
desc,
volume_idx[pos_mask] + (step_idx[pos_mask] + 1) % (n_bins + 1),
frac[pos_mask],
)
np.add.at(
desc,
volume_idx[~pos_mask] + (step_idx[~pos_mask] + n_bins) % (n_bins + 1),
-frac[~pos_mask],
)
# Radial interpolation (adjacent shell)
outer = dists > r_half
inner = ~outer
rad_d_out = (dists - r_34) / r_half
far_outer = outer & (dists > r_34)
near_outer = outer & ~far_outer
int_weight[far_outer] += 1.0 - rad_d_out[far_outer]
int_weight[near_outer] += 1.0 + rad_d_out[near_outer]
np.add.at(
desc,
(desc_idx[near_outer] - 2) * (n_bins + 1) + step_idx[near_outer],
-rad_d_out[near_outer],
)
rad_d_in = (dists - r_14) / r_half
far_inner = inner & (dists < r_14)
near_inner = inner & ~far_inner
int_weight[far_inner] += 1.0 + rad_d_in[far_inner]
int_weight[near_inner] += 1.0 - rad_d_in[near_inner]
np.add.at(
desc,
(desc_idx[near_inner] + 2) * (n_bins + 1) + step_idx[near_inner],
rad_d_in[near_inner],
)
# Inclination interpolation (adjacent vertical zone)
inc_cos = np.clip(z / dists, -1.0, 1.0)
inclination = np.arccos(inc_cos)
upper = (inclination > _DEG_90) | (
(np.abs(inclination - _DEG_90) < 1e-30) & (z <= 0)
)
lower = ~upper
inc_d_up = (inclination - _DEG_135) / _DEG_90
inc_d_lo = (inclination - _DEG_45) / _DEG_90
far_upper = upper & (inclination > _DEG_135)
near_upper = upper & ~far_upper
far_lower = lower & (inclination < _DEG_45)
near_lower = lower & ~far_lower
int_weight[far_upper] += 1.0 - inc_d_up[far_upper]
int_weight[near_upper] += 1.0 + inc_d_up[near_upper]
np.add.at(
desc,
(desc_idx[near_upper] + 1) * (n_bins + 1) + step_idx[near_upper],
-inc_d_up[near_upper],
)
int_weight[far_lower] += 1.0 + inc_d_lo[far_lower]
int_weight[near_lower] += 1.0 - inc_d_lo[near_lower]
np.add.at(
desc,
(desc_idx[near_lower] - 1) * (n_bins + 1) + step_idx[near_lower],
inc_d_lo[near_lower],
)
# Azimuth interpolation (adjacent angular sector, wrapped mod 32)
has_az = (y != 0.0) | (x != 0.0)
azimuth = np.arctan2(y, x)
sel = desc_idx >> 2
az_dist = (azimuth - (-_DEG_168 + _DEG_45 * sel)) / _DEG_45
az_dist = np.clip(az_dist, -0.5, 0.5)
az_pos = has_az & (az_dist > 0)
az_neg = has_az & ~az_pos
interp_pos = (desc_idx + 4) % _N_SECTORS
interp_neg = (desc_idx - 4 + _N_SECTORS) % _N_SECTORS
int_weight[az_pos] += 1.0 - az_dist[az_pos]
np.add.at(
desc,
interp_pos[az_pos] * (n_bins + 1) + step_idx[az_pos],
az_dist[az_pos],
)
int_weight[az_neg] += 1.0 + az_dist[az_neg]
np.add.at(
desc,
interp_neg[az_neg] * (n_bins + 1) + step_idx[az_neg],
-az_dist[az_neg],
)
# Accumulate main votes
np.add.at(desc, volume_idx + step_idx, int_weight)
# L2 normalise
norm = np.linalg.norm(desc)
if norm > 1e-15:
desc /= norm
return desc
[docs]
class ShotDescriptor(Descriptor):
"""SHOT descriptor for triangle meshes.
Computes a 352-dimensional (by default) local shape signature per vertex
by accumulating normal-alignment statistics of neighbours into a spatially
structured histogram. The algorithm is translated directly from the
reference C++ implementation by Tombari, Salti and Di Stefano (ECCV 2010).
Parameters
----------
radius : float, optional
Support sphere radius. If ``None`` (default), the radius is set
automatically to ``0.05 × bounding-box diagonal`` of the input mesh,
which gives a neighbourhood of roughly 50–200 vertices on typical
normalised human-body meshes (FAUST, SMAL, …).
n_bins : int
Number of histogram bins per spatial volume (default 10).
Descriptor dimensionality = 32 × (n_bins + 1) = 352 for n_bins=10.
min_neighbors : int
Minimum number of neighbours required to compute a descriptor for a
vertex. Vertices with fewer neighbours receive an all-zero row.
References
----------
.. [1] Tombari, Salti, Di Stefano. "Unique Signatures of Histograms for
Local Surface Description." ECCV 2010.
"""
def __init__(self, radius=None, n_bins=10, min_neighbors=5):
super().__init__()
self.radius = radius
self.n_bins = n_bins
self.min_neighbors = min_neighbors
@property
def descriptor_size(self):
"""Dimensionality of the descriptor vector per vertex."""
return _N_SECTORS * (self.n_bins + 1)
def _resolve_radius(self, vertices):
if self.radius is not None:
return float(self.radius)
bbox_diag = float(np.linalg.norm(vertices.max(axis=0) - vertices.min(axis=0)))
return 0.05 * bbox_diag
def __call__(self, shape):
"""Compute the SHOT descriptor for every vertex of *shape*.
Parameters
----------
shape : TriangleMesh
Input shape. Must expose ``shape.vertices`` (n_vertices, 3) and
``shape.vertex_normals`` (n_vertices, 3).
Returns
-------
descriptors : array-like, shape=[descriptor_size, n_vertices]
L2-normalised per-vertex SHOT descriptors.
"""
device = getattr(shape.vertices, "device", None)
vertices = np.asarray(
gs.to_numpy(gs.to_device(shape.vertices, "cpu")), dtype=np.float64
)
normals = np.asarray(
gs.to_numpy(gs.to_device(shape.vertex_normals, "cpu")), dtype=np.float64
)
radius = self._resolve_radius(vertices)
return gs.to_device(
gs.array(self._compute_descriptors(vertices, normals, radius)), device
)
def _compute_descriptors(self, vertices, normals, radius):
"""Compute per-vertex SHOT histograms on CPU numpy arrays.
Parameters
----------
vertices : ndarray, shape=[n_vertices, 3]
normals : ndarray, shape=[n_vertices, 3]
radius : float
Returns
-------
out : ndarray, shape=[descriptor_size, n_vertices]
"""
n_vertices = vertices.shape[0]
n_bins = self.n_bins
desc_size = self.descriptor_size
tree = cKDTree(vertices)
all_neighbors = tree.query_ball_point(vertices, radius)
out = np.zeros((n_vertices, desc_size), dtype=np.float64)
for i in range(n_vertices):
idx = [j for j in all_neighbors[i] if j != i]
if len(idx) < self.min_neighbors:
continue
nb_pts = vertices[idx]
nb_nrms = normals[idx]
sq_dists = np.sum((nb_pts - vertices[i]) ** 2, axis=1)
x_axis, y_axis, z_axis = _compute_lrf(vertices[i], nb_pts, radius)
out[i] = _shot_single_point(
vertices[i],
nb_pts,
nb_nrms,
sq_dists,
x_axis,
y_axis,
z_axis,
radius,
n_bins,
)
return out.T