SIFT/descriptor.py at main · maxcrous/SIFT · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
"""
This file contains functions related to assigning a descriptor
to a keypoint. The central function in this file is
    `assign_descriptor`.
"""

import numpy as np

import const
from keypoints import Keypoint
from octaves import pixel_dist_in_octave
from reference_orientation import gradients, patch_in_frame, weighting_matrix


def hist_centers() -> np.ndarray:
    """ Calculates relative coordinates of histogram centers within a descriptor patch.

    Returns:
        centers: Relative coordinates of histogram centers, in format (16, 2)
    """
    xs = list()
    ys = list()

    bin_width = (2 * const.descriptor_locality) / const.nr_descriptor_histograms
    hist_center_offset = bin_width / 2
    start_coord = -const.descriptor_locality + hist_center_offset

    for row_idx in range(const.nr_descriptor_histograms):
        for col_idx in range(const.nr_descriptor_histograms):
            y = start_coord + bin_width * row_idx
            x = start_coord + bin_width * col_idx
            ys.append(y)
            xs.append(x)

    centers = np.array([xs, ys]).T
    return centers


# All patches have the same relative histogram centers,
# so calculate them beforehand and treat as constants.
histogram_centers = hist_centers()


def relative_patch_coordinates(center_offset: list,
                               patch_shape: tuple,
                               pixel_dist: float,
                               sigma: float,
                               keypoint_orientation: float) -> np.ndarray:
    """ Calculates the coordinates of pixels in a descriptor patch,
        relative to the keypoint. Keypoints have an orientation and
        therefore introduce an oriented x and y axis. This is why
        the relative coordinates are the result of a rotation.
        See Lowe section 5 and AOS section 4.2.

    Args:
        center_offset: The keypoint's offset from the patch's center.
        patch_shape: The shape of a descriptor patch including padding.
        pixel_dist: The distance between adjacent pixels.
        sigma: The scale of layer where the keypoint was found.
        keypoint_orientation: The orientation of the keypoint in radians.
    Returns:
        rel_coords: The y & x coordinates of pixels in a descriptor patch
            relative to the keypoint's location and orientation.
    """
    y_len, x_len = patch_shape
    center = np.array(patch_shape) / 2 + center_offset
    y_idxs = np.arange(y_len)
    x_idxs = np.arange(x_len)
    xs, ys = np.meshgrid(y_idxs, x_idxs)

    # Coordinates are rotated to align with the keypoint's orientation.
    rel_xs = ((xs - center[1]) * np.cos(keypoint_orientation)
              + (ys - center[0]) * np.sin(keypoint_orientation)) / (sigma / pixel_dist)

    rel_ys = (-(xs - center[1]) * np.sin(keypoint_orientation)
              + (ys - center[0]) * np.cos(keypoint_orientation)) / (sigma / pixel_dist)

    rel_coords = np.array([rel_xs, rel_ys])
    return rel_coords


def mask_outliers(magnitude_patch: np.ndarray,
                  rel_patch_coords: np.ndarray,
                  threshold: float,
                  axis: int = 0) -> np.ndarray:
    """ Masks outliers in a patch. Here, an outlier has a distance
        from the patch's center keypoint along the y or x axis that
        is larger than the threshold.

    Args:
        magnitude_patch: The gradient magnitudes in the patch.
        rel_patch_coords: The y & x coordinates of pixels in a descriptor patch
            relative to the keypoint's location and potentially orientation.
        threshold: Distance in y and x after which a point is masked to 0.
        axis: The axis along which the max between y & x is found.
    Returns:
        magnitude_patch: The  gradient magnitudes in the patch after masking.
    """
    mask = np.max(np.abs(rel_patch_coords), axis=axis) <= threshold
    magnitude_patch = magnitude_patch * mask
    return magnitude_patch


def interpolate_2d_grid_contribution(magnitude_path: np.ndarray,
                                     coords_rel_to_hist: np.ndarray):
    """ Interpolates gradient contributions to surrounding histograms.
        In other words: Calculates to what extent gradients in a descriptor
        patch contribute to a histogram, based on the gradient's pixel's
        y & x distance to that histogram's location. See AOS section 4.2
        and figure 10 and Lowe section 6. This function performs the
        interpolation for all histograms at once via broadcasting.

    Args:
        magnitude_path: The gradient magnitudes in a descriptor patch, used to
            weigh gradient contributions. For the standard configuration,
            this array is of shape (2, 32, 32) with semantics (y_or_x, patch_row, patch_col).
        coords_rel_to_hist: The coordinates of pixels in a descriptor patch,
            relative to a histograms location. For the standard configuration,
            this array is of shape (2, 16, 32, 32) after axes swap, with
            semantics (y_or_x, hist_idx, patch_row, patch_col).
    Returns:
        magnitude_path: The gradient magnitudes in a descriptor patch after
            interpolating their contributions for each histogram.
            For the standard configuration, this array is of shape (16, 32, 32),
            with semantics (hist_idx, patch_row, patch_col).
    """
    coords_rel_to_hist = np.swapaxes(coords_rel_to_hist, 0, 1)
    xs, ys = np.abs(coords_rel_to_hist)
    y_contrib = 1 - (ys / (1/2 * const.descriptor_locality))
    x_contrib = 1 - (xs / (1/2 * const.descriptor_locality))
    contrib = y_contrib * x_contrib
    magnitude_path = magnitude_path * contrib
    return magnitude_path


def interpolate_1d_hist_contribution(magnitude_path: np.ndarray,
                                     orientation_patch: np.ndarray) -> np.ndarray:
    """ Interpolates an orientation's contribution between two orientation bins.
        When creating an orientation histogram, rather than adding an orientation's
        contribution to a single bin, it contributes mass to 2 bins, its left and
        right neighbor. This contribution is linear interpolated given the distance
        to each of these bins.

    Args:
        magnitude_path: The gradient magnitudes in the descriptor gradient patch.
        orientation_patch: The gradient orientations in the descriptor gradient patch.
    Returns:
        interpol_hist: The orientation histogram where contributions have been
            interpolated between neighboring bins.
    """
    nr_hists = magnitude_path.shape[0]
    orientation_patch = np.repeat(orientation_patch[None, ...], nr_hists, axis=0)

    hist_bin_width = const.descriptor_bin_width
    dist_to_next_bin = (orientation_patch % hist_bin_width)
    norm_dist_to_next_bin = dist_to_next_bin / hist_bin_width
    norm_dist_current_bin = 1 - norm_dist_to_next_bin

    current_bin_orients = orientation_patch
    next_bin_orients = (orientation_patch + hist_bin_width) % (2 * np.pi)

    hist_current = histogram_per_row(current_bin_orients.reshape((nr_hists, -1)),
                                     bins=const.nr_descriptor_bins,
                                     range_=(0, 2 * np.pi),
                                     weights=norm_dist_current_bin * magnitude_path)

    hist_next = histogram_per_row(next_bin_orients.reshape((nr_hists, -1)),
                                  bins=const.nr_descriptor_bins,
                                  range_=(0, 2 * np.pi),
                                  weights=norm_dist_to_next_bin * magnitude_path)

    interpol_hist = hist_current + hist_next
    return interpol_hist


def histogram_per_row(data: np.ndarray,
                      bins: int,
                      range_: tuple,
                      weights: np.ndarray) -> np.ndarray:
    """ Calculates histograms for each row of a 2D matrix.
        Has a similar signature to np.histogram(), except np.histogram() only
        supports 1D arrays. This function was created to speed up histogram
        creation for all (16) histograms in the descriptor patch. Borrows from
        https://stackoverflow.com/questions/44152436/calculate-histograms-along-axis

    Args:
        data: A 2 dimensional array. A histogram will be calculated for each row.
        bins: The number of bins in the histograms.
        range_: The range of values that the histogram covers.
        weights: Contribution weights for each of the elements in `data`.
            This array must have the same number of elements as `data`.
    Returns:
        histograms: The histograms for each row. Represented as bin counts.
    """
    range_min, range_max = range_
    n_rows, n_cols = data.shape
    bin_edges = np.linspace(range_min, range_max, bins + 1)
    idx = np.searchsorted(bin_edges, data, 'right') - 1
    bad_mask = idx == bins
    idx[bad_mask] = bins - 1
    scaled_idx = idx + bins * np.arange(n_rows)[:, None]
    limit = bins * n_rows
    histograms = np.bincount(scaled_idx.ravel(), minlength=limit, weights=weights.ravel())
    histograms.shape = (n_rows, bins)
    return histograms


def normalize_sift_feature(hists: np.ndarray) -> np.ndarray:
    """ Normalizes a keypoint's descriptor histograms to a unit length vector.
        See AOS section 4.2 and Lowe section 6.1

    Args:
        hists: A 1D array of a keypoint's descriptor histograms concatenated.
    Returns:
        hists: The histogram array that has been clipped and normalized to unit length.
    """
    hists = hists / np.linalg.norm(hists)
    hists = np.clip(hists, a_min=None, a_max=const.descriptor_clip_max)
    hists = hists / np.linalg.norm(hists)
    return hists


def assign_descriptor(keypoints: list[Keypoint],
                      gauss_octave: np.array,
                      octave_idx: int) -> list[Keypoint]:
    """ Assigns a descriptor to each keypoint.
        A descriptor is a collection of histograms that capture
        the distribution of gradients orientations in an oriented
        keypoint's local neighborhood. See AOS section 4.2 and Lowe
        section 6. Descriptors are created by taking a square
        patch of gradients surrounding the keypoint, assigning
        each gradient in the patch a coordinates relative to the
        oriented keypoint, and accumulating the gradients into a set
        of histograms. A gradient's contributions to a particular
        histogram is determined by its distance from the histogram's
        and keypoint's location.

    Args:
        keypoints: A list of keypoints that have been assigned an orientation.
        gauss_octave: An octave of Gaussian convolved images.
        octave_idx: The index of an octave.
    Returns:
        described_keypoints: A list of keypoints that have been assigned a descriptor.
    """
    magnitudes, orientations = gradients(gauss_octave)

    described_keypoints = list()
    for keypoint in keypoints:
        coord = keypoint.coordinate
        sigma = keypoint.sigma
        shape = gauss_octave.shape
        s, y, x = coord.round().astype(int)

        pixel_dist = pixel_dist_in_octave(octave_idx)
        max_width = (np.sqrt(2) * const.descriptor_locality * sigma) / pixel_dist
        max_width = max_width.round().astype(int)

        if patch_in_frame(coord, max_width, shape):
            orientation_patch = orientations[s,
                                             y - max_width: y + max_width,
                                             x - max_width: x + max_width]
            magnitude_patch = magnitudes[s,
                                         y - max_width: y + max_width,
                                         x - max_width: x + max_width]
            patch_shape = magnitude_patch.shape
            center_offset = [coord[1] - y, coord[2] - x]
            rel_patch_coords = relative_patch_coordinates(center_offset, patch_shape, pixel_dist, sigma,
                                                          keypoint.orientation)
            magnitude_patch = mask_outliers(magnitude_patch, rel_patch_coords, const.descriptor_locality)
            orientation_patch = (orientation_patch - keypoint.orientation) % (2 * np.pi)
            weights = weighting_matrix(center_offset, patch_shape, octave_idx, sigma, const.descriptor_locality)
            magnitude_patch = magnitude_patch * weights
            coords_rel_to_hists = rel_patch_coords[None] - histogram_centers[..., None, None]
            hists_magnitude_patch = mask_outliers(magnitude_patch[None], coords_rel_to_hists, const.inter_hist_dist, 1)
            hists_magnitude_patch = interpolate_2d_grid_contribution(hists_magnitude_patch, coords_rel_to_hists)
            hists = interpolate_1d_hist_contribution(hists_magnitude_patch, orientation_patch).ravel()
            keypoint.descriptor = normalize_sift_feature(hists)
            described_keypoints.append(keypoint)

    return described_keypoints