|
| 1 | +#!/usr/bin/env python3 |
| 2 | +import argparse |
| 3 | +from math import log |
| 4 | +from typing import Callable |
| 5 | +import unittest |
| 6 | + |
| 7 | +import torch |
| 8 | + |
| 9 | +# Bounding boxes and anchors are expected to be PyTorch tensors, |
| 10 | +# where the last dimension has size 4. |
| 11 | + |
| 12 | +# For bounding boxes in pixel coordinates, the 4 values correspond to: |
| 13 | +TOP: int = 0 |
| 14 | +LEFT: int = 1 |
| 15 | +BOTTOM: int = 2 |
| 16 | +RIGHT: int = 3 |
| 17 | + |
| 18 | + |
| 19 | +def bboxes_area(bboxes: torch.Tensor) -> torch.Tensor: |
| 20 | + """Compute area of given set of bboxes. |
| 21 | +
|
| 22 | + Each bbox is parametrized as a four-tuple (top, left, bottom, right). |
| 23 | +
|
| 24 | + If the bboxes.shape is [..., 4], the output shape is bboxes.shape[:-1]. |
| 25 | + """ |
| 26 | + return torch.relu(bboxes[..., BOTTOM] - bboxes[..., TOP]) \ |
| 27 | + * torch.relu(bboxes[..., RIGHT] - bboxes[..., LEFT]) |
| 28 | + |
| 29 | + |
| 30 | +def bboxes_iou(xs: torch.Tensor, ys: torch.Tensor) -> torch.Tensor: |
| 31 | + """Compute IoU of corresponding pairs from two sets of bboxes `xs` and `ys`. |
| 32 | +
|
| 33 | + Each bbox is parametrized as a four-tuple (top, left, bottom, right). |
| 34 | +
|
| 35 | + Note that broadcasting is supported, so passing inputs with |
| 36 | + `xs.shape=[num_xs, 1, 4]` and `ys.shape=[1, num_ys, 4]` produces an output with |
| 37 | + shape `[num_xs, num_ys]`, computing IoU for all pairs of bboxes from `xs` and `ys`. |
| 38 | + Formally, the output shape is `torch.broadcast_shapes(xs.shape, ys.shape)[:-1]`. |
| 39 | + """ |
| 40 | + intersections = torch.stack([ |
| 41 | + torch.maximum(xs[..., TOP], ys[..., TOP]), |
| 42 | + torch.maximum(xs[..., LEFT], ys[..., LEFT]), |
| 43 | + torch.minimum(xs[..., BOTTOM], ys[..., BOTTOM]), |
| 44 | + torch.minimum(xs[..., RIGHT], ys[..., RIGHT]), |
| 45 | + ], dim=-1) |
| 46 | + |
| 47 | + xs_area, ys_area, intersections_area = bboxes_area(xs), bboxes_area(ys), bboxes_area(intersections) |
| 48 | + |
| 49 | + return intersections_area / (xs_area + ys_area - intersections_area) |
| 50 | + |
| 51 | + |
| 52 | +def bboxes_to_rcnn(anchors: torch.Tensor, bboxes: torch.Tensor) -> torch.Tensor: |
| 53 | + """Convert `bboxes` to a R-CNN-like representation relative to `anchors`. |
| 54 | +
|
| 55 | + The `anchors` and `bboxes` are arrays of four-tuples (top, left, bottom, right); |
| 56 | + you can use the TOP, LEFT, BOTTOM, RIGHT constants as indices of the |
| 57 | + respective coordinates. |
| 58 | +
|
| 59 | + The resulting representation of a single bbox is a four-tuple with: |
| 60 | + - (bbox_y_center - anchor_y_center) / anchor_height |
| 61 | + - (bbox_x_center - anchor_x_center) / anchor_width |
| 62 | + - log(bbox_height / anchor_height) |
| 63 | + - log(bbox_width / anchor_width) |
| 64 | +
|
| 65 | + If the `anchors.shape` is `[anchors_len, 4]` and `bboxes.shape` is `[anchors_len, 4]`, |
| 66 | + the output shape is `[anchors_len, 4]`. |
| 67 | + """ |
| 68 | + # TODO: Implement according to the docstring. |
| 69 | + raise NotImplementedError() |
| 70 | + |
| 71 | + |
| 72 | +def bboxes_from_rcnn(anchors: torch.Tensor, rcnns: torch.Tensor) -> torch.Tensor: |
| 73 | + """Convert R-CNN-like representation relative to `anchor` to a `bbox`. |
| 74 | +
|
| 75 | + If the `anchors.shape` is `[anchors_len, 4]` and `rcnns.shape` is `[anchors_len, 4]`, |
| 76 | + the output shape is `[anchors_len, 4]`. |
| 77 | + """ |
| 78 | + # TODO: Implement according to the docstring. |
| 79 | + raise NotImplementedError() |
| 80 | + |
| 81 | + |
| 82 | +def bboxes_training( |
| 83 | + anchors: torch.Tensor, gold_classes: torch.Tensor, gold_bboxes: torch.Tensor, iou_threshold: float, |
| 84 | +) -> tuple[torch.Tensor, torch.Tensor]: |
| 85 | + """Compute training data for object detection. |
| 86 | +
|
| 87 | + Arguments: |
| 88 | + - `anchors` is an array of four-tuples (top, left, bottom, right) |
| 89 | + - `gold_classes` is an array of zero-based classes of the gold objects |
| 90 | + - `gold_bboxes` is an array of four-tuples (top, left, bottom, right) |
| 91 | + of the gold objects |
| 92 | + - `iou_threshold` is a given threshold |
| 93 | +
|
| 94 | + Returns: |
| 95 | + - `anchor_classes` contains for every anchor either 0 for background |
| 96 | + (if no gold object is assigned) or `1 + gold_class` if a gold object |
| 97 | + with `gold_class` is assigned to it |
| 98 | + - `anchor_bboxes` contains for every anchor a four-tuple |
| 99 | + `(center_y, center_x, height, width)` representing the gold bbox of |
| 100 | + a chosen object using parametrization of R-CNN; zeros if no gold object |
| 101 | + was assigned to the anchor |
| 102 | + If the `anchors` shape is `[anchors_len, 4]`, the `anchor_classes` shape |
| 103 | + is `[anchors_len]` and the `anchor_bboxes` shape is `[anchors_len, 4]`. |
| 104 | +
|
| 105 | + Algorithm: |
| 106 | + - First, for each gold object, assign it to an anchor with the largest IoU |
| 107 | + (the anchor with smaller index if there are several). In case several gold |
| 108 | + objects are assigned to a single anchor, use the gold object with smaller |
| 109 | + index. |
| 110 | + - For each unused anchor, find the gold object with the largest IoU |
| 111 | + (again the gold object with smaller index if there are several), and if |
| 112 | + the IoU is >= iou_threshold, assign the object to the anchor. |
| 113 | + """ |
| 114 | + # TODO: First, for each gold object, assign it to an anchor with the |
| 115 | + # largest IoU (the anchor with smaller index if there are several). In case |
| 116 | + # several gold objects are assigned to a single anchor, use the gold object |
| 117 | + # with smaller index. |
| 118 | + |
| 119 | + # TODO: For each unused anchor, find the gold object with the largest IoU |
| 120 | + # (again the gold object with smaller index if there are several), and if |
| 121 | + # the IoU is >= threshold, assign the object to the anchor. |
| 122 | + |
| 123 | + anchor_classes, anchor_bboxes = ..., ... |
| 124 | + |
| 125 | + return anchor_classes, anchor_bboxes |
| 126 | + |
| 127 | + |
| 128 | +def main(args: argparse.Namespace) -> tuple[Callable, Callable, Callable]: |
| 129 | + return bboxes_to_rcnn, bboxes_from_rcnn, bboxes_training |
| 130 | + |
| 131 | + |
| 132 | +class Tests(unittest.TestCase): |
| 133 | + def test_bboxes_to_from_rcnn(self): |
| 134 | + data = [ |
| 135 | + [[0, 0, 10, 10], [0, 0, 10, 10], [0, 0, 0, 0]], |
| 136 | + [[0, 0, 10, 10], [5, 0, 15, 10], [.5, 0, 0, 0]], |
| 137 | + [[0, 0, 10, 10], [0, 5, 10, 15], [0, .5, 0, 0]], |
| 138 | + [[0, 0, 10, 10], [0, 0, 20, 30], [.5, 1, log(2), log(3)]], |
| 139 | + [[0, 9, 10, 19], [2, 10, 5, 16], [-0.15, -0.1, -1.20397, -0.51083]], |
| 140 | + [[5, 3, 15, 13], [7, 7, 10, 9], [-0.15, 0, -1.20397, -1.60944]], |
| 141 | + [[7, 6, 17, 16], [9, 10, 12, 13], [-0.15, 0.05, -1.20397, -1.20397]], |
| 142 | + [[5, 6, 15, 16], [7, 7, 10, 10], [-0.15, -0.25, -1.20397, -1.20397]], |
| 143 | + [[6, 3, 16, 13], [8, 5, 12, 8], [-0.1, -0.15, -0.91629, -1.20397]], |
| 144 | + [[5, 2, 15, 12], [9, 6, 12, 8], [0.05, 0, -1.20397, -1.60944]], |
| 145 | + [[2, 10, 12, 20], [6, 11, 8, 17], [0, -0.1, -1.60944, -0.51083]], |
| 146 | + [[10, 9, 20, 19], [12, 13, 17, 16], [-0.05, 0.05, -0.69315, -1.20397]], |
| 147 | + [[6, 7, 16, 17], [10, 11, 12, 14], [0, 0.05, -1.60944, -1.20397]], |
| 148 | + [[2, 2, 12, 12], [3, 5, 8, 8], [-0.15, -0.05, -0.69315, -1.20397]], |
| 149 | + ] |
| 150 | + # First run on individual anchors, and then on all together |
| 151 | + for anchors, bboxes, rcnns in [map(lambda x: [x], row) for row in data] + [zip(*data)]: |
| 152 | + anchors, bboxes, rcnns = [torch.tensor(data, dtype=torch.float32) for data in [anchors, bboxes, rcnns]] |
| 153 | + torch.testing.assert_close(bboxes_to_rcnn(anchors, bboxes), rcnns, atol=1e-3, rtol=1e-3) |
| 154 | + torch.testing.assert_close(bboxes_from_rcnn(anchors, rcnns), bboxes, atol=1e-3, rtol=1e-3) |
| 155 | + |
| 156 | + def test_bboxes_training(self): |
| 157 | + anchors = torch.tensor([[0, 0, 10, 10], [0, 10, 10, 20], [10, 0, 20, 10], [10, 10, 20, 20]]) |
| 158 | + for gold_classes, gold_bboxes, anchor_classes, anchor_bboxes, iou in [ |
| 159 | + [[1], [[14, 14, 16, 16]], [0, 0, 0, 2], [[0, 0, 0, 0]] * 3 + [[0, 0, log(.2), log(.2)]], 0.5], |
| 160 | + [[2], [[0, 0, 20, 20]], [3, 0, 0, 0], [[.5, .5, log(2), log(2)]] + [[0, 0, 0, 0]] * 3, 0.26], |
| 161 | + [[2], [[0, 0, 20, 20]], [3, 3, 3, 3], |
| 162 | + [[y, x, log(2), log(2)] for y in [.5, -.5] for x in [.5, -.5]], 0.24], |
| 163 | + [[0, 1], [[3, 3, 20, 18], [10, 1, 18, 21]], [0, 0, 0, 1], |
| 164 | + [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [-0.35, -0.45, 0.53062, 0.40546]], 0.5], |
| 165 | + [[0, 1], [[3, 3, 20, 18], [10, 1, 18, 21]], [0, 0, 2, 1], |
| 166 | + [[0, 0, 0, 0], [0, 0, 0, 0], [-0.1, 0.6, -0.22314, 0.69314], [-0.35, -0.45, 0.53062, 0.40546]], 0.3], |
| 167 | + [[0, 1], [[3, 3, 20, 18], [10, 1, 18, 21]], [0, 1, 2, 1], |
| 168 | + [[0, 0, 0, 0], [0.65, -0.45, 0.53062, 0.40546], [-0.1, 0.6, -0.22314, 0.69314], |
| 169 | + [-0.35, -0.45, 0.53062, 0.40546]], 0.17], |
| 170 | + ]: |
| 171 | + gold_classes, anchor_classes = torch.tensor(gold_classes), torch.tensor(anchor_classes) |
| 172 | + gold_bboxes, anchor_bboxes = torch.tensor(gold_bboxes), torch.tensor(anchor_bboxes) |
| 173 | + computed_classes, computed_bboxes = bboxes_training(anchors, gold_classes, gold_bboxes, iou) |
| 174 | + torch.testing.assert_close(computed_classes, anchor_classes, atol=1e-3, rtol=1e-3) |
| 175 | + torch.testing.assert_close(computed_bboxes, anchor_bboxes, atol=1e-3, rtol=1e-3) |
| 176 | + |
| 177 | + |
| 178 | +if __name__ == '__main__': |
| 179 | + unittest.main() |
0 commit comments