diff --git a/.gitignore b/.gitignore index 894a44cc066a027465cd26d634948d56d13af9af..ffbae97a51e885187c5fc0c0485e58bf6067e310 100644 --- a/.gitignore +++ b/.gitignore @@ -102,3 +102,6 @@ venv.bak/ # mypy .mypy_cache/ + +# cython generated cpp +mmdet/ops/nms/*.cpp \ No newline at end of file diff --git a/compile.sh b/compile.sh new file mode 100755 index 0000000000000000000000000000000000000000..8bf418054a26fc2ab5741298f3f3863273cd1c0a --- /dev/null +++ b/compile.sh @@ -0,0 +1,22 @@ +#!/usr/bin/env bash + +PYTHON=${PYTHON:-"python"} + +echo "Building roi align op..." +cd mmdet/ops/roi_align +if [ -d "build" ]; then + rm -r build +fi +$PYTHON setup.py build_ext --inplace + +echo "Building roi pool op..." +cd ../roi_pool +if [ -d "build" ]; then + rm -r build +fi +$PYTHON setup.py build_ext --inplace + +echo "Building nms op..." +cd ../nms +make clean +make PYTHON=${PYTHON} diff --git a/mmdet/__init__.py b/mmdet/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..58f3ace6c03d093337c9fa417ccbe8bc267b6c69 --- /dev/null +++ b/mmdet/__init__.py @@ -0,0 +1 @@ +from .version import __version__ diff --git a/mmdet/core/__init__.py b/mmdet/core/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..7992d8deb3ba0f6586c1bef0705f33a41a78d917 --- /dev/null +++ b/mmdet/core/__init__.py @@ -0,0 +1,6 @@ +from .anchor_generator import * +from .bbox_ops import * +from .mask_ops import * +from .eval import * +from .nn import * +from .targets import * diff --git a/mmdet/core/anchor_generator.py b/mmdet/core/anchor_generator.py new file mode 100644 index 0000000000000000000000000000000000000000..e7a1fa256fb6d4df69be77a341728ed194b54b7e --- /dev/null +++ b/mmdet/core/anchor_generator.py @@ -0,0 +1,80 @@ +import torch + + +class AnchorGenerator(object): + + def __init__(self, base_size, scales, ratios, scale_major=True): + self.base_size = base_size + self.scales = torch.Tensor(scales) + self.ratios = torch.Tensor(ratios) + self.scale_major = scale_major + self.base_anchors = self.gen_base_anchors() + + @property + def num_base_anchors(self): + return self.base_anchors.size(0) + + def gen_base_anchors(self): + base_anchor = torch.Tensor( + [0, 0, self.base_size - 1, self.base_size - 1]) + + w = base_anchor[2] - base_anchor[0] + 1 + h = base_anchor[3] - base_anchor[1] + 1 + x_ctr = base_anchor[0] + 0.5 * (w - 1) + y_ctr = base_anchor[1] + 0.5 * (h - 1) + + h_ratios = torch.sqrt(self.ratios) + w_ratios = 1 / h_ratios + if self.scale_major: + ws = (w * w_ratios[:, None] * self.scales[None, :]).view(-1) + hs = (h * h_ratios[:, None] * self.scales[None, :]).view(-1) + else: + ws = (w * self.scales[:, None] * w_ratios[None, :]).view(-1) + hs = (h * self.scales[:, None] * h_ratios[None, :]).view(-1) + + base_anchors = torch.stack( + [ + x_ctr - 0.5 * (ws - 1), y_ctr - 0.5 * (hs - 1), + x_ctr + 0.5 * (ws - 1), y_ctr + 0.5 * (hs - 1) + ], + dim=-1).round() + + return base_anchors + + def _meshgrid(self, x, y, row_major=True): + xx = x.repeat(len(y)) + yy = y.view(-1, 1).repeat(1, len(x)).view(-1) + if row_major: + return xx, yy + else: + return yy, xx + + def grid_anchors(self, featmap_size, stride=16, device='cuda'): + feat_h, feat_w = featmap_size + shift_x = torch.arange(0, feat_w, device=device) * stride + shift_y = torch.arange(0, feat_h, device=device) * stride + shift_xx, shift_yy = self._meshgrid(shift_x, shift_y) + shifts = torch.stack([shift_xx, shift_yy, shift_xx, shift_yy], dim=-1) + # first feat_w elements correspond to the first row of shifts + # add A anchors (1, A, 4) to K shifts (K, 1, 4) to get + # shifted anchors (K, A, 4), reshape to (K*A, 4) + base_anchors = self.base_anchors.to(device) + all_anchors = base_anchors[None, :, :] + shifts[:, None, :] + all_anchors = all_anchors.view(-1, 4) + # first A rows correspond to A anchors of (0, 0) in feature map, + # then (0, 1), (0, 2), ... + return all_anchors + + def valid_flags(self, featmap_size, valid_size, device='cuda'): + feat_h, feat_w = featmap_size + valid_h, valid_w = valid_size + assert valid_h <= feat_h and valid_w <= feat_w + valid_x = torch.zeros(feat_w, dtype=torch.uint8, device=device) + valid_y = torch.zeros(feat_h, dtype=torch.uint8, device=device) + valid_x[:valid_w] = 1 + valid_y[:valid_h] = 1 + valid_xx, valid_yy = self._meshgrid(valid_x, valid_y) + valid = valid_xx & valid_yy + valid = valid[:, None].expand( + valid.size(0), self.num_base_anchors).contiguous().view(-1) + return valid diff --git a/mmdet/core/bbox_ops/__init__.py b/mmdet/core/bbox_ops/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..4bf9aeb74a5db787f687bacf0147ae1e2b1054bf --- /dev/null +++ b/mmdet/core/bbox_ops/__init__.py @@ -0,0 +1,12 @@ +from .geometry import bbox_overlaps +from .sampling import (random_choice, bbox_assign, bbox_assign_via_overlaps, + bbox_sampling, sample_positives, sample_negatives) +from .transforms import (bbox_transform, bbox_transform_inv, bbox_flip, + bbox_mapping, bbox_mapping_back, bbox2roi, roi2bbox) + +__all__ = [ + 'bbox_overlaps', 'random_choice', 'bbox_assign', + 'bbox_assign_via_overlaps', 'bbox_sampling', 'sample_positives', + 'sample_negatives', 'bbox_transform', 'bbox_transform_inv', 'bbox_flip', + 'bbox_mapping', 'bbox_mapping_back', 'bbox2roi', 'roi2bbox' +] diff --git a/mmdet/core/bbox_ops/geometry.py b/mmdet/core/bbox_ops/geometry.py new file mode 100644 index 0000000000000000000000000000000000000000..a852a06fb0c216569cf5f32385c356114c534904 --- /dev/null +++ b/mmdet/core/bbox_ops/geometry.py @@ -0,0 +1,63 @@ +import torch + + +def bbox_overlaps(bboxes1, bboxes2, mode='iou', is_aligned=False): + """Calculate overlap between two set of bboxes. + + If ``is_aligned`` is ``False``, then calculate the ious between each bbox + of bboxes1 and bboxes2, otherwise the ious between each aligned pair of + bboxes1 and bboxes2. + + Args: + bboxes1 (Tensor): shape (m, 4) + bboxes2 (Tensor): shape (n, 4), if is_aligned is ``True``, then m and n + must be equal. + mode (str): "iou" (intersection over union) or iof (intersection over + foreground). + + Returns: + ious(Tensor): shape (n, k) if is_aligned == False else shape (n, 1) + """ + + assert mode in ['iou', 'iof'] + + rows = bboxes1.size(0) + cols = bboxes2.size(0) + if is_aligned: + assert rows == cols + + if rows * cols == 0: + return bboxes1.new(rows, 1) if is_aligned else bboxes1.new(rows, cols) + + if is_aligned: + lt = torch.max(bboxes1[:, :2], bboxes2[:, :2]) # [rows, 2] + rb = torch.min(bboxes1[:, 2:], bboxes2[:, 2:]) # [rows, 2] + + wh = (rb - lt + 1).clamp(min=0) # [rows, 2] + overlap = wh[:, 0] * wh[:, 1] + area1 = (bboxes1[:, 2] - bboxes1[:, 0] + 1) * ( + bboxes1[:, 3] - bboxes1[:, 1] + 1) + + if mode == 'iou': + area2 = (bboxes2[:, 2] - bboxes2[:, 0] + 1) * ( + bboxes2[:, 3] - bboxes2[:, 1] + 1) + ious = overlap / (area1 + area2 - overlap) + else: + ious = overlap / area1 + else: + lt = torch.max(bboxes1[:, None, :2], bboxes2[:, :2]) # [rows, cols, 2] + rb = torch.min(bboxes1[:, None, 2:], bboxes2[:, 2:]) # [rows, cols, 2] + + wh = (rb - lt + 1).clamp(min=0) # [rows, cols, 2] + overlap = wh[:, :, 0] * wh[:, :, 1] + area1 = (bboxes1[:, 2] - bboxes1[:, 0] + 1) * ( + bboxes1[:, 3] - bboxes1[:, 1] + 1) + + if mode == 'iou': + area2 = (bboxes2[:, 2] - bboxes2[:, 0] + 1) * ( + bboxes2[:, 3] - bboxes2[:, 1] + 1) + ious = overlap / (area1[:, None] + area2 - overlap) + else: + ious = overlap / (area1[:, None]) + + return ious diff --git a/mmdet/core/bbox_ops/sampling.py b/mmdet/core/bbox_ops/sampling.py new file mode 100644 index 0000000000000000000000000000000000000000..9825e3bd15ec87dc6bc9c31be4b2f11422fcda13 --- /dev/null +++ b/mmdet/core/bbox_ops/sampling.py @@ -0,0 +1,255 @@ +import numpy as np +import torch + +from .geometry import bbox_overlaps + + +def random_choice(gallery, num): + assert len(gallery) >= num + if isinstance(gallery, list): + gallery = np.array(gallery) + cands = np.arange(len(gallery)) + np.random.shuffle(cands) + rand_inds = cands[:num] + if not isinstance(gallery, np.ndarray): + rand_inds = torch.from_numpy(rand_inds).long() + if gallery.is_cuda: + rand_inds = rand_inds.cuda(gallery.get_device()) + return gallery[rand_inds] + + +def bbox_assign(proposals, + gt_bboxes, + gt_crowd_bboxes=None, + gt_labels=None, + pos_iou_thr=0.5, + neg_iou_thr=0.5, + min_pos_iou=.0, + crowd_thr=-1): + """Assign a corresponding gt bbox or background to each proposal/anchor + This function assign a gt bbox to every proposal, each proposals will be + assigned with -1, 0, or a positive number. -1 means don't care, 0 means + negative sample, positive number is the index (1-based) of assigned gt. + If gt_crowd_bboxes is not None, proposals which have iof(intersection over foreground) + with crowd bboxes over crowd_thr will be ignored + Args: + proposals(Tensor): proposals or RPN anchors, shape (n, 4) + gt_bboxes(Tensor): shape (k, 4) + gt_crowd_bboxes(Tensor): shape(m, 4) + gt_labels(Tensor, optional): shape (k, ) + pos_iou_thr(float): iou threshold for positive bboxes + neg_iou_thr(float or tuple): iou threshold for negative bboxes + min_pos_iou(float): minimum iou for a bbox to be considered as a positive bbox, + for RPN, it is usually set as 0, for Fast R-CNN, + it is usually set as pos_iou_thr + crowd_thr: ignore proposals which have iof(intersection over foreground) with + crowd bboxes over crowd_thr + Returns: + tuple: (assigned_gt_inds, argmax_overlaps, max_overlaps), shape (n, ) + """ + + # calculate overlaps between the proposals and the gt boxes + overlaps = bbox_overlaps(proposals, gt_bboxes) + if overlaps.numel() == 0: + raise ValueError('No gt bbox or proposals') + + # ignore proposals according to crowd bboxes + if (crowd_thr > 0) and (gt_crowd_bboxes is + not None) and (gt_crowd_bboxes.numel() > 0): + crowd_overlaps = bbox_overlaps(proposals, gt_crowd_bboxes, mode='iof') + crowd_max_overlaps, _ = crowd_overlaps.max(dim=1) + crowd_bboxes_inds = torch.nonzero( + crowd_max_overlaps > crowd_thr).long() + if crowd_bboxes_inds.numel() > 0: + overlaps[crowd_bboxes_inds, :] = -1 + + return bbox_assign_via_overlaps(overlaps, gt_labels, pos_iou_thr, + neg_iou_thr, min_pos_iou) + + +def bbox_assign_via_overlaps(overlaps, + gt_labels=None, + pos_iou_thr=0.5, + neg_iou_thr=0.5, + min_pos_iou=.0): + """Assign a corresponding gt bbox or background to each proposal/anchor + This function assign a gt bbox to every proposal, each proposals will be + assigned with -1, 0, or a positive number. -1 means don't care, 0 means + negative sample, positive number is the index (1-based) of assigned gt. + The assignment is done in following steps, the order matters: + 1. assign every anchor to -1 + 2. assign proposals whose iou with all gts < neg_iou_thr to 0 + 3. for each anchor, if the iou with its nearest gt >= pos_iou_thr, + assign it to that bbox + 4. for each gt bbox, assign its nearest proposals(may be more than one) + to itself + Args: + overlaps(Tensor): overlaps between n proposals and k gt_bboxes, shape(n, k) + gt_labels(Tensor, optional): shape (k, ) + pos_iou_thr(float): iou threshold for positive bboxes + neg_iou_thr(float or tuple): iou threshold for negative bboxes + min_pos_iou(float): minimum iou for a bbox to be considered as a positive bbox, + for RPN, it is usually set as 0, for Fast R-CNN, + it is usually set as pos_iou_thr + Returns: + tuple: (assigned_gt_inds, argmax_overlaps, max_overlaps), shape (n, ) + """ + num_bboxes, num_gts = overlaps.size(0), overlaps.size(1) + # 1. assign -1 by default + assigned_gt_inds = overlaps.new(num_bboxes).long().fill_(-1) + + if overlaps.numel() == 0: + raise ValueError('No gt bbox or proposals') + + assert overlaps.size() == (num_bboxes, num_gts) + # for each anchor, which gt best overlaps with it + # for each anchor, the max iou of all gts + max_overlaps, argmax_overlaps = overlaps.max(dim=1) + # for each gt, which anchor best overlaps with it + # for each gt, the max iou of all proposals + gt_max_overlaps, gt_argmax_overlaps = overlaps.max(dim=0) + + # 2. assign negative: below + if isinstance(neg_iou_thr, float): + assigned_gt_inds[(max_overlaps >= 0) + & (max_overlaps < neg_iou_thr)] = 0 + elif isinstance(neg_iou_thr, tuple): + assert len(neg_iou_thr) == 2 + assigned_gt_inds[(max_overlaps >= neg_iou_thr[0]) + & (max_overlaps < neg_iou_thr[1])] = 0 + + # 3. assign positive: above positive IoU threshold + pos_inds = max_overlaps >= pos_iou_thr + assigned_gt_inds[pos_inds] = argmax_overlaps[pos_inds] + 1 + + # 4. assign fg: for each gt, proposals with highest IoU + for i in range(num_gts): + if gt_max_overlaps[i] >= min_pos_iou: + assigned_gt_inds[overlaps[:, i] == gt_max_overlaps[i]] = i + 1 + + if gt_labels is None: + return assigned_gt_inds, argmax_overlaps, max_overlaps + else: + assigned_labels = assigned_gt_inds.new(num_bboxes).fill_(0) + pos_inds = torch.nonzero(assigned_gt_inds > 0).squeeze() + if pos_inds.numel() > 0: + assigned_labels[pos_inds] = gt_labels[assigned_gt_inds[pos_inds] - + 1] + return assigned_gt_inds, assigned_labels, argmax_overlaps, max_overlaps + + +def sample_positives(assigned_gt_inds, num_expected, balance_sampling=True): + """Balance sampling for positive bboxes/anchors + 1. calculate average positive num for each gt: num_per_gt + 2. sample at most num_per_gt positives for each gt + 3. random sampling from rest anchors if not enough fg + """ + pos_inds = torch.nonzero(assigned_gt_inds > 0) + if pos_inds.numel() != 0: + pos_inds = pos_inds.squeeze(1) + if pos_inds.numel() <= num_expected: + return pos_inds + elif not balance_sampling: + return random_choice(pos_inds, num_expected) + else: + unique_gt_inds = torch.unique(assigned_gt_inds[pos_inds].cpu()) + num_gts = len(unique_gt_inds) + num_per_gt = int(round(num_expected / float(num_gts)) + 1) + sampled_inds = [] + for i in unique_gt_inds: + inds = torch.nonzero(assigned_gt_inds == i.item()) + if inds.numel() != 0: + inds = inds.squeeze(1) + else: + continue + if len(inds) > num_per_gt: + inds = random_choice(inds, num_per_gt) + sampled_inds.append(inds) + sampled_inds = torch.cat(sampled_inds) + if len(sampled_inds) < num_expected: + num_extra = num_expected - len(sampled_inds) + extra_inds = np.array( + list(set(pos_inds.cpu()) - set(sampled_inds.cpu()))) + if len(extra_inds) > num_extra: + extra_inds = random_choice(extra_inds, num_extra) + extra_inds = torch.from_numpy(extra_inds).to( + assigned_gt_inds.device).long() + sampled_inds = torch.cat([sampled_inds, extra_inds]) + elif len(sampled_inds) > num_expected: + sampled_inds = random_choice(sampled_inds, num_expected) + return sampled_inds + + +def sample_negatives(assigned_gt_inds, + num_expected, + max_overlaps=None, + balance_thr=0, + hard_fraction=0.5): + """Balance sampling for negative bboxes/anchors + negative samples are split into 2 set: hard(balance_thr <= iou < neg_iou_thr) + and easy(iou < balance_thr), around equal number of bg are sampled + from each set. + """ + neg_inds = torch.nonzero(assigned_gt_inds == 0) + if neg_inds.numel() != 0: + neg_inds = neg_inds.squeeze(1) + if len(neg_inds) <= num_expected: + return neg_inds + elif balance_thr <= 0: + # uniform sampling among all negative samples + return random_choice(neg_inds, num_expected) + else: + assert max_overlaps is not None + max_overlaps = max_overlaps.cpu().numpy() + # balance sampling for negative samples + neg_set = set(neg_inds.cpu().numpy()) + easy_set = set( + np.where( + np.logical_and(max_overlaps >= 0, + max_overlaps < balance_thr))[0]) + hard_set = set(np.where(max_overlaps >= balance_thr)[0]) + easy_neg_inds = list(easy_set & neg_set) + hard_neg_inds = list(hard_set & neg_set) + + num_expected_hard = int(num_expected * hard_fraction) + if len(hard_neg_inds) > num_expected_hard: + sampled_hard_inds = random_choice(hard_neg_inds, num_expected_hard) + else: + sampled_hard_inds = np.array(hard_neg_inds, dtype=np.int) + num_expected_easy = num_expected - len(sampled_hard_inds) + if len(easy_neg_inds) > num_expected_easy: + sampled_easy_inds = random_choice(easy_neg_inds, num_expected_easy) + else: + sampled_easy_inds = np.array(easy_neg_inds, dtype=np.int) + sampled_inds = np.concatenate((sampled_easy_inds, sampled_hard_inds)) + if len(sampled_inds) < num_expected: + num_extra = num_expected - len(sampled_inds) + extra_inds = np.array(list(neg_set - set(sampled_inds))) + if len(extra_inds) > num_extra: + extra_inds = random_choice(extra_inds, num_extra) + sampled_inds = np.concatenate((sampled_inds, extra_inds)) + sampled_inds = torch.from_numpy(sampled_inds).long().to( + assigned_gt_inds.device) + return sampled_inds + + +def bbox_sampling(assigned_gt_inds, + num_expected, + pos_fraction, + neg_pos_ub, + pos_balance_sampling=True, + max_overlaps=None, + neg_balance_thr=0, + neg_hard_fraction=0.5): + num_expected_pos = int(num_expected * pos_fraction) + pos_inds = sample_positives(assigned_gt_inds, num_expected_pos, + pos_balance_sampling) + num_sampled_pos = pos_inds.numel() + num_neg_max = int( + neg_pos_ub * + num_sampled_pos) if num_sampled_pos > 0 else int(neg_pos_ub) + num_expected_neg = min(num_neg_max, num_expected - num_sampled_pos) + neg_inds = sample_negatives(assigned_gt_inds, num_expected_neg, + max_overlaps, neg_balance_thr, + neg_hard_fraction) + return pos_inds, neg_inds diff --git a/mmdet/core/bbox_ops/transforms.py b/mmdet/core/bbox_ops/transforms.py new file mode 100644 index 0000000000000000000000000000000000000000..6f83a1dc56efdc214fe96c60b9a587a1cb81602b --- /dev/null +++ b/mmdet/core/bbox_ops/transforms.py @@ -0,0 +1,128 @@ +import mmcv +import numpy as np +import torch + + +def bbox_transform(proposals, gt, means=[0, 0, 0, 0], stds=[1, 1, 1, 1]): + assert proposals.size() == gt.size() + + proposals = proposals.float() + gt = gt.float() + px = (proposals[..., 0] + proposals[..., 2]) * 0.5 + py = (proposals[..., 1] + proposals[..., 3]) * 0.5 + pw = proposals[..., 2] - proposals[..., 0] + 1.0 + ph = proposals[..., 3] - proposals[..., 1] + 1.0 + + gx = (gt[..., 0] + gt[..., 2]) * 0.5 + gy = (gt[..., 1] + gt[..., 3]) * 0.5 + gw = gt[..., 2] - gt[..., 0] + 1.0 + gh = gt[..., 3] - gt[..., 1] + 1.0 + + dx = (gx - px) / pw + dy = (gy - py) / ph + dw = torch.log(gw / pw) + dh = torch.log(gh / ph) + deltas = torch.stack([dx, dy, dw, dh], dim=-1) + + means = deltas.new_tensor(means).unsqueeze(0) + stds = deltas.new_tensor(stds).unsqueeze(0) + deltas = deltas.sub_(means).div_(stds) + + return deltas + + +def bbox_transform_inv(rois, + deltas, + means=[0, 0, 0, 0], + stds=[1, 1, 1, 1], + max_shape=None, + wh_ratio_clip=16 / 1000): + means = deltas.new_tensor(means).repeat(1, deltas.size(1) // 4) + stds = deltas.new_tensor(stds).repeat(1, deltas.size(1) // 4) + denorm_deltas = deltas * stds + means + dx = denorm_deltas[:, 0::4] + dy = denorm_deltas[:, 1::4] + dw = denorm_deltas[:, 2::4] + dh = denorm_deltas[:, 3::4] + max_ratio = np.abs(np.log(wh_ratio_clip)) + dw = dw.clamp(min=-max_ratio, max=max_ratio) + dh = dh.clamp(min=-max_ratio, max=max_ratio) + px = ((rois[:, 0] + rois[:, 2]) * 0.5).unsqueeze(1).expand_as(dx) + py = ((rois[:, 1] + rois[:, 3]) * 0.5).unsqueeze(1).expand_as(dy) + pw = (rois[:, 2] - rois[:, 0] + 1.0).unsqueeze(1).expand_as(dw) + ph = (rois[:, 3] - rois[:, 1] + 1.0).unsqueeze(1).expand_as(dh) + gw = pw * dw.exp() + gh = ph * dh.exp() + gx = torch.addcmul(px, 1, pw, dx) # gx = px + pw * dx + gy = torch.addcmul(py, 1, ph, dy) # gy = py + ph * dy + x1 = gx - gw * 0.5 + 0.5 + y1 = gy - gh * 0.5 + 0.5 + x2 = gx + gw * 0.5 - 0.5 + y2 = gy + gh * 0.5 - 0.5 + if max_shape is not None: + x1 = x1.clamp(min=0, max=max_shape[1] - 1) + y1 = y1.clamp(min=0, max=max_shape[0] - 1) + x2 = x2.clamp(min=0, max=max_shape[1] - 1) + y2 = y2.clamp(min=0, max=max_shape[0] - 1) + bboxes = torch.stack([x1, y1, x2, y2], dim=-1).view_as(deltas) + return bboxes + + +def bbox_flip(bboxes, img_shape): + """Flip bboxes horizontally + Args: + bboxes(Tensor): shape (..., 4*k) + img_shape(Tensor): image shape + """ + if isinstance(bboxes, torch.Tensor): + assert bboxes.shape[-1] % 4 == 0 + flipped = bboxes.clone() + flipped[:, 0::4] = img_shape[1] - bboxes[:, 2::4] - 1 + flipped[:, 2::4] = img_shape[1] - bboxes[:, 0::4] - 1 + return flipped + elif isinstance(bboxes, np.ndarray): + return mmcv.bbox_flip(bboxes, img_shape) + + +def bbox_mapping(bboxes, img_shape, flip): + """Map bboxes from the original image scale to testing scale""" + new_bboxes = bboxes * img_shape[-1] + if flip: + new_bboxes = bbox_flip(new_bboxes, img_shape) + return new_bboxes + + +def bbox_mapping_back(bboxes, img_shape, flip): + """Map bboxes from testing scale to original image scale""" + new_bboxes = bbox_flip(bboxes, img_shape) if flip else bboxes + new_bboxes = new_bboxes / img_shape[-1] + return new_bboxes + + +def bbox2roi(bbox_list): + """Convert a list of bboxes to roi format. + Args: + bbox_list (Tensor): a list of bboxes corresponding to a list of images + Returns: + Tensor: shape (n, 5), [batch_ind, x1, y1, x2, y2] + """ + rois_list = [] + for img_id, bboxes in enumerate(bbox_list): + if bboxes.size(0) > 0: + img_inds = bboxes.new_full((bboxes.size(0), 1), img_id) + rois = torch.cat([img_inds, bboxes[:, :4]], dim=-1) + else: + rois = bboxes.new_zeros((0, 5)) + rois_list.append(rois) + rois = torch.cat(rois_list, 0) + return rois + + +def roi2bbox(rois): + bbox_list = [] + img_ids = torch.unique(rois[:, 0].cpu(), sorted=True) + for img_id in img_ids: + inds = (rois[:, 0] == img_id.item()) + bbox = rois[inds, 1:] + bbox_list.append(bbox) + return bbox_list diff --git a/mmdet/core/eval/__init__.py b/mmdet/core/eval/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..fe4893a0af68ffff2633fcd702f7cf73cce93e76 --- /dev/null +++ b/mmdet/core/eval/__init__.py @@ -0,0 +1,13 @@ +from .class_names import (voc_classes, imagenet_det_classes, + imagenet_vid_classes, coco_classes, dataset_aliases, + get_classes) +from .mean_ap import average_precision, eval_map, print_map_summary +from .recall import (eval_recalls, print_recall_summary, plot_num_recall, + plot_iou_recall) + +__all__ = [ + 'voc_classes', 'imagenet_det_classes', 'imagenet_vid_classes', + 'coco_classes', 'dataset_aliases', 'get_classes', 'average_precision', + 'eval_map', 'print_map_summary', 'eval_recalls', 'print_recall_summary', + 'plot_num_recall', 'plot_iou_recall' +] diff --git a/mmdet/core/eval/bbox_overlaps.py b/mmdet/core/eval/bbox_overlaps.py new file mode 100644 index 0000000000000000000000000000000000000000..ad4c70523fdaa5d89a2b80ada559e1822d0ecd22 --- /dev/null +++ b/mmdet/core/eval/bbox_overlaps.py @@ -0,0 +1,49 @@ +import numpy as np + + +def bbox_overlaps(bboxes1, bboxes2, mode='iou'): + """Calculate the ious between each bbox of bboxes1 and bboxes2. + + Args: + bboxes1(ndarray): shape (n, 4) + bboxes2(ndarray): shape (k, 4) + mode(str): iou (intersection over union) or iof (intersection + over foreground) + + Returns: + ious(ndarray): shape (n, k) + """ + + assert mode in ['iou', 'iof'] + + bboxes1 = bboxes1.astype(np.float32) + bboxes2 = bboxes2.astype(np.float32) + rows = bboxes1.shape[0] + cols = bboxes2.shape[0] + ious = np.zeros((rows, cols), dtype=np.float32) + if rows * cols == 0: + return ious + exchange = False + if bboxes1.shape[0] > bboxes2.shape[0]: + bboxes1, bboxes2 = bboxes2, bboxes1 + ious = np.zeros((cols, rows), dtype=np.float32) + exchange = True + area1 = (bboxes1[:, 2] - bboxes1[:, 0] + 1) * ( + bboxes1[:, 3] - bboxes1[:, 1] + 1) + area2 = (bboxes2[:, 2] - bboxes2[:, 0] + 1) * ( + bboxes2[:, 3] - bboxes2[:, 1] + 1) + for i in range(bboxes1.shape[0]): + x_start = np.maximum(bboxes1[i, 0], bboxes2[:, 0]) + y_start = np.maximum(bboxes1[i, 1], bboxes2[:, 1]) + x_end = np.minimum(bboxes1[i, 2], bboxes2[:, 2]) + y_end = np.minimum(bboxes1[i, 3], bboxes2[:, 3]) + overlap = np.maximum(x_end - x_start + 1, 0) * np.maximum( + y_end - y_start + 1, 0) + if mode == 'iou': + union = area1[i] + area2 - overlap + else: + union = area1[i] if not exchange else area2 + ious[i, :] = overlap / union + if exchange: + ious = ious.T + return ious diff --git a/mmdet/core/eval/class_names.py b/mmdet/core/eval/class_names.py new file mode 100644 index 0000000000000000000000000000000000000000..b68e9135dca366e93217e0c06959bea990ffda5e --- /dev/null +++ b/mmdet/core/eval/class_names.py @@ -0,0 +1,103 @@ +import mmcv + + +def voc_classes(): + return [ + 'aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat', + 'chair', 'cow', 'diningtable', 'dog', 'horse', 'motorbike', 'person', + 'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor' + ] + + +def imagenet_det_classes(): + return [ + 'accordion', 'airplane', 'ant', 'antelope', 'apple', 'armadillo', + 'artichoke', 'axe', 'baby_bed', 'backpack', 'bagel', 'balance_beam', + 'banana', 'band_aid', 'banjo', 'baseball', 'basketball', 'bathing_cap', + 'beaker', 'bear', 'bee', 'bell_pepper', 'bench', 'bicycle', 'binder', + 'bird', 'bookshelf', 'bow_tie', 'bow', 'bowl', 'brassiere', 'burrito', + 'bus', 'butterfly', 'camel', 'can_opener', 'car', 'cart', 'cattle', + 'cello', 'centipede', 'chain_saw', 'chair', 'chime', 'cocktail_shaker', + 'coffee_maker', 'computer_keyboard', 'computer_mouse', 'corkscrew', + 'cream', 'croquet_ball', 'crutch', 'cucumber', 'cup_or_mug', 'diaper', + 'digital_clock', 'dishwasher', 'dog', 'domestic_cat', 'dragonfly', + 'drum', 'dumbbell', 'electric_fan', 'elephant', 'face_powder', 'fig', + 'filing_cabinet', 'flower_pot', 'flute', 'fox', 'french_horn', 'frog', + 'frying_pan', 'giant_panda', 'goldfish', 'golf_ball', 'golfcart', + 'guacamole', 'guitar', 'hair_dryer', 'hair_spray', 'hamburger', + 'hammer', 'hamster', 'harmonica', 'harp', 'hat_with_a_wide_brim', + 'head_cabbage', 'helmet', 'hippopotamus', 'horizontal_bar', 'horse', + 'hotdog', 'iPod', 'isopod', 'jellyfish', 'koala_bear', 'ladle', + 'ladybug', 'lamp', 'laptop', 'lemon', 'lion', 'lipstick', 'lizard', + 'lobster', 'maillot', 'maraca', 'microphone', 'microwave', 'milk_can', + 'miniskirt', 'monkey', 'motorcycle', 'mushroom', 'nail', 'neck_brace', + 'oboe', 'orange', 'otter', 'pencil_box', 'pencil_sharpener', 'perfume', + 'person', 'piano', 'pineapple', 'ping-pong_ball', 'pitcher', 'pizza', + 'plastic_bag', 'plate_rack', 'pomegranate', 'popsicle', 'porcupine', + 'power_drill', 'pretzel', 'printer', 'puck', 'punching_bag', 'purse', + 'rabbit', 'racket', 'ray', 'red_panda', 'refrigerator', + 'remote_control', 'rubber_eraser', 'rugby_ball', 'ruler', + 'salt_or_pepper_shaker', 'saxophone', 'scorpion', 'screwdriver', + 'seal', 'sheep', 'ski', 'skunk', 'snail', 'snake', 'snowmobile', + 'snowplow', 'soap_dispenser', 'soccer_ball', 'sofa', 'spatula', + 'squirrel', 'starfish', 'stethoscope', 'stove', 'strainer', + 'strawberry', 'stretcher', 'sunglasses', 'swimming_trunks', 'swine', + 'syringe', 'table', 'tape_player', 'tennis_ball', 'tick', 'tie', + 'tiger', 'toaster', 'traffic_light', 'train', 'trombone', 'trumpet', + 'turtle', 'tv_or_monitor', 'unicycle', 'vacuum', 'violin', + 'volleyball', 'waffle_iron', 'washer', 'water_bottle', 'watercraft', + 'whale', 'wine_bottle', 'zebra' + ] + + +def imagenet_vid_classes(): + return [ + 'airplane', 'antelope', 'bear', 'bicycle', 'bird', 'bus', 'car', + 'cattle', 'dog', 'domestic_cat', 'elephant', 'fox', 'giant_panda', + 'hamster', 'horse', 'lion', 'lizard', 'monkey', 'motorcycle', 'rabbit', + 'red_panda', 'sheep', 'snake', 'squirrel', 'tiger', 'train', 'turtle', + 'watercraft', 'whale', 'zebra' + ] + + +def coco_classes(): + return [ + 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', + 'truck', 'boat', 'traffic light', 'fire hydrant', 'stop sign', + 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', + 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', + 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', + 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', + 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', + 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', + 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', + 'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv', + 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', + 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', + 'scissors', 'teddy bear', 'hair drier', 'toothbrush' + ] + + +dataset_aliases = { + 'voc': ['voc', 'pascal_voc', 'voc07', 'voc12'], + 'imagenet_det': ['det', 'imagenet_det', 'ilsvrc_det'], + 'imagenet_vid': ['vid', 'imagenet_vid', 'ilsvrc_vid'], + 'coco': ['coco', 'mscoco', 'ms_coco'] +} + + +def get_classes(dataset): + """Get class names of a dataset.""" + alias2name = {} + for name, aliases in dataset_aliases.items(): + for alias in aliases: + alias2name[alias] = name + + if mmcv.is_str(dataset): + if dataset in alias2name: + labels = eval(alias2name[dataset] + '_labels()') + else: + raise ValueError('Unrecognized dataset: {}'.format(dataset)) + else: + raise TypeError('dataset must a str, but got {}'.format(type(dataset))) + return labels diff --git a/mmdet/core/eval/mean_ap.py b/mmdet/core/eval/mean_ap.py new file mode 100644 index 0000000000000000000000000000000000000000..9a33f7640409993db3e11cedd587f1cd14c38aa5 --- /dev/null +++ b/mmdet/core/eval/mean_ap.py @@ -0,0 +1,372 @@ +import numpy as np +from terminaltables import AsciiTable + +from .bbox_overlaps import bbox_overlaps +from .class_names import get_classes + + +def average_precision(recalls, precisions, mode='area'): + """Calculate average precision (for single or multiple scales). + + Args: + recalls(ndarray): shape (num_scales, num_dets) or (num_dets, ) + precisions(ndarray): shape (num_scales, num_dets) or (num_dets, ) + mode(str): 'area' or '11points', 'area' means calculating the area + under precision-recall curve, '11points' means calculating + the average precision of recalls at [0, 0.1, ..., 1] + + Returns: + float or ndarray: calculated average precision + """ + no_scale = False + if recalls.ndim == 1: + no_scale = True + recalls = recalls[np.newaxis, :] + precisions = precisions[np.newaxis, :] + assert recalls.shape == precisions.shape and recalls.ndim == 2 + num_scales = recalls.shape[0] + ap = np.zeros(num_scales, dtype=np.float32) + if mode == 'area': + zeros = np.zeros((num_scales, 1), dtype=recalls.dtype) + ones = np.ones((num_scales, 1), dtype=recalls.dtype) + mrec = np.hstack((zeros, recalls, ones)) + mpre = np.hstack((zeros, precisions, zeros)) + for i in range(mpre.shape[1] - 1, 0, -1): + mpre[:, i - 1] = np.maximum(mpre[:, i - 1], mpre[:, i]) + for i in range(num_scales): + ind = np.where(mrec[i, 1:] != mrec[i, :-1])[0] + ap[i] = np.sum( + (mrec[i, ind + 1] - mrec[i, ind]) * mpre[i, ind + 1]) + elif mode == '11points': + for i in range(num_scales): + for thr in np.arange(0, 1 + 1e-3, 0.1): + precs = precisions[i, recalls[i, :] >= thr] + prec = precs.max() if precs.size > 0 else 0 + ap[i] += prec + ap /= 11 + else: + raise ValueError( + 'Unrecognized mode, only "area" and "11points" are supported') + if no_scale: + ap = ap[0] + return ap + + +def tpfp_imagenet(det_bboxes, + gt_bboxes, + gt_ignore, + default_iou_thr, + area_ranges=None): + """Check if detected bboxes are true positive or false positive. + + Args: + det_bbox(ndarray): the detected bbox + gt_bboxes(ndarray): ground truth bboxes of this image + gt_ignore(ndarray): indicate if gts are ignored for evaluation or not + default_iou_thr(float): the iou thresholds for medium and large bboxes + area_ranges(list or None): gt bbox area ranges + + Returns: + tuple: two arrays (tp, fp) whose elements are 0 and 1 + """ + num_dets = det_bboxes.shape[0] + num_gts = gt_bboxes.shape[0] + if area_ranges is None: + area_ranges = [(None, None)] + num_scales = len(area_ranges) + # tp and fp are of shape (num_scales, num_gts), each row is tp or fp + # of a certain scale. + tp = np.zeros((num_scales, num_dets), dtype=np.float32) + fp = np.zeros((num_scales, num_dets), dtype=np.float32) + if gt_bboxes.shape[0] == 0: + if area_ranges == [(None, None)]: + fp[...] = 1 + else: + det_areas = (det_bboxes[:, 2] - det_bboxes[:, 0] + 1) * ( + det_bboxes[:, 3] - det_bboxes[:, 1] + 1) + for i, (min_area, max_area) in enumerate(area_ranges): + fp[i, (det_areas >= min_area) & (det_areas < max_area)] = 1 + return tp, fp + ious = bbox_overlaps(det_bboxes, gt_bboxes - 1) + gt_w = gt_bboxes[:, 2] - gt_bboxes[:, 0] + 1 + gt_h = gt_bboxes[:, 3] - gt_bboxes[:, 1] + 1 + iou_thrs = np.minimum((gt_w * gt_h) / ((gt_w + 10.0) * (gt_h + 10.0)), + default_iou_thr) + # sort all detections by scores in descending order + sort_inds = np.argsort(-det_bboxes[:, -1]) + for k, (min_area, max_area) in enumerate(area_ranges): + gt_covered = np.zeros(num_gts, dtype=bool) + # if no area range is specified, gt_area_ignore is all False + if min_area is None: + gt_area_ignore = np.zeros_like(gt_ignore, dtype=bool) + else: + gt_areas = gt_w * gt_h + gt_area_ignore = (gt_areas < min_area) | (gt_areas >= max_area) + for i in sort_inds: + max_iou = -1 + matched_gt = -1 + # find best overlapped available gt + for j in range(num_gts): + # different from PASCAL VOC: allow finding other gts if the + # best overlaped ones are already matched by other det bboxes + if gt_covered[j]: + continue + elif ious[i, j] >= iou_thrs[j] and ious[i, j] > max_iou: + max_iou = ious[i, j] + matched_gt = j + # there are 4 cases for a det bbox: + # 1. this det bbox matches a gt, tp = 1, fp = 0 + # 2. this det bbox matches an ignored gt, tp = 0, fp = 0 + # 3. this det bbox matches no gt and within area range, tp = 0, fp = 1 + # 4. this det bbox matches no gt but is beyond area range, tp = 0, fp = 0 + if matched_gt >= 0: + gt_covered[matched_gt] = 1 + if not (gt_ignore[matched_gt] or gt_area_ignore[matched_gt]): + tp[k, i] = 1 + elif min_area is None: + fp[k, i] = 1 + else: + bbox = det_bboxes[i, :4] + area = (bbox[2] - bbox[0] + 1) * (bbox[3] - bbox[1] + 1) + if area >= min_area and area < max_area: + fp[k, i] = 1 + return tp, fp + + +def tpfp_default(det_bboxes, gt_bboxes, gt_ignore, iou_thr, area_ranges=None): + """Check if detected bboxes are true positive or false positive. + + Args: + det_bbox(ndarray): the detected bbox + gt_bboxes(ndarray): ground truth bboxes of this image + gt_ignore(ndarray): indicate if gts are ignored for evaluation or not + iou_thr(float): the iou thresholds + + Returns: + tuple: (tp, fp), two arrays whose elements are 0 and 1 + """ + num_dets = det_bboxes.shape[0] + num_gts = gt_bboxes.shape[0] + if area_ranges is None: + area_ranges = [(None, None)] + num_scales = len(area_ranges) + # tp and fp are of shape (num_scales, num_gts), each row is tp or fp of + # a certain scale + tp = np.zeros((num_scales, num_dets), dtype=np.float32) + fp = np.zeros((num_scales, num_dets), dtype=np.float32) + # if there is no gt bboxes in this image, then all det bboxes + # within area range are false positives + if gt_bboxes.shape[0] == 0: + if area_ranges == [(None, None)]: + fp[...] = 1 + else: + det_areas = (det_bboxes[:, 2] - det_bboxes[:, 0] + 1) * ( + det_bboxes[:, 3] - det_bboxes[:, 1] + 1) + for i, (min_area, max_area) in enumerate(area_ranges): + fp[i, (det_areas >= min_area) & (det_areas < max_area)] = 1 + return tp, fp + ious = bbox_overlaps(det_bboxes, gt_bboxes) + ious_max = ious.max(axis=1) + ious_argmax = ious.argmax(axis=1) + sort_inds = np.argsort(-det_bboxes[:, -1]) + for k, (min_area, max_area) in enumerate(area_ranges): + gt_covered = np.zeros(num_gts, dtype=bool) + # if no area range is specified, gt_area_ignore is all False + if min_area is None: + gt_area_ignore = np.zeros_like(gt_ignore, dtype=bool) + else: + gt_areas = (gt_bboxes[:, 2] - gt_bboxes[:, 0] + 1) * ( + gt_bboxes[:, 3] - gt_bboxes[:, 1] + 1) + gt_area_ignore = (gt_areas < min_area) | (gt_areas >= max_area) + for i in sort_inds: + if ious_max[i] >= iou_thr: + matched_gt = ious_argmax[i] + if not (gt_ignore[matched_gt] or gt_area_ignore[matched_gt]): + if not gt_covered[matched_gt]: + gt_covered[matched_gt] = True + tp[k, i] = 1 + else: + fp[k, i] = 1 + # otherwise ignore this detected bbox, tp = 0, fp = 0 + elif min_area is None: + fp[k, i] = 1 + else: + bbox = det_bboxes[i, :4] + area = (bbox[2] - bbox[0] + 1) * (bbox[3] - bbox[1] + 1) + if area >= min_area and area < max_area: + fp[k, i] = 1 + return tp, fp + + +def get_cls_results(det_results, gt_bboxes, gt_labels, gt_ignore, class_id): + """Get det results and gt information of a certain class.""" + cls_dets = [det[class_id] + for det in det_results] # det bboxes of this class + cls_gts = [] # gt bboxes of this class + cls_gt_ignore = [] + for j in range(len(gt_bboxes)): + gt_bbox = gt_bboxes[j] + cls_inds = (gt_labels[j] == class_id + 1) + cls_gt = gt_bbox[cls_inds, :] if gt_bbox.shape[0] > 0 else gt_bbox + cls_gts.append(cls_gt) + if gt_ignore is None: + cls_gt_ignore.append(np.zeros(cls_gt.shape[0], dtype=np.int32)) + else: + cls_gt_ignore.append(gt_ignore[j][cls_inds]) + return cls_dets, cls_gts, cls_gt_ignore + + +def eval_map(det_results, + gt_bboxes, + gt_labels, + gt_ignore=None, + scale_ranges=None, + iou_thr=0.5, + dataset=None, + print_summary=True): + """Evaluate mAP of a dataset. + + Args: + det_results(list): a list of list, [[cls1_det, cls2_det, ...], ...] + gt_bboxes(list): ground truth bboxes of each image, a list of K*4 array + gt_labels(list): ground truth labels of each image, a list of K array + gt_ignore(list): gt ignore indicators of each image, a list of K array + scale_ranges(list, optional): [(min1, max1), (min2, max2), ...] + iou_thr(float): IoU threshold + dataset(None or str): dataset name, there are minor differences in + metrics for different datsets, e.g. "voc07", "imagenet_det", etc. + print_summary(bool): whether to print the mAP summary + + Returns: + tuple: (mAP, [dict, dict, ...]) + """ + assert len(det_results) == len(gt_bboxes) == len(gt_labels) + if gt_ignore is not None: + assert len(gt_ignore) == len(gt_labels) + for i in range(len(gt_ignore)): + assert len(gt_labels[i]) == len(gt_ignore[i]) + area_ranges = ([(rg[0]**2, rg[1]**2) for rg in scale_ranges] + if scale_ranges is not None else None) + num_scales = len(scale_ranges) if scale_ranges is not None else 1 + eval_results = [] + num_classes = len(det_results[0]) # positive class num + gt_labels = [ + label if label.ndim == 1 else label[:, 0] for label in gt_labels + ] + for i in range(num_classes): + # get gt and det bboxes of this class + cls_dets, cls_gts, cls_gt_ignore = get_cls_results( + det_results, gt_bboxes, gt_labels, gt_ignore, i) + # calculate tp and fp for each image + tpfp_func = (tpfp_imagenet + if dataset in ['det', 'vid'] else tpfp_default) + tpfp = [ + tpfp_func(cls_dets[j], cls_gts[j], cls_gt_ignore[j], iou_thr, + area_ranges) for j in range(len(cls_dets)) + ] + tp, fp = tuple(zip(*tpfp)) + # calculate gt number of each scale, gts ignored or beyond scale are not counted + num_gts = np.zeros(num_scales, dtype=int) + for j, bbox in enumerate(cls_gts): + if area_ranges is None: + num_gts[0] += np.sum(np.logical_not(cls_gt_ignore[j])) + else: + gt_areas = (bbox[:, 2] - bbox[:, 0] + 1) * ( + bbox[:, 3] - bbox[:, 1] + 1) + for k, (min_area, max_area) in enumerate(area_ranges): + num_gts[k] += np.sum( + np.logical_not(cls_gt_ignore[j]) & + (gt_areas >= min_area) & (gt_areas < max_area)) + # sort all det bboxes by score, also sort tp and fp + cls_dets = np.vstack(cls_dets) + num_dets = cls_dets.shape[0] + sort_inds = np.argsort(-cls_dets[:, -1]) + tp = np.hstack(tp)[:, sort_inds] + fp = np.hstack(fp)[:, sort_inds] + # calculate recall and precision with tp and fp + tp = np.cumsum(tp, axis=1) + fp = np.cumsum(fp, axis=1) + eps = np.finfo(np.float32).eps + recalls = tp / np.maximum(num_gts[:, np.newaxis], eps) + precisions = tp / np.maximum((tp + fp), eps) + # calculate AP + if scale_ranges is None: + recalls = recalls[0, :] + precisions = precisions[0, :] + num_gts = num_gts.item() + mode = 'area' if dataset != 'voc07' else '11points' + ap = average_precision(recalls, precisions, mode) + eval_results.append({ + 'num_gts': num_gts, + 'num_dets': num_dets, + 'recall': recalls, + 'precision': precisions, + 'ap': ap + }) + if scale_ranges is not None: + # shape (num_classes, num_scales) + all_ap = np.vstack([cls_result['ap'] for cls_result in eval_results]) + all_num_gts = np.vstack( + [cls_result['num_gts'] for cls_result in eval_results]) + mean_ap = [ + all_ap[all_num_gts[:, i] > 0, i].mean() + if np.any(all_num_gts[:, i] > 0) else 0.0 + for i in range(num_scales) + ] + else: + aps = [] + for cls_result in eval_results: + if cls_result['num_gts'] > 0: + aps.append(cls_result['ap']) + mean_ap = np.array(aps).mean().item() if aps else 0.0 + if print_summary: + print_map_summary(mean_ap, eval_results, dataset) + + return mean_ap, eval_results + + +def print_map_summary(mean_ap, results, dataset=None): + """Print mAP and results of each class. + + Args: + mean_ap(float): calculated from `eval_map` + results(list): calculated from `eval_map` + dataset(None or str or list): dataset name. + """ + num_scales = len(results[0]['ap']) if isinstance(results[0]['ap'], + np.ndarray) else 1 + num_classes = len(results) + + recalls = np.zeros((num_scales, num_classes), dtype=np.float32) + precisions = np.zeros((num_scales, num_classes), dtype=np.float32) + aps = np.zeros((num_scales, num_classes), dtype=np.float32) + num_gts = np.zeros((num_scales, num_classes), dtype=int) + for i, cls_result in enumerate(results): + if cls_result['recall'].size > 0: + recalls[:, i] = np.array(cls_result['recall'], ndmin=2)[:, -1] + precisions[:, i] = np.array( + cls_result['precision'], ndmin=2)[:, -1] + aps[:, i] = cls_result['ap'] + num_gts[:, i] = cls_result['num_gts'] + + if dataset is None: + label_names = [str(i) for i in range(1, num_classes + 1)] + else: + label_names = get_classes(dataset) + + if not isinstance(mean_ap, list): + mean_ap = [mean_ap] + header = ['class', 'gts', 'dets', 'recall', 'precision', 'ap'] + for i in range(num_scales): + table_data = [header] + for j in range(num_classes): + row_data = [ + label_names[j], num_gts[i, j], results[j]['num_dets'], + '{:.3f}'.format(recalls[i, j]), '{:.3f}'.format( + precisions[i, j]), '{:.3f}'.format(aps[i, j]) + ] + table_data.append(row_data) + table_data.append(['mAP', '', '', '', '', '{:.3f}'.format(mean_ap[i])]) + table = AsciiTable(table_data) + table.inner_footing_row_border = True + print(table.table) diff --git a/mmdet/core/eval/recall.py b/mmdet/core/eval/recall.py new file mode 100644 index 0000000000000000000000000000000000000000..2a56f42fdef33341d4b9ec7a654832282b44a7c2 --- /dev/null +++ b/mmdet/core/eval/recall.py @@ -0,0 +1,185 @@ +import numpy as np +from terminaltables import AsciiTable + +from .bbox_overlaps import bbox_overlaps + + +def _recalls(all_ious, proposal_nums, thrs): + + img_num = all_ious.shape[0] + total_gt_num = sum([ious.shape[0] for ious in all_ious]) + + _ious = np.zeros((proposal_nums.size, total_gt_num), dtype=np.float32) + for k, proposal_num in enumerate(proposal_nums): + tmp_ious = np.zeros(0) + for i in range(img_num): + ious = all_ious[i][:, :proposal_num].copy() + gt_ious = np.zeros((ious.shape[0])) + if ious.size == 0: + tmp_ious = np.hstack((tmp_ious, gt_ious)) + continue + for j in range(ious.shape[0]): + gt_max_overlaps = ious.argmax(axis=1) + max_ious = ious[np.arange(0, ious.shape[0]), gt_max_overlaps] + gt_idx = max_ious.argmax() + gt_ious[j] = max_ious[gt_idx] + box_idx = gt_max_overlaps[gt_idx] + ious[gt_idx, :] = -1 + ious[:, box_idx] = -1 + tmp_ious = np.hstack((tmp_ious, gt_ious)) + _ious[k, :] = tmp_ious + + _ious = np.fliplr(np.sort(_ious, axis=1)) + recalls = np.zeros((proposal_nums.size, thrs.size)) + for i, thr in enumerate(thrs): + recalls[:, i] = (_ious >= thr).sum(axis=1) / float(total_gt_num) + + return recalls + + +def set_recall_param(proposal_nums, iou_thrs): + """Check proposal_nums and iou_thrs and set correct format. + """ + if isinstance(proposal_nums, list): + _proposal_nums = np.array(proposal_nums) + elif isinstance(proposal_nums, int): + _proposal_nums = np.array([proposal_nums]) + else: + _proposal_nums = proposal_nums + + if iou_thrs is None: + _iou_thrs = np.array([0.5]) + elif isinstance(iou_thrs, list): + _iou_thrs = np.array(iou_thrs) + elif isinstance(iou_thrs, float): + _iou_thrs = np.array([iou_thrs]) + else: + _iou_thrs = iou_thrs + + return _proposal_nums, _iou_thrs + + +def eval_recalls(gts, + proposals, + proposal_nums=None, + iou_thrs=None, + print_summary=True): + """Calculate recalls. + + Args: + gts(list or ndarray): a list of arrays of shape (n, 4) + proposals(list or ndarray): a list of arrays of shape (k, 4) or (k, 5) + proposal_nums(int or list of int or ndarray): top N proposals + thrs(float or list or ndarray): iou thresholds + + Returns: + ndarray: recalls of different ious and proposal nums + """ + + img_num = len(gts) + assert img_num == len(proposals) + + proposal_nums, iou_thrs = set_recall_param(proposal_nums, iou_thrs) + + all_ious = [] + for i in range(img_num): + if proposals[i].ndim == 2 and proposals[i].shape[1] == 5: + scores = proposals[i][:, 4] + sort_idx = np.argsort(scores)[::-1] + img_proposal = proposals[i][sort_idx, :] + else: + img_proposal = proposals[i] + prop_num = min(img_proposal.shape[0], proposal_nums[-1]) + if gts[i] is None or gts[i].shape[0] == 0: + ious = np.zeros((0, img_proposal.shape[0]), dtype=np.float32) + else: + ious = bbox_overlaps(gts[i], img_proposal[:prop_num, :4]) + all_ious.append(ious) + all_ious = np.array(all_ious) + recalls = _recalls(all_ious, proposal_nums, iou_thrs) + if print_summary: + print_recall_summary(recalls, proposal_nums, iou_thrs) + return recalls + + +def print_recall_summary(recalls, + proposal_nums, + iou_thrs, + row_idxs=None, + col_idxs=None): + """Print recalls in a table. + + Args: + recalls(ndarray): calculated from `bbox_recalls` + proposal_nums(ndarray or list): top N proposals + iou_thrs(ndarray or list): iou thresholds + row_idxs(ndarray): which rows(proposal nums) to print + col_idxs(ndarray): which cols(iou thresholds) to print + """ + proposal_nums = np.array(proposal_nums, dtype=np.int32) + iou_thrs = np.array(iou_thrs) + if row_idxs is None: + row_idxs = np.arange(proposal_nums.size) + if col_idxs is None: + col_idxs = np.arange(iou_thrs.size) + row_header = [''] + iou_thrs[col_idxs].tolist() + table_data = [row_header] + for i, num in enumerate(proposal_nums[row_idxs]): + row = [ + '{:.3f}'.format(val) + for val in recalls[row_idxs[i], col_idxs].tolist() + ] + row.insert(0, num) + table_data.append(row) + table = AsciiTable(table_data) + print(table.table) + + +def plot_num_recall(recalls, proposal_nums): + """Plot Proposal_num-Recalls curve. + + Args: + recalls(ndarray or list): shape (k,) + proposal_nums(ndarray or list): same shape as `recalls` + """ + if isinstance(proposal_nums, np.ndarray): + _proposal_nums = proposal_nums.tolist() + else: + _proposal_nums = proposal_nums + if isinstance(recalls, np.ndarray): + _recalls = recalls.tolist() + else: + _recalls = recalls + + import matplotlib.pyplot as plt + f = plt.figure() + plt.plot([0] + _proposal_nums, [0] + _recalls) + plt.xlabel('Proposal num') + plt.ylabel('Recall') + plt.axis([0, proposal_nums.max(), 0, 1]) + f.show() + + +def plot_iou_recall(recalls, iou_thrs): + """Plot IoU-Recalls curve. + + Args: + recalls(ndarray or list): shape (k,) + iou_thrs(ndarray or list): same shape as `recalls` + """ + if isinstance(iou_thrs, np.ndarray): + _iou_thrs = iou_thrs.tolist() + else: + _iou_thrs = iou_thrs + if isinstance(recalls, np.ndarray): + _recalls = recalls.tolist() + else: + _recalls = recalls + + import matplotlib.pyplot as plt + f = plt.figure() + plt.plot(_iou_thrs + [1.0], _recalls + [0.]) + plt.xlabel('IoU') + plt.ylabel('Recall') + plt.axis([iou_thrs.min(), 1, 0, 1]) + f.show() diff --git a/mmdet/core/hooks.py b/mmdet/core/hooks.py new file mode 100644 index 0000000000000000000000000000000000000000..3347639d51ac19d5072bcb0a2e76c7747d686c77 --- /dev/null +++ b/mmdet/core/hooks.py @@ -0,0 +1,246 @@ +import os +import os.path as osp +import shutil +import time + +import mmcv +import numpy as np +import torch +from mmcv.torchpack import Hook +from mmdet import collate, scatter +from pycocotools.cocoeval import COCOeval + +from .eval import eval_recalls + + +class EmptyCacheHook(Hook): + + def before_epoch(self, runner): + torch.cuda.empty_cache() + + def after_epoch(self, runner): + torch.cuda.empty_cache() + + +class DistEvalHook(Hook): + + def __init__(self, dataset, interval=1): + self.dataset = dataset + self.interval = interval + self.lock_dir = None + + def _barrier(self, rank, world_size): + """Due to some issues with `torch.distributed.barrier()`, we have to + implement this ugly barrier function. + """ + if rank == 0: + for i in range(1, world_size): + tmp = osp.join(self.lock_dir, '{}.pkl'.format(i)) + while not (osp.exists(tmp)): + time.sleep(1) + for i in range(1, world_size): + tmp = osp.join(self.lock_dir, '{}.pkl'.format(i)) + os.remove(tmp) + else: + tmp = osp.join(self.lock_dir, '{}.pkl'.format(rank)) + mmcv.dump([], tmp) + while osp.exists(tmp): + time.sleep(1) + + def before_run(self, runner): + self.lock_dir = osp.join(runner.work_dir, '.lock_map_hook') + if runner.rank == 0: + if osp.exists(self.lock_dir): + shutil.rmtree(self.lock_dir) + mmcv.mkdir_or_exist(self.lock_dir) + + def after_train_epoch(self, runner): + if not self.every_n_epochs(runner, self.interval): + return + runner.model.eval() + results = [None for _ in range(len(self.dataset))] + prog_bar = mmcv.ProgressBar(len(self.dataset)) + for idx in range(runner.rank, len(self.dataset), runner.world_size): + data = self.dataset[idx] + device_id = torch.cuda.current_device() + imgs_data = tuple( + scatter(collate([data], samples_per_gpu=1), [device_id])[0]) + + # compute output + with torch.no_grad(): + result = runner.model( + *imgs_data, + return_loss=False, + return_bboxes=True, + rescale=True) + results[idx] = result + + batch_size = runner.world_size + for _ in range(batch_size): + prog_bar.update() + + if runner.rank == 0: + print('\n') + self._barrier(runner.rank, runner.world_size) + for i in range(1, runner.world_size): + tmp_file = osp.join(runner.work_dir, 'temp_{}.pkl'.format(i)) + tmp_results = mmcv.load(tmp_file) + for idx in range(i, len(results), runner.world_size): + results[idx] = tmp_results[idx] + os.remove(tmp_file) + self.evaluate(runner, results) + else: + tmp_file = osp.join(runner.work_dir, + 'temp_{}.pkl'.format(runner.rank)) + mmcv.dump(results, tmp_file) + self._barrier(runner.rank, runner.world_size) + self._barrier(runner.rank, runner.world_size) + + def evaluate(self): + raise NotImplementedError + + +class CocoEvalMixin(object): + + def _xyxy2xywh(self, bbox): + _bbox = bbox.tolist() + return [ + _bbox[0], + _bbox[1], + _bbox[2] - _bbox[0] + 1, + _bbox[3] - _bbox[1] + 1, + ] + + def det2json(self, dataset, results): + json_results = [] + for idx in range(len(dataset)): + img_id = dataset.img_ids[idx] + result = results[idx] + for label in range(len(result)): + bboxes = result[label] + for i in range(bboxes.shape[0]): + data = dict() + data['image_id'] = img_id + data['bbox'] = self._xyxy2xywh(bboxes[i]) + data['score'] = float(bboxes[i][4]) + data['category_id'] = dataset.cat_ids[label] + json_results.append(data) + return json_results + + def segm2json(self, dataset, results): + json_results = [] + for idx in range(len(dataset)): + img_id = dataset.img_ids[idx] + det, seg = results[idx] + for label in range(len(det)): + bboxes = det[label] + segms = seg[label] + for i in range(bboxes.shape[0]): + data = dict() + data['image_id'] = img_id + data['bbox'] = self._xyxy2xywh(bboxes[i]) + data['score'] = float(bboxes[i][4]) + data['category_id'] = dataset.cat_ids[label] + segms[i]['counts'] = segms[i]['counts'].decode() + data['segmentation'] = segms[i] + json_results.append(data) + return json_results + + def proposal2json(self, dataset, results): + json_results = [] + for idx in range(len(dataset)): + img_id = dataset.img_ids[idx] + bboxes = results[idx] + for i in range(bboxes.shape[0]): + data = dict() + data['image_id'] = img_id + data['bbox'] = self._xyxy2xywh(bboxes[i]) + data['score'] = float(bboxes[i][4]) + data['category_id'] = 1 + json_results.append(data) + return json_results + + def results2json(self, dataset, results, out_file): + if isinstance(results[0], list): + json_results = self.det2json(dataset, results) + elif isinstance(results[0], tuple): + json_results = self.segm2json(dataset, results) + elif isinstance(results[0], np.ndarray): + json_results = self.proposal2json(dataset, results) + else: + raise TypeError('invalid type of results') + mmcv.dump(json_results, out_file, file_format='json') + + +class DistEvalRecallHook(DistEvalHook): + + def __init__(self, + dataset, + proposal_nums=(100, 300, 1000), + iou_thrs=np.arange(0.5, 0.96, 0.05)): + super(DistEvalRecallHook, self).__init__(dataset) + self.proposal_nums = np.array(proposal_nums, dtype=np.int32) + self.iou_thrs = np.array(iou_thrs, dtype=np.float32) + + def evaluate(self, runner, results): + # official coco evaluation is too slow, here we use our own + # implementation, which may get slightly different results + gt_bboxes = [] + for i in range(len(self.dataset)): + img_id = self.dataset.img_ids[i] + ann_ids = self.dataset.coco.getAnnIds(imgIds=img_id) + ann_info = self.dataset.coco.loadAnns(ann_ids) + if len(ann_info) == 0: + gt_bboxes.append(np.zeros((0, 4))) + continue + bboxes = [] + for ann in ann_info: + if ann.get('ignore', False) or ann['iscrowd']: + continue + x1, y1, w, h = ann['bbox'] + bboxes.append([x1, y1, x1 + w - 1, y1 + h - 1]) + bboxes = np.array(bboxes, dtype=np.float32) + if bboxes.shape[0] == 0: + bboxes = np.zeros((0, 4)) + gt_bboxes.append(bboxes) + + recalls = eval_recalls( + gt_bboxes, + results, + self.proposal_nums, + self.iou_thrs, + print_summary=False) + ar = recalls.mean(axis=1) + for i, num in enumerate(self.proposal_nums): + runner.log_buffer.output['AR@{}'.format(num)] = ar[i] + runner.log_buffer.ready = True + + +class CocoDistEvalmAPHook(DistEvalHook, CocoEvalMixin): + + def evaluate(self, runner, results): + tmp_file = osp.join(runner.work_dir, 'temp_0.json') + self.results2json(self.dataset, results, tmp_file) + + res_types = ['bbox', 'segm'] if runner.model.with_mask else ['bbox'] + cocoGt = self.dataset.coco + cocoDt = cocoGt.loadRes(tmp_file) + imgIds = cocoGt.getImgIds() + for res_type in res_types: + iou_type = res_type + cocoEval = COCOeval(cocoGt, cocoDt, iou_type) + cocoEval.params.imgIds = imgIds + cocoEval.evaluate() + cocoEval.accumulate() + cocoEval.summarize() + field = '{}_mAP'.format(res_type) + runner.log_buffer.output[field] = cocoEval.stats[0] + runner.log_buffer.ready = True + os.remove(tmp_file) + + +class CocoDistCascadeEvalmAPHook(CocoDistEvalmAPHook): + + def evaluate(self, runner, results): + results = [res[-1] for res in results] + super(CocoDistCascadeEvalmAPHook, self).evaluate(runner, results) diff --git a/mmdet/core/mask_ops/__init__.py b/mmdet/core/mask_ops/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..25850cdc62ae69271f3788288d960b86ef179452 --- /dev/null +++ b/mmdet/core/mask_ops/__init__.py @@ -0,0 +1,10 @@ +from .segms import (flip_segms, polys_to_mask, mask_to_bbox, + polys_to_mask_wrt_box, polys_to_boxes, rle_mask_voting, + rle_mask_nms, rle_masks_to_boxes) +from .utils import split_combined_gt_polys + +__all__ = [ + 'flip_segms', 'polys_to_mask', 'mask_to_bbox', 'polys_to_mask_wrt_box', + 'polys_to_boxes', 'rle_mask_voting', 'rle_mask_nms', 'rle_masks_to_boxes', + 'split_combined_gt_polys' +] diff --git a/mmdet/core/mask_ops/segms.py b/mmdet/core/mask_ops/segms.py new file mode 100644 index 0000000000000000000000000000000000000000..b2ae6b69a1ff206b085799fa82527e1d17be0a4f --- /dev/null +++ b/mmdet/core/mask_ops/segms.py @@ -0,0 +1,271 @@ +# This file is copied from Detectron. + +# Copyright (c) 2017-present, Facebook, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +############################################################################## +"""Functions for interacting with segmentation masks in the COCO format. +The following terms are used in this module + mask: a binary mask encoded as a 2D numpy array + segm: a segmentation mask in one of the two COCO formats (polygon or RLE) + polygon: COCO's polygon format + RLE: COCO's run length encoding format +""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +from __future__ import unicode_literals + +import numpy as np +import pycocotools.mask as mask_util + + +def flip_segms(segms, height, width): + """Left/right flip each mask in a list of masks.""" + + def _flip_poly(poly, width): + flipped_poly = np.array(poly) + flipped_poly[0::2] = width - np.array(poly[0::2]) - 1 + return flipped_poly.tolist() + + def _flip_rle(rle, height, width): + if 'counts' in rle and type(rle['counts']) == list: + # Magic RLE format handling painfully discovered by looking at the + # COCO API showAnns function. + rle = mask_util.frPyObjects([rle], height, width) + mask = mask_util.decode(rle) + mask = mask[:, ::-1, :] + rle = mask_util.encode(np.array(mask, order='F', dtype=np.uint8)) + return rle + + flipped_segms = [] + for segm in segms: + if type(segm) == list: + # Polygon format + flipped_segms.append([_flip_poly(poly, width) for poly in segm]) + else: + # RLE format + assert type(segm) == dict + flipped_segms.append(_flip_rle(segm, height, width)) + return flipped_segms + + +def polys_to_mask(polygons, height, width): + """Convert from the COCO polygon segmentation format to a binary mask + encoded as a 2D array of data type numpy.float32. The polygon segmentation + is understood to be enclosed inside a height x width image. The resulting + mask is therefore of shape (height, width). + """ + rle = mask_util.frPyObjects(polygons, height, width) + mask = np.array(mask_util.decode(rle), dtype=np.float32) + # Flatten in case polygons was a list + mask = np.sum(mask, axis=2) + mask = np.array(mask > 0, dtype=np.float32) + return mask + + +def mask_to_bbox(mask): + """Compute the tight bounding box of a binary mask.""" + xs = np.where(np.sum(mask, axis=0) > 0)[0] + ys = np.where(np.sum(mask, axis=1) > 0)[0] + + if len(xs) == 0 or len(ys) == 0: + return None + + x0 = xs[0] + x1 = xs[-1] + y0 = ys[0] + y1 = ys[-1] + return np.array((x0, y0, x1, y1), dtype=np.float32) + + +def polys_to_mask_wrt_box(polygons, box, M): + """Convert from the COCO polygon segmentation format to a binary mask + encoded as a 2D array of data type numpy.float32. The polygon segmentation + is understood to be enclosed in the given box and rasterized to an M x M + mask. The resulting mask is therefore of shape (M, M). + """ + w = box[2] - box[0] + h = box[3] - box[1] + + w = np.maximum(w, 1) + h = np.maximum(h, 1) + + polygons_norm = [] + for poly in polygons: + p = np.array(poly, dtype=np.float32) + p[0::2] = (p[0::2] - box[0]) * M / w + p[1::2] = (p[1::2] - box[1]) * M / h + polygons_norm.append(p) + + rle = mask_util.frPyObjects(polygons_norm, M, M) + mask = np.array(mask_util.decode(rle), dtype=np.float32) + # Flatten in case polygons was a list + mask = np.sum(mask, axis=2) + mask = np.array(mask > 0, dtype=np.float32) + return mask + + +def polys_to_boxes(polys): + """Convert a list of polygons into an array of tight bounding boxes.""" + boxes_from_polys = np.zeros((len(polys), 4), dtype=np.float32) + for i in range(len(polys)): + poly = polys[i] + x0 = min(min(p[::2]) for p in poly) + x1 = max(max(p[::2]) for p in poly) + y0 = min(min(p[1::2]) for p in poly) + y1 = max(max(p[1::2]) for p in poly) + boxes_from_polys[i, :] = [x0, y0, x1, y1] + + return boxes_from_polys + + +def rle_mask_voting(top_masks, + all_masks, + all_dets, + iou_thresh, + binarize_thresh, + method='AVG'): + """Returns new masks (in correspondence with `top_masks`) by combining + multiple overlapping masks coming from the pool of `all_masks`. Two methods + for combining masks are supported: 'AVG' uses a weighted average of + overlapping mask pixels; 'UNION' takes the union of all mask pixels. + """ + if len(top_masks) == 0: + return + + all_not_crowd = [False] * len(all_masks) + top_to_all_overlaps = mask_util.iou(top_masks, all_masks, all_not_crowd) + decoded_all_masks = [ + np.array(mask_util.decode(rle), dtype=np.float32) for rle in all_masks + ] + decoded_top_masks = [ + np.array(mask_util.decode(rle), dtype=np.float32) for rle in top_masks + ] + all_boxes = all_dets[:, :4].astype(np.int32) + all_scores = all_dets[:, 4] + + # Fill box support with weights + mask_shape = decoded_all_masks[0].shape + mask_weights = np.zeros((len(all_masks), mask_shape[0], mask_shape[1])) + for k in range(len(all_masks)): + ref_box = all_boxes[k] + x_0 = max(ref_box[0], 0) + x_1 = min(ref_box[2] + 1, mask_shape[1]) + y_0 = max(ref_box[1], 0) + y_1 = min(ref_box[3] + 1, mask_shape[0]) + mask_weights[k, y_0:y_1, x_0:x_1] = all_scores[k] + mask_weights = np.maximum(mask_weights, 1e-5) + + top_segms_out = [] + for k in range(len(top_masks)): + # Corner case of empty mask + if decoded_top_masks[k].sum() == 0: + top_segms_out.append(top_masks[k]) + continue + + inds_to_vote = np.where(top_to_all_overlaps[k] >= iou_thresh)[0] + # Only matches itself + if len(inds_to_vote) == 1: + top_segms_out.append(top_masks[k]) + continue + + masks_to_vote = [decoded_all_masks[i] for i in inds_to_vote] + if method == 'AVG': + ws = mask_weights[inds_to_vote] + soft_mask = np.average(masks_to_vote, axis=0, weights=ws) + mask = np.array(soft_mask > binarize_thresh, dtype=np.uint8) + elif method == 'UNION': + # Any pixel that's on joins the mask + soft_mask = np.sum(masks_to_vote, axis=0) + mask = np.array(soft_mask > 1e-5, dtype=np.uint8) + else: + raise NotImplementedError('Method {} is unknown'.format(method)) + rle = mask_util.encode(np.array(mask[:, :, np.newaxis], order='F'))[0] + top_segms_out.append(rle) + + return top_segms_out + + +def rle_mask_nms(masks, dets, thresh, mode='IOU'): + """Performs greedy non-maximum suppression based on an overlap measurement + between masks. The type of measurement is determined by `mode` and can be + either 'IOU' (standard intersection over union) or 'IOMA' (intersection over + mininum area). + """ + if len(masks) == 0: + return [] + if len(masks) == 1: + return [0] + + if mode == 'IOU': + # Computes ious[m1, m2] = area(intersect(m1, m2)) / area(union(m1, m2)) + all_not_crowds = [False] * len(masks) + ious = mask_util.iou(masks, masks, all_not_crowds) + elif mode == 'IOMA': + # Computes ious[m1, m2] = area(intersect(m1, m2)) / min(area(m1), area(m2)) + all_crowds = [True] * len(masks) + # ious[m1, m2] = area(intersect(m1, m2)) / area(m2) + ious = mask_util.iou(masks, masks, all_crowds) + # ... = max(area(intersect(m1, m2)) / area(m2), + # area(intersect(m2, m1)) / area(m1)) + ious = np.maximum(ious, ious.transpose()) + elif mode == 'CONTAINMENT': + # Computes ious[m1, m2] = area(intersect(m1, m2)) / area(m2) + # Which measures how much m2 is contained inside m1 + all_crowds = [True] * len(masks) + ious = mask_util.iou(masks, masks, all_crowds) + else: + raise NotImplementedError('Mode {} is unknown'.format(mode)) + + scores = dets[:, 4] + order = np.argsort(-scores) + + keep = [] + while order.size > 0: + i = order[0] + keep.append(i) + ovr = ious[i, order[1:]] + inds_to_keep = np.where(ovr <= thresh)[0] + order = order[inds_to_keep + 1] + + return keep + + +def rle_masks_to_boxes(masks): + """Computes the bounding box of each mask in a list of RLE encoded masks.""" + if len(masks) == 0: + return [] + + decoded_masks = [ + np.array(mask_util.decode(rle), dtype=np.float32) for rle in masks + ] + + def get_bounds(flat_mask): + inds = np.where(flat_mask > 0)[0] + return inds.min(), inds.max() + + boxes = np.zeros((len(decoded_masks), 4)) + keep = [True] * len(decoded_masks) + for i, mask in enumerate(decoded_masks): + if mask.sum() == 0: + keep[i] = False + continue + flat_mask = mask.sum(axis=0) + x0, x1 = get_bounds(flat_mask) + flat_mask = mask.sum(axis=1) + y0, y1 = get_bounds(flat_mask) + boxes[i, :] = (x0, y0, x1, y1) + + return boxes, np.where(keep)[0] diff --git a/mmdet/core/mask_ops/utils.py b/mmdet/core/mask_ops/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..2802430007e7b239bcb18ba20a26c0609c62245c --- /dev/null +++ b/mmdet/core/mask_ops/utils.py @@ -0,0 +1,35 @@ +import cvbase as cvb +import numpy as np +import pycocotools.mask as mask_utils + +import mmcv + + +def split_combined_gt_polys(gt_polys, gt_poly_lens, num_polys_per_mask): + """Split the combined 1-D polys into masks. + + A mask is represented as a list of polys, and a poly is represented as + a 1-D array. In dataset, all masks are concatenated into a single 1-D + tensor. Here we need to split the tensor into original representations. + + Args: + gt_polys (list): a list (length = image num) of 1-D tensors + gt_poly_lens (list): a list (length = image num) of poly length + num_polys_per_mask (list): a list (length = image num) of poly number + of each mask + + Returns: + list: a list (length = image num) of list (length = mask num) of + list (length = poly num) of numpy array + """ + mask_polys_list = [] + for img_id in range(len(gt_polys)): + gt_polys_single = gt_polys[img_id].cpu().numpy() + gt_polys_lens_single = gt_poly_lens[img_id].cpu().numpy().tolist() + num_polys_per_mask_single = num_polys_per_mask[ + img_id].cpu().numpy().tolist() + + split_gt_polys = mmcv.slice_list(gt_polys_single, gt_polys_lens_single) + mask_polys = mmcv.slice_list(split_gt_polys, num_polys_per_mask_single) + mask_polys_list.append(mask_polys) + return mask_polys_list diff --git a/mmdet/core/post_processing/__init__.py b/mmdet/core/post_processing/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..1b24a3fc68525de1c73d687404990bd521bdf5b0 --- /dev/null +++ b/mmdet/core/post_processing/__init__.py @@ -0,0 +1,8 @@ +from .bbox_nms import multiclass_nms +from .merge_augs import (merge_aug_proposals, merge_aug_bboxes, + merge_aug_scores, merge_aug_masks) + +__all__ = [ + 'multiclass_nms', 'merge_aug_proposals', 'merge_aug_bboxes', + 'merge_aug_scores', 'merge_aug_masks' +] diff --git a/mmdet/core/post_processing/bbox_nms.py b/mmdet/core/post_processing/bbox_nms.py new file mode 100644 index 0000000000000000000000000000000000000000..f619d2682a035344c6fda6974cd03c5cbfeb0f26 --- /dev/null +++ b/mmdet/core/post_processing/bbox_nms.py @@ -0,0 +1,54 @@ +import torch + +from mmdet.ops import nms + + +def multiclass_nms(multi_bboxes, multi_scores, score_thr, nms_thr, max_num=-1): + """NMS for multi-class bboxes. + + Args: + multi_bboxes (Tensor): shape (n, #class*4) or (n, 4) + multi_scores (Tensor): shape (n, #class) + score_thr (float): bbox threshold, bboxes with scores lower than it + will not be considered. + nms_thr (float): NMS IoU threshold + max_num (int): if there are more than max_num bboxes after NMS, + only top max_num will be kept. + + Returns: + tuple: (bboxes, labels), tensors of shape (k, 5) and (k, 1). Labels + are 0-based. + """ + num_classes = multi_scores.shape[1] + bboxes, labels = [], [] + for i in range(1, num_classes): + cls_inds = multi_scores[:, i] > score_thr + if not cls_inds.any(): + continue + # get bboxes and scores of this class + if multi_bboxes.shape[1] == 4: + _bboxes = multi_bboxes[cls_inds, :] + else: + _bboxes = multi_bboxes[cls_inds, i * 4:(i + 1) * 4] + _scores = multi_scores[cls_inds, i] + cls_dets = torch.cat([_bboxes, _scores[:, None]], dim=1) + # perform nms + nms_keep = nms(cls_dets, nms_thr) + cls_dets = cls_dets[nms_keep, :] + cls_labels = multi_bboxes.new_full( + (len(nms_keep), ), i - 1, dtype=torch.long) + bboxes.append(cls_dets) + labels.append(cls_labels) + if bboxes: + bboxes = torch.cat(bboxes) + labels = torch.cat(labels) + if bboxes.shape[0] > max_num: + _, inds = bboxes[:, -1].sort(descending=True) + inds = inds[:max_num] + bboxes = bboxes[inds] + labels = labels[inds] + else: + bboxes = multi_bboxes.new_zeros((0, 5)) + labels = multi_bboxes.new_zeros((0, ), dtype=torch.long) + + return bboxes, labels diff --git a/mmdet/core/post_processing/merge_augs.py b/mmdet/core/post_processing/merge_augs.py new file mode 100644 index 0000000000000000000000000000000000000000..5d56e481e5aee2ce113cea7adcb11ebe0aaede5b --- /dev/null +++ b/mmdet/core/post_processing/merge_augs.py @@ -0,0 +1,96 @@ +import torch + +from mmcv.ops import nms +import numpy as np + +from ..bbox_ops import bbox_mapping_back + + +def merge_aug_proposals(aug_proposals, img_metas, rpn_test_cfg): + """Merge augmented proposals (multiscale, flip, etc.) + + Args: + aug_proposals (list[Tensor]): proposals from different testing + schemes, shape (n, 5). Note that they are not rescaled to the + original image size. + img_metas (list[dict]): image info including "shape_scale" and "flip". + rpn_test_cfg (dict): rpn test config. + + Returns: + Tensor: shape (n, 4), proposals corresponding to original image scale. + """ + recovered_proposals = [] + for proposals, img_info in zip(aug_proposals, img_metas): + shape_scale = img_info['shape_scale'][0] + flip = img_info['flip'][0] + _proposals = proposals.clone() + _proposals[:, :4] = bbox_mapping_back(_proposals[:, :4], shape_scale, + flip) + recovered_proposals.append(_proposals) + aug_proposals = torch.cat(recovered_proposals, dim=0) + nms_keep = nms(aug_proposals, rpn_test_cfg.nms_thr, + aug_proposals.get_device()) + merged_proposals = aug_proposals[nms_keep, :] + scores = merged_proposals[:, 4] + _, order = scores.sort(0, descending=True) + num = min(rpn_test_cfg.max_num, merged_proposals.shape[0]) + order = order[:num] + merged_proposals = merged_proposals[order, :] + return merged_proposals + + +def merge_aug_bboxes(aug_bboxes, aug_scores, img_metas, rcnn_test_cfg): + """Merge augmented detection bboxes and scores. + + Args: + aug_bboxes (list[Tensor]): shape (n, 4*#class) + aug_scores (list[Tensor] or None): shape (n, #class) + img_shapes (list[Tensor]): shape (3, ). + rcnn_test_cfg (dict): rcnn test config. + + Returns: + tuple: (bboxes, scores) + """ + recovered_bboxes = [] + for bboxes, img_info in zip(aug_bboxes, img_metas): + shape_scale = img_info['shape_scale'][0] + flip = img_info['flip'][0] + bboxes = bbox_mapping_back(bboxes, shape_scale, flip) + recovered_bboxes.append(bboxes) + bboxes = torch.stack(recovered_bboxes).mean(dim=0) + if aug_scores is None: + return bboxes + else: + scores = torch.stack(aug_scores).mean(dim=0) + return bboxes, scores + + +def merge_aug_scores(aug_scores): + """Merge augmented bbox scores.""" + if isinstance(aug_scores[0], torch.Tensor): + return torch.mean(torch.stack(aug_scores), dim=0) + else: + return np.mean(aug_scores, axis=0) + + +def merge_aug_masks(aug_masks, bboxes, img_metas, rcnn_test_cfg, weights=None): + """Merge augmented mask prediction. + + Args: + aug_masks (list[ndarray]): shape (n, #class, h, w) + img_shapes (list[ndarray]): shape (3, ). + rcnn_test_cfg (dict): rcnn test config. + + Returns: + tuple: (bboxes, scores) + """ + recovered_masks = [ + mask if not img_info['flip'][0] else mask[..., ::-1] + for mask, img_info in zip(aug_masks, img_metas) + ] + if weights is None: + merged_masks = np.mean(recovered_masks, axis=0) + else: + merged_masks = np.average( + np.array(recovered_masks), axis=0, weights=np.array(weights)) + return merged_masks diff --git a/mmdet/core/targets/__init__.py b/mmdet/core/targets/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..b3b2567efff687ba503b8a37d9f096597a0c8780 --- /dev/null +++ b/mmdet/core/targets/__init__.py @@ -0,0 +1,5 @@ +from .anchor_target import anchor_target +from .bbox_target import bbox_target +from .mask_target import mask_target + +__all__ = ['anchor_target', 'bbox_target', 'mask_target'] diff --git a/mmdet/core/targets/anchor_target.py b/mmdet/core/targets/anchor_target.py new file mode 100644 index 0000000000000000000000000000000000000000..ec2389f90885da0c92f0598dc6d45f59c0ab6dac --- /dev/null +++ b/mmdet/core/targets/anchor_target.py @@ -0,0 +1,2 @@ +def anchor_target(): + pass diff --git a/mmdet/core/targets/bbox_target.py b/mmdet/core/targets/bbox_target.py new file mode 100644 index 0000000000000000000000000000000000000000..49642c2298735b163b98ad832a3a6a9ee9941c45 --- /dev/null +++ b/mmdet/core/targets/bbox_target.py @@ -0,0 +1,2 @@ +def bbox_target(): + pass diff --git a/mmdet/core/targets/mask_target.py b/mmdet/core/targets/mask_target.py new file mode 100644 index 0000000000000000000000000000000000000000..4c330e13b81e8cb27e35a8705e2e89b00792ddaa --- /dev/null +++ b/mmdet/core/targets/mask_target.py @@ -0,0 +1,2 @@ +def mask_target(): + pass diff --git a/mmdet/datasets/__init__.py b/mmdet/datasets/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..6045c2b0923993243a999f0008b79443126d0e26 --- /dev/null +++ b/mmdet/datasets/__init__.py @@ -0,0 +1,4 @@ +from .coco import CocoDataset +from .collate import * +from .sampler import * +from .transforms import * diff --git a/mmdet/datasets/coco.py b/mmdet/datasets/coco.py new file mode 100644 index 0000000000000000000000000000000000000000..e0705e79b6168c2ccf45610af3609013082ddb48 --- /dev/null +++ b/mmdet/datasets/coco.py @@ -0,0 +1,288 @@ +import os.path as osp + +import mmcv +import numpy as np +from pycocotools.coco import COCO +from torch.utils.data import Dataset + +from .transforms import (ImageTransform, BboxTransform, PolyMaskTransform, + Numpy2Tensor) +from .utils import show_ann, random_scale +from .utils import DataContainer as DC + + +def parse_ann_info(ann_info, cat2label, with_mask=True): + """Parse bbox and mask annotation. + + Args: + ann_info (list[dict]): Annotation info of an image. + cat2label (dict): The mapping from category ids to labels. + with_mask (bool): Whether to parse mask annotations. + + Returns: + tuple: gt_bboxes, gt_labels and gt_mask_info + """ + gt_bboxes = [] + gt_labels = [] + gt_bboxes_ignore = [] + # each mask consists of one or several polys, each poly is a list of float. + if with_mask: + gt_mask_polys = [] + gt_poly_lens = [] + for i, ann in enumerate(ann_info): + if ann.get('ignore', False): + continue + x1, y1, w, h = ann['bbox'] + if ann['area'] <= 0 or w < 1 or h < 1: + continue + bbox = [x1, y1, x1 + w - 1, y1 + h - 1] + if ann['iscrowd']: + gt_bboxes_ignore.append(bbox) + else: + gt_bboxes.append(bbox) + gt_labels.append(cat2label[ann['category_id']]) + if with_mask: + # Note polys are not resized + mask_polys = [ + p for p in ann['segmentation'] if len(p) >= 6 + ] # valid polygons have >= 3 points (6 coordinates) + poly_lens = [len(p) for p in mask_polys] + gt_mask_polys.append(mask_polys) + gt_poly_lens.extend(poly_lens) + if gt_bboxes: + gt_bboxes = np.array(gt_bboxes, dtype=np.float32) + gt_labels = np.array(gt_labels, dtype=np.int64) + else: + gt_bboxes = np.zeros((0, 4), dtype=np.float32) + gt_labels = np.array([], dtype=np.int64) + + if gt_bboxes_ignore: + gt_bboxes_ignore = np.array(gt_bboxes_ignore, dtype=np.float32) + else: + gt_bboxes_ignore = np.zeros((0, 4), dtype=np.float32) + + ann = dict( + bboxes=gt_bboxes, labels=gt_labels, bboxes_ignore=gt_bboxes_ignore) + + if with_mask: + ann['mask_polys'] = gt_mask_polys + ann['poly_lens'] = gt_poly_lens + return ann + + +class CocoDataset(Dataset): + + def __init__(self, + ann_file, + img_prefix, + img_scale, + img_norm_cfg, + size_divisor=None, + proposal_file=None, + num_max_proposals=1000, + flip_ratio=0, + with_mask=True, + with_crowd=True, + with_label=True, + test_mode=False, + debug=False): + # path of the data file + self.coco = COCO(ann_file) + # filter images with no annotation during training + if not test_mode: + self.img_ids, self.img_infos = self._filter_imgs() + else: + self.img_ids = self.coco.getImgIds() + self.img_infos = [ + self.coco.loadImgs(idx)[0] for idx in self.img_ids + ] + assert len(self.img_ids) == len(self.img_infos) + # get the mapping from original category ids to labels + self.cat_ids = self.coco.getCatIds() + self.cat2label = { + cat_id: i + 1 + for i, cat_id in enumerate(self.cat_ids) + } + # prefix of images path + self.img_prefix = img_prefix + # (long_edge, short_edge) or [(long1, short1), (long2, short2), ...] + self.img_scales = img_scale if isinstance(img_scale, + list) else [img_scale] + assert mmcv.is_list_of(self.img_scales, tuple) + # color channel order and normalize configs + self.img_norm_cfg = img_norm_cfg + # proposals + self.proposals = mmcv.load( + proposal_file) if proposal_file is not None else None + self.num_max_proposals = num_max_proposals + # flip ratio + self.flip_ratio = flip_ratio + assert flip_ratio >= 0 and flip_ratio <= 1 + # padding border to ensure the image size can be divided by + # size_divisor (used for FPN) + self.size_divisor = size_divisor + # with crowd or not, False when using RetinaNet + self.with_crowd = with_crowd + # with mask or not + self.with_mask = with_mask + # with label is False for RPN + self.with_label = with_label + # in test mode or not + self.test_mode = test_mode + # debug mode or not + self.debug = debug + + # set group flag for the sampler + self._set_group_flag() + # transforms + self.img_transform = ImageTransform( + size_divisor=self.size_divisor, **self.img_norm_cfg) + self.bbox_transform = BboxTransform() + self.mask_transform = PolyMaskTransform() + self.numpy2tensor = Numpy2Tensor() + + def __len__(self): + return len(self.img_ids) + + def _filter_imgs(self, min_size=32): + """Filter images too small or without ground truths.""" + img_ids = list(set([_['image_id'] for _ in self.coco.anns.values()])) + valid_ids = [] + img_infos = [] + for i in img_ids: + info = self.coco.loadImgs(i)[0] + if min(info['width'], info['height']) >= min_size: + valid_ids.append(i) + img_infos.append(info) + return valid_ids, img_infos + + def _load_ann_info(self, idx): + img_id = self.img_ids[idx] + ann_ids = self.coco.getAnnIds(imgIds=img_id) + ann_info = self.coco.loadAnns(ann_ids) + return ann_info + + def _set_group_flag(self): + """Set flag according to image aspect ratio. + + Images with aspect ratio greater than 1 will be set as group 1, + otherwise group 0. + """ + self.flag = np.zeros(len(self.img_ids), dtype=np.uint8) + for i in range(len(self.img_ids)): + img_info = self.img_infos[i] + if img_info['width'] / img_info['height'] > 1: + self.flag[i] = 1 + + def _rand_another(self, idx): + pool = np.where(self.flag == self.flag[idx])[0] + return np.random.choice(pool) + + def __getitem__(self, idx): + if self.test_mode: + return self.prepare_test_img(idx) + while True: + img_info = self.img_infos[idx] + ann_info = self._load_ann_info(idx) + + # load image + img = mmcv.imread(osp.join(self.img_prefix, img_info['file_name'])) + if self.debug: + show_ann(self.coco, img, ann_info) + + # load proposals if necessary + if self.proposals is not None: + proposals = self.proposals[idx][:self.num_max_proposals, :4] + # TODO: Handle empty proposals properly. Currently images with + # no proposals are just ignored, but they can be used for + # training in concept. + if len(proposals) == 0: + idx = self._rand_another(idx) + continue + + ann = parse_ann_info(ann_info, self.cat2label, self.with_mask) + gt_bboxes = ann['bboxes'] + gt_labels = ann['labels'] + gt_bboxes_ignore = ann['bboxes_ignore'] + # skip the image if there is no valid gt bbox + if len(gt_bboxes) == 0: + idx = self._rand_another(idx) + continue + + # apply transforms + flip = True if np.random.rand() < self.flip_ratio else False + img_scale = random_scale(self.img_scales) # sample a scale + img, img_shape, scale_factor = self.img_transform( + img, img_scale, flip) + if self.proposals is not None: + proposals = self.bbox_transform(proposals, img_shape, + scale_factor, flip) + gt_bboxes = self.bbox_transform(gt_bboxes, img_shape, scale_factor, + flip) + gt_bboxes_ignore = self.bbox_transform(gt_bboxes_ignore, img_shape, + scale_factor, flip) + + if self.with_mask: + gt_mask_polys, gt_poly_lens, num_polys_per_mask = \ + self.mask_transform( + ann['mask_polys'], ann['poly_lens'], + img_info['height'], img_info['width'], flip) + + ori_shape = (img_info['height'], img_info['width']) + img_meta = dict( + ori_shape=DC(ori_shape), + img_shape=DC(img_shape), + scale_factor=DC(scale_factor), + flip=DC(flip)) + + data = dict( + img=DC(img, stack=True), + img_meta=img_meta, + gt_bboxes=DC(gt_bboxes)) + if self.proposals is not None: + data['proposals'] = DC(proposals) + if self.with_label: + data['gt_labels'] = DC(gt_labels) + if self.with_crowd: + data['gt_bboxes_ignore'] = DC(gt_bboxes_ignore) + if self.with_mask: + data['gt_mask_polys'] = DC(gt_mask_polys) + data['gt_poly_lens'] = DC(gt_poly_lens) + data['num_polys_per_mask'] = DC(num_polys_per_mask) + return data + + def prepare_test_img(self, idx): + """Prepare an image for testing (multi-scale and flipping)""" + img_info = self._load_info(idx, with_ann=False) + img_file = osp.join(self.prefix, img_info['file_name']) + proposal = (self.proposals[idx][:, :4] + if self.proposals is not None else None) + + def prepare_single(img_file, scale, flip, proposal=None): + img_np, shape_scale_np = self.img_transform(img_file, scale, flip) + img, shape_scale = self.numpy2tensor(img_np, shape_scale_np) + img_meta = dict(shape_scale=shape_scale, flip=flip) + if proposal is not None: + proposal = self.bbox_transform(proposal, shape_scale_np, flip) + proposal = self.numpy2tensor(proposal) + return img, img_meta, proposal + + imgs = [] + img_metas = [] + proposals = [] + for scale in self.img_scale: + img, img_meta, proposal = prepare_single(img_file, scale, False, + proposal) + imgs.append(img) + img_metas.append(img_meta) + proposals.append(proposal) + if self.flip_ratio > 0: + img, img_meta, prop = prepare_single(img_file, scale, True, + proposal) + imgs.append(img) + img_metas.append(img_meta) + proposals.append(prop) + if self.proposals is None: + return imgs, img_metas + else: + return imgs, img_metas, proposals diff --git a/mmdet/datasets/collate.py b/mmdet/datasets/collate.py new file mode 100644 index 0000000000000000000000000000000000000000..44117d6f2d01d3aaa4c06996c2d8bf657e4a1ce5 --- /dev/null +++ b/mmdet/datasets/collate.py @@ -0,0 +1,57 @@ +import collections + +import torch +import torch.nn.functional as F +from torch.utils.data.dataloader import default_collate + +from .utils import DataContainer + +# https://github.com/pytorch/pytorch/issues/973 +import resource +rlimit = resource.getrlimit(resource.RLIMIT_NOFILE) +resource.setrlimit(resource.RLIMIT_NOFILE, (4096, rlimit[1])) + +__all__ = ['collate'] + + +def collate(batch, samples_per_gpu=1): + + if not isinstance(batch, collections.Sequence): + raise TypeError("{} is not supported.".format(batch.dtype)) + + if isinstance(batch[0], DataContainer): + assert len(batch) % samples_per_gpu == 0 + stacked = [] + if batch[0].stack: + for i in range(0, len(batch), samples_per_gpu): + assert isinstance(batch[i].data, torch.Tensor) + # TODO: handle tensors other than 3d + assert batch[i].dim() == 3 + c, h, w = batch[0].size() + for sample in batch[i:i + samples_per_gpu]: + assert c == sample.size(0) + h = max(h, sample.size(1)) + w = max(w, sample.size(2)) + padded_samples = [ + F.pad( + sample.data, + (0, w - sample.size(2), 0, h - sample.size(1)), + value=sample.padding_value) + for sample in batch[i:i + samples_per_gpu] + ] + stacked.append(default_collate(padded_samples)) + else: + for i in range(0, len(batch), samples_per_gpu): + stacked.append( + [sample.data for sample in batch[i:i + samples_per_gpu]]) + return DataContainer(stacked, batch[0].stack, batch[0].padding_value) + elif isinstance(batch[0], collections.Sequence): + transposed = zip(*batch) + return [collate(samples, samples_per_gpu) for samples in transposed] + elif isinstance(batch[0], collections.Mapping): + return { + key: collate([d[key] for d in batch], samples_per_gpu) + for key in batch[0] + } + else: + return default_collate(batch) diff --git a/mmdet/datasets/sampler.py b/mmdet/datasets/sampler.py new file mode 100644 index 0000000000000000000000000000000000000000..74089821bf17a7bdc6f1f728c0340e382adb3046 --- /dev/null +++ b/mmdet/datasets/sampler.py @@ -0,0 +1,134 @@ +from __future__ import division + +import math +import torch +import numpy as np + +from torch.distributed import get_world_size, get_rank +from torch.utils.data.sampler import Sampler + +__all__ = ['GroupSampler', 'DistributedGroupSampler'] + + +class GroupSampler(Sampler): + + def __init__(self, dataset, samples_per_gpu=1): + assert hasattr(dataset, 'flag') + self.dataset = dataset + self.samples_per_gpu = samples_per_gpu + self.flag = dataset.flag.astype(np.int64) + self.group_sizes = np.bincount(self.flag) + self.num_samples = 0 + for i, size in enumerate(self.group_sizes): + self.num_samples += int(np.ceil( + size / self.samples_per_gpu)) * self.samples_per_gpu + + def __iter__(self): + indices = [] + for i, size in enumerate(self.group_sizes): + if size == 0: + continue + indice = np.where(self.flag == i)[0] + assert len(indice) == size + np.random.shuffle(indice) + num_extra = int(np.ceil(size / self.samples_per_gpu) + ) * self.samples_per_gpu - len(indice) + indice = np.concatenate([indice, indice[:num_extra]]) + indices.append(indice) + indices = np.concatenate(indices) + indices = [ + indices[i * self.samples_per_gpu:(i + 1) * self.samples_per_gpu] + for i in np.random.permutation( + range(len(indices) // self.samples_per_gpu)) + ] + indices = np.concatenate(indices) + indices = torch.from_numpy(indices).long() + assert len(indices) == self.num_samples + return iter(indices) + + def __len__(self): + return self.num_samples + + +class DistributedGroupSampler(Sampler): + """Sampler that restricts data loading to a subset of the dataset. + It is especially useful in conjunction with + :class:`torch.nn.parallel.DistributedDataParallel`. In such case, each + process can pass a DistributedSampler instance as a DataLoader sampler, + and load a subset of the original dataset that is exclusive to it. + .. note:: + Dataset is assumed to be of constant size. + Arguments: + dataset: Dataset used for sampling. + num_replicas (optional): Number of processes participating in + distributed training. + rank (optional): Rank of the current process within num_replicas. + """ + + def __init__(self, + dataset, + samples_per_gpu=1, + num_replicas=None, + rank=None): + if num_replicas is None: + num_replicas = get_world_size() + if rank is None: + rank = get_rank() + self.dataset = dataset + self.samples_per_gpu = samples_per_gpu + self.num_replicas = num_replicas + self.rank = rank + self.epoch = 0 + + assert hasattr(self.dataset, 'flag') + self.flag = self.dataset.flag + self.group_sizes = np.bincount(self.flag) + + self.num_samples = 0 + for i, j in enumerate(self.group_sizes): + self.num_samples += int( + math.ceil(self.group_sizes[i] * 1.0 / self.samples_per_gpu / + self.num_replicas)) * self.samples_per_gpu + self.total_size = self.num_samples * self.num_replicas + + def __iter__(self): + # deterministically shuffle based on epoch + g = torch.Generator() + g.manual_seed(self.epoch) + + indices = [] + for i, size in enumerate(self.group_sizes): + if size > 0: + indice = np.where(self.flag == i)[0] + assert len(indice) == size + indice = indice[list(torch.randperm(int(size), + generator=g))].tolist() + extra = int( + math.ceil( + size * 1.0 / self.samples_per_gpu / self.num_replicas) + ) * self.samples_per_gpu * self.num_replicas - len(indice) + indice += indice[:extra] + indices += indice + + assert len(indices) == self.total_size + + indices = [ + indices[j] for i in list( + torch.randperm( + len(indices) // self.samples_per_gpu, generator=g)) + for j in range(i * self.samples_per_gpu, (i + 1) * + self.samples_per_gpu) + ] + + # subsample + offset = self.num_samples * self.rank + indices = indices[offset:offset + self.num_samples] + assert len(indices) == self.num_samples + + return iter(indices) + + def __len__(self): + return self.num_samples + + def set_epoch(self, epoch): + self.epoch = epoch diff --git a/mmdet/datasets/transforms.py b/mmdet/datasets/transforms.py new file mode 100644 index 0000000000000000000000000000000000000000..81f3a627d0d20a5890ea9c5f597e814ea373b9e5 --- /dev/null +++ b/mmdet/datasets/transforms.py @@ -0,0 +1,208 @@ +import mmcv +# import cvbase as cvb +import numpy as np +import torch + +from mmdet.core import segms + +__all__ = [ + 'ImageTransform', 'BboxTransform', 'PolyMaskTransform', 'Numpy2Tensor' +] + + +class ImageTransform(object): + """Preprocess an image + 1. rescale the image to expected size + 2. normalize the image + 3. flip the image (if needed) + 4. pad the image (if needed) + 5. transpose to (c, h, w) + """ + + def __init__(self, + mean=(0, 0, 0), + std=(1, 1, 1), + to_rgb=True, + size_divisor=None): + self.mean = np.array(mean, dtype=np.float32) + self.std = np.array(std, dtype=np.float32) + self.to_rgb = to_rgb + self.size_divisor = size_divisor + + def __call__(self, img, scale, flip=False): + img, scale_factor = mmcv.imrescale(img, scale, True) + img_shape = img.shape + img = mmcv.imnorm(img, self.mean, self.std, self.to_rgb) + if flip: + img = mmcv.imflip(img) + if self.size_divisor is not None: + img = mmcv.impad_to_multiple(img, self.size_divisor) + img = img.transpose(2, 0, 1) + return img, img_shape, scale_factor + + # img, scale = cvb.resize_keep_ar(img_or_path, max_long_edge, + # max_short_edge, True) + # shape_scale = np.array(img.shape + (scale, ), dtype=np.float32) + # if flip: + # img = img[:, ::-1, :].copy() + # if self.color_order == 'RGB': + # img = cvb.bgr2rgb(img) + # img = img.astype(np.float32) + # img -= self.color_mean + # img /= self.color_std + # if self.size_divisor is None: + # padded_img = img + # else: + # pad_h = int(np.ceil( + # img.shape[0] / self.size_divisor)) * self.size_divisor + # pad_w = int(np.ceil( + # img.shape[1] / self.size_divisor)) * self.size_divisor + # padded_img = cvb.pad_img(img, (pad_h, pad_w), pad_val=0) + # padded_img = padded_img.transpose(2, 0, 1) + # return padded_img, shape_scale + + +class ImageCrop(object): + """crop image patches and resize patches into fixed size + 1. (read and) flip image (if needed) + 2. crop image patches according to given bboxes + 3. resize patches into fixed size (default 224x224) + 4. normalize the image (if needed) + 5. transpose to (c, h, w) (if needed) + """ + + def __init__(self, + normalize=True, + transpose=True, + color_order='RGB', + color_mean=(0, 0, 0), + color_std=(1, 1, 1)): + self.normalize = normalize + self.transpose = transpose + + assert color_order in ['RGB', 'BGR'] + self.color_order = color_order + self.color_mean = np.array(color_mean, dtype=np.float32) + self.color_std = np.array(color_std, dtype=np.float32) + + def __call__(self, + img_or_path, + bboxes, + crop_size, + scale_ratio=1.0, + flip=False): + img = cvb.read_img(img_or_path) + if flip: + img = img[:, ::-1, :].copy() + crop_imgs = cvb.crop_img( + img, + bboxes[:, :4], + scale_ratio=scale_ratio, + pad_fill=self.color_mean) + processed_crop_imgs_list = [] + for i in range(len(crop_imgs)): + crop_img = crop_imgs[i] + crop_img = cvb.resize(crop_img, crop_size) + crop_img = crop_img.astype(np.float32) + crop_img -= self.color_mean + crop_img /= self.color_std + processed_crop_imgs_list.append(crop_img) + processed_crop_imgs = np.stack(processed_crop_imgs_list, axis=0) + processed_crop_imgs = processed_crop_imgs.transpose(0, 3, 1, 2) + return processed_crop_imgs + + +class BboxTransform(object): + """Preprocess gt bboxes + 1. rescale bboxes according to image size + 2. flip bboxes (if needed) + 3. pad the first dimension to `max_num_gts` + """ + + def __init__(self, max_num_gts=None): + self.max_num_gts = max_num_gts + + def __call__(self, bboxes, img_shape, scale_factor, flip=False): + gt_bboxes = bboxes * scale_factor + if flip: + gt_bboxes = mmcv.bbox_flip(gt_bboxes, img_shape) + if self.max_num_gts is None: + return gt_bboxes + else: + num_gts = gt_bboxes.shape[0] + padded_bboxes = np.zeros((self.max_num_gts, 4), dtype=np.float32) + padded_bboxes[:num_gts, :] = gt_bboxes + return padded_bboxes + + +class PolyMaskTransform(object): + + def __init__(self): + pass + + def __call__(self, gt_mask_polys, gt_poly_lens, img_h, img_w, flip=False): + """ + Args: + gt_mask_polys(list): a list of masks, each mask is a list of polys, + each poly is a list of numbers + gt_poly_lens(list): a list of int, indicating the size of each poly + """ + if flip: + gt_mask_polys = segms.flip_segms(gt_mask_polys, img_h, img_w) + num_polys_per_mask = np.array( + [len(mask_polys) for mask_polys in gt_mask_polys], dtype=np.int64) + gt_poly_lens = np.array(gt_poly_lens, dtype=np.int64) + gt_mask_polys = [ + np.concatenate(mask_polys).astype(np.float32) + for mask_polys in gt_mask_polys + ] + gt_mask_polys = np.concatenate(gt_mask_polys) + return gt_mask_polys, gt_poly_lens, num_polys_per_mask + + +class MaskTransform(object): + """Preprocess masks + 1. resize masks to expected size and stack to a single array + 2. flip the masks (if needed) + 3. pad the masks (if needed) + """ + + def __init__(self, max_num_gts, pad_size=None): + self.max_num_gts = max_num_gts + self.pad_size = pad_size + + def __call__(self, masks, img_size, flip=False): + max_long_edge = max(img_size) + max_short_edge = min(img_size) + masks = [ + cvb.resize_keep_ar( + mask, + max_long_edge, + max_short_edge, + interpolation=cvb.INTER_NEAREST) for mask in masks + ] + masks = np.stack(masks, axis=0) + if flip: + masks = masks[:, ::-1, :] + if self.pad_size is None: + pad_h = masks.shape[1] + pad_w = masks.shape[2] + else: + pad_size = self.pad_size if self.pad_size > 0 else max_long_edge + pad_h = pad_w = pad_size + padded_masks = np.zeros( + (self.max_num_gts, pad_h, pad_w), dtype=masks.dtype) + padded_masks[:masks.shape[0], :masks.shape[1], :masks.shape[2]] = masks + return padded_masks + + +class Numpy2Tensor(object): + + def __init__(self): + pass + + def __call__(self, *args): + if len(args) == 1: + return torch.from_numpy(args[0]) + else: + return tuple([torch.from_numpy(array) for array in args]) diff --git a/mmdet/datasets/utils/__init__.py b/mmdet/datasets/utils/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..de3ea43bdf4e4cc526119054954fdd1acf811c38 --- /dev/null +++ b/mmdet/datasets/utils/__init__.py @@ -0,0 +1,2 @@ +from .data_container import DataContainer +from .misc import * diff --git a/mmdet/datasets/utils/data_container.py b/mmdet/datasets/utils/data_container.py new file mode 100644 index 0000000000000000000000000000000000000000..c27beab37bbd28aeb37c1231b8ff94a335702216 --- /dev/null +++ b/mmdet/datasets/utils/data_container.py @@ -0,0 +1,80 @@ +import functools +from collections import Sequence + +import mmcv +import numpy as np +import torch + + +def to_tensor(data): + """Convert objects of various python types to :obj:`torch.Tensor`. + + Supported types are: :class:`numpy.ndarray`, :class:`torch.Tensor`, + :class:`Sequence`, :class:`int` and :class:`float`. + """ + if isinstance(data, np.ndarray): + return torch.from_numpy(data) + elif isinstance(data, torch.Tensor): + return data + elif isinstance(data, Sequence) and not mmcv.is_str(data): + return torch.tensor(data) + elif isinstance(data, int): + return torch.LongTensor([data]) + elif isinstance(data, float): + return torch.FloatTensor([data]) + else: + raise TypeError('type {} cannot be converted to tensor.'.format( + type(data))) + + +def assert_tensor_type(func): + + @functools.wraps(func) + def wrapper(*args, **kwargs): + if not isinstance(args[0].data, torch.Tensor): + raise AttributeError('{} has no attribute {} for type {}'.format( + args[0].__class__.__name__, func.__name__, args[0].datatype)) + return func(*args, **kwargs) + + return wrapper + + +class DataContainer(object): + + def __init__(self, data, stack=False, padding_value=0): + if isinstance(data, list): + self._data = data + else: + self._data = to_tensor(data) + self._stack = stack + self._padding_value = padding_value + + def __repr__(self): + return '{}({})'.format(self.__class__.__name__, repr(self.data)) + + @property + def data(self): + return self._data + + @property + def datatype(self): + if isinstance(self.data, torch.Tensor): + return self.data.type() + else: + return type(self.data) + + @property + def stack(self): + return self._stack + + @property + def padding_value(self): + return self._padding_value + + @assert_tensor_type + def size(self, *args, **kwargs): + return self.data.size(*args, **kwargs) + + @assert_tensor_type + def dim(self): + return self.data.dim() diff --git a/mmdet/datasets/utils/misc.py b/mmdet/datasets/utils/misc.py new file mode 100644 index 0000000000000000000000000000000000000000..419c11ad08462268b9dfe6b43182a9ec4725b00c --- /dev/null +++ b/mmdet/datasets/utils/misc.py @@ -0,0 +1,62 @@ +import mmcv + +import matplotlib.pyplot as plt +import numpy as np +import pycocotools.mask as maskUtils + + +def random_scale(img_scales, mode='range'): + """Randomly select a scale from a list of scales or scale ranges. + + Args: + img_scales (list[tuple]): Image scale or scale range. + mode (str): "range" or "value". + + Returns: + tuple: Sampled image scale. + """ + num_scales = len(img_scales) + if num_scales == 1: # fixed scale is specified + img_scale = img_scales[0] + elif num_scales == 2: # randomly sample a scale + if mode == 'range': + img_scale_long = [max(s) for s in img_scales] + img_scale_short = [min(s) for s in img_scales] + long_edge = np.random.randint( + min(img_scale_long), + max(img_scale_long) + 1) + short_edge = np.random.randint( + min(img_scale_short), + max(img_scale_short) + 1) + img_scale = (long_edge, short_edge) + elif mode == 'value': + img_scale = img_scales[np.random.randint(num_scales)] + else: + if mode != 'value': + raise ValueError( + 'Only "value" mode supports more than 2 image scales') + img_scale = img_scales[np.random.randint(num_scales)] + return img_scale + + +def show_ann(coco, img, ann_info): + plt.imshow(mmcv.bgr2rgb(img)) + plt.axis('off') + coco.showAnns(ann_info) + plt.show() + + +def draw_bbox_and_segm(img, results, dataset, score_thr=0.5): + bbox_results, segm_results = results + hi_bboxes = [] + for cls_bboxes, cls_segms in zip(bbox_results, segm_results): + if len(cls_bboxes) == 0: + hi_bboxes.append(cls_bboxes) + continue + inds = np.where(cls_bboxes[:, -1] > score_thr)[0] + hi_bboxes.append(cls_bboxes[inds, :]) + color_mask = np.random.random((1, 3)) + for i in inds: + mask = maskUtils.decode(cls_segms[i]).astype(np.bool) + img[mask] = img[mask] * 0.5 + color_mask * 0.5 + mmcv.draw_bboxes_with_label(np.ascontiguousarray(img), hi_bboxes, dataset) diff --git a/mmdet/models/__init__.py b/mmdet/models/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/mmdet/models/backbones/__init__.py b/mmdet/models/backbones/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..f9e21e83d1469167d35de22c6511f6c09c260727 --- /dev/null +++ b/mmdet/models/backbones/__init__.py @@ -0,0 +1 @@ +from .resnet import resnet diff --git a/mmdet/models/backbones/resnet.py b/mmdet/models/backbones/resnet.py new file mode 100644 index 0000000000000000000000000000000000000000..f8203accd4b335886b7ebffd59517bdc8568769e --- /dev/null +++ b/mmdet/models/backbones/resnet.py @@ -0,0 +1,325 @@ +import math +import torch.nn as nn +import torch.utils.checkpoint as cp +from torchpack import load_checkpoint + + +def conv3x3(in_planes, out_planes, stride=1, dilation=1): + "3x3 convolution with padding" + return nn.Conv2d( + in_planes, + out_planes, + kernel_size=3, + stride=stride, + padding=dilation, + dilation=dilation, + bias=False) + + +class BasicBlock(nn.Module): + expansion = 1 + + def __init__(self, + inplanes, + planes, + stride=1, + dilation=1, + downsample=None, + style='fb'): + super(BasicBlock, self).__init__() + self.conv1 = conv3x3(inplanes, planes, stride, dilation) + self.bn1 = nn.BatchNorm2d(planes) + self.relu = nn.ReLU(inplace=True) + self.conv2 = conv3x3(planes, planes) + self.bn2 = nn.BatchNorm2d(planes) + self.downsample = downsample + self.stride = stride + self.dilation = dilation + + def forward(self, x): + residual = x + + out = self.conv1(x) + out = self.bn1(out) + out = self.relu(out) + + out = self.conv2(out) + out = self.bn2(out) + + if self.downsample is not None: + residual = self.downsample(x) + + out += residual + out = self.relu(out) + + return out + + +class Bottleneck(nn.Module): + expansion = 4 + + def __init__(self, + inplanes, + planes, + stride=1, + dilation=1, + downsample=None, + style='fb', + with_cp=False): + """Bottleneck block + if style is "fb", the stride-two layer is the 3x3 conv layer, + if style is "msra", the stride-two layer is the first 1x1 conv layer + """ + super(Bottleneck, self).__init__() + assert style in ['fb', 'msra'] + if style == 'fb': + conv1_stride = 1 + conv2_stride = stride + else: + conv1_stride = stride + conv2_stride = 1 + self.conv1 = nn.Conv2d( + inplanes, planes, kernel_size=1, stride=conv1_stride, bias=False) + self.conv2 = nn.Conv2d( + planes, + planes, + kernel_size=3, + stride=conv2_stride, + padding=dilation, + dilation=dilation, + bias=False) + + self.bn1 = nn.BatchNorm2d(planes) + self.bn2 = nn.BatchNorm2d(planes) + self.conv3 = nn.Conv2d( + planes, planes * self.expansion, kernel_size=1, bias=False) + self.bn3 = nn.BatchNorm2d(planes * self.expansion) + self.relu = nn.ReLU(inplace=True) + self.downsample = downsample + self.stride = stride + self.dilation = dilation + self.with_cp = with_cp + + def forward(self, x): + + def _inner_forward(x): + residual = x + + out = self.conv1(x) + out = self.bn1(out) + out = self.relu(out) + + out = self.conv2(out) + out = self.bn2(out) + out = self.relu(out) + + out = self.conv3(out) + out = self.bn3(out) + + if self.downsample is not None: + residual = self.downsample(x) + + out += residual + + return out + + if self.with_cp and x.requires_grad: + out = cp.checkpoint(_inner_forward, x) + else: + out = _inner_forward(x) + + out = self.relu(out) + + return out + + +def make_res_layer(block, + inplanes, + planes, + blocks, + stride=1, + dilation=1, + style='fb', + with_cp=False): + downsample = None + if stride != 1 or inplanes != planes * block.expansion: + downsample = nn.Sequential( + nn.Conv2d( + inplanes, + planes * block.expansion, + kernel_size=1, + stride=stride, + bias=False), + nn.BatchNorm2d(planes * block.expansion), + ) + + layers = [] + layers.append( + block( + inplanes, + planes, + stride, + dilation, + downsample, + style=style, + with_cp=with_cp)) + inplanes = planes * block.expansion + for i in range(1, blocks): + layers.append( + block(inplanes, planes, 1, dilation, style=style, with_cp=with_cp)) + + return nn.Sequential(*layers) + + +class ResHead(nn.Module): + + def __init__(self, block, num_blocks, stride=2, dilation=1, style='fb'): + self.layer4 = make_res_layer( + block, + 1024, + 512, + num_blocks, + stride=stride, + dilation=dilation, + style=style) + + def forward(self, x): + return self.layer4(x) + + +class ResNet(nn.Module): + + def __init__(self, + block, + layers, + strides=(1, 2, 2, 2), + dilations=(1, 1, 1, 1), + out_indices=(0, 1, 2, 3), + frozen_stages=-1, + style='fb', + sync_bn=False, + with_cp=False): + super(ResNet, self).__init__() + if not len(layers) == len(strides) == len(dilations): + raise ValueError( + 'The number of layers, strides and dilations must be equal, ' + 'but found have {} layers, {} strides and {} dilations'.format( + len(layers), len(strides), len(dilations))) + assert max(out_indices) < len(layers) + self.out_indices = out_indices + self.frozen_stages = frozen_stages + self.style = style + self.sync_bn = sync_bn + self.inplanes = 64 + self.conv1 = nn.Conv2d( + 3, 64, kernel_size=7, stride=2, padding=3, bias=False) + self.bn1 = nn.BatchNorm2d(64) + self.relu = nn.ReLU(inplace=True) + self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) + self.res_layers = [] + for i, num_blocks in enumerate(layers): + + stride = strides[i] + dilation = dilations[i] + + layer_name = 'layer{}'.format(i + 1) + planes = 64 * 2**i + res_layer = make_res_layer( + block, + self.inplanes, + planes, + num_blocks, + stride=stride, + dilation=dilation, + style=self.style, + with_cp=with_cp) + self.inplanes = planes * block.expansion + setattr(self, layer_name, res_layer) + self.res_layers.append(layer_name) + self.feat_dim = block.expansion * 64 * 2**(len(layers) - 1) + self.with_cp = with_cp + + def init_weights(self, pretrained=None): + if isinstance(pretrained, str): + load_checkpoint(self, pretrained, strict=False) + elif pretrained is None: + for m in self.modules(): + if isinstance(m, nn.Conv2d): + n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels + nn.init.normal_(m.weight, 0, math.sqrt(2. / n)) + elif isinstance(m, nn.BatchNorm2d): + nn.init.constant_(m.weight, 1) + nn.init.constant_(m.bias, 0) + else: + raise TypeError('pretrained must be a str or None') + + def forward(self, x): + x = self.conv1(x) + x = self.bn1(x) + x = self.relu(x) + x = self.maxpool(x) + outs = [] + for i, layer_name in enumerate(self.res_layers): + res_layer = getattr(self, layer_name) + x = res_layer(x) + if i in self.out_indices: + outs.append(x) + if len(outs) == 1: + return outs[0] + else: + return tuple(outs) + + def train(self, mode=True): + super(ResNet, self).train(mode) + if not self.sync_bn: + for m in self.modules(): + if isinstance(m, nn.BatchNorm2d): + m.eval() + if mode and self.frozen_stages >= 0: + for param in self.conv1.parameters(): + param.requires_grad = False + for param in self.bn1.parameters(): + param.requires_grad = False + self.bn1.eval() + self.bn1.weight.requires_grad = False + self.bn1.bias.requires_grad = False + for i in range(1, self.frozen_stages + 1): + mod = getattr(self, 'layer{}'.format(i)) + mod.eval() + for param in mod.parameters(): + param.requires_grad = False + + +resnet_cfg = { + 18: (BasicBlock, (2, 2, 2, 2)), + 34: (BasicBlock, (3, 4, 6, 3)), + 50: (Bottleneck, (3, 4, 6, 3)), + 101: (Bottleneck, (3, 4, 23, 3)), + 152: (Bottleneck, (3, 8, 36, 3)) +} + + +def resnet(depth, + num_stages=4, + strides=(1, 2, 2, 2), + dilations=(1, 1, 1, 1), + out_indices=(2, ), + frozen_stages=-1, + style='fb', + sync_bn=False, + with_cp=False): + """Constructs a ResNet model. + + Args: + depth (int): depth of resnet, from {18, 34, 50, 101, 152} + num_stages (int): num of resnet stages, normally 4 + strides (list): strides of the first block of each stage + dilations (list): dilation of each stage + out_indices (list): output from which stages + """ + if depth not in resnet_cfg: + raise KeyError('invalid depth {} for resnet'.format(depth)) + block, layers = resnet_cfg[depth] + model = ResNet(block, layers[:num_stages], strides, dilations, out_indices, + frozen_stages, style, sync_bn, with_cp) + return model diff --git a/mmdet/models/bbox_heads/__init__.py b/mmdet/models/bbox_heads/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e6709af6176d5d574bf7f4a5bdf8e67691787536 --- /dev/null +++ b/mmdet/models/bbox_heads/__init__.py @@ -0,0 +1,3 @@ +from .bbox_head import BBoxHead + +__all__ = ['BBoxHead'] diff --git a/mmdet/models/bbox_heads/bbox_head.py b/mmdet/models/bbox_heads/bbox_head.py new file mode 100644 index 0000000000000000000000000000000000000000..9f0c188a459286ee5c0e5ab71f8305da0d1ab761 --- /dev/null +++ b/mmdet/models/bbox_heads/bbox_head.py @@ -0,0 +1,123 @@ +import torch.nn as nn +import torch.nn.functional as F + +from mmdet.core import (bbox_transform_inv, bbox_target, multiclass_nms, + weighted_cross_entropy, weighted_smoothl1, accuracy) + + +class BBoxHead(nn.Module): + """Simplest RoI head, with only two fc layers for classification and + regression respectively""" + + def __init__(self, + exclude_mal_box=True, + with_avg_pool=False, + with_cls=True, + with_reg=True, + roi_feat_size=7, + in_channels=256, + num_classes=81, + target_means=[0., 0., 0., 0.], + target_stds=[0.1, 0.1, 0.2, 0.2], + reg_class_agnostic=False): + super(BBoxHead, self).__init__() + assert with_cls or with_reg + self.with_avg_pool = with_avg_pool + self.with_cls = with_cls + self.with_reg = with_reg + self.roi_feat_size = roi_feat_size + self.in_channels = in_channels + self.num_classes = num_classes + self.target_means = target_means + self.target_stds = target_stds + self.reg_class_agnostic = reg_class_agnostic + self.exclude_mal_box = exclude_mal_box + + in_channels = self.in_channels + if self.with_avg_pool: + self.avg_pool = nn.AvgPool2d(roi_feat_size) + else: + in_channels *= (self.roi_feat_size * self.roi_feat_size) + if self.with_cls: + self.fc_cls = nn.Linear(in_channels, num_classes) + if self.with_reg: + out_dim_reg = 4 if reg_class_agnostic else 4 * num_classes + self.fc_reg = nn.Linear(in_channels, out_dim_reg) + self.debug_imgs = None + + def init_weights(self): + if self.with_cls: + nn.init.normal_(self.fc_cls.weight, 0, 0.01) + nn.init.constant_(self.fc_cls.bias, 0) + if self.with_reg: + nn.init.normal_(self.fc_reg.weight, 0, 0.001) + nn.init.constant_(self.fc_reg.bias, 0) + + def forward(self, x): + if self.with_avg_pool: + x = self.avg_pool(x) + x = x.view(x.size(0), -1) + cls_score = self.fc_cls(x) if self.with_cls else None + bbox_pred = self.fc_reg(x) if self.with_reg else None + return cls_score, bbox_pred + + def bbox_target(self, pos_proposals, neg_proposals, pos_gt_bboxes, + pos_gt_labels, rcnn_train_cfg): + reg_num_classes = 1 if self.reg_class_agnostic else self.num_classes + cls_reg_targets = bbox_target( + pos_proposals, + neg_proposals, + pos_gt_bboxes, + pos_gt_labels, + self.target_means, + self.target_stds, + rcnn_train_cfg, + reg_num_classes, + debug_imgs=self.debug_imgs) + return cls_reg_targets + + def loss(self, cls_score, bbox_pred, labels, label_weights, bbox_targets, + bbox_weights): + losses = dict() + if cls_score is not None: + losses['loss_cls'] = weighted_cross_entropy( + cls_score, labels, label_weights) + losses['acc'] = accuracy(cls_score, labels) + if bbox_pred is not None: + losses['loss_reg'] = weighted_smoothl1( + bbox_pred, + bbox_targets, + bbox_weights, + ave_factor=bbox_targets.size(0)) + return losses + + def get_det_bboxes(self, + rois, + cls_score, + bbox_pred, + img_shape, + rescale=False, + nms_cfg=None): + if isinstance(cls_score, list): + cls_score = sum(cls_score) / float(len(cls_score)) + scores = F.softmax(cls_score, dim=1) if cls_score is not None else None + + if bbox_pred is not None: + bboxes = bbox_transform_inv(rois[:, 1:], bbox_pred, + self.target_means, self.target_stds, + img_shape) + else: + bboxes = rois[:, 1:] + # TODO: add clip here + + if rescale: + bboxes /= img_shape[-1] + + if nms_cfg is None: + return bboxes, scores + else: + det_bboxes, det_labels = multiclass_nms( + bboxes, scores, nms_cfg.score_thr, nms_cfg.nms_thr, + nms_cfg.max_per_img) + + return det_bboxes, det_labels diff --git a/mmdet/models/builder.py b/mmdet/models/builder.py new file mode 100644 index 0000000000000000000000000000000000000000..f109d851397a5106c33d173eda8986ee1c0f8b06 --- /dev/null +++ b/mmdet/models/builder.py @@ -0,0 +1,47 @@ +import mmcv +from torch import nn + +from . import (backbones, necks, roi_extractors, rpn_heads, bbox_heads, + mask_heads) + +__all__ = [ + 'build_backbone', 'build_neck', 'build_rpn_head', 'build_roi_extractor', + 'build_bbox_head', 'build_mask_head' +] + + +def _build_module(cfg, parrent=None): + return cfg if isinstance(cfg, nn.Module) else mmcv.obj_from_dict( + cfg, parrent) + + +def build(cfg, parrent=None): + if isinstance(cfg, list): + modules = [_build_module(cfg_, parrent) for cfg_ in cfg] + return nn.Sequential(*modules) + else: + return _build_module(cfg, parrent) + + +def build_backbone(cfg): + return build(cfg, backbones) + + +def build_neck(cfg): + return build(cfg, necks) + + +def build_rpn_head(cfg): + return build(cfg, rpn_heads) + + +def build_roi_extractor(cfg): + return build(cfg, roi_extractors) + + +def build_bbox_head(cfg): + return build(cfg, bbox_heads) + + +def build_mask_head(cfg): + return build(cfg, mask_heads) diff --git a/mmdet/models/common/__init__.py b/mmdet/models/common/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..1a611c251065f2addc6c069d61c7e1f18fbd7da2 --- /dev/null +++ b/mmdet/models/common/__init__.py @@ -0,0 +1,4 @@ +from .conv_module import ConvModule +from .norm import build_norm_layer + +__all__ = ['ConvModule', 'build_norm_layer'] diff --git a/mmdet/models/common/conv_module.py b/mmdet/models/common/conv_module.py new file mode 100644 index 0000000000000000000000000000000000000000..25121972da29d8e4e83fb2301b8f8d25a1727f7e --- /dev/null +++ b/mmdet/models/common/conv_module.py @@ -0,0 +1,95 @@ +import warnings + +import torch.nn as nn + +from .norm import build_norm_layer + + +class ConvModule(nn.Module): + + def __init__(self, + in_channels, + out_channels, + kernel_size, + stride=1, + padding=0, + dilation=1, + groups=1, + bias=True, + normalize=None, + activation='relu', + inplace=True, + activate_last=True): + super(ConvModule, self).__init__() + self.with_norm = normalize is not None + self.with_activatation = activation is not None + self.with_bias = bias + self.activation = activation + self.activate_last = activate_last + + if self.with_norm and self.with_bias: + warnings.warn('ConvModule has norm and bias at the same time') + + self.conv = nn.Conv2d( + in_channels, + out_channels, + kernel_size, + stride, + padding, + dilation, + groups, + bias=bias) + + self.in_channels = self.conv.in_channels + self.out_channels = self.conv.out_channels + self.kernel_size = self.conv.kernel_size + self.stride = self.conv.stride + self.padding = self.conv.padding + self.dilation = self.conv.dilation + self.transposed = self.conv.transposed + self.output_padding = self.conv.output_padding + self.groups = self.conv.groups + + if self.with_norm: + # self.norm_type, self.norm_params = parse_norm(normalize) + # assert self.norm_type in [None, 'BN', 'SyncBN', 'GN', 'SN'] + # self.Norm2d = norm_cfg[self.norm_type] + if self.activate_last: + self.norm = build_norm_layer(normalize, out_channels) + # self.norm = self.Norm2d(out_channels, **self.norm_params) + else: + self.norm = build_norm_layer(normalize, in_channels) + # self.norm = self.Norm2d(in_channels, **self.norm_params) + + if self.with_activatation: + assert activation in ['relu'], 'Only ReLU supported.' + if self.activation == 'relu': + self.activate = nn.ReLU(inplace=inplace) + + # Default using msra init + self.init_weights() + + def init_weights(self): + nonlinearity = 'relu' if self.activation is None else self.activation + nn.init.kaiming_normal_( + self.conv.weight, mode='fan_out', nonlinearity=nonlinearity) + if self.with_bias: + nn.init.constant_(self.conv.bias, 0) + if self.with_norm: + nn.init.constant_(self.norm.weight, 1) + nn.init.constant_(self.norm.bias, 0) + + def forward(self, x, activate=True, norm=True): + if self.activate_last: + x = self.conv(x) + if norm and self.with_norm: + x = self.norm(x) + if activate and self.with_activatation: + x = self.activate(x) + else: + if norm and self.with_norm: + x = self.norm(x) + if activate and self.with_activatation: + x = self.activate(x) + x = self.conv(x) + return x diff --git a/mmdet/models/common/norm.py b/mmdet/models/common/norm.py new file mode 100644 index 0000000000000000000000000000000000000000..7b82cd046e82e8ece24c5552687ae2952cfd9932 --- /dev/null +++ b/mmdet/models/common/norm.py @@ -0,0 +1,17 @@ +import torch.nn as nn + +norm_cfg = {'BN': nn.BatchNorm2d, 'SyncBN': None, 'GN': None} + + +def build_norm_layer(cfg, num_features): + assert isinstance(cfg, dict) and 'type' in cfg + cfg_ = cfg.copy() + cfg_.setdefault('eps', 1e-5) + layer_type = cfg_.pop('type') + + if layer_type not in norm_cfg: + raise KeyError('Unrecognized norm type {}'.format(layer_type)) + elif norm_cfg[layer_type] is None: + raise NotImplementedError + + return norm_cfg[layer_type](num_features, **cfg_) diff --git a/mmdet/models/detectors/__init__.py b/mmdet/models/detectors/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/mmdet/models/detectors/rpn.py b/mmdet/models/detectors/rpn.py new file mode 100644 index 0000000000000000000000000000000000000000..6d80c9d9b10a12c07155f11ab00b24542f805cc6 --- /dev/null +++ b/mmdet/models/detectors/rpn.py @@ -0,0 +1,100 @@ +import torch.nn as nn + +from mmdet.core import tensor2imgs, merge_aug_proposals, bbox_mapping +from .. import builder + + +class RPN(nn.Module): + + def __init__(self, + backbone, + neck, + rpn_head, + rpn_train_cfg, + rpn_test_cfg, + pretrained=None): + super(RPN, self).__init__() + self.backbone = builder.build_backbone(backbone) + self.neck = builder.build_neck(neck) if neck is not None else None + self.rpn_head = builder.build_rpn_head(rpn_head) + self.rpn_train_cfg = rpn_train_cfg + self.rpn_test_cfg = rpn_test_cfg + self.init_weights(pretrained=pretrained) + + def init_weights(self, pretrained=None): + if pretrained is not None: + print('load model from: {}'.format(pretrained)) + self.backbone.init_weights(pretrained=pretrained) + if self.neck is not None: + self.neck.init_weights() + self.rpn_head.init_weights() + + def forward(self, + img, + img_meta, + gt_bboxes=None, + return_loss=True, + return_bboxes=False, + rescale=False): + if not return_loss: + return self.test(img, img_meta, rescale) + + img_shapes = img_meta['shape_scale'] + + if self.rpn_train_cfg.get('debug', False): + self.rpn_head.debug_imgs = tensor2imgs(img) + + x = self.backbone(img) + if self.neck is not None: + x = self.neck(x) + rpn_outs = self.rpn_head(x) + + rpn_loss_inputs = rpn_outs + (gt_bboxes, img_shapes, + self.rpn_train_cfg) + losses = self.rpn_head.loss(*rpn_loss_inputs) + return losses + + def test(self, imgs, img_metas, rescale=False): + """Test w/ or w/o augmentations.""" + assert isinstance(imgs, list) and isinstance(img_metas, list) + assert len(imgs) == len(img_metas) + img_per_gpu = imgs[0].size(0) + assert img_per_gpu == 1 + if len(imgs) == 1: + return self.simple_test(imgs[0], img_metas[0], rescale) + else: + return self.aug_test(imgs, img_metas, rescale) + + def simple_test(self, img, img_meta, rescale=False): + img_shapes = img_meta['shape_scale'] + # get feature maps + x = self.backbone(img) + if self.neck is not None: + x = self.neck(x) + rpn_outs = self.rpn_head(x) + proposal_inputs = rpn_outs + (img_shapes, self.rpn_test_cfg) + proposals = self.rpn_head.get_proposals(*proposal_inputs)[0] + if rescale: + proposals[:, :4] /= img_shapes[0][-1] + return proposals.cpu().numpy() + + def aug_test(self, imgs, img_metas, rescale=False): + aug_proposals = [] + for img, img_meta in zip(imgs, img_metas): + x = self.backbone(img) + if self.neck is not None: + x = self.neck(x) + rpn_outs = self.rpn_head(x) + proposal_inputs = rpn_outs + (img_meta['shape_scale'], + self.rpn_test_cfg) + proposal_list = self.rpn_head.get_proposals(*proposal_inputs) + assert len(proposal_list) == 1 + aug_proposals.append(proposal_list[0]) # len(proposal_list) = 1 + merged_proposals = merge_aug_proposals(aug_proposals, img_metas, + self.rpn_test_cfg) + if not rescale: + img_shape = img_metas[0]['shape_scale'][0] + flip = img_metas[0]['flip'][0] + merged_proposals[:, :4] = bbox_mapping(merged_proposals[:, :4], + img_shape, flip) + return merged_proposals.cpu().numpy() diff --git a/mmdet/models/detectors/two_stage.py b/mmdet/models/detectors/two_stage.py new file mode 100644 index 0000000000000000000000000000000000000000..0c057d606fba6c322733490591d5352a42b426a5 --- /dev/null +++ b/mmdet/models/detectors/two_stage.py @@ -0,0 +1,329 @@ +import torch +import torch.nn as nn + +from .. import builder +from mmdet.core.utils import tensor2imgs +from mmdet.core import (bbox2roi, bbox_mapping, split_combined_gt_polys, + bbox_sampling, multiclass_nms, merge_aug_proposals, + merge_aug_bboxes, merge_aug_masks, bbox2result) + + +class TwoStageDetector(nn.Module): + + def __init__(self, + backbone, + neck, + rpn_head, + roi_block, + bbox_head, + rpn_train_cfg, + rpn_test_cfg, + rcnn_train_cfg, + rcnn_test_cfg, + mask_block=None, + mask_head=None, + pretrained=None): + super(TwoStageDetector, self).__init__() + self.backbone = builder.build_backbone(backbone) + self.neck = builder.build_neck(neck) if neck is not None else None + self.rpn_head = builder.build_rpn_head(rpn_head) + self.bbox_roi_extractor = builder.build_roi_block(roi_block) + self.bbox_head = builder.build_bbox_head(bbox_head) + self.mask_roi_extractor = builder.build_roi_block(mask_block) if ( + mask_block is not None) else None + self.mask_head = builder.build_mask_head(mask_head) if ( + mask_head is not None) else None + self.with_mask = False if self.mask_head is None else True + + self.rpn_train_cfg = rpn_train_cfg + self.rpn_test_cfg = rpn_test_cfg + self.rcnn_train_cfg = rcnn_train_cfg + self.rcnn_test_cfg = rcnn_test_cfg + self.init_weights(pretrained=pretrained) + + def init_weights(self, pretrained=None): + if pretrained is not None: + print('load model from: {}'.format(pretrained)) + self.backbone.init_weights(pretrained=pretrained) + if self.neck is not None: + if isinstance(self.neck, nn.Sequential): + for m in self.neck: + m.init_weights() + else: + self.neck.init_weights() + self.rpn_head.init_weights() + self.bbox_roi_extractor.init_weights() + self.bbox_head.init_weights() + if self.mask_roi_extractor is not None: + self.mask_roi_extractor.init_weights() + if self.mask_head is not None: + self.mask_head.init_weights() + + def forward(self, + img, + img_meta, + gt_bboxes=None, + gt_labels=None, + gt_ignore=None, + gt_polys=None, + gt_poly_lens=None, + num_polys_per_mask=None, + return_loss=True, + return_bboxes=False, + rescale=False): + if not return_loss: + return self.test(img, img_meta, rescale) + + if not self.with_mask: + assert (gt_polys is None and gt_poly_lens is None + and num_polys_per_mask is None) + else: + assert (gt_polys is not None and gt_poly_lens is not None + and num_polys_per_mask is not None) + gt_polys = split_combined_gt_polys(gt_polys, gt_poly_lens, + num_polys_per_mask) + + if self.rpn_train_cfg.get('debug', False): + self.rpn_head.debug_imgs = tensor2imgs(img) + if self.rcnn_train_cfg.get('debug', False): + self.bbox_head.debug_imgs = tensor2imgs(img) + if self.mask_head is not None: + self.mask_head.debug_imgs = tensor2imgs(img) + + img_shapes = img_meta['shape_scale'] + + x = self.backbone(img) + if self.neck is not None: + x = self.neck(x) + + rpn_outs = self.rpn_head(x) + proposal_inputs = rpn_outs + (img_shapes, self.rpn_test_cfg) + proposal_list = self.rpn_head.get_proposals(*proposal_inputs) + + (pos_inds, neg_inds, pos_proposals, neg_proposals, + pos_assigned_gt_inds, pos_gt_bboxes, pos_gt_labels) = bbox_sampling( + proposal_list, gt_bboxes, gt_ignore, gt_labels, + self.rcnn_train_cfg) + + labels, label_weights, bbox_targets, bbox_weights = \ + self.bbox_head.proposal_target( + pos_proposals, neg_proposals, pos_gt_bboxes, pos_gt_labels, + self.rcnn_train_cfg) + + rois = bbox2roi([ + torch.cat([pos, neg], dim=0) + for pos, neg in zip(pos_proposals, neg_proposals) + ]) + # TODO: a more flexible way to configurate feat maps + roi_feats = self.bbox_roi_extractor( + x[:self.bbox_roi_extractor.num_inputs], rois) + cls_score, bbox_pred = self.bbox_head(roi_feats) + + losses = dict() + rpn_loss_inputs = rpn_outs + (gt_bboxes, img_shapes, + self.rpn_train_cfg) + rpn_losses = self.rpn_head.loss(*rpn_loss_inputs) + losses.update(rpn_losses) + + loss_bbox = self.bbox_head.loss(cls_score, bbox_pred, labels, + label_weights, bbox_targets, + bbox_weights) + losses.update(loss_bbox) + + if self.with_mask: + mask_targets = self.mask_head.mask_target( + pos_proposals, pos_assigned_gt_inds, gt_polys, img_shapes, + self.rcnn_train_cfg) + pos_rois = bbox2roi(pos_proposals) + mask_feats = self.mask_roi_extractor( + x[:self.mask_roi_extractor.num_inputs], pos_rois) + mask_pred = self.mask_head(mask_feats) + losses['loss_mask'] = self.mask_head.loss(mask_pred, mask_targets, + torch.cat(pos_gt_labels)) + return losses + + def test(self, imgs, img_metas, rescale=False): + """Test w/ or w/o augmentations.""" + assert isinstance(imgs, list) and isinstance(img_metas, list) + assert len(imgs) == len(img_metas) + img_per_gpu = imgs[0].size(0) + assert img_per_gpu == 1 + if len(imgs) == 1: + return self.simple_test(imgs[0], img_metas[0], rescale) + else: + return self.aug_test(imgs, img_metas, rescale) + + def simple_test_bboxes(self, x, img_meta, rescale=False): + """Test only det bboxes without augmentation.""" + + img_shapes = img_meta['shape_scale'] + rpn_outs = self.rpn_head(x) + proposal_inputs = rpn_outs + (img_shapes, self.rpn_test_cfg) + proposal_list = self.rpn_head.get_proposals(*proposal_inputs) + + rois = bbox2roi(proposal_list) + roi_feats = self.bbox_roi_extractor( + x[:len(self.bbox_roi_extractor.featmap_strides)], rois) + cls_score, bbox_pred = self.bbox_head(roi_feats) + # image shape of the first image in the batch (only one) + img_shape = img_shapes[0] + det_bboxes, det_labels = self.bbox_head.get_det_bboxes( + rois, + cls_score, + bbox_pred, + img_shape, + rescale=rescale, + nms_cfg=self.rcnn_test_cfg) + return det_bboxes, det_labels + + def simple_test_mask(self, + x, + img_meta, + det_bboxes, + det_labels, + rescale=False): + # image shape of the first image in the batch (only one) + img_shape = img_meta['shape_scale'][0] + if det_bboxes.shape[0] == 0: + segm_result = [[] for _ in range(self.mask_head.num_classes - 1)] + else: + # if det_bboxes is rescaled to the original image size, we need to + # rescale it back to the testing scale to obtain RoIs. + _bboxes = (det_bboxes[:, :4] * img_shape[-1] + if rescale else det_bboxes) + mask_rois = bbox2roi([_bboxes]) + mask_feats = self.mask_roi_extractor( + x[:len(self.mask_roi_extractor.featmap_strides)], mask_rois) + mask_pred = self.mask_head(mask_feats) + segm_result = self.mask_head.get_seg_masks( + mask_pred, det_bboxes, det_labels, img_shape, + self.rcnn_test_cfg, rescale) + return segm_result + + def simple_test(self, img, img_meta, rescale=False): + """Test without augmentation.""" + # get feature maps + x = self.backbone(img) + if self.neck is not None: + x = self.neck(x) + det_bboxes, det_labels = self.simple_test_bboxes( + x, img_meta, rescale=rescale) + bbox_result = bbox2result(det_bboxes, det_labels, + self.bbox_head.num_classes) + if not self.with_mask: + return bbox_result + + segm_result = self.simple_test_mask( + x, img_meta, det_bboxes, det_labels, rescale=rescale) + + return bbox_result, segm_result + + def aug_test_bboxes(self, imgs, img_metas): + """Test with augmentations for det bboxes.""" + # step 1: get RPN proposals for augmented images, apply NMS to the + # union of all proposals. + aug_proposals = [] + for img, img_meta in zip(imgs, img_metas): + x = self.backbone(img) + if self.neck is not None: + x = self.neck(x) + rpn_outs = self.rpn_head(x) + proposal_inputs = rpn_outs + (img_meta['shape_scale'], + self.rpn_test_cfg) + proposal_list = self.rpn_head.get_proposals(*proposal_inputs) + assert len(proposal_list) == 1 + aug_proposals.append(proposal_list[0]) # len(proposal_list) = 1 + # after merging, proposals will be rescaled to the original image size + merged_proposals = merge_aug_proposals(aug_proposals, img_metas, + self.rpn_test_cfg) + # step 2: Given merged proposals, predict bboxes for augmented images, + # output the union of these bboxes. + aug_bboxes = [] + aug_scores = [] + for img, img_meta in zip(imgs, img_metas): + # only one image in the batch + img_shape = img_meta['shape_scale'][0] + flip = img_meta['flip'][0] + proposals = bbox_mapping(merged_proposals[:, :4], img_shape, flip) + rois = bbox2roi([proposals]) + # recompute feature maps to save GPU memory + x = self.backbone(img) + if self.neck is not None: + x = self.neck(x) + roi_feats = self.bbox_roi_extractor( + x[:len(self.bbox_roi_extractor.featmap_strides)], rois) + cls_score, bbox_pred = self.bbox_head(roi_feats) + bboxes, scores = self.bbox_head.get_det_bboxes( + rois, + cls_score, + bbox_pred, + img_shape, + rescale=False, + nms_cfg=None) + aug_bboxes.append(bboxes) + aug_scores.append(scores) + # after merging, bboxes will be rescaled to the original image size + merged_bboxes, merged_scores = merge_aug_bboxes( + aug_bboxes, aug_scores, img_metas, self.rcnn_test_cfg) + det_bboxes, det_labels = multiclass_nms( + merged_bboxes, merged_scores, self.rcnn_test_cfg.score_thr, + self.rcnn_test_cfg.nms_thr, self.rcnn_test_cfg.max_per_img) + return det_bboxes, det_labels + + def aug_test_mask(self, + imgs, + img_metas, + det_bboxes, + det_labels, + rescale=False): + # step 3: Given merged bboxes, predict masks for augmented images, + # scores of masks are averaged across augmented images. + if rescale: + _det_bboxes = det_bboxes + else: + _det_bboxes = det_bboxes.clone() + _det_bboxes[:, :4] *= img_metas[0]['shape_scale'][0][-1] + if det_bboxes.shape[0] == 0: + segm_result = [[] for _ in range(self.mask_head.num_classes - 1)] + else: + aug_masks = [] + for img, img_meta in zip(imgs, img_metas): + img_shape = img_meta['shape_scale'][0] + flip = img_meta['flip'][0] + _bboxes = bbox_mapping(det_bboxes[:, :4], img_shape, flip) + mask_rois = bbox2roi([_bboxes]) + x = self.backbone(img) + if self.neck is not None: + x = self.neck(x) + mask_feats = self.mask_roi_extractor( + x[:len(self.mask_roi_extractor.featmap_strides)], + mask_rois) + mask_pred = self.mask_head(mask_feats) + # convert to numpy array to save memory + aug_masks.append(mask_pred.sigmoid().cpu().numpy()) + merged_masks = merge_aug_masks(aug_masks, img_metas, + self.rcnn_test_cfg) + segm_result = self.mask_head.get_seg_masks( + merged_masks, _det_bboxes, det_labels, + img_metas[0]['shape_scale'][0], self.rcnn_test_cfg, rescale) + return segm_result + + def aug_test(self, imgs, img_metas, rescale=False): + """Test with augmentations. + If rescale is False, then returned bboxes and masks will fit the scale + if imgs[0]. + """ + # aug test det bboxes + det_bboxes, det_labels = self.aug_test_bboxes(imgs, img_metas) + if rescale: + _det_bboxes = det_bboxes + else: + _det_bboxes = det_bboxes.clone() + _det_bboxes[:, :4] *= img_metas[0]['shape_scale'][0][-1] + bbox_result = bbox2result(_det_bboxes, det_labels, + self.bbox_head.num_classes) + if not self.with_mask: + return bbox_result + segm_result = self.aug_test_mask( + imgs, img_metas, det_bboxes, det_labels, rescale=rescale) + return bbox_result, segm_result diff --git a/mmdet/models/mask_heads/__init__.py b/mmdet/models/mask_heads/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..a21ae9add5a78d23781bf36a696b28606e19b0ce --- /dev/null +++ b/mmdet/models/mask_heads/__init__.py @@ -0,0 +1,3 @@ +from .fcn_mask_head import FCNMaskHead + +__all__ = ['FCNMaskHead'] diff --git a/mmdet/models/mask_heads/fcn_mask_head.py b/mmdet/models/mask_heads/fcn_mask_head.py new file mode 100644 index 0000000000000000000000000000000000000000..28865a68f006a4cd04753a1eb6caeda9ce3fc284 --- /dev/null +++ b/mmdet/models/mask_heads/fcn_mask_head.py @@ -0,0 +1,175 @@ +import mmcv +import numpy as np +import pycocotools.mask as mask_util +import torch +import torch.nn as nn +import torch.utils.checkpoint as cp + +from ..common import ConvModule +from mmdet.core import mask_target, mask_cross_entropy + + +class FCNMaskHead(nn.Module): + + def __init__(self, + num_convs=4, + roi_feat_size=14, + in_channels=256, + conv_kernel_size=3, + conv_out_channels=256, + upsample_method='deconv', + upsample_ratio=2, + num_classes=81, + class_agnostic=False, + with_cp=False, + normalize=None): + super(FCNMaskHead, self).__init__() + if upsample_method not in [None, 'deconv', 'nearest', 'bilinear']: + raise ValueError( + 'Invalid upsample method {}, accepted methods ' + 'are "deconv", "nearest", "bilinear"'.format(upsample_method)) + self.num_convs = num_convs + self.roi_feat_size = roi_feat_size # WARN: not used and reserved + self.in_channels = in_channels + self.conv_kernel_size = conv_kernel_size + self.conv_out_channels = conv_out_channels + self.upsample_method = upsample_method + self.upsample_ratio = upsample_ratio + self.num_classes = num_classes + self.class_agnostic = class_agnostic + self.normalize = normalize + self.with_bias = normalize is None + self.with_cp = with_cp + + self.convs = nn.ModuleList() + for i in range(self.num_convs): + in_channels = (self.in_channels + if i == 0 else self.conv_out_channels) + padding = (self.conv_kernel_size - 1) // 2 + self.convs.append( + ConvModule( + in_channels, + self.conv_out_channels, + 3, + padding=padding, + normalize=normalize, + bias=self.with_bias)) + if self.upsample_method is None: + self.upsample = None + elif self.upsample_method == 'deconv': + self.upsample = nn.ConvTranspose2d( + self.conv_out_channels, + self.conv_out_channels, + self.upsample_ratio, + stride=self.upsample_ratio) + else: + self.upsample = nn.Upsample( + scale_factor=self.upsample_ratio, mode=self.upsample_method) + + out_channels = 1 if self.class_agnostic else self.num_classes + self.conv_logits = nn.Conv2d(self.conv_out_channels, out_channels, 1) + self.relu = nn.ReLU(inplace=True) + self.debug_imgs = None + + def init_weights(self): + for m in [self.upsample, self.conv_logits]: + if m is None: + continue + nn.init.kaiming_normal_( + m.weight, mode='fan_out', nonlinearity='relu') + nn.init.constant_(m.bias, 0) + + def convs_forward(self, x): + + def m_lvl_convs_forward(x): + for conv in self.convs[1:-1]: + x = conv(x) + return x + + if self.num_convs > 0: + x = self.convs[0](x) + if self.num_convs > 1: + if self.with_cp and x.requires_grad: + x = cp.checkpoint(m_lvl_convs_forward, x) + else: + x = m_lvl_convs_forward(x) + x = self.convs[-1](x) + return x + + def forward(self, x): + x = self.convs_forward(x) + if self.upsample is not None: + x = self.upsample(x) + if self.upsample_method == 'deconv': + x = self.relu(x) + mask_pred = self.conv_logits(x) + return mask_pred + + def mask_target(self, pos_proposals, pos_assigned_gt_inds, gt_masks, + img_shapes, rcnn_train_cfg): + mask_targets = mask_target(pos_proposals, pos_assigned_gt_inds, + gt_masks, img_shapes, rcnn_train_cfg) + return mask_targets + + def loss(self, mask_pred, mask_targets, labels): + loss_mask = mask_cross_entropy(mask_pred, mask_targets, labels) + return loss_mask + + def get_seg_masks(self, + mask_pred, + det_bboxes, + det_labels, + img_shape, + rcnn_test_cfg, + ori_scale, + rescale=True): + """Get segmentation masks from mask_pred and bboxes + Args: + mask_pred (Tensor or ndarray): shape (n, #class+1, h, w). + For single-scale testing, mask_pred is the direct output of + model, whose type is Tensor, while for multi-scale testing, + it will be converted to numpy array outside of this method. + det_bboxes (Tensor): shape (n, 4/5) + det_labels (Tensor): shape (n, ) + img_shape (Tensor): shape (3, ) + rcnn_test_cfg (dict): rcnn testing config + rescale (bool): whether rescale masks to original image size + Returns: + list[list]: encoded masks + """ + if isinstance(mask_pred, torch.Tensor): + mask_pred = mask_pred.sigmoid().cpu().numpy() + assert isinstance(mask_pred, np.ndarray) + cls_segms = [[] for _ in range(self.num_classes - 1)] + bboxes = det_bboxes.cpu().numpy()[:, :4] + labels = det_labels.cpu().numpy() + 1 + scale_factor = img_shape[-1] if rescale else 1.0 + img_h = ori_scale['height'] if rescale else np.round( + ori_scale['height'].item() * img_shape[-1].item()).astype(np.int32) + img_w = ori_scale['width'] if rescale else np.round( + ori_scale['width'].item() * img_shape[-1].item()).astype(np.int32) + + for i in range(bboxes.shape[0]): + bbox = (bboxes[i, :] / float(scale_factor)).astype(int) + label = labels[i] + w = bbox[2] - bbox[0] + 1 + h = bbox[3] - bbox[1] + 1 + w = max(w, 1) + h = max(h, 1) + + if not self.class_agnostic: + mask_pred_ = mask_pred[i, label, :, :] + else: + mask_pred_ = mask_pred[i, 0, :, :] + + im_mask = np.zeros((img_h, img_w), dtype=np.float32) + + im_mask[bbox[1]:bbox[1] + h, bbox[0]:bbox[0] + w] = mmcv.resize( + mask_pred_, (w, h)) + # im_mask = cv2.resize(im_mask, (img_w, img_h)) + im_mask = np.array( + im_mask > rcnn_test_cfg.mask_thr_binary, dtype=np.uint8) + rle = mask_util.encode( + np.array(im_mask[:, :, np.newaxis], order='F'))[0] + cls_segms[label - 1].append(rle) + return cls_segms diff --git a/mmdet/models/misc.py b/mmdet/models/misc.py new file mode 100644 index 0000000000000000000000000000000000000000..ad52b587ac126ed2cfbf5e2ed5c98356e1499c5f --- /dev/null +++ b/mmdet/models/misc.py @@ -0,0 +1,9 @@ +from functools import partial + +from six.moves import map, zip + + +def multi_apply(func, *args, **kwargs): + pfunc = partial(func, **kwargs) if kwargs else func + map_results = map(pfunc, *args) + return tuple(map(list, zip(*map_results))) diff --git a/mmdet/models/necks/__init__.py b/mmdet/models/necks/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..0093021ebac1e46fbb798ed6ee96a192dbd8604c --- /dev/null +++ b/mmdet/models/necks/__init__.py @@ -0,0 +1,3 @@ +from .fpn import FPN + +__all__ = ['FPN'] diff --git a/mmdet/models/necks/fpn.py b/mmdet/models/necks/fpn.py new file mode 100644 index 0000000000000000000000000000000000000000..c4734e18621bec4cdb8e33052935c6d7f3a495e2 --- /dev/null +++ b/mmdet/models/necks/fpn.py @@ -0,0 +1,125 @@ +import torch.nn as nn +import torch.nn.functional as F +from ..common import ConvModule +from ..weight_init import xavier_init + + +class FPN(nn.Module): + + def __init__(self, + in_channels, + out_channels, + num_outs, + start_level=0, + end_level=-1, + add_extra_convs=False, + normalize=None, + activation=None): + super(FPN, self).__init__() + assert isinstance(in_channels, list) + self.in_channels = in_channels + self.out_channels = out_channels + self.num_ins = len(in_channels) + self.num_outs = num_outs + self.activation = activation + self.with_bias = normalize is None + + if end_level == -1: + self.backbone_end_level = self.num_ins + assert num_outs >= self.num_ins - start_level + else: + # if end_level < inputs, no extra level is allowed + self.backbone_end_level = end_level + assert end_level <= len(in_channels) + assert num_outs == end_level - start_level + self.start_level = start_level + self.end_level = end_level + self.add_extra_convs = add_extra_convs + + self.lateral_convs = nn.ModuleList() + self.fpn_convs = nn.ModuleList() + + for i in range(self.start_level, self.backbone_end_level): + l_conv = ConvModule( + in_channels[i], + out_channels, + 1, + normalize=normalize, + bias=self.with_bias, + activation=self.activation, + inplace=False) + fpn_conv = ConvModule( + out_channels, + out_channels, + 3, + padding=1, + normalize=normalize, + bias=self.with_bias, + activation=self.activation, + inplace=False) + + self.lateral_convs.append(l_conv) + self.fpn_convs.append(fpn_conv) + + # lvl_id = i - self.start_level + # setattr(self, 'lateral_conv{}'.format(lvl_id), l_conv) + # setattr(self, 'fpn_conv{}'.format(lvl_id), fpn_conv) + + # add extra conv layers (e.g., RetinaNet) + extra_levels = num_outs - self.backbone_end_level + self.start_level + if add_extra_convs and extra_levels >= 1: + for i in range(extra_levels): + in_channels = (self.in_channels[self.backbone_end_level - 1] + if i == 0 else out_channels) + extra_fpn_conv = ConvModule( + in_channels, + out_channels, + 3, + stride=2, + padding=1, + normalize=normalize, + bias=self.with_bias, + activation=self.activation, + inplace=False) + self.fpn_convs.append(extra_fpn_conv) + + # default init_weights for conv(msra) and norm in ConvModule + def init_weights(self): + for m in self.modules(): + if isinstance(m, nn.Conv2d): + xavier_init(m, distribution='uniform') + + def forward(self, inputs): + assert len(inputs) == len(self.in_channels) + + # build laterals + laterals = [ + lateral_conv(inputs[i + self.start_level]) + for i, lateral_conv in enumerate(self.lateral_convs) + ] + + # build top-down path + used_backbone_levels = len(laterals) + for i in range(used_backbone_levels - 1, 0, -1): + laterals[i - 1] += F.upsample( + laterals[i], scale_factor=2, mode='nearest') + + # build outputs + # part 1: from original levels + outs = [ + self.fpn_convs[i](laterals[i]) for i in range(used_backbone_levels) + ] + # part 2: add extra levels + if self.num_outs > len(outs): + # use max pool to get more levels on top of outputs (Faster R-CNN, Mask R-CNN) + if not self.add_extra_convs: + for i in range(self.num_outs - used_backbone_levels): + outs.append(F.max_pool2d(outs[-1], 1, stride=2)) + # add conv layers on top of original feature maps (RetinaNet) + else: + orig = inputs[self.backbone_end_level - 1] + outs.append(self.fpn_convs[used_backbone_levels](orig)) + for i in range(used_backbone_levels + 1, self.num_outs): + # BUG: we should add relu before each extra conv + outs.append(self.fpn_convs[i](outs[-1])) + return tuple(outs) diff --git a/mmdet/models/roi_extractors/__init__.py b/mmdet/models/roi_extractors/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e76e689753f10e87b3f6d9482e880b902f9b747e --- /dev/null +++ b/mmdet/models/roi_extractors/__init__.py @@ -0,0 +1,3 @@ +from .single_level import SingleLevelRoI + +__all__ = ['SingleLevelRoI'] diff --git a/mmdet/models/roi_extractors/single_level.py b/mmdet/models/roi_extractors/single_level.py new file mode 100644 index 0000000000000000000000000000000000000000..3e37ac83d6ffb7beab56926329f71311f7eef116 --- /dev/null +++ b/mmdet/models/roi_extractors/single_level.py @@ -0,0 +1,73 @@ +from __future__ import division + +import torch +import torch.nn as nn + +from mmdet import ops + + +class SingleLevelRoI(nn.Module): + """Extract RoI features from a single level feature map. Each RoI is + mapped to a level according to its scale.""" + + def __init__(self, + roi_layer, + out_channels, + featmap_strides, + finest_scale=56): + super(SingleLevelRoI, self).__init__() + self.roi_layers = self.build_roi_layers(roi_layer, featmap_strides) + self.out_channels = out_channels + self.featmap_strides = featmap_strides + self.finest_scale = finest_scale + + @property + def num_inputs(self): + return len(self.featmap_strides) + + def init_weights(self): + pass + + def build_roi_layers(self, layer_cfg, featmap_strides): + cfg = layer_cfg.copy() + layer_type = cfg.pop('type') + assert hasattr(ops, layer_type) + layer_cls = getattr(ops, layer_type) + roi_layers = nn.ModuleList( + [layer_cls(spatial_scale=1 / s, **cfg) for s in featmap_strides]) + return roi_layers + + def map_roi_levels(self, rois, num_levels): + """Map rois to corresponding feature levels (0-based) by scales. + + scale < finest_scale: level 0 + finest_scale <= scale < finest_scale * 2: level 1 + finest_scale * 2 <= scale < finest_scale * 4: level 2 + scale >= finest_scale * 4: level 3 + """ + scale = torch.sqrt( + (rois[:, 3] - rois[:, 1] + 1) * (rois[:, 4] - rois[:, 2] + 1)) + target_lvls = torch.floor(torch.log2(scale / self.finest_scale + 1e-6)) + target_lvls = target_lvls.clamp(min=0, max=num_levels - 1).long() + return target_lvls + + def forward(self, feats, rois): + """Extract roi features with the roi layer. If multiple feature levels + are used, then rois are mapped to corresponding levels according to + their scales. + """ + if len(feats) == 1: + return self.roi_layers[0](feats[0], rois) + + out_size = self.roi_layers[0].out_size + num_levels = len(feats) + target_lvls = self.map_roi_levels(rois, num_levels) + roi_feats = torch.cuda.FloatTensor(rois.size()[0], self.out_channels, + out_size, out_size).fill_(0) + for i in range(num_levels): + inds = target_lvls == i + if inds.any(): + rois_ = rois[inds, :] + roi_feats_t = self.roi_layers[i](feats[i], rois_) + roi_feats[inds] += roi_feats_t + return roi_feats diff --git a/mmdet/models/rpn_heads/__init__.py b/mmdet/models/rpn_heads/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..fbc4b3affbf31059fdcbb1b4b43eeb1544c631f0 --- /dev/null +++ b/mmdet/models/rpn_heads/__init__.py @@ -0,0 +1,3 @@ +from .rpn_head import RPNHead + +__all__ = ['RPNHead'] diff --git a/mmdet/models/rpn_heads/rpn_head.py b/mmdet/models/rpn_heads/rpn_head.py new file mode 100644 index 0000000000000000000000000000000000000000..f2fce9ebe7aa5c820139fa0188e2f6a25322ed66 --- /dev/null +++ b/mmdet/models/rpn_heads/rpn_head.py @@ -0,0 +1,237 @@ +from __future__ import division + +import numpy as np +import torch +import torch.nn as nn +import torch.nn.functional as F + +from mmdet.core import (AnchorGenerator, anchor_target, bbox_transform_inv, + weighted_cross_entropy, weighted_smoothl1, + weighted_binary_cross_entropy) +from mmdet.ops import nms +from ..misc import multi_apply +from ..weight_init import normal_init + + +class RPNHead(nn.Module): + + def __init__(self, + in_channels, + feat_channels=512, + coarsest_stride=32, + anchor_scales=[8, 16, 32], + anchor_ratios=[0.5, 1.0, 2.0], + anchor_strides=[4, 8, 16, 32, 64], + anchor_base_sizes=None, + target_means=(.0, .0, .0, .0), + target_stds=(1.0, 1.0, 1.0, 1.0), + use_sigmoid_cls=False): + super(RPNHead, self).__init__() + self.in_channels = in_channels + self.feat_channels = feat_channels + self.coarsest_stride = coarsest_stride + self.anchor_scales = anchor_scales + self.anchor_ratios = anchor_ratios + self.anchor_strides = anchor_strides + self.anchor_base_sizes = anchor_strides.copy( + ) if anchor_base_sizes is None else anchor_base_sizes + self.target_means = target_means + self.target_stds = target_stds + self.use_sigmoid_cls = use_sigmoid_cls + + self.anchor_generators = [] + for anchor_base in self.anchor_base_sizes: + self.anchor_generators.append( + AnchorGenerator(anchor_base, anchor_scales, anchor_ratios)) + self.rpn_conv = nn.Conv2d(in_channels, feat_channels, 3, padding=1) + self.relu = nn.ReLU(inplace=True) + self.num_anchors = len(self.anchor_ratios) * len(self.anchor_scales) + out_channels = (self.num_anchors + if self.use_sigmoid_cls else self.num_anchors * 2) + self.rpn_cls = nn.Conv2d(feat_channels, out_channels, 1) + self.rpn_reg = nn.Conv2d(feat_channels, self.num_anchors * 4, 1) + self.debug_imgs = None + + def init_weights(self): + normal_init(self.rpn_conv, std=0.01) + normal_init(self.rpn_cls, std=0.01) + normal_init(self.rpn_reg, std=0.01) + + def forward_single(self, x): + rpn_feat = self.relu(self.rpn_conv(x)) + rpn_cls_score = self.rpn_cls(rpn_feat) + rpn_bbox_pred = self.rpn_reg(rpn_feat) + return rpn_cls_score, rpn_bbox_pred + + def forward(self, feats): + return multi_apply(self.forward_single, feats) + + def get_anchors(self, featmap_sizes, img_shapes): + """Get anchors given a list of feature map sizes, and get valid flags + at the same time. (Extra padding regions should be marked as invalid) + """ + # calculate actual image shapes + padded_img_shapes = [] + for img_shape in img_shapes: + h, w = img_shape[:2] + padded_h = int( + np.ceil(h / self.coarsest_stride) * self.coarsest_stride) + padded_w = int( + np.ceil(w / self.coarsest_stride) * self.coarsest_stride) + padded_img_shapes.append((padded_h, padded_w)) + # generate anchors for different feature levels + # len = feature levels + anchor_list = [] + # len = imgs per gpu + valid_flag_list = [[] for _ in range(len(img_shapes))] + for i in range(len(featmap_sizes)): + anchor_stride = self.anchor_strides[i] + anchors = self.anchor_generators[i].grid_anchors( + featmap_sizes[i], anchor_stride) + anchor_list.append(anchors) + # for each image in this feature level, get valid flags + featmap_size = featmap_sizes[i] + for img_id, (h, w) in enumerate(padded_img_shapes): + valid_feat_h = min( + int(np.ceil(h / anchor_stride)), featmap_size[0]) + valid_feat_w = min( + int(np.ceil(w / anchor_stride)), featmap_size[1]) + flags = self.anchor_generators[i].valid_flags( + featmap_size, (valid_feat_h, valid_feat_w)) + valid_flag_list[img_id].append(flags) + return anchor_list, valid_flag_list + + def loss_single(self, rpn_cls_score, rpn_bbox_pred, labels, label_weights, + bbox_targets, bbox_weights, num_total_samples, cfg): + labels = labels.contiguous().view(-1) + label_weights = label_weights.contiguous().view(-1) + bbox_targets = bbox_targets.contiguous().view(-1, 4) + bbox_weights = bbox_weights.contiguous().view(-1, 4) + if self.use_sigmoid_cls: + rpn_cls_score = rpn_cls_score.permute(0, 2, 3, + 1).contiguous().view(-1) + loss_cls = weighted_binary_cross_entropy( + rpn_cls_score, + labels, + label_weights, + ave_factor=num_total_samples) + else: + rpn_cls_score = rpn_cls_score.permute(0, 2, 3, + 1).contiguous().view(-1, 2) + loss_cls = weighted_cross_entropy( + rpn_cls_score, + labels, + label_weights, + ave_factor=num_total_samples) + rpn_bbox_pred = rpn_bbox_pred.permute(0, 2, 3, 1).contiguous().view( + -1, 4) + loss_reg = weighted_smoothl1( + rpn_bbox_pred, + bbox_targets, + bbox_weights, + beta=cfg.smoothl1_beta, + ave_factor=num_total_samples) + return loss_cls, loss_reg + + def loss(self, rpn_cls_scores, rpn_bbox_preds, gt_bboxes, img_shapes, cfg): + featmap_sizes = [featmap.size()[-2:] for featmap in rpn_cls_scores] + assert len(featmap_sizes) == len(self.anchor_generators) + + anchor_list, valid_flag_list = self.get_anchors( + featmap_sizes, img_shapes) + cls_reg_targets = anchor_target( + anchor_list, valid_flag_list, featmap_sizes, gt_bboxes, img_shapes, + self.target_means, self.target_stds, cfg) + if cls_reg_targets is None: + return None + (labels_list, label_weights_list, bbox_targets_list, bbox_weights_list, + num_total_samples) = cls_reg_targets + losses_cls, losses_reg = multi_apply( + self.loss_single, + rpn_cls_scores, + rpn_bbox_preds, + labels_list, + label_weights_list, + bbox_targets_list, + bbox_weights_list, + num_total_samples=num_total_samples, + cfg=cfg) + return dict(loss_rpn_cls=losses_cls, loss_rpn_reg=losses_reg) + + def get_proposals(self, rpn_cls_scores, rpn_bbox_preds, img_shapes, cfg): + img_per_gpu = len(img_shapes) + featmap_sizes = [featmap.size()[-2:] for featmap in rpn_cls_scores] + mlvl_anchors = [ + self.anchor_generators[idx].grid_anchors(featmap_sizes[idx], + self.anchor_strides[idx]) + for idx in range(len(featmap_sizes)) + ] + proposal_list = [] + for img_id in range(img_per_gpu): + rpn_cls_score_list = [ + rpn_cls_scores[idx][img_id].detach() + for idx in range(len(rpn_cls_scores)) + ] + rpn_bbox_pred_list = [ + rpn_bbox_preds[idx][img_id].detach() + for idx in range(len(rpn_bbox_preds)) + ] + assert len(rpn_cls_score_list) == len(rpn_bbox_pred_list) + img_shape = img_shapes[img_id] + proposals = self._get_proposals_single( + rpn_cls_score_list, rpn_bbox_pred_list, mlvl_anchors, + img_shape, cfg) + proposal_list.append(proposals) + return proposal_list + + def _get_proposals_single(self, rpn_cls_scores, rpn_bbox_preds, + mlvl_anchors, img_shape, cfg): + mlvl_proposals = [] + for idx in range(len(rpn_cls_scores)): + rpn_cls_score = rpn_cls_scores[idx] + rpn_bbox_pred = rpn_bbox_preds[idx] + assert rpn_cls_score.size()[-2:] == rpn_bbox_pred.size()[-2:] + anchors = mlvl_anchors[idx] + if self.use_sigmoid_cls: + rpn_cls_score = rpn_cls_score.permute(1, 2, + 0).contiguous().view(-1) + rpn_cls_prob = F.sigmoid(rpn_cls_score) + scores = rpn_cls_prob + else: + rpn_cls_score = rpn_cls_score.permute(1, 2, + 0).contiguous().view( + -1, 2) + rpn_cls_prob = F.softmax(rpn_cls_score, dim=1) + scores = rpn_cls_prob[:, 1] + rpn_bbox_pred = rpn_bbox_pred.permute(1, 2, 0).contiguous().view( + -1, 4) + _, order = scores.sort(0, descending=True) + if cfg.nms_pre > 0: + order = order[:cfg.nms_pre] + rpn_bbox_pred = rpn_bbox_pred[order, :] + anchors = anchors[order, :] + scores = scores[order] + proposals = bbox_transform_inv(anchors, rpn_bbox_pred, + self.target_means, self.target_stds, + img_shape) + w = proposals[:, 2] - proposals[:, 0] + 1 + h = proposals[:, 3] - proposals[:, 1] + 1 + valid_inds = torch.nonzero((w >= cfg.min_bbox_size) & + (h >= cfg.min_bbox_size)).squeeze() + proposals = proposals[valid_inds, :] + scores = scores[valid_inds] + proposals = torch.cat([proposals, scores.unsqueeze(-1)], dim=-1) + nms_keep = nms(proposals, cfg.nms_thr)[:cfg.nms_post] + proposals = proposals[nms_keep, :] + mlvl_proposals.append(proposals) + proposals = torch.cat(mlvl_proposals, 0) + if cfg.nms_across_levels: + nms_keep = nms(proposals, cfg.nms_thr)[:cfg.max_num] + proposals = proposals[nms_keep, :] + else: + scores = proposals[:, 4] + _, order = scores.sort(0, descending=True) + num = min(cfg.max_num, proposals.shape[0]) + order = order[:num] + proposals = proposals[order, :] + return proposals diff --git a/mmdet/models/weight_init.py b/mmdet/models/weight_init.py new file mode 100644 index 0000000000000000000000000000000000000000..2e9b13b4fbc17d6d1986da876108c1a813190c2d --- /dev/null +++ b/mmdet/models/weight_init.py @@ -0,0 +1,39 @@ +import torch.nn as nn + + +def xavier_init(module, gain=1, bias=0, distribution='normal'): + assert distribution in ['uniform', 'normal'] + if distribution == 'uniform': + nn.init.xavier_uniform_(module.weight, gain=gain) + else: + nn.init.xavier_normal_(module.weight, gain=gain) + if hasattr(module, 'bias'): + nn.init.constant_(module.bias, bias) + + +def normal_init(module, mean=0, std=1, bias=0): + nn.init.normal_(module.weight, mean, std) + if hasattr(module, 'bias'): + nn.init.constant_(module.bias, bias) + + +def uniform_init(module, a=0, b=1, bias=0): + nn.init.uniform_(module.weight, a, b) + if hasattr(module, 'bias'): + nn.init.constant_(module.bias, bias) + + +def kaiming_init(module, + mode='fan_out', + nonlinearity='relu', + bias=0, + distribution='normal'): + assert distribution in ['uniform', 'normal'] + if distribution == 'uniform': + nn.init.kaiming_uniform_( + module.weight, mode=mode, nonlinearity=nonlinearity) + else: + nn.init.kaiming_normal_( + module.weight, mode=mode, nonlinearity=nonlinearity) + if hasattr(module, 'bias'): + nn.init.constant_(module.bias, bias) diff --git a/mmdet/nn/__init__.py b/mmdet/nn/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..1b627f5e7b807b1c6ae321c775c8fc8d03266238 --- /dev/null +++ b/mmdet/nn/__init__.py @@ -0,0 +1 @@ +from .parallel import MMDataParallel, MMDistributedDataParallel diff --git a/mmdet/nn/parallel/__init__.py b/mmdet/nn/parallel/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..0ea0a58e4a53737372b7995f3f9d570cba50dddb --- /dev/null +++ b/mmdet/nn/parallel/__init__.py @@ -0,0 +1,7 @@ +from .data_parallel import MMDataParallel +from .distributed import MMDistributedDataParallel +from .scatter_gather import scatter, scatter_kwargs + +__all__ = [ + 'MMDataParallel', 'MMDistributedDataParallel', 'scatter', 'scatter_kwargs' +] diff --git a/mmdet/nn/parallel/_functions.py b/mmdet/nn/parallel/_functions.py new file mode 100644 index 0000000000000000000000000000000000000000..75bb954dce440f7634c47d4a021360df53f3509e --- /dev/null +++ b/mmdet/nn/parallel/_functions.py @@ -0,0 +1,74 @@ +import torch +from torch.nn.parallel._functions import _get_stream + + +def scatter(input, devices, streams=None): + """Scatters tensor across multiple GPUs. + """ + if streams is None: + streams = [None] * len(devices) + + if isinstance(input, list): + chunk_size = (len(input) - 1) // len(devices) + 1 + outputs = [ + scatter(input[i], [devices[i // chunk_size]], + [streams[i // chunk_size]]) for i in range(len(input)) + ] + return outputs + elif isinstance(input, torch.Tensor): + output = input.contiguous() + # TODO: copy to a pinned buffer first (if copying from CPU) + stream = streams[0] if output.numel() > 0 else None + with torch.cuda.device(devices[0]), torch.cuda.stream(stream): + output = output.cuda(devices[0], non_blocking=True) + return output + else: + raise Exception('Unknown type {}.'.format(type(input))) + + +def synchronize_stream(output, devices, streams): + if isinstance(output, list): + chunk_size = len(output) // len(devices) + for i in range(len(devices)): + for j in range(chunk_size): + synchronize_stream(output[i * chunk_size + j], [devices[i]], + [streams[i]]) + elif isinstance(output, torch.Tensor): + if output.numel() != 0: + with torch.cuda.device(devices[0]): + main_stream = torch.cuda.current_stream() + main_stream.wait_stream(streams[0]) + output.record_stream(main_stream) + else: + raise Exception('Unknown type {}.'.format(type(output))) + + +def get_input_device(input): + if isinstance(input, list): + for item in input: + input_device = get_input_device(item) + if input_device != -1: + return input_device + return -1 + elif isinstance(input, torch.Tensor): + return input.get_device() if input.is_cuda else -1 + else: + raise Exception('Unknown type {}.'.format(type(input))) + + +class Scatter(object): + + @staticmethod + def forward(target_gpus, input): + input_device = get_input_device(input) + streams = None + if input_device == -1: + # Perform CPU to GPU copies in a background stream + streams = [_get_stream(device) for device in target_gpus] + + outputs = scatter(input, target_gpus, streams) + # Synchronize with the copy stream + if streams is not None: + synchronize_stream(outputs, target_gpus, streams) + + return tuple(outputs) diff --git a/mmdet/nn/parallel/data_parallel.py b/mmdet/nn/parallel/data_parallel.py new file mode 100644 index 0000000000000000000000000000000000000000..6735cb4afb7b512c5e9f757e962612ad1073ae12 --- /dev/null +++ b/mmdet/nn/parallel/data_parallel.py @@ -0,0 +1,9 @@ +from torch.nn.parallel import DataParallel + +from .scatter_gather import scatter_kwargs + + +class MMDataParallel(DataParallel): + + def scatter(self, inputs, kwargs, device_ids): + return scatter_kwargs(inputs, kwargs, device_ids, dim=self.dim) diff --git a/mmdet/nn/parallel/distributed.py b/mmdet/nn/parallel/distributed.py new file mode 100644 index 0000000000000000000000000000000000000000..2809778ad93951650677a546b57190cb7659302d --- /dev/null +++ b/mmdet/nn/parallel/distributed.py @@ -0,0 +1,9 @@ +from torch.nn.parallel import DistributedDataParallel + +from .scatter_gather import scatter_kwargs + + +class MMDistributedDataParallel(DistributedDataParallel): + + def scatter(self, inputs, kwargs, device_ids): + return scatter_kwargs(inputs, kwargs, device_ids, dim=self.dim) diff --git a/mmdet/nn/parallel/scatter_gather.py b/mmdet/nn/parallel/scatter_gather.py new file mode 100644 index 0000000000000000000000000000000000000000..82511fd1db12774e1df1468e93353f2a963ed962 --- /dev/null +++ b/mmdet/nn/parallel/scatter_gather.py @@ -0,0 +1,48 @@ +import torch +from ._functions import Scatter +from torch.nn.parallel._functions import Scatter as OrigScatter +from detkit.datasets.utils import DataContainer + + +def scatter(inputs, target_gpus, dim=0): + """Scatter inputs to target gpus. + + The only difference from original :func:`scatter` is to add support for + :type:`~mmdet.DataContainer`. + """ + + def scatter_map(obj): + if isinstance(obj, torch.Tensor): + return OrigScatter.apply(target_gpus, None, dim, obj) + if isinstance(obj, DataContainer) and isinstance(obj.data, list): + return Scatter.forward(target_gpus, obj.data) + if isinstance(obj, tuple) and len(obj) > 0: + return list(zip(*map(scatter_map, obj))) + if isinstance(obj, list) and len(obj) > 0: + return list(map(list, zip(*map(scatter_map, obj)))) + if isinstance(obj, dict) and len(obj) > 0: + return list(map(type(obj), zip(*map(scatter_map, obj.items())))) + return [obj for targets in target_gpus] + + # After scatter_map is called, a scatter_map cell will exist. This cell + # has a reference to the actual function scatter_map, which has references + # to a closure that has a reference to the scatter_map cell (because the + # fn is recursive). To avoid this reference cycle, we set the function to + # None, clearing the cell + try: + return scatter_map(inputs) + finally: + scatter_map = None + + +def scatter_kwargs(inputs, kwargs, target_gpus, dim=0): + """Scatter with support for kwargs dictionary""" + inputs = scatter(inputs, target_gpus, dim) if inputs else [] + kwargs = scatter(kwargs, target_gpus, dim) if kwargs else [] + if len(inputs) < len(kwargs): + inputs.extend([() for _ in range(len(kwargs) - len(inputs))]) + elif len(kwargs) < len(inputs): + kwargs.extend([{} for _ in range(len(inputs) - len(kwargs))]) + inputs = tuple(inputs) + kwargs = tuple(kwargs) + return inputs, kwargs diff --git a/mmdet/ops/__init__.py b/mmdet/ops/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..52e5808016cb94e63a7501cef7b1292805eb3491 --- /dev/null +++ b/mmdet/ops/__init__.py @@ -0,0 +1,3 @@ +from .nms import nms, soft_nms +from .roi_align import RoIAlign, roi_align +from .roi_pool import RoIPool, roi_pool diff --git a/mmdet/ops/nms/.gitignore b/mmdet/ops/nms/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..ce1da4c53c0301615c1f0ba3b01a859ad68259cb --- /dev/null +++ b/mmdet/ops/nms/.gitignore @@ -0,0 +1 @@ +*.cpp diff --git a/mmdet/ops/nms/Makefile b/mmdet/ops/nms/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..39556dd28ba76300d0f491cd5e66d4a4d19fc8ee --- /dev/null +++ b/mmdet/ops/nms/Makefile @@ -0,0 +1,8 @@ +PYTHON=${PYTHON:-python} + +all: + echo "Compiling nms kernels..." + $(PYTHON) setup.py build_ext --inplace + +clean: + rm *.so diff --git a/mmdet/ops/nms/__init__.py b/mmdet/ops/nms/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..1cf8569b97b3a568458428776b1dbd6737882389 --- /dev/null +++ b/mmdet/ops/nms/__init__.py @@ -0,0 +1 @@ +from .nms_wrapper import nms, soft_nms diff --git a/mmdet/ops/nms/cpu_nms.pyx b/mmdet/ops/nms/cpu_nms.pyx new file mode 100644 index 0000000000000000000000000000000000000000..1d0bef3321d78fc73556906649ab61eaaea60d86 --- /dev/null +++ b/mmdet/ops/nms/cpu_nms.pyx @@ -0,0 +1,68 @@ +# -------------------------------------------------------- +# Fast R-CNN +# Copyright (c) 2015 Microsoft +# Licensed under The MIT License [see LICENSE for details] +# Written by Ross Girshick +# -------------------------------------------------------- + +import numpy as np +cimport numpy as np + +cdef inline np.float32_t max(np.float32_t a, np.float32_t b): + return a if a >= b else b + +cdef inline np.float32_t min(np.float32_t a, np.float32_t b): + return a if a <= b else b + +def cpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh): + cdef np.ndarray[np.float32_t, ndim=1] x1 = dets[:, 0] + cdef np.ndarray[np.float32_t, ndim=1] y1 = dets[:, 1] + cdef np.ndarray[np.float32_t, ndim=1] x2 = dets[:, 2] + cdef np.ndarray[np.float32_t, ndim=1] y2 = dets[:, 3] + cdef np.ndarray[np.float32_t, ndim=1] scores = dets[:, 4] + + cdef np.ndarray[np.float32_t, ndim=1] areas = (x2 - x1 + 1) * (y2 - y1 + 1) + cdef np.ndarray[np.int_t, ndim=1] order = scores.argsort()[::-1] + + cdef int ndets = dets.shape[0] + cdef np.ndarray[np.int_t, ndim=1] suppressed = \ + np.zeros((ndets), dtype=np.int) + + # nominal indices + cdef int _i, _j + # sorted indices + cdef int i, j + # temp variables for box i's (the box currently under consideration) + cdef np.float32_t ix1, iy1, ix2, iy2, iarea + # variables for computing overlap with box j (lower scoring box) + cdef np.float32_t xx1, yy1, xx2, yy2 + cdef np.float32_t w, h + cdef np.float32_t inter, ovr + + keep = [] + for _i in range(ndets): + i = order[_i] + if suppressed[i] == 1: + continue + keep.append(i) + ix1 = x1[i] + iy1 = y1[i] + ix2 = x2[i] + iy2 = y2[i] + iarea = areas[i] + for _j in range(_i + 1, ndets): + j = order[_j] + if suppressed[j] == 1: + continue + xx1 = max(ix1, x1[j]) + yy1 = max(iy1, y1[j]) + xx2 = min(ix2, x2[j]) + yy2 = min(iy2, y2[j]) + w = max(0.0, xx2 - xx1 + 1) + h = max(0.0, yy2 - yy1 + 1) + inter = w * h + ovr = inter / (iarea + areas[j] - inter) + if ovr >= thresh: + suppressed[j] = 1 + + return keep diff --git a/mmdet/ops/nms/cpu_soft_nms.pyx b/mmdet/ops/nms/cpu_soft_nms.pyx new file mode 100644 index 0000000000000000000000000000000000000000..05ec5a5446221d3593a10edfd4d714bfa6192309 --- /dev/null +++ b/mmdet/ops/nms/cpu_soft_nms.pyx @@ -0,0 +1,123 @@ +# ---------------------------------------------------------- +# Soft-NMS: Improving Object Detection With One Line of Code +# Copyright (c) University of Maryland, College Park +# Licensed under The MIT License [see LICENSE for details] +# Written by Navaneeth Bodla and Bharat Singh +# ---------------------------------------------------------- + +import numpy as np +cimport numpy as np + + +cdef inline np.float32_t max(np.float32_t a, np.float32_t b): + return a if a >= b else b + +cdef inline np.float32_t min(np.float32_t a, np.float32_t b): + return a if a <= b else b + +def cpu_soft_nms( + np.ndarray[float, ndim=2] boxes_in, + float sigma=0.5, + float Nt=0.3, + float threshold=0.001, + unsigned int method=0 +): + boxes = boxes_in.copy() + cdef unsigned int N = boxes.shape[0] + cdef float iw, ih, box_area + cdef float ua + cdef int pos = 0 + cdef float maxscore = 0 + cdef int maxpos = 0 + cdef float x1, x2, y1, y2, tx1, tx2, ty1, ty2, ts, area, weight, ov + inds = np.arange(N) + + for i in range(N): + maxscore = boxes[i, 4] + maxpos = i + + tx1 = boxes[i,0] + ty1 = boxes[i,1] + tx2 = boxes[i,2] + ty2 = boxes[i,3] + ts = boxes[i,4] + ti = inds[i] + + pos = i + 1 + # get max box + while pos < N: + if maxscore < boxes[pos, 4]: + maxscore = boxes[pos, 4] + maxpos = pos + pos = pos + 1 + + # add max box as a detection + boxes[i,0] = boxes[maxpos,0] + boxes[i,1] = boxes[maxpos,1] + boxes[i,2] = boxes[maxpos,2] + boxes[i,3] = boxes[maxpos,3] + boxes[i,4] = boxes[maxpos,4] + inds[i] = inds[maxpos] + + # swap ith box with position of max box + boxes[maxpos,0] = tx1 + boxes[maxpos,1] = ty1 + boxes[maxpos,2] = tx2 + boxes[maxpos,3] = ty2 + boxes[maxpos,4] = ts + inds[maxpos] = ti + + tx1 = boxes[i,0] + ty1 = boxes[i,1] + tx2 = boxes[i,2] + ty2 = boxes[i,3] + ts = boxes[i,4] + + pos = i + 1 + # NMS iterations, note that N changes if detection boxes fall below + # threshold + while pos < N: + x1 = boxes[pos, 0] + y1 = boxes[pos, 1] + x2 = boxes[pos, 2] + y2 = boxes[pos, 3] + s = boxes[pos, 4] + + area = (x2 - x1 + 1) * (y2 - y1 + 1) + iw = (min(tx2, x2) - max(tx1, x1) + 1) + if iw > 0: + ih = (min(ty2, y2) - max(ty1, y1) + 1) + if ih > 0: + ua = float((tx2 - tx1 + 1) * (ty2 - ty1 + 1) + area - iw * ih) + ov = iw * ih / ua #iou between max box and detection box + + if method == 1: # linear + if ov > Nt: + weight = 1 - ov + else: + weight = 1 + elif method == 2: # gaussian + weight = np.exp(-(ov * ov)/sigma) + else: # original NMS + if ov > Nt: + weight = 0 + else: + weight = 1 + + boxes[pos, 4] = weight*boxes[pos, 4] + + # if box score falls below threshold, discard the box by + # swapping with last box update N + if boxes[pos, 4] < threshold: + boxes[pos,0] = boxes[N-1, 0] + boxes[pos,1] = boxes[N-1, 1] + boxes[pos,2] = boxes[N-1, 2] + boxes[pos,3] = boxes[N-1, 3] + boxes[pos,4] = boxes[N-1, 4] + inds[pos] = inds[N-1] + N = N - 1 + pos = pos - 1 + + pos = pos + 1 + + return boxes[:N], inds[:N] \ No newline at end of file diff --git a/mmdet/ops/nms/gpu_nms.hpp b/mmdet/ops/nms/gpu_nms.hpp new file mode 100644 index 0000000000000000000000000000000000000000..2d45e344aeb93c00262f98153dd3e1300a9adcce --- /dev/null +++ b/mmdet/ops/nms/gpu_nms.hpp @@ -0,0 +1,3 @@ +void _nms(int* keep_out, int* num_out, const float* boxes_host, int boxes_num, + int boxes_dim, float nms_overlap_thresh, int device_id, size_t base); +size_t nms_Malloc(); diff --git a/mmdet/ops/nms/gpu_nms.pyx b/mmdet/ops/nms/gpu_nms.pyx new file mode 100644 index 0000000000000000000000000000000000000000..e5ae72578731c38150bf0c79866fcabfcb936ceb --- /dev/null +++ b/mmdet/ops/nms/gpu_nms.pyx @@ -0,0 +1,43 @@ +# -------------------------------------------------------- +# Faster R-CNN +# Copyright (c) 2015 Microsoft +# Licensed under The MIT License [see LICENSE for details] +# Written by Ross Girshick +# -------------------------------------------------------- + +import numpy as np +cimport numpy as np + +assert sizeof(int) == sizeof(np.int32_t) + +cdef extern from "gpu_nms.hpp": + void _nms(np.int32_t*, int*, np.float32_t*, int, int, float, int, size_t) nogil + size_t nms_Malloc() nogil + +memory_pool = {} + +def gpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh, + np.int32_t device_id=0): + cdef int boxes_num = dets.shape[0] + cdef int boxes_dim = dets.shape[1] + cdef int num_out + cdef size_t base + cdef np.ndarray[np.int32_t, ndim=1] \ + keep = np.zeros(boxes_num, dtype=np.int32) + cdef np.ndarray[np.float32_t, ndim=1] \ + scores = dets[:, 4] + cdef np.ndarray[np.int_t, ndim=1] \ + order = scores.argsort()[::-1] + cdef np.ndarray[np.float32_t, ndim=2] \ + sorted_dets = dets[order, :] + cdef float cthresh = thresh + if device_id not in memory_pool: + with nogil: + base = nms_Malloc() + memory_pool[device_id] = base + # print "malloc", base + base = memory_pool[device_id] + with nogil: + _nms(&keep[0], &num_out, &sorted_dets[0, 0], boxes_num, boxes_dim, cthresh, device_id, base) + keep = keep[:num_out] + return list(order[keep]) diff --git a/mmdet/ops/nms/nms_kernel.cu b/mmdet/ops/nms/nms_kernel.cu new file mode 100644 index 0000000000000000000000000000000000000000..4c5f0ec5e1096260e57ff314074f9c36da0a4e72 --- /dev/null +++ b/mmdet/ops/nms/nms_kernel.cu @@ -0,0 +1,188 @@ +// ------------------------------------------------------------------ +// Faster R-CNN +// Copyright (c) 2015 Microsoft +// Licensed under The MIT License [see fast-rcnn/LICENSE for details] +// Written by Shaoqing Ren +// ------------------------------------------------------------------ + +#include <stdio.h> +#include <iostream> +#include <vector> +#include "gpu_nms.hpp" + +#define CUDA_CHECK(condition) \ + /* Code block avoids redefinition of cudaError_t error */ \ + do { \ + cudaError_t error = condition; \ + if (error != cudaSuccess) { \ + std::cout << cudaGetErrorString(error) << std::endl; \ + } \ + } while (0) + +#define DIVUP(m, n) ((m) / (n) + ((m) % (n) > 0)) +#define MULTIPLIER 16 +#define LONGLONG_SIZE 64 + +int const threadsPerBlock = + sizeof(unsigned long long) * 8 * + MULTIPLIER; // number of bits for a long long variable + +__device__ inline float devIoU(float const* const a, float const* const b) { + float left = max(a[0], b[0]), right = min(a[2], b[2]); + float top = max(a[1], b[1]), bottom = min(a[3], b[3]); + float width = max(right - left + 1, 0.f), + height = max(bottom - top + 1, 0.f); + float interS = width * height; + float Sa = (a[2] - a[0] + 1) * (a[3] - a[1] + 1); + float Sb = (b[2] - b[0] + 1) * (b[3] - b[1] + 1); + return interS / (Sa + Sb - interS); +} + +__global__ void nms_kernel(const int n_boxes, const float nms_overlap_thresh, + const float* dev_boxes, + unsigned long long* dev_mask) { + const int row_start = blockIdx.y; + const int col_start = blockIdx.x; + + // if (row_start > col_start) return; + + const int row_size = + min(n_boxes - row_start * threadsPerBlock, threadsPerBlock); + const int col_size = + min(n_boxes - col_start * threadsPerBlock, threadsPerBlock); + + __shared__ float block_boxes[threadsPerBlock * 5]; + if (threadIdx.x < col_size) { + block_boxes[threadIdx.x * 5 + 0] = + dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 0]; + block_boxes[threadIdx.x * 5 + 1] = + dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 1]; + block_boxes[threadIdx.x * 5 + 2] = + dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 2]; + block_boxes[threadIdx.x * 5 + 3] = + dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 3]; + block_boxes[threadIdx.x * 5 + 4] = + dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 4]; + } + __syncthreads(); + + unsigned long long ts[MULTIPLIER]; + + if (threadIdx.x < row_size) { +#pragma unroll + for (int i = 0; i < MULTIPLIER; ++i) { + ts[i] = 0; + } + const int cur_box_idx = threadsPerBlock * row_start + threadIdx.x; + const float* cur_box = dev_boxes + cur_box_idx * 5; + int i = 0; + int start = 0; + if (row_start == col_start) { + start = threadIdx.x + 1; + } + for (i = start; i < col_size; i++) { + if (devIoU(cur_box, block_boxes + i * 5) > nms_overlap_thresh) { + ts[i / LONGLONG_SIZE] |= 1ULL << (i % LONGLONG_SIZE); + } + } + const int col_blocks = DIVUP(n_boxes, threadsPerBlock); + +#pragma unroll + for (int i = 0; i < MULTIPLIER; ++i) { + dev_mask[(cur_box_idx * col_blocks + col_start) * MULTIPLIER + i] = + ts[i]; + } + } +} + +void _set_device(int device_id) { + int current_device; + CUDA_CHECK(cudaGetDevice(¤t_device)); + if (current_device == device_id) { + return; + } + // The call to cudaSetDevice must come before any calls to Get, which + // may perform initialization using the GPU. + CUDA_CHECK(cudaSetDevice(device_id)); +} + +const size_t MEMORY_SIZE = 500000000; +size_t nms_Malloc() { + float* boxes_dev = NULL; + CUDA_CHECK(cudaMalloc(&boxes_dev, MEMORY_SIZE)); + return size_t(boxes_dev); +} + +void _nms(int* keep_out, int* num_out, const float* boxes_host, int boxes_num, + int boxes_dim, float nms_overlap_thresh, int device_id, size_t base) { + _set_device(device_id); + + float* boxes_dev = NULL; + unsigned long long* mask_dev = NULL; + + const int col_blocks = DIVUP(boxes_num, threadsPerBlock); + + if (base > 0) { + size_t require_mem = + boxes_num * boxes_dim * sizeof(float) + + boxes_num * col_blocks * sizeof(unsigned long long) * MULTIPLIER; + if (require_mem >= MEMORY_SIZE) { + std::cout << "require_mem: " << require_mem << std::endl; + } + boxes_dev = (float*)(base); + mask_dev = + (unsigned long long*)(base + + 512 * ((unsigned long long)(boxes_num * + boxes_dim * + sizeof(float) / + 512) + + 1)); + } else { + CUDA_CHECK( + cudaMalloc(&boxes_dev, boxes_num * boxes_dim * sizeof(float))); + CUDA_CHECK(cudaMalloc(&mask_dev, MULTIPLIER * boxes_num * col_blocks * + sizeof(unsigned long long))); + } + CUDA_CHECK(cudaMemcpy(boxes_dev, boxes_host, + boxes_num * boxes_dim * sizeof(float), + cudaMemcpyHostToDevice)); + + dim3 blocks(DIVUP(boxes_num, threadsPerBlock), + DIVUP(boxes_num, threadsPerBlock)); + dim3 threads(threadsPerBlock); + nms_kernel<<<blocks, threads>>>(boxes_num, nms_overlap_thresh, boxes_dev, + mask_dev); + + std::vector<unsigned long long> mask_host(boxes_num * col_blocks * + MULTIPLIER); + CUDA_CHECK(cudaMemcpy( + &mask_host[0], mask_dev, + sizeof(unsigned long long) * boxes_num * col_blocks * MULTIPLIER, + cudaMemcpyDeviceToHost)); + + std::vector<unsigned long long> remv(col_blocks * MULTIPLIER); + memset(&remv[0], 0, sizeof(unsigned long long) * col_blocks * MULTIPLIER); + + int num_to_keep = 0; + for (int i = 0; i < boxes_num; i++) { + int nblock = i / threadsPerBlock; + int inblock = i % threadsPerBlock; + int offset = inblock / LONGLONG_SIZE; + int bit_pos = inblock % LONGLONG_SIZE; + + if (!(remv[nblock * MULTIPLIER + offset] & (1ULL << bit_pos))) { + keep_out[num_to_keep++] = i; + unsigned long long* p = &mask_host[0] + i * col_blocks * MULTIPLIER; + for (int j = nblock * MULTIPLIER + offset; + j < col_blocks * MULTIPLIER; j++) { + remv[j] |= p[j]; + } + } + } + *num_out = num_to_keep; + + if (!base) { + CUDA_CHECK(cudaFree(boxes_dev)); + CUDA_CHECK(cudaFree(mask_dev)); + } +} diff --git a/mmdet/ops/nms/nms_wrapper.py b/mmdet/ops/nms/nms_wrapper.py new file mode 100644 index 0000000000000000000000000000000000000000..43d5e5c6e5c038467f2084d46d85b97bb2a943f1 --- /dev/null +++ b/mmdet/ops/nms/nms_wrapper.py @@ -0,0 +1,46 @@ +import numpy as np +import torch + +from .gpu_nms import gpu_nms +from .cpu_nms import cpu_nms +from .cpu_soft_nms import cpu_soft_nms + + +def nms(dets, thresh, device_id=None): + """Dispatch to either CPU or GPU NMS implementations.""" + + if isinstance(dets, torch.Tensor): + if dets.is_cuda: + device_id = dets.get_device() + dets = dets.detach().cpu().numpy() + assert isinstance(dets, np.ndarray) + + if dets.shape[0] == 0: + inds = [] + else: + inds = (gpu_nms(dets, thresh, device_id=device_id) + if device_id is not None else cpu_nms(dets, thresh)) + + if isinstance(dets, torch.Tensor): + return dets.new_tensor(inds, dtype=torch.long) + else: + return np.array(inds, dtype=np.int) + + +def soft_nms(dets, Nt=0.3, method=1, sigma=0.5, min_score=0): + if isinstance(dets, torch.Tensor): + _dets = dets.detach().cpu().numpy() + else: + _dets = dets.copy() + assert isinstance(_dets, np.ndarray) + + new_dets, inds = cpu_soft_nms( + _dets, Nt=Nt, method=method, sigma=sigma, threshold=min_score) + + if isinstance(dets, torch.Tensor): + return dets.new_tensor( + inds, dtype=torch.long), dets.new_tensor(new_dets) + else: + return np.array( + inds, dtype=np.int), np.array( + new_dets, dtype=np.float32) diff --git a/mmdet/ops/nms/setup.py b/mmdet/ops/nms/setup.py new file mode 100644 index 0000000000000000000000000000000000000000..98bf57c8f135805927205ec638d865177b070d8c --- /dev/null +++ b/mmdet/ops/nms/setup.py @@ -0,0 +1,91 @@ +import os +from distutils.core import setup +from distutils.extension import Extension + +import numpy as np +from Cython.Build import cythonize +from Cython.Distutils import build_ext + +CUDA_ROOT = '/usr/local/cuda' +CUDA = { + "include": os.path.join(CUDA_ROOT, 'include'), + "lib": os.path.join(CUDA_ROOT, 'lib64'), + "nvcc": os.path.join(CUDA_ROOT, 'bin', "nvcc") +} + +inc_dirs = [CUDA['include'], np.get_include()] + +lib_dirs = [CUDA['lib']] + +# extensions +ext_args = dict( + include_dirs=inc_dirs, + library_dirs=lib_dirs, + language='c++', + libraries=['cudart'], + extra_compile_args={ + "cc": ['-Wno-unused-function', '-Wno-write-strings'], + "nvcc": [ + '-arch=sm_52', '--ptxas-options=-v', '-c', '--compiler-options', + '-fPIC' + ], + }, +) + +extensions = [ + Extension('cpu_nms', ['cpu_nms.pyx'], **ext_args), + Extension('gpu_nms', ['gpu_nms.pyx', 'nms_kernel.cu'], **ext_args), + Extension('cpu_soft_nms', ['cpu_soft_nms.pyx'], **ext_args), +] + + +def customize_compiler_for_nvcc(self): + """inject deep into distutils to customize how the dispatch + to cc/nvcc works. + If you subclass UnixCCompiler, it's not trivial to get your subclass + injected in, and still have the right customizations (i.e. + distutils.sysconfig.customize_compiler) run on it. So instead of going + the OO route, I have this. Note, it's kindof like a wierd functional + subclassing going on.""" + + # tell the compiler it can processes .cu + self.src_extensions.append('.cu') + + # save references to the default compiler_so and _comple methods + default_compiler_so = self.compiler_so + super = self._compile + + # now redefine the _compile method. This gets executed for each + # object but distutils doesn't have the ability to change compilers + # based on source extension: we add it. + def _compile(obj, src, ext, cc_args, extra_postargs, pp_opts): + if os.path.splitext(src)[1] == '.cu': + # use the cuda for .cu files + self.set_executable('compiler_so', CUDA['nvcc']) + # use only a subset of the extra_postargs, which are 1-1 translated + # from the extra_compile_args in the Extension class + postargs = extra_postargs['nvcc'] + else: + postargs = extra_postargs['cc'] + + super(obj, src, ext, cc_args, postargs, pp_opts) + # reset the default compiler_so, which we might have changed for cuda + self.compiler_so = default_compiler_so + + # inject our redefined _compile method into the class + self._compile = _compile + + +# run the customize_compiler +class custom_build_ext(build_ext): + + def build_extensions(self): + customize_compiler_for_nvcc(self.compiler) + build_ext.build_extensions(self) + + +setup( + name='nms', + cmdclass={'build_ext': custom_build_ext}, + ext_modules=cythonize(extensions), +) diff --git a/mmdet/ops/roi_align/__init__.py b/mmdet/ops/roi_align/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..ae27e21d6c78e9ffd8d13e8c71017ef6f365fb5e --- /dev/null +++ b/mmdet/ops/roi_align/__init__.py @@ -0,0 +1,2 @@ +from .functions.roi_align import roi_align +from .modules.roi_align import RoIAlign diff --git a/mmdet/ops/roi_align/functions/__init__.py b/mmdet/ops/roi_align/functions/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/mmdet/ops/roi_align/functions/roi_align.py b/mmdet/ops/roi_align/functions/roi_align.py new file mode 100644 index 0000000000000000000000000000000000000000..0e546fe59527570a2331f6f79bb6113f1cc1abb9 --- /dev/null +++ b/mmdet/ops/roi_align/functions/roi_align.py @@ -0,0 +1,61 @@ +from torch.autograd import Function, Variable + +from .. import roi_align_cuda + + +class RoIAlignFunction(Function): + + @staticmethod + def forward(ctx, features, rois, out_size, spatial_scale, sample_num=0): + if isinstance(out_size, int): + out_h = out_size + out_w = out_size + elif isinstance(out_size, tuple): + assert len(out_size) == 2 + assert isinstance(out_size[0], int) + assert isinstance(out_size[1], int) + out_h, out_w = out_size + else: + raise TypeError( + '"out_size" must be an integer or tuple of integers') + ctx.spatial_scale = spatial_scale + ctx.sample_num = sample_num + ctx.save_for_backward(rois) + ctx.feature_size = features.size() + + batch_size, num_channels, data_height, data_width = features.size() + num_rois = rois.size(0) + + output = features.new_zeros(num_rois, num_channels, out_h, out_w) + if features.is_cuda: + roi_align_cuda.forward(features, rois, out_h, out_w, spatial_scale, + sample_num, output) + else: + raise NotImplementedError + + return output + + @staticmethod + def backward(ctx, grad_output): + feature_size = ctx.feature_size + spatial_scale = ctx.spatial_scale + sample_num = ctx.sample_num + rois = ctx.saved_tensors[0] + assert (feature_size is not None and grad_output.is_cuda) + + batch_size, num_channels, data_height, data_width = feature_size + out_w = grad_output.size(3) + out_h = grad_output.size(2) + + grad_input = grad_rois = None + if ctx.needs_input_grad[0]: + grad_input = Variable( + rois.new(batch_size, num_channels, data_height, data_width) + .zero_()) + roi_align_cuda.backward(grad_output, rois, out_h, out_w, + spatial_scale, sample_num, grad_input) + + return grad_input, grad_rois, None, None, None + + +roi_align = RoIAlignFunction.apply diff --git a/mmdet/ops/roi_align/gradcheck.py b/mmdet/ops/roi_align/gradcheck.py new file mode 100644 index 0000000000000000000000000000000000000000..e2c51e64bb7b5eba9da3087d83cfa1083f965bbc --- /dev/null +++ b/mmdet/ops/roi_align/gradcheck.py @@ -0,0 +1,29 @@ +import numpy as np +import torch +from torch.autograd import gradcheck + +import os.path as osp +import sys +sys.path.append(osp.abspath(osp.join(__file__, '../../'))) +from roi_align import RoIAlign + +feat_size = 15 +spatial_scale = 1.0 / 8 +img_size = feat_size / spatial_scale +num_imgs = 2 +num_rois = 20 + +batch_ind = np.random.randint(num_imgs, size=(num_rois, 1)) +rois = np.random.rand(num_rois, 4) * img_size * 0.5 +rois[:, 2:] += img_size * 0.5 +rois = np.hstack((batch_ind, rois)) + +feat = torch.randn( + num_imgs, 16, feat_size, feat_size, requires_grad=True, device='cuda:0') +rois = torch.from_numpy(rois).float().cuda() +inputs = (feat, rois) +print('Gradcheck for roi align...') +test = gradcheck(RoIAlign(3, spatial_scale), inputs, atol=1e-3, eps=1e-3) +print(test) +test = gradcheck(RoIAlign(3, spatial_scale, 2), inputs, atol=1e-3, eps=1e-3) +print(test) diff --git a/mmdet/ops/roi_align/modules/__init__.py b/mmdet/ops/roi_align/modules/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/mmdet/ops/roi_align/modules/roi_align.py b/mmdet/ops/roi_align/modules/roi_align.py new file mode 100644 index 0000000000000000000000000000000000000000..b83b74e6b7c151eaf627c2b6d3530823ce8cda05 --- /dev/null +++ b/mmdet/ops/roi_align/modules/roi_align.py @@ -0,0 +1,16 @@ +from torch.nn.modules.module import Module +from ..functions.roi_align import RoIAlignFunction + + +class RoIAlign(Module): + + def __init__(self, out_size, spatial_scale, sample_num=0): + super(RoIAlign, self).__init__() + + self.out_size = out_size + self.spatial_scale = float(spatial_scale) + self.sample_num = int(sample_num) + + def forward(self, features, rois): + return RoIAlignFunction.apply(features, rois, self.out_size, + self.spatial_scale, self.sample_num) diff --git a/mmdet/ops/roi_align/setup.py b/mmdet/ops/roi_align/setup.py new file mode 100644 index 0000000000000000000000000000000000000000..f02a5ea30d66f51761038c7802d948f039871c8c --- /dev/null +++ b/mmdet/ops/roi_align/setup.py @@ -0,0 +1,12 @@ +from setuptools import setup +from torch.utils.cpp_extension import BuildExtension, CUDAExtension + +setup( + name='roi_align_cuda', + ext_modules=[ + CUDAExtension('roi_align_cuda', [ + 'src/roi_align_cuda.cpp', + 'src/roi_align_kernel.cu', + ]), + ], + cmdclass={'build_ext': BuildExtension}) diff --git a/mmdet/ops/roi_align/src/roi_align_cuda.cpp b/mmdet/ops/roi_align/src/roi_align_cuda.cpp new file mode 100644 index 0000000000000000000000000000000000000000..e4c28c142268d4caf3ff2800dcfe9b24e8e99c66 --- /dev/null +++ b/mmdet/ops/roi_align/src/roi_align_cuda.cpp @@ -0,0 +1,85 @@ +#include <torch/torch.h> + +#include <cmath> +#include <vector> + +int ROIAlignForwardLaucher(const at::Tensor features, const at::Tensor rois, + const float spatial_scale, const int sample_num, + const int channels, const int height, + const int width, const int num_rois, + const int pooled_height, const int pooled_width, + at::Tensor output); + +int ROIAlignBackwardLaucher(const at::Tensor top_grad, const at::Tensor rois, + const float spatial_scale, const int sample_num, + const int channels, const int height, + const int width, const int num_rois, + const int pooled_height, const int pooled_width, + at::Tensor bottom_grad); + +#define CHECK_CUDA(x) AT_ASSERT(x.type().is_cuda(), #x " must be a CUDAtensor ") +#define CHECK_CONTIGUOUS(x) \ + AT_ASSERT(x.is_contiguous(), #x " must be contiguous ") +#define CHECK_INPUT(x) \ + CHECK_CUDA(x); \ + CHECK_CONTIGUOUS(x) + +int roi_align_forward_cuda(at::Tensor features, at::Tensor rois, + int pooled_height, int pooled_width, + float spatial_scale, int sample_num, + at::Tensor output) { + CHECK_INPUT(features); + CHECK_INPUT(rois); + CHECK_INPUT(output); + + // Number of ROIs + int num_rois = rois.size(0); + int size_rois = rois.size(1); + + if (size_rois != 5) { + printf("wrong roi size\n"); + return 0; + } + + int num_channels = features.size(1); + int data_height = features.size(2); + int data_width = features.size(3); + + ROIAlignForwardLaucher(features, rois, spatial_scale, sample_num, + num_channels, data_height, data_width, num_rois, + pooled_height, pooled_width, output); + + return 1; +} + +int roi_align_backward_cuda(at::Tensor top_grad, at::Tensor rois, + int pooled_height, int pooled_width, + float spatial_scale, int sample_num, + at::Tensor bottom_grad) { + CHECK_INPUT(top_grad); + CHECK_INPUT(rois); + CHECK_INPUT(bottom_grad); + + // Number of ROIs + int num_rois = rois.size(0); + int size_rois = rois.size(1); + if (size_rois != 5) { + printf("wrong roi size\n"); + return 0; + } + + int num_channels = bottom_grad.size(1); + int data_height = bottom_grad.size(2); + int data_width = bottom_grad.size(3); + + ROIAlignBackwardLaucher(top_grad, rois, spatial_scale, sample_num, + num_channels, data_height, data_width, num_rois, + pooled_height, pooled_width, bottom_grad); + + return 1; +} + +PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { + m.def("forward", &roi_align_forward_cuda, "Roi_Align forward (CUDA)"); + m.def("backward", &roi_align_backward_cuda, "Roi_Align backward (CUDA)"); +} diff --git a/mmdet/ops/roi_align/src/roi_align_kernel.cu b/mmdet/ops/roi_align/src/roi_align_kernel.cu new file mode 100644 index 0000000000000000000000000000000000000000..31be093c038872ff0b48c79157e5048d25a416cf --- /dev/null +++ b/mmdet/ops/roi_align/src/roi_align_kernel.cu @@ -0,0 +1,319 @@ +#include <ATen/ATen.h> + +#include <cuda.h> +#include <cuda_runtime.h> + +#include <math.h> +#include <stdio.h> +#include <vector> + +#define CUDA_1D_KERNEL_LOOP(i, n) \ + for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < n; \ + i += blockDim.x * gridDim.x) + +#define THREADS_PER_BLOCK 1024 + +inline int GET_BLOCKS(const int N) { + int optimal_block_num = (N + THREADS_PER_BLOCK - 1) / THREADS_PER_BLOCK; + int max_block_num = 65000; + return min(optimal_block_num, max_block_num); +} + +template <typename scalar_t> +__device__ scalar_t bilinear_interpolate(const scalar_t *bottom_data, + const int height, const int width, + scalar_t y, scalar_t x) { + // deal with cases that inverse elements are out of feature map boundary + if (y < -1.0 || y > height || x < -1.0 || x > width) { + return 0; + } + + if (y <= 0) + y = 0; + if (x <= 0) + x = 0; + + int y_low = (int)y; + int x_low = (int)x; + int y_high; + int x_high; + + if (y_low >= height - 1) { + y_high = y_low = height - 1; + y = (scalar_t)y_low; + } else { + y_high = y_low + 1; + } + + if (x_low >= width - 1) { + x_high = x_low = width - 1; + x = (scalar_t)x_low; + } else { + x_high = x_low + 1; + } + + scalar_t ly = y - y_low; + scalar_t lx = x - x_low; + scalar_t hy = 1. - ly; + scalar_t hx = 1. - lx; + // do bilinear interpolation + scalar_t lt = bottom_data[y_low * width + x_low]; + scalar_t rt = bottom_data[y_low * width + x_high]; + scalar_t lb = bottom_data[y_high * width + x_low]; + scalar_t rb = bottom_data[y_high * width + x_high]; + scalar_t w1 = hy * hx, w2 = hy * lx, w3 = ly * hx, w4 = ly * lx; + + scalar_t val = (w1 * lt + w2 * rt + w3 * lb + w4 * rb); + + return val; +} + +template <typename scalar_t> +__global__ void +ROIAlignForward(const int nthreads, const scalar_t *bottom_data, + const scalar_t *bottom_rois, const scalar_t spatial_scale, + const int sample_num, const int channels, const int height, + const int width, const int pooled_height, + const int pooled_width, scalar_t *top_data) { + CUDA_1D_KERNEL_LOOP(index, nthreads) { + // (n, c, ph, pw) is an element in the aligned output + int pw = index % pooled_width; + int ph = (index / pooled_width) % pooled_height; + int c = (index / pooled_width / pooled_height) % channels; + int n = index / pooled_width / pooled_height / channels; + + const scalar_t *offset_bottom_rois = bottom_rois + n * 5; + int roi_batch_ind = offset_bottom_rois[0]; + scalar_t roi_start_w = offset_bottom_rois[1] * spatial_scale; + scalar_t roi_start_h = offset_bottom_rois[2] * spatial_scale; + scalar_t roi_end_w = (offset_bottom_rois[3] + 1) * spatial_scale; + scalar_t roi_end_h = (offset_bottom_rois[4] + 1) * spatial_scale; + + // Force malformed ROIs to be 1x1 + scalar_t roi_width = fmaxf((scalar_t)roi_end_w - roi_start_w, 0.); + scalar_t roi_height = fmaxf((scalar_t)roi_end_h - roi_start_h, 0.); + + scalar_t bin_size_h = roi_height / pooled_height; + scalar_t bin_size_w = roi_width / pooled_width; + + const scalar_t *offset_bottom_data = + bottom_data + (roi_batch_ind * channels + c) * height * width; + + int sample_num_h = (sample_num > 0) + ? sample_num + : ceil(roi_height / pooled_height); // e.g., = 2 + int sample_num_w = + (sample_num > 0) ? sample_num : ceil(roi_width / pooled_width); + + scalar_t h = (scalar_t)(ph + 0.5) * bin_size_h + roi_start_h; + scalar_t w = (scalar_t)(pw + 0.5) * bin_size_w + roi_start_w; + + int hstart = fminf(floor(h), height - 2); + int wstart = fminf(floor(w), width - 2); + + scalar_t output_val = 0; + for (int iy = 0; iy < sample_num_h; iy++) { + const scalar_t y = roi_start_h + ph * bin_size_h + + (scalar_t)(iy + scalar_t(.5f)) * bin_size_h / + (scalar_t)(sample_num_h); + for (int ix = 0; ix < sample_num_w; ix++) { + const scalar_t x = roi_start_w + pw * bin_size_w + + (scalar_t)(ix + scalar_t(.5f)) * bin_size_w / + (scalar_t)(sample_num_w); + scalar_t val = bilinear_interpolate<scalar_t>(offset_bottom_data, + height, width, y, x); + output_val += val; + } + } + output_val /= (sample_num_h * sample_num_w); + top_data[index] = output_val; + } +} + +int ROIAlignForwardLaucher(const at::Tensor features, const at::Tensor rois, + const float spatial_scale, const int sample_num, + const int channels, const int height, + const int width, const int num_rois, + const int pooled_height, const int pooled_width, + at::Tensor output) { + const int output_size = num_rois * pooled_height * pooled_width * channels; + AT_DISPATCH_FLOATING_TYPES( + features.type(), "ROIAlignLaucherForward", ([&] { + const scalar_t *bottom_data = features.data<scalar_t>(); + const scalar_t *rois_data = rois.data<scalar_t>(); + scalar_t *top_data = output.data<scalar_t>(); + + ROIAlignForward< + scalar_t><<<GET_BLOCKS(output_size), THREADS_PER_BLOCK>>>( + output_size, bottom_data, rois_data, scalar_t(spatial_scale), + sample_num, channels, height, width, pooled_height, pooled_width, + top_data); + })); + cudaError_t err = cudaGetLastError(); + if (cudaSuccess != err) { + fprintf(stderr, "cudaCheckError() failed : %s\n", cudaGetErrorString(err)); + exit(-1); + } + + return 1; +} + +template <typename scalar_t> +__device__ void +bilinear_interpolate_gradient(const int height, const int width, scalar_t y, + scalar_t x, scalar_t &w1, scalar_t &w2, + scalar_t &w3, scalar_t &w4, int &x_low, + int &x_high, int &y_low, int &y_high) { + // deal with cases that inverse elements are out of feature map boundary + if (y < -1.0 || y > height || x < -1.0 || x > width) { + w1 = w2 = w3 = w4 = 0.; + x_low = x_high = y_low = y_high = -1; + return; + } + + if (y <= 0) + y = 0; + if (x <= 0) + x = 0; + + y_low = (int)y; + x_low = (int)x; + + if (y_low >= height - 1) { + y_high = y_low = height - 1; + y = (scalar_t)y_low; + } else { + y_high = y_low + 1; + } + + if (x_low >= width - 1) { + x_high = x_low = width - 1; + x = (scalar_t)x_low; + } else { + x_high = x_low + 1; + } + + scalar_t ly = y - y_low; + scalar_t lx = x - x_low; + scalar_t hy = 1. - ly; + scalar_t hx = 1. - lx; + + w1 = hy * hx, w2 = hy * lx, w3 = ly * hx, w4 = ly * lx; + + return; +} + +template <typename scalar_t> +__global__ void +ROIAlignBackward(const int nthreads, const scalar_t *top_diff, + const scalar_t *bottom_rois, const scalar_t spatial_scale, + const int sample_num, const int channels, const int height, + const int width, const int pooled_height, + const int pooled_width, scalar_t *bottom_diff) { + CUDA_1D_KERNEL_LOOP(index, nthreads) { + // (n, c, ph, pw) is an element in the aligned output + int pw = index % pooled_width; + int ph = (index / pooled_width) % pooled_height; + int c = (index / pooled_width / pooled_height) % channels; + int n = index / pooled_width / pooled_height / channels; + + const scalar_t *offset_bottom_rois = bottom_rois + n * 5; + int roi_batch_ind = offset_bottom_rois[0]; + scalar_t roi_start_w = offset_bottom_rois[1] * spatial_scale; + scalar_t roi_start_h = offset_bottom_rois[2] * spatial_scale; + scalar_t roi_end_w = (offset_bottom_rois[3] + 1) * spatial_scale; + scalar_t roi_end_h = (offset_bottom_rois[4] + 1) * spatial_scale; + + // Force malformed ROIs to be 1x1 + scalar_t roi_width = fmaxf((scalar_t)roi_end_w - roi_start_w, 0.); + scalar_t roi_height = fmaxf((scalar_t)roi_end_h - roi_start_h, 0.); + + scalar_t bin_size_h = roi_height / pooled_height; + scalar_t bin_size_w = roi_width / pooled_width; + + scalar_t *offset_bottom_diff = + bottom_diff + (roi_batch_ind * channels + c) * height * width; + int offset_top = (n * channels + c) * pooled_height * pooled_width + + ph * pooled_width + pw; + scalar_t offset_top_diff = top_diff[offset_top]; + + int sample_num_h = (sample_num > 0) + ? sample_num + : ceil(roi_height / pooled_height); // e.g., = 2 + int sample_num_w = + (sample_num > 0) ? sample_num : ceil(roi_width / pooled_width); + + const scalar_t count = (scalar_t)(sample_num_h * sample_num_w); + + scalar_t h = (scalar_t)(ph + 0.5) * bin_size_h + roi_start_h; + scalar_t w = (scalar_t)(pw + 0.5) * bin_size_w + roi_start_w; + + int hstart = fminf(floor(h), height - 2); + int wstart = fminf(floor(w), width - 2); + + for (int iy = 0; iy < sample_num_h; iy++) { + const scalar_t y = + roi_start_h + ph * bin_size_h + + (scalar_t)(iy + .5f) * bin_size_h / (scalar_t)(sample_num_h); + for (int ix = 0; ix < sample_num_w; ix++) { + const scalar_t x = + roi_start_w + pw * bin_size_w + + (scalar_t)(ix + .5f) * bin_size_w / (scalar_t)(sample_num_w); + scalar_t w1, w2, w3, w4; + int x_low, x_high, y_low, y_high; + + bilinear_interpolate_gradient<scalar_t>( + height, width, y, x, w1, w2, w3, w4, x_low, x_high, y_low, y_high); + scalar_t g1 = offset_top_diff * w1 / count; + scalar_t g2 = offset_top_diff * w2 / count; + scalar_t g3 = offset_top_diff * w3 / count; + scalar_t g4 = offset_top_diff * w4 / count; + if (x_low >= 0 && x_high >= 0 && y_low >= 0 && y_high >= 0) { + atomicAdd(offset_bottom_diff + y_low * width + x_low, g1); + atomicAdd(offset_bottom_diff + y_low * width + x_high, g2); + atomicAdd(offset_bottom_diff + y_high * width + x_low, g3); + atomicAdd(offset_bottom_diff + y_high * width + x_high, g4); + } + } + } + } +} + +template <> +__global__ void ROIAlignBackward<double>( + const int nthreads, const double *top_diff, const double *bottom_rois, + const double spatial_scale, const int sample_num, const int channels, + const int height, const int width, const int pooled_height, + const int pooled_width, double *bottom_diff) {} + +int ROIAlignBackwardLaucher(const at::Tensor top_grad, const at::Tensor rois, + const float spatial_scale, const int sample_num, + const int channels, const int height, + const int width, const int num_rois, + const int pooled_height, const int pooled_width, + at::Tensor bottom_grad) { + const int output_size = num_rois * pooled_height * pooled_width * channels; + + AT_DISPATCH_FLOATING_TYPES( + top_grad.type(), "ROIAlignLaucherBackward", ([&] { + const scalar_t *top_diff = top_grad.data<scalar_t>(); + const scalar_t *rois_data = rois.data<scalar_t>(); + scalar_t *bottom_diff = bottom_grad.data<scalar_t>(); + if (sizeof(scalar_t) == sizeof(double)) { + fprintf(stderr, "double is not supported\n"); + exit(-1); + } + + ROIAlignBackward< + scalar_t><<<GET_BLOCKS(output_size), THREADS_PER_BLOCK>>>( + output_size, top_diff, rois_data, spatial_scale, sample_num, + channels, height, width, pooled_height, pooled_width, bottom_diff); + })); + cudaError_t err = cudaGetLastError(); + if (cudaSuccess != err) { + fprintf(stderr, "cudaCheckError() failed : %s\n", cudaGetErrorString(err)); + exit(-1); + } + + return 1; +} diff --git a/mmdet/ops/roi_pool/__init__.py b/mmdet/ops/roi_pool/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..9c8506d319d3c9c2300860a6c0d64259e43e7916 --- /dev/null +++ b/mmdet/ops/roi_pool/__init__.py @@ -0,0 +1,2 @@ +from .functions.roi_pool import roi_pool +from .modules.roi_pool import RoIPool diff --git a/mmdet/ops/roi_pool/functions/__init__.py b/mmdet/ops/roi_pool/functions/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/mmdet/ops/roi_pool/functions/roi_pool.py b/mmdet/ops/roi_pool/functions/roi_pool.py new file mode 100644 index 0000000000000000000000000000000000000000..78ba1395fb9b653673c3ad57d076def78887b5ff --- /dev/null +++ b/mmdet/ops/roi_pool/functions/roi_pool.py @@ -0,0 +1,56 @@ +import torch +from torch.autograd import Function + +from .. import roi_pool_cuda + + +class RoIPoolFunction(Function): + + @staticmethod + def forward(ctx, features, rois, out_size, spatial_scale): + if isinstance(out_size, int): + out_h = out_size + out_w = out_size + elif isinstance(out_size, tuple): + assert len(out_size) == 2 + assert isinstance(out_size[0], int) + assert isinstance(out_size[1], int) + out_h, out_w = out_size + else: + raise TypeError( + '"out_size" must be an integer or tuple of integers') + assert features.is_cuda + ctx.save_for_backward(rois) + num_channels = features.size(1) + num_rois = rois.size(0) + out_size = (num_rois, num_channels, out_h, out_w) + output = features.new_zeros(*out_size) + + argmax = features.new_zeros(*out_size, dtype=torch.int) + roi_pool_cuda.forward(features, rois, out_h, out_w, spatial_scale, + output, argmax) + ctx.spatial_scale = spatial_scale + ctx.feature_size = features.size() + ctx.argmax = argmax + + return output + + @staticmethod + def backward(ctx, grad_output): + assert grad_output.is_cuda + spatial_scale = ctx.spatial_scale + feature_size = ctx.feature_size + argmax = ctx.argmax + rois = ctx.saved_tensors[0] + assert feature_size is not None + + grad_input = grad_rois = None + if ctx.needs_input_grad[0]: + grad_input = grad_output.new(feature_size).zero_() + roi_pool_cuda.backward(grad_output, rois, argmax, spatial_scale, + grad_input) + + return grad_input, grad_rois, None, None + + +roi_pool = RoIPoolFunction.apply diff --git a/mmdet/ops/roi_pool/gradcheck.py b/mmdet/ops/roi_pool/gradcheck.py new file mode 100644 index 0000000000000000000000000000000000000000..dfc08b2e138855e913a2ac1f3c365a570aba661d --- /dev/null +++ b/mmdet/ops/roi_pool/gradcheck.py @@ -0,0 +1,15 @@ +import torch +from torch.autograd import gradcheck + +import os.path as osp +import sys +sys.path.append(osp.abspath(osp.join(__file__, '../../'))) +from roi_pooling import RoIPool + +feat = torch.randn(4, 16, 15, 15, requires_grad=True).cuda() +rois = torch.Tensor([[0, 0, 0, 50, 50], [0, 10, 30, 43, 55], + [1, 67, 40, 110, 120]]).cuda() +inputs = (feat, rois) +print('Gradcheck for roi pooling...') +test = gradcheck(RoIPool(4, 1.0 / 8), inputs, eps=1e-5, atol=1e-3) +print(test) diff --git a/mmdet/ops/roi_pool/modules/__init__.py b/mmdet/ops/roi_pool/modules/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/mmdet/ops/roi_pool/modules/roi_pool.py b/mmdet/ops/roi_pool/modules/roi_pool.py new file mode 100644 index 0000000000000000000000000000000000000000..d7fffd08c656ee7301aeed5a8262714f4be4157d --- /dev/null +++ b/mmdet/ops/roi_pool/modules/roi_pool.py @@ -0,0 +1,14 @@ +from torch.nn.modules.module import Module +from ..functions.roi_pool import roi_pool + + +class RoIPool(Module): + + def __init__(self, out_size, spatial_scale): + super(RoIPool, self).__init__() + + self.out_size = out_size + self.spatial_scale = float(spatial_scale) + + def forward(self, features, rois): + return roi_pool(features, rois, self.out_size, self.spatial_scale) diff --git a/mmdet/ops/roi_pool/setup.py b/mmdet/ops/roi_pool/setup.py new file mode 100644 index 0000000000000000000000000000000000000000..16991b889220f9ae4c7763460033754c6ff38f77 --- /dev/null +++ b/mmdet/ops/roi_pool/setup.py @@ -0,0 +1,12 @@ +from setuptools import setup +from torch.utils.cpp_extension import BuildExtension, CUDAExtension + +setup( + name='roi_pool', + ext_modules=[ + CUDAExtension('roi_pool_cuda', [ + 'src/roi_pool_cuda.cpp', + 'src/roi_pool_kernel.cu', + ]) + ], + cmdclass={'build_ext': BuildExtension}) diff --git a/mmdet/ops/roi_pool/src/roi_pool_cuda.cpp b/mmdet/ops/roi_pool/src/roi_pool_cuda.cpp new file mode 100644 index 0000000000000000000000000000000000000000..799c151d192911f03e446ea9c1ad7bb18fa3b1d1 --- /dev/null +++ b/mmdet/ops/roi_pool/src/roi_pool_cuda.cpp @@ -0,0 +1,86 @@ +#include <torch/torch.h> + +#include <cmath> +#include <vector> + +int ROIPoolForwardLaucher(const at::Tensor features, const at::Tensor rois, + const float spatial_scale, const int channels, + const int height, const int width, const int num_rois, + const int pooled_h, const int pooled_w, + at::Tensor output, at::Tensor argmax); + +int ROIPoolBackwardLaucher(const at::Tensor top_grad, const at::Tensor rois, + const at::Tensor argmax, const float spatial_scale, + const int batch_size, const int channels, + const int height, const int width, + const int num_rois, const int pooled_h, + const int pooled_w, at::Tensor bottom_grad); + +#define CHECK_CUDA(x) AT_ASSERT(x.type().is_cuda(), #x " must be a CUDAtensor ") +#define CHECK_CONTIGUOUS(x) \ + AT_ASSERT(x.is_contiguous(), #x " must be contiguous ") +#define CHECK_INPUT(x) \ + CHECK_CUDA(x); \ + CHECK_CONTIGUOUS(x) + +int roi_pooling_forward_cuda(at::Tensor features, at::Tensor rois, + int pooled_height, int pooled_width, + float spatial_scale, at::Tensor output, + at::Tensor argmax) { + CHECK_INPUT(features); + CHECK_INPUT(rois); + CHECK_INPUT(output); + CHECK_INPUT(argmax); + + // Number of ROIs + int num_rois = rois.size(0); + int size_rois = rois.size(1); + + if (size_rois != 5) { + printf("wrong roi size\n"); + return 0; + } + + int channels = features.size(1); + int height = features.size(2); + int width = features.size(3); + + ROIPoolForwardLaucher(features, rois, spatial_scale, channels, height, width, + num_rois, pooled_height, pooled_width, output, argmax); + + return 1; +} + +int roi_pooling_backward_cuda(at::Tensor top_grad, at::Tensor rois, + at::Tensor argmax, float spatial_scale, + at::Tensor bottom_grad) { + CHECK_INPUT(top_grad); + CHECK_INPUT(rois); + CHECK_INPUT(argmax); + CHECK_INPUT(bottom_grad); + + int pooled_height = top_grad.size(2); + int pooled_width = top_grad.size(3); + int num_rois = rois.size(0); + int size_rois = rois.size(1); + + if (size_rois != 5) { + printf("wrong roi size\n"); + return 0; + } + int batch_size = bottom_grad.size(0); + int channels = bottom_grad.size(1); + int height = bottom_grad.size(2); + int width = bottom_grad.size(3); + + ROIPoolBackwardLaucher(top_grad, rois, argmax, spatial_scale, batch_size, + channels, height, width, num_rois, pooled_height, + pooled_width, bottom_grad); + + return 1; +} + +PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { + m.def("forward", &roi_pooling_forward_cuda, "Roi_Pooling forward (CUDA)"); + m.def("backward", &roi_pooling_backward_cuda, "Roi_Pooling backward (CUDA)"); +} diff --git a/mmdet/ops/roi_pool/src/roi_pool_kernel.cu b/mmdet/ops/roi_pool/src/roi_pool_kernel.cu new file mode 100644 index 0000000000000000000000000000000000000000..c94a9cd78503c19995db88dd71f2b1ce5a36d629 --- /dev/null +++ b/mmdet/ops/roi_pool/src/roi_pool_kernel.cu @@ -0,0 +1,193 @@ +#include <ATen/ATen.h> + +#include <cuda.h> +#include <cuda_runtime.h> + +#include <math.h> +#include <stdio.h> +#include <vector> + +#define CUDA_1D_KERNEL_LOOP(i, n) \ + for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < n; \ + i += blockDim.x * gridDim.x) + +#define THREADS_PER_BLOCK 1024 + +inline int GET_BLOCKS(const int N) { + int optimal_block_num = (N + THREADS_PER_BLOCK - 1) / THREADS_PER_BLOCK; + int max_block_num = 65000; + return min(optimal_block_num, max_block_num); +} + +template <typename scalar_t> +__global__ void ROIPoolForward(const int nthreads, const scalar_t *bottom_data, + const scalar_t *rois, + const scalar_t spatial_scale, const int channels, + const int height, const int width, + const int pooled_h, const int pooled_w, + scalar_t *top_data, int *argmax_data) { + CUDA_1D_KERNEL_LOOP(index, nthreads) { + // (n, c, ph, pw) is an element in the pooled output + int pw = index % pooled_w; + int ph = (index / pooled_w) % pooled_h; + int c = (index / pooled_w / pooled_h) % channels; + int n = index / pooled_w / pooled_h / channels; + + const scalar_t *offset_rois = rois + n * 5; + int roi_batch_ind = offset_rois[0]; + // calculate the roi region on feature maps + scalar_t roi_x1 = offset_rois[1] * spatial_scale; + scalar_t roi_y1 = offset_rois[2] * spatial_scale; + scalar_t roi_x2 = (offset_rois[3] + 1) * spatial_scale; + scalar_t roi_y2 = (offset_rois[4] + 1) * spatial_scale; + + // force malformed rois to be 1x1 + scalar_t roi_w = roi_x2 - roi_x1; + scalar_t roi_h = roi_y2 - roi_y1; + if (roi_w <= 0 || roi_h <= 0) + continue; + + scalar_t bin_size_w = roi_w / static_cast<scalar_t>(pooled_w); + scalar_t bin_size_h = roi_h / static_cast<scalar_t>(pooled_h); + + // the corresponding bin region + int bin_x1 = floor(static_cast<scalar_t>(pw) * bin_size_w + roi_x1); + int bin_y1 = floor(static_cast<scalar_t>(ph) * bin_size_h + roi_y1); + int bin_x2 = ceil(static_cast<scalar_t>(pw + 1) * bin_size_w + roi_x1); + int bin_y2 = ceil(static_cast<scalar_t>(ph + 1) * bin_size_h + roi_y1); + + // add roi offsets and clip to input boundaries + bin_x1 = min(max(bin_x1, 0), width); + bin_y1 = min(max(bin_y1, 0), height); + bin_x2 = min(max(bin_x2, 0), width); + bin_y2 = min(max(bin_y2, 0), height); + bool is_empty = (bin_y2 <= bin_y1) || (bin_x2 <= bin_x1); + + // If nothing is pooled, argmax = -1 causes nothing to be backprop'd + int max_idx = -1; + bottom_data += (roi_batch_ind * channels + c) * height * width; + + // Define an empty pooling region to be zero + scalar_t max_val = is_empty ? 0 : bottom_data[bin_y1 * width + bin_x1] - 1; + + for (int h = bin_y1; h < bin_y2; ++h) { + for (int w = bin_x1; w < bin_x2; ++w) { + int offset = h * width + w; + if (bottom_data[offset] > max_val) { + max_val = bottom_data[offset]; + max_idx = offset; + } + } + } + top_data[index] = max_val; + if (argmax_data != NULL) + argmax_data[index] = max_idx; + } +} + +int ROIPoolForwardLaucher(const at::Tensor features, const at::Tensor rois, + const float spatial_scale, const int channels, + const int height, const int width, const int num_rois, + const int pooled_h, const int pooled_w, + at::Tensor output, at::Tensor argmax) { + const int output_size = num_rois * channels * pooled_h * pooled_w; + + AT_DISPATCH_FLOATING_TYPES( + features.type(), "ROIPoolLaucherForward", ([&] { + const scalar_t *bottom_data = features.data<scalar_t>(); + const scalar_t *rois_data = rois.data<scalar_t>(); + scalar_t *top_data = output.data<scalar_t>(); + int *argmax_data = argmax.data<int>(); + + ROIPoolForward< + scalar_t><<<GET_BLOCKS(output_size), THREADS_PER_BLOCK>>>( + output_size, bottom_data, rois_data, scalar_t(spatial_scale), + channels, height, width, pooled_h, pooled_w, top_data, argmax_data); + })); + cudaError_t err = cudaGetLastError(); + if (cudaSuccess != err) { + fprintf(stderr, "cudaCheckError() failed : %s\n", cudaGetErrorString(err)); + exit(-1); + } + return 1; +} + +template <typename scalar_t> +__global__ void ROIPoolBackward(const int nthreads, const scalar_t *top_diff, + const scalar_t *rois, const int *argmax_data, + const scalar_t spatial_scale, + const int channels, const int height, + const int width, const int pooled_h, + const int pooled_w, scalar_t *bottom_diff) { + CUDA_1D_KERNEL_LOOP(index, nthreads) { + int pw = index % pooled_w; + int ph = (index / pooled_w) % pooled_h; + int c = (index / pooled_w / pooled_h) % channels; + int n = index / pooled_w / pooled_h / channels; + + int roi_batch_ind = rois[n * 5]; + int bottom_index = argmax_data[(n * channels + c) * pooled_h * pooled_w + + ph * pooled_w + pw]; + + atomicAdd(bottom_diff + (roi_batch_ind * channels + c) * height * width + + bottom_index, + top_diff[index]); + } +} + +template <> +__global__ void +ROIPoolBackward<double>(const int nthreads, const double *top_diff, + const double *rois, const int *argmax_data, + const double spatial_scale, const int channels, + const int height, const int width, const int pooled_h, + const int pooled_w, double *bottom_diff) { + // CUDA_1D_KERNEL_LOOP(index, nthreads) { + // int pw = index % pooled_w; + // int ph = (index / pooled_w) % pooled_h; + // int c = (index / pooled_w / pooled_h) % channels; + // int n = index / pooled_w / pooled_h / channels; + + // int roi_batch_ind = rois[n * 5]; + // int bottom_index = argmax_data[(n * channels + c) * pooled_h * pooled_w + + // ph * pooled_w + pw]; + + // *(bottom_diff + (roi_batch_ind * channels + c) * height * width + + // bottom_index) +=top_diff[index]; + // } +} + +int ROIPoolBackwardLaucher(const at::Tensor top_grad, const at::Tensor rois, + const at::Tensor argmax, const float spatial_scale, + const int batch_size, const int channels, + const int height, const int width, + const int num_rois, const int pooled_h, + const int pooled_w, at::Tensor bottom_grad) { + const int output_size = num_rois * pooled_h * pooled_w * channels; + + AT_DISPATCH_FLOATING_TYPES( + top_grad.type(), "ROIPoolLaucherBackward", ([&] { + const scalar_t *top_diff = top_grad.data<scalar_t>(); + const scalar_t *rois_data = rois.data<scalar_t>(); + const int *argmax_data = argmax.data<int>(); + scalar_t *bottom_diff = bottom_grad.data<scalar_t>(); + + if (sizeof(scalar_t) == sizeof(double)) { + fprintf(stderr, "double is not supported\n"); + exit(-1); + } + + ROIPoolBackward< + scalar_t><<<GET_BLOCKS(output_size), THREADS_PER_BLOCK>>>( + output_size, top_diff, rois_data, argmax_data, + scalar_t(spatial_scale), channels, height, width, pooled_h, + pooled_w, bottom_diff); + })); + cudaError_t err = cudaGetLastError(); + if (cudaSuccess != err) { + fprintf(stderr, "cudaCheckError() failed : %s\n", cudaGetErrorString(err)); + exit(-1); + } + + return 1; +} diff --git a/mmdet/version.py b/mmdet/version.py new file mode 100644 index 0000000000000000000000000000000000000000..2b8877c505752cd3aaa805b09b88791d3ca0c9bb --- /dev/null +++ b/mmdet/version.py @@ -0,0 +1 @@ +__version__ = '0.5.0' diff --git a/setup.py b/setup.py new file mode 100644 index 0000000000000000000000000000000000000000..8ed19bd5a810692f308f99617f20fe2e07e86f5a --- /dev/null +++ b/setup.py @@ -0,0 +1,40 @@ +from setuptools import find_packages, setup + + +def readme(): + with open('README.md') as f: + content = f.read() + return content + + +def get_version(): + version_file = 'mmcv/version.py' + with open(version_file, 'r') as f: + exec(compile(f.read(), version_file, 'exec')) + return locals()['__version__'] + + +setup( + name='mmdet', + version=get_version(), + description='Open MMLab Detection Toolbox', + long_description=readme(), + keywords='computer vision, object detection', + packages=find_packages(), + classifiers=[ + 'Development Status :: 4 - Beta', + 'License :: OSI Approved :: GNU General Public License v3 (GPLv3)', + 'Operating System :: OS Independent', + 'Programming Language :: Python :: 2', + 'Programming Language :: Python :: 2.7', + 'Programming Language :: Python :: 3', + 'Programming Language :: Python :: 3.4', + 'Programming Language :: Python :: 3.5', + 'Programming Language :: Python :: 3.6', + 'Topic :: Utilities', + ], + license='GPLv3', + setup_requires=['pytest-runner'], + tests_require=['pytest'], + install_requires=['numpy', 'matplotlib', 'six', 'terminaltables'], + zip_safe=False)