Skip to content
Snippets Groups Projects
Unverified Commit 7ed8d51e authored by Wenwei Zhang's avatar Wenwei Zhang Committed by GitHub
Browse files

Change V2.0 coors (#2380)

* Refactor (all): change coordinate system

* Fix (mask_head): fix cat -1 bug in mask_paste

* Fix (unittest)
: modify unittest and pass CI

* reformat to pass CI

* Fix round coordinates bugs

* clean file

* Fix (test): use cpu version of aligned roi_align in tests

* Refactor (mask): clean np.stack

* Refactor (head): reformat code and fix missing -1

* Reformat: reformat and add doc strings

* Refactor (mask_head): more clea docstring
parent 5db9b2e3
No related branches found
No related tags found
No related merge requests found
Showing
with 289 additions and 172 deletions
...@@ -8,10 +8,10 @@ class AnchorGenerator(object): ...@@ -8,10 +8,10 @@ class AnchorGenerator(object):
>>> self = AnchorGenerator(9, [1.], [1.]) >>> self = AnchorGenerator(9, [1.], [1.])
>>> all_anchors = self.grid_anchors((2, 2), device='cpu') >>> all_anchors = self.grid_anchors((2, 2), device='cpu')
>>> print(all_anchors) >>> print(all_anchors)
tensor([[ 0., 0., 8., 8.], tensor([[-4.5000, -4.5000, 4.5000, 4.5000],
[16., 0., 24., 8.], [11.5000, -4.5000, 20.5000, 4.5000],
[ 0., 16., 8., 24.], [-4.5000, 11.5000, 4.5000, 20.5000],
[16., 16., 24., 24.]]) [11.5000, 11.5000, 20.5000, 20.5000]])
""" """
def __init__(self, base_size, scales, ratios, scale_major=True, ctr=None): def __init__(self, base_size, scales, ratios, scale_major=True, ctr=None):
...@@ -30,8 +30,8 @@ class AnchorGenerator(object): ...@@ -30,8 +30,8 @@ class AnchorGenerator(object):
w = self.base_size w = self.base_size
h = self.base_size h = self.base_size
if self.ctr is None: if self.ctr is None:
x_ctr = 0.5 * (w - 1) x_ctr = 0.
y_ctr = 0.5 * (h - 1) y_ctr = 0.
else: else:
x_ctr, y_ctr = self.ctr x_ctr, y_ctr = self.ctr
...@@ -44,14 +44,13 @@ class AnchorGenerator(object): ...@@ -44,14 +44,13 @@ class AnchorGenerator(object):
ws = (w * self.scales[:, None] * w_ratios[None, :]).view(-1) ws = (w * self.scales[:, None] * w_ratios[None, :]).view(-1)
hs = (h * self.scales[:, None] * h_ratios[None, :]).view(-1) hs = (h * self.scales[:, None] * h_ratios[None, :]).view(-1)
# yapf: disable # use float anchor and the anchor's center is aligned with the
base_anchors = torch.stack( # pixel center
[ base_anchors = [
x_ctr - 0.5 * (ws - 1), y_ctr - 0.5 * (hs - 1), x_ctr - 0.5 * ws, y_ctr - 0.5 * hs, x_ctr + 0.5 * ws,
x_ctr + 0.5 * (ws - 1), y_ctr + 0.5 * (hs - 1) y_ctr + 0.5 * hs
], ]
dim=-1).round() base_anchors = torch.stack(base_anchors, dim=-1)
# yapf: enable
return base_anchors return base_anchors
......
...@@ -22,10 +22,10 @@ def calc_region(bbox, ratio, featmap_size=None): ...@@ -22,10 +22,10 @@ def calc_region(bbox, ratio, featmap_size=None):
x2 = torch.round(ratio * bbox[0] + (1 - ratio) * bbox[2]).long() x2 = torch.round(ratio * bbox[0] + (1 - ratio) * bbox[2]).long()
y2 = torch.round(ratio * bbox[1] + (1 - ratio) * bbox[3]).long() y2 = torch.round(ratio * bbox[1] + (1 - ratio) * bbox[3]).long()
if featmap_size is not None: if featmap_size is not None:
x1 = x1.clamp(min=0, max=featmap_size[1] - 1) x1 = x1.clamp(min=0, max=featmap_size[1])
y1 = y1.clamp(min=0, max=featmap_size[0] - 1) y1 = y1.clamp(min=0, max=featmap_size[0])
x2 = x2.clamp(min=0, max=featmap_size[1] - 1) x2 = x2.clamp(min=0, max=featmap_size[1])
y2 = y2.clamp(min=0, max=featmap_size[0] - 1) y2 = y2.clamp(min=0, max=featmap_size[0])
return (x1, y1, x2, y2) return (x1, y1, x2, y2)
...@@ -76,8 +76,8 @@ def ga_loc_target(gt_bboxes_list, ...@@ -76,8 +76,8 @@ def ga_loc_target(gt_bboxes_list,
all_ignore_map.append(ignore_map) all_ignore_map.append(ignore_map)
for img_id in range(img_per_gpu): for img_id in range(img_per_gpu):
gt_bboxes = gt_bboxes_list[img_id] gt_bboxes = gt_bboxes_list[img_id]
scale = torch.sqrt((gt_bboxes[:, 2] - gt_bboxes[:, 0] + 1) * scale = torch.sqrt((gt_bboxes[:, 2] - gt_bboxes[:, 0]) *
(gt_bboxes[:, 3] - gt_bboxes[:, 1] + 1)) (gt_bboxes[:, 3] - gt_bboxes[:, 1]))
min_anchor_size = scale.new_full( min_anchor_size = scale.new_full(
(1, ), float(anchor_scale * anchor_strides[0])) (1, ), float(anchor_scale * anchor_strides[0]))
# assign gt bboxes to different feature levels w.r.t. their scales # assign gt bboxes to different feature levels w.r.t. their scales
......
...@@ -30,8 +30,8 @@ def bbox_overlaps(bboxes1, bboxes2, mode='iou', is_aligned=False): ...@@ -30,8 +30,8 @@ def bbox_overlaps(bboxes1, bboxes2, mode='iou', is_aligned=False):
>>> [10, 10, 20, 20], >>> [10, 10, 20, 20],
>>> ]) >>> ])
>>> bbox_overlaps(bboxes1, bboxes2) >>> bbox_overlaps(bboxes1, bboxes2)
tensor([[0.5238, 0.0500, 0.0041], tensor([[0.5000, 0.0000, 0.0000],
[0.0323, 0.0452, 1.0000], [0.0000, 0.0000, 1.0000],
[0.0000, 0.0000, 0.0000]]) [0.0000, 0.0000, 0.0000]])
Example: Example:
...@@ -58,14 +58,14 @@ def bbox_overlaps(bboxes1, bboxes2, mode='iou', is_aligned=False): ...@@ -58,14 +58,14 @@ def bbox_overlaps(bboxes1, bboxes2, mode='iou', is_aligned=False):
lt = torch.max(bboxes1[:, :2], bboxes2[:, :2]) # [rows, 2] lt = torch.max(bboxes1[:, :2], bboxes2[:, :2]) # [rows, 2]
rb = torch.min(bboxes1[:, 2:], bboxes2[:, 2:]) # [rows, 2] rb = torch.min(bboxes1[:, 2:], bboxes2[:, 2:]) # [rows, 2]
wh = (rb - lt + 1).clamp(min=0) # [rows, 2] wh = (rb - lt).clamp(min=0) # [rows, 2]
overlap = wh[:, 0] * wh[:, 1] overlap = wh[:, 0] * wh[:, 1]
area1 = (bboxes1[:, 2] - bboxes1[:, 0] + 1) * ( area1 = (bboxes1[:, 2] - bboxes1[:, 0]) * (
bboxes1[:, 3] - bboxes1[:, 1] + 1) bboxes1[:, 3] - bboxes1[:, 1])
if mode == 'iou': if mode == 'iou':
area2 = (bboxes2[:, 2] - bboxes2[:, 0] + 1) * ( area2 = (bboxes2[:, 2] - bboxes2[:, 0]) * (
bboxes2[:, 3] - bboxes2[:, 1] + 1) bboxes2[:, 3] - bboxes2[:, 1])
ious = overlap / (area1 + area2 - overlap) ious = overlap / (area1 + area2 - overlap)
else: else:
ious = overlap / area1 ious = overlap / area1
...@@ -73,14 +73,14 @@ def bbox_overlaps(bboxes1, bboxes2, mode='iou', is_aligned=False): ...@@ -73,14 +73,14 @@ def bbox_overlaps(bboxes1, bboxes2, mode='iou', is_aligned=False):
lt = torch.max(bboxes1[:, None, :2], bboxes2[:, :2]) # [rows, cols, 2] lt = torch.max(bboxes1[:, None, :2], bboxes2[:, :2]) # [rows, cols, 2]
rb = torch.min(bboxes1[:, None, 2:], bboxes2[:, 2:]) # [rows, cols, 2] rb = torch.min(bboxes1[:, None, 2:], bboxes2[:, 2:]) # [rows, cols, 2]
wh = (rb - lt + 1).clamp(min=0) # [rows, cols, 2] wh = (rb - lt).clamp(min=0) # [rows, cols, 2]
overlap = wh[:, :, 0] * wh[:, :, 1] overlap = wh[:, :, 0] * wh[:, :, 1]
area1 = (bboxes1[:, 2] - bboxes1[:, 0] + 1) * ( area1 = (bboxes1[:, 2] - bboxes1[:, 0]) * (
bboxes1[:, 3] - bboxes1[:, 1] + 1) bboxes1[:, 3] - bboxes1[:, 1])
if mode == 'iou': if mode == 'iou':
area2 = (bboxes2[:, 2] - bboxes2[:, 0] + 1) * ( area2 = (bboxes2[:, 2] - bboxes2[:, 0]) * (
bboxes2[:, 3] - bboxes2[:, 1] + 1) bboxes2[:, 3] - bboxes2[:, 1])
ious = overlap / (area1[:, None] + area2 - overlap) ious = overlap / (area1[:, None] + area2 - overlap)
else: else:
ious = overlap / (area1[:, None]) ious = overlap / (area1[:, None])
......
...@@ -10,13 +10,13 @@ def bbox2delta(proposals, gt, means=[0, 0, 0, 0], stds=[1, 1, 1, 1]): ...@@ -10,13 +10,13 @@ def bbox2delta(proposals, gt, means=[0, 0, 0, 0], stds=[1, 1, 1, 1]):
gt = gt.float() gt = gt.float()
px = (proposals[..., 0] + proposals[..., 2]) * 0.5 px = (proposals[..., 0] + proposals[..., 2]) * 0.5
py = (proposals[..., 1] + proposals[..., 3]) * 0.5 py = (proposals[..., 1] + proposals[..., 3]) * 0.5
pw = proposals[..., 2] - proposals[..., 0] + 1.0 pw = proposals[..., 2] - proposals[..., 0]
ph = proposals[..., 3] - proposals[..., 1] + 1.0 ph = proposals[..., 3] - proposals[..., 1]
gx = (gt[..., 0] + gt[..., 2]) * 0.5 gx = (gt[..., 0] + gt[..., 2]) * 0.5
gy = (gt[..., 1] + gt[..., 3]) * 0.5 gy = (gt[..., 1] + gt[..., 3]) * 0.5
gw = gt[..., 2] - gt[..., 0] + 1.0 gw = gt[..., 2] - gt[..., 0]
gh = gt[..., 3] - gt[..., 1] + 1.0 gh = gt[..., 3] - gt[..., 1]
dx = (gx - px) / pw dx = (gx - px) / pw
dy = (gy - py) / ph dy = (gy - py) / ph
...@@ -71,9 +71,9 @@ def delta2bbox(rois, ...@@ -71,9 +71,9 @@ def delta2bbox(rois,
>>> [ 0.7, -1.9, -0.5, 0.3]]) >>> [ 0.7, -1.9, -0.5, 0.3]])
>>> delta2bbox(rois, deltas, max_shape=(32, 32)) >>> delta2bbox(rois, deltas, max_shape=(32, 32))
tensor([[0.0000, 0.0000, 1.0000, 1.0000], tensor([[0.0000, 0.0000, 1.0000, 1.0000],
[0.2817, 0.2817, 4.7183, 4.7183], [0.1409, 0.1409, 2.8591, 2.8591],
[0.0000, 0.6321, 7.3891, 0.3679], [0.0000, 0.3161, 4.1945, 0.6839],
[5.8967, 2.9251, 5.5033, 3.2749]]) [5.0000, 5.0000, 5.0000, 5.0000]])
""" """
means = deltas.new_tensor(means).repeat(1, deltas.size(1) // 4) means = deltas.new_tensor(means).repeat(1, deltas.size(1) // 4)
stds = deltas.new_tensor(stds).repeat(1, deltas.size(1) // 4) stds = deltas.new_tensor(stds).repeat(1, deltas.size(1) // 4)
...@@ -89,8 +89,8 @@ def delta2bbox(rois, ...@@ -89,8 +89,8 @@ def delta2bbox(rois,
px = ((rois[:, 0] + rois[:, 2]) * 0.5).unsqueeze(1).expand_as(dx) px = ((rois[:, 0] + rois[:, 2]) * 0.5).unsqueeze(1).expand_as(dx)
py = ((rois[:, 1] + rois[:, 3]) * 0.5).unsqueeze(1).expand_as(dy) py = ((rois[:, 1] + rois[:, 3]) * 0.5).unsqueeze(1).expand_as(dy)
# Compute width/height of each roi # Compute width/height of each roi
pw = (rois[:, 2] - rois[:, 0] + 1.0).unsqueeze(1).expand_as(dw) pw = (rois[:, 2] - rois[:, 0]).unsqueeze(1).expand_as(dw)
ph = (rois[:, 3] - rois[:, 1] + 1.0).unsqueeze(1).expand_as(dh) ph = (rois[:, 3] - rois[:, 1]).unsqueeze(1).expand_as(dh)
# Use exp(network energy) to enlarge/shrink each roi # Use exp(network energy) to enlarge/shrink each roi
gw = pw * dw.exp() gw = pw * dw.exp()
gh = ph * dh.exp() gh = ph * dh.exp()
...@@ -98,15 +98,15 @@ def delta2bbox(rois, ...@@ -98,15 +98,15 @@ def delta2bbox(rois,
gx = torch.addcmul(px, 1, pw, dx) # gx = px + pw * dx gx = torch.addcmul(px, 1, pw, dx) # gx = px + pw * dx
gy = torch.addcmul(py, 1, ph, dy) # gy = py + ph * dy gy = torch.addcmul(py, 1, ph, dy) # gy = py + ph * dy
# Convert center-xy/width/height to top-left, bottom-right # Convert center-xy/width/height to top-left, bottom-right
x1 = gx - gw * 0.5 + 0.5 x1 = gx - gw * 0.5
y1 = gy - gh * 0.5 + 0.5 y1 = gy - gh * 0.5
x2 = gx + gw * 0.5 - 0.5 x2 = gx + gw * 0.5
y2 = gy + gh * 0.5 - 0.5 y2 = gy + gh * 0.5
if max_shape is not None: if max_shape is not None:
x1 = x1.clamp(min=0, max=max_shape[1] - 1) x1 = x1.clamp(min=0, max=max_shape[1])
y1 = y1.clamp(min=0, max=max_shape[0] - 1) y1 = y1.clamp(min=0, max=max_shape[0])
x2 = x2.clamp(min=0, max=max_shape[1] - 1) x2 = x2.clamp(min=0, max=max_shape[1])
y2 = y2.clamp(min=0, max=max_shape[0] - 1) y2 = y2.clamp(min=0, max=max_shape[0])
bboxes = torch.stack([x1, y1, x2, y2], dim=-1).view_as(deltas) bboxes = torch.stack([x1, y1, x2, y2], dim=-1).view_as(deltas)
return bboxes return bboxes
...@@ -124,8 +124,8 @@ def bbox_flip(bboxes, img_shape): ...@@ -124,8 +124,8 @@ def bbox_flip(bboxes, img_shape):
if isinstance(bboxes, torch.Tensor): if isinstance(bboxes, torch.Tensor):
assert bboxes.shape[-1] % 4 == 0 assert bboxes.shape[-1] % 4 == 0
flipped = bboxes.clone() flipped = bboxes.clone()
flipped[:, 0::4] = img_shape[1] - bboxes[:, 2::4] - 1 flipped[:, 0::4] = img_shape[1] - bboxes[:, 2::4]
flipped[:, 2::4] = img_shape[1] - bboxes[:, 0::4] - 1 flipped[:, 2::4] = img_shape[1] - bboxes[:, 0::4]
return flipped return flipped
elif isinstance(bboxes, np.ndarray): elif isinstance(bboxes, np.ndarray):
return mmcv.bbox_flip(bboxes, img_shape) return mmcv.bbox_flip(bboxes, img_shape)
...@@ -216,8 +216,8 @@ def distance2bbox(points, distance, max_shape=None): ...@@ -216,8 +216,8 @@ def distance2bbox(points, distance, max_shape=None):
x2 = points[:, 0] + distance[:, 2] x2 = points[:, 0] + distance[:, 2]
y2 = points[:, 1] + distance[:, 3] y2 = points[:, 1] + distance[:, 3]
if max_shape is not None: if max_shape is not None:
x1 = x1.clamp(min=0, max=max_shape[1] - 1) x1 = x1.clamp(min=0, max=max_shape[1])
y1 = y1.clamp(min=0, max=max_shape[0] - 1) y1 = y1.clamp(min=0, max=max_shape[0])
x2 = x2.clamp(min=0, max=max_shape[1] - 1) x2 = x2.clamp(min=0, max=max_shape[1])
y2 = y2.clamp(min=0, max=max_shape[0] - 1) y2 = y2.clamp(min=0, max=max_shape[0])
return torch.stack([x1, y1, x2, y2], -1) return torch.stack([x1, y1, x2, y2], -1)
...@@ -28,17 +28,15 @@ def bbox_overlaps(bboxes1, bboxes2, mode='iou'): ...@@ -28,17 +28,15 @@ def bbox_overlaps(bboxes1, bboxes2, mode='iou'):
bboxes1, bboxes2 = bboxes2, bboxes1 bboxes1, bboxes2 = bboxes2, bboxes1
ious = np.zeros((cols, rows), dtype=np.float32) ious = np.zeros((cols, rows), dtype=np.float32)
exchange = True exchange = True
area1 = (bboxes1[:, 2] - bboxes1[:, 0] + 1) * ( area1 = (bboxes1[:, 2] - bboxes1[:, 0]) * (bboxes1[:, 3] - bboxes1[:, 1])
bboxes1[:, 3] - bboxes1[:, 1] + 1) area2 = (bboxes2[:, 2] - bboxes2[:, 0]) * (bboxes2[:, 3] - bboxes2[:, 1])
area2 = (bboxes2[:, 2] - bboxes2[:, 0] + 1) * (
bboxes2[:, 3] - bboxes2[:, 1] + 1)
for i in range(bboxes1.shape[0]): for i in range(bboxes1.shape[0]):
x_start = np.maximum(bboxes1[i, 0], bboxes2[:, 0]) x_start = np.maximum(bboxes1[i, 0], bboxes2[:, 0])
y_start = np.maximum(bboxes1[i, 1], bboxes2[:, 1]) y_start = np.maximum(bboxes1[i, 1], bboxes2[:, 1])
x_end = np.minimum(bboxes1[i, 2], bboxes2[:, 2]) x_end = np.minimum(bboxes1[i, 2], bboxes2[:, 2])
y_end = np.minimum(bboxes1[i, 3], bboxes2[:, 3]) y_end = np.minimum(bboxes1[i, 3], bboxes2[:, 3])
overlap = np.maximum(x_end - x_start + 1, 0) * np.maximum( overlap = np.maximum(x_end - x_start, 0) * np.maximum(
y_end - y_start + 1, 0) y_end - y_start, 0)
if mode == 'iou': if mode == 'iou':
union = area1[i] + area2 - overlap union = area1[i] + area2 - overlap
else: else:
......
...@@ -98,14 +98,14 @@ def tpfp_imagenet(det_bboxes, ...@@ -98,14 +98,14 @@ def tpfp_imagenet(det_bboxes,
if area_ranges == [(None, None)]: if area_ranges == [(None, None)]:
fp[...] = 1 fp[...] = 1
else: else:
det_areas = (det_bboxes[:, 2] - det_bboxes[:, 0] + 1) * ( det_areas = (det_bboxes[:, 2] - det_bboxes[:, 0]) * (
det_bboxes[:, 3] - det_bboxes[:, 1] + 1) det_bboxes[:, 3] - det_bboxes[:, 1])
for i, (min_area, max_area) in enumerate(area_ranges): for i, (min_area, max_area) in enumerate(area_ranges):
fp[i, (det_areas >= min_area) & (det_areas < max_area)] = 1 fp[i, (det_areas >= min_area) & (det_areas < max_area)] = 1
return tp, fp return tp, fp
ious = bbox_overlaps(det_bboxes, gt_bboxes - 1) ious = bbox_overlaps(det_bboxes, gt_bboxes - 1)
gt_w = gt_bboxes[:, 2] - gt_bboxes[:, 0] + 1 gt_w = gt_bboxes[:, 2] - gt_bboxes[:, 0]
gt_h = gt_bboxes[:, 3] - gt_bboxes[:, 1] + 1 gt_h = gt_bboxes[:, 3] - gt_bboxes[:, 1]
iou_thrs = np.minimum((gt_w * gt_h) / ((gt_w + 10.0) * (gt_h + 10.0)), iou_thrs = np.minimum((gt_w * gt_h) / ((gt_w + 10.0) * (gt_h + 10.0)),
default_iou_thr) default_iou_thr)
# sort all detections by scores in descending order # sort all detections by scores in descending order
...@@ -144,7 +144,7 @@ def tpfp_imagenet(det_bboxes, ...@@ -144,7 +144,7 @@ def tpfp_imagenet(det_bboxes,
fp[k, i] = 1 fp[k, i] = 1
else: else:
bbox = det_bboxes[i, :4] bbox = det_bboxes[i, :4]
area = (bbox[2] - bbox[0] + 1) * (bbox[3] - bbox[1] + 1) area = (bbox[2] - bbox[0]) * (bbox[3] - bbox[1])
if area >= min_area and area < max_area: if area >= min_area and area < max_area:
fp[k, i] = 1 fp[k, i] = 1
return tp, fp return tp, fp
...@@ -194,8 +194,8 @@ def tpfp_default(det_bboxes, ...@@ -194,8 +194,8 @@ def tpfp_default(det_bboxes,
if area_ranges == [(None, None)]: if area_ranges == [(None, None)]:
fp[...] = 1 fp[...] = 1
else: else:
det_areas = (det_bboxes[:, 2] - det_bboxes[:, 0] + 1) * ( det_areas = (det_bboxes[:, 2] - det_bboxes[:, 0]) * (
det_bboxes[:, 3] - det_bboxes[:, 1] + 1) det_bboxes[:, 3] - det_bboxes[:, 1])
for i, (min_area, max_area) in enumerate(area_ranges): for i, (min_area, max_area) in enumerate(area_ranges):
fp[i, (det_areas >= min_area) & (det_areas < max_area)] = 1 fp[i, (det_areas >= min_area) & (det_areas < max_area)] = 1
return tp, fp return tp, fp
...@@ -213,8 +213,8 @@ def tpfp_default(det_bboxes, ...@@ -213,8 +213,8 @@ def tpfp_default(det_bboxes,
if min_area is None: if min_area is None:
gt_area_ignore = np.zeros_like(gt_ignore_inds, dtype=bool) gt_area_ignore = np.zeros_like(gt_ignore_inds, dtype=bool)
else: else:
gt_areas = (gt_bboxes[:, 2] - gt_bboxes[:, 0] + 1) * ( gt_areas = (gt_bboxes[:, 2] - gt_bboxes[:, 0]) * (
gt_bboxes[:, 3] - gt_bboxes[:, 1] + 1) gt_bboxes[:, 3] - gt_bboxes[:, 1])
gt_area_ignore = (gt_areas < min_area) | (gt_areas >= max_area) gt_area_ignore = (gt_areas < min_area) | (gt_areas >= max_area)
for i in sort_inds: for i in sort_inds:
if ious_max[i] >= iou_thr: if ious_max[i] >= iou_thr:
...@@ -231,7 +231,7 @@ def tpfp_default(det_bboxes, ...@@ -231,7 +231,7 @@ def tpfp_default(det_bboxes,
fp[k, i] = 1 fp[k, i] = 1
else: else:
bbox = det_bboxes[i, :4] bbox = det_bboxes[i, :4]
area = (bbox[2] - bbox[0] + 1) * (bbox[3] - bbox[1] + 1) area = (bbox[2] - bbox[0]) * (bbox[3] - bbox[1])
if area >= min_area and area < max_area: if area >= min_area and area < max_area:
fp[k, i] = 1 fp[k, i] = 1
return tp, fp return tp, fp
...@@ -332,8 +332,8 @@ def eval_map(det_results, ...@@ -332,8 +332,8 @@ def eval_map(det_results,
if area_ranges is None: if area_ranges is None:
num_gts[0] += bbox.shape[0] num_gts[0] += bbox.shape[0]
else: else:
gt_areas = (bbox[:, 2] - bbox[:, 0] + 1) * ( gt_areas = (bbox[:, 2] - bbox[:, 0]) * (
bbox[:, 3] - bbox[:, 1] + 1) bbox[:, 3] - bbox[:, 1])
for k, (min_area, max_area) in enumerate(area_ranges): for k, (min_area, max_area) in enumerate(area_ranges):
num_gts[k] += np.sum((gt_areas >= min_area) num_gts[k] += np.sum((gt_areas >= min_area)
& (gt_areas < max_area)) & (gt_areas < max_area))
......
...@@ -13,21 +13,21 @@ def mask_target(pos_proposals_list, pos_assigned_gt_inds_list, gt_masks_list, ...@@ -13,21 +13,21 @@ def mask_target(pos_proposals_list, pos_assigned_gt_inds_list, gt_masks_list,
def mask_target_single(pos_proposals, pos_assigned_gt_inds, gt_masks, cfg): def mask_target_single(pos_proposals, pos_assigned_gt_inds, gt_masks, cfg):
device = pos_proposals.device
mask_size = _pair(cfg.mask_size) mask_size = _pair(cfg.mask_size)
num_pos = pos_proposals.size(0) num_pos = pos_proposals.size(0)
if num_pos > 0: if num_pos > 0:
proposals_np = pos_proposals.cpu().numpy() proposals_np = pos_proposals.cpu().numpy()
maxh, maxw = gt_masks.height, gt_masks.width maxh, maxw = gt_masks.height, gt_masks.width
proposals_np[:, [0, 2]] = np.clip(proposals_np[:, [0, 2]], 0, maxw - 1) proposals_np[:, [0, 2]] = np.clip(proposals_np[:, [0, 2]], 0, maxw)
proposals_np[:, [1, 3]] = np.clip(proposals_np[:, [1, 3]], 0, maxh - 1) proposals_np[:, [1, 3]] = np.clip(proposals_np[:, [1, 3]], 0, maxh)
proposals_np = proposals_np.astype(np.int32)
pos_assigned_gt_inds = pos_assigned_gt_inds.cpu().numpy() pos_assigned_gt_inds = pos_assigned_gt_inds.cpu().numpy()
mask_targets = gt_masks.crop_and_resize( mask_targets = gt_masks.crop_and_resize(
proposals_np, mask_size, inds=pos_assigned_gt_inds).to_ndarray() proposals_np, mask_size, device=device,
inds=pos_assigned_gt_inds).to_ndarray()
mask_targets = torch.from_numpy(np.stack(mask_targets)).float().to( mask_targets = torch.from_numpy(mask_targets).float().to(device)
pos_proposals.device)
else: else:
mask_targets = pos_proposals.new_zeros((0, ) + mask_size) mask_targets = pos_proposals.new_zeros((0, ) + mask_size)
......
...@@ -5,6 +5,8 @@ import numpy as np ...@@ -5,6 +5,8 @@ import numpy as np
import pycocotools.mask as maskUtils import pycocotools.mask as maskUtils
import torch import torch
from mmdet.ops.roi_align import roi_align
class BaseInstanceMasks(metaclass=ABCMeta): class BaseInstanceMasks(metaclass=ABCMeta):
...@@ -185,11 +187,11 @@ class BitmapMasks(BaseInstanceMasks): ...@@ -185,11 +187,11 @@ class BitmapMasks(BaseInstanceMasks):
# clip the boundary # clip the boundary
bbox = bbox.copy() bbox = bbox.copy()
bbox[0::2] = np.clip(bbox[0::2], 0, self.width - 1) bbox[0::2] = np.clip(bbox[0::2], 0, self.width)
bbox[1::2] = np.clip(bbox[1::2], 0, self.height - 1) bbox[1::2] = np.clip(bbox[1::2], 0, self.height)
x1, y1, x2, y2 = bbox x1, y1, x2, y2 = bbox
w = np.maximum(x2 - x1 + 1, 1) w = np.maximum(x2 - x1, 1)
h = np.maximum(y2 - y1 + 1, 1) h = np.maximum(y2 - y1, 1)
if len(self.masks) == 0: if len(self.masks) == 0:
cropped_masks = np.empty((0, h, w), dtype=np.uint8) cropped_masks = np.empty((0, h, w), dtype=np.uint8)
...@@ -201,6 +203,7 @@ class BitmapMasks(BaseInstanceMasks): ...@@ -201,6 +203,7 @@ class BitmapMasks(BaseInstanceMasks):
bboxes, bboxes,
out_shape, out_shape,
inds, inds,
device='cpu',
interpolation='bilinear'): interpolation='bilinear'):
"""Crop and resize masks by the given bboxes. """Crop and resize masks by the given bboxes.
...@@ -209,9 +212,10 @@ class BitmapMasks(BaseInstanceMasks): ...@@ -209,9 +212,10 @@ class BitmapMasks(BaseInstanceMasks):
assigned bbox and resize to the size of (mask_h, mask_w) assigned bbox and resize to the size of (mask_h, mask_w)
Args: Args:
bboxes (ndarray): bboxes in format [x1, y1, x2, y2], shape (N, 4) bboxes (Tensor): bboxes in format [x1, y1, x2, y2], shape (N, 4)
out_shape (tuple[int]): target (h, w) of resized mask out_shape (tuple[int]): target (h, w) of resized mask
inds (ndarray): indexes to assign masks to each bbox inds (ndarray): indexes to assign masks to each bbox
device (str): device of bboxes
interpolation (str): see `mmcv.imresize` interpolation (str): see `mmcv.imresize`
Return: Return:
...@@ -221,19 +225,26 @@ class BitmapMasks(BaseInstanceMasks): ...@@ -221,19 +225,26 @@ class BitmapMasks(BaseInstanceMasks):
empty_masks = np.empty((0, *out_shape), dtype=np.uint8) empty_masks = np.empty((0, *out_shape), dtype=np.uint8)
return BitmapMasks(empty_masks, *out_shape) return BitmapMasks(empty_masks, *out_shape)
resized_masks = [] # convert bboxes to tensor
for i in range(len(bboxes)): if isinstance(bboxes, np.ndarray):
mask = self.masks[inds[i]] bboxes = torch.from_numpy(bboxes).to(device=device)
bbox = bboxes[i, :].astype(np.int32) if isinstance(inds, np.ndarray):
x1, y1, x2, y2 = bbox inds = torch.from_numpy(inds).to(device=device)
w = np.maximum(x2 - x1 + 1, 1)
h = np.maximum(y2 - y1 + 1, 1) num_bbox = bboxes.shape[0]
resized_masks.append( fake_inds = torch.arange(
mmcv.imresize( num_bbox, device=device).to(dtype=bboxes.dtype)[:, None]
mask[y1:y1 + h, x1:x1 + w], rois = torch.cat([fake_inds, bboxes], dim=1) # Nx5
out_shape, rois = rois.to(device=device)
interpolation=interpolation)) if num_bbox > 0:
return BitmapMasks(np.stack(resized_masks), *out_shape) gt_masks_th = torch.from_numpy(self.masks).to(device).index_select(
0, inds).to(dtype=rois.dtype)
targets = roi_align(gt_masks_th[:, None, :, :], rois, out_shape,
1.0, 0, True).squeeze(1)
resized_masks = (targets >= 0.5).cpu().numpy()
else:
resized_masks = []
return BitmapMasks(resized_masks, *out_shape)
def expand(self, expanded_h, expanded_w, top, left): def expand(self, expanded_h, expanded_w, top, left):
"""see `transforms.Expand`.""" """see `transforms.Expand`."""
...@@ -355,7 +366,7 @@ class PolygonMasks(BaseInstanceMasks): ...@@ -355,7 +366,7 @@ class PolygonMasks(BaseInstanceMasks):
flipped_poly_per_obj = [] flipped_poly_per_obj = []
for p in poly_per_obj: for p in poly_per_obj:
p = p.copy() p = p.copy()
p[idx::2] = dim - p[idx::2] - 1 p[idx::2] = dim - p[idx::2]
flipped_poly_per_obj.append(p) flipped_poly_per_obj.append(p)
flipped_masks.append(flipped_poly_per_obj) flipped_masks.append(flipped_poly_per_obj)
flipped_masks = PolygonMasks(flipped_masks, self.height, flipped_masks = PolygonMasks(flipped_masks, self.height,
...@@ -369,11 +380,11 @@ class PolygonMasks(BaseInstanceMasks): ...@@ -369,11 +380,11 @@ class PolygonMasks(BaseInstanceMasks):
# clip the boundary # clip the boundary
bbox = bbox.copy() bbox = bbox.copy()
bbox[0::2] = np.clip(bbox[0::2], 0, self.width - 1) bbox[0::2] = np.clip(bbox[0::2], 0, self.width)
bbox[1::2] = np.clip(bbox[1::2], 0, self.height - 1) bbox[1::2] = np.clip(bbox[1::2], 0, self.height)
x1, y1, x2, y2 = bbox x1, y1, x2, y2 = bbox
w = np.maximum(x2 - x1 + 1, 1) w = np.maximum(x2 - x1, 1)
h = np.maximum(y2 - y1 + 1, 1) h = np.maximum(y2 - y1, 1)
if len(self.masks) == 0: if len(self.masks) == 0:
cropped_masks = PolygonMasks([], h, w) cropped_masks = PolygonMasks([], h, w)
...@@ -402,6 +413,7 @@ class PolygonMasks(BaseInstanceMasks): ...@@ -402,6 +413,7 @@ class PolygonMasks(BaseInstanceMasks):
bboxes, bboxes,
out_shape, out_shape,
inds, inds,
device='cpu',
interpolation='bilinear'): interpolation='bilinear'):
"""see BitmapMasks.crop_and_resize""" """see BitmapMasks.crop_and_resize"""
out_h, out_w = out_shape out_h, out_w = out_shape
...@@ -413,8 +425,8 @@ class PolygonMasks(BaseInstanceMasks): ...@@ -413,8 +425,8 @@ class PolygonMasks(BaseInstanceMasks):
mask = self.masks[inds[i]] mask = self.masks[inds[i]]
bbox = bboxes[i, :].astype(np.int32) bbox = bboxes[i, :].astype(np.int32)
x1, y1, x2, y2 = bbox x1, y1, x2, y2 = bbox
w = np.maximum(x2 - x1 + 1, 1) w = np.maximum(x2 - x1, 1)
h = np.maximum(y2 - y1 + 1, 1) h = np.maximum(y2 - y1, 1)
h_scale = out_h / h h_scale = out_h / h
w_scale = out_w / w w_scale = out_w / w
......
...@@ -60,7 +60,7 @@ class CityscapesDataset(CocoDataset): ...@@ -60,7 +60,7 @@ class CityscapesDataset(CocoDataset):
x1, y1, w, h = ann['bbox'] x1, y1, w, h = ann['bbox']
if ann['area'] <= 0 or w < 1 or h < 1: if ann['area'] <= 0 or w < 1 or h < 1:
continue continue
bbox = [x1, y1, x1 + w - 1, y1 + h - 1] bbox = [x1, y1, x1 + w, y1 + h]
if ann.get('iscrowd', False): if ann.get('iscrowd', False):
gt_bboxes_ignore.append(bbox) gt_bboxes_ignore.append(bbox)
else: else:
......
...@@ -86,7 +86,7 @@ class CocoDataset(CustomDataset): ...@@ -86,7 +86,7 @@ class CocoDataset(CustomDataset):
x1, y1, w, h = ann['bbox'] x1, y1, w, h = ann['bbox']
if ann['area'] <= 0 or w < 1 or h < 1: if ann['area'] <= 0 or w < 1 or h < 1:
continue continue
bbox = [x1, y1, x1 + w - 1, y1 + h - 1] bbox = [x1, y1, x1 + w, y1 + h]
if ann.get('iscrowd', False): if ann.get('iscrowd', False):
gt_bboxes_ignore.append(bbox) gt_bboxes_ignore.append(bbox)
else: else:
...@@ -122,8 +122,8 @@ class CocoDataset(CustomDataset): ...@@ -122,8 +122,8 @@ class CocoDataset(CustomDataset):
return [ return [
_bbox[0], _bbox[0],
_bbox[1], _bbox[1],
_bbox[2] - _bbox[0] + 1, _bbox[2] - _bbox[0],
_bbox[3] - _bbox[1] + 1, _bbox[3] - _bbox[1],
] ]
def _proposal2json(self, results): def _proposal2json(self, results):
...@@ -249,7 +249,7 @@ class CocoDataset(CustomDataset): ...@@ -249,7 +249,7 @@ class CocoDataset(CustomDataset):
if ann.get('ignore', False) or ann['iscrowd']: if ann.get('ignore', False) or ann['iscrowd']:
continue continue
x1, y1, w, h = ann['bbox'] x1, y1, w, h = ann['bbox']
bboxes.append([x1, y1, x1 + w - 1, y1 + h - 1]) bboxes.append([x1, y1, x1 + w, y1 + h])
bboxes = np.array(bboxes, dtype=np.float32) bboxes = np.array(bboxes, dtype=np.float32)
if bboxes.shape[0] == 0: if bboxes.shape[0] == 0:
bboxes = np.zeros((0, 4)) bboxes = np.zeros((0, 4))
......
...@@ -44,7 +44,8 @@ class InstaBoost(object): ...@@ -44,7 +44,8 @@ class InstaBoost(object):
bbox = bboxes[i] bbox = bboxes[i]
mask = masks[i] mask = masks[i]
x1, y1, x2, y2 = bbox x1, y1, x2, y2 = bbox
bbox = [x1, y1, x2 - x1 + 1, y2 - y1 + 1] # assert (x2 - x1) >= 1 and (y2 - y1) >= 1
bbox = [x1, y1, x2 - x1, y2 - y1]
anns.append({ anns.append({
'category_id': label, 'category_id': label,
'segmentation': mask, 'segmentation': mask,
...@@ -59,7 +60,10 @@ class InstaBoost(object): ...@@ -59,7 +60,10 @@ class InstaBoost(object):
gt_masks_ann = [] gt_masks_ann = []
for ann in anns: for ann in anns:
x1, y1, w, h = ann['bbox'] x1, y1, w, h = ann['bbox']
bbox = [x1, y1, x1 + w - 1, y1 + h - 1] # TODO: more essential bug need to be fixed in instaboost
if w <= 0 or h <= 0:
continue
bbox = [x1, y1, x1 + w, y1 + h]
gt_bboxes.append(bbox) gt_bboxes.append(bbox)
gt_labels.append(ann['category_id']) gt_labels.append(ann['category_id'])
gt_masks_ann.append(ann['segmentation']) gt_masks_ann.append(ann['segmentation'])
...@@ -73,6 +77,7 @@ class InstaBoost(object): ...@@ -73,6 +77,7 @@ class InstaBoost(object):
def __call__(self, results): def __call__(self, results):
img = results['img'] img = results['img']
orig_type = img.dtype
anns = self._load_anns(results) anns = self._load_anns(results)
if np.random.choice([0, 1], p=[1 - self.aug_ratio, self.aug_ratio]): if np.random.choice([0, 1], p=[1 - self.aug_ratio, self.aug_ratio]):
try: try:
...@@ -81,8 +86,9 @@ class InstaBoost(object): ...@@ -81,8 +86,9 @@ class InstaBoost(object):
raise ImportError('Please run "pip install instaboostfast" ' raise ImportError('Please run "pip install instaboostfast" '
'to install instaboostfast first.') 'to install instaboostfast first.')
anns, img = instaboost.get_new_data( anns, img = instaboost.get_new_data(
anns, img, self.cfg, background=None) anns, img.astype(np.uint8), self.cfg, background=None)
results = self._parse_anns(results, anns, img)
results = self._parse_anns(results, anns, img.astype(orig_type))
return results return results
def __repr__(self): def __repr__(self):
......
...@@ -143,8 +143,8 @@ class Resize(object): ...@@ -143,8 +143,8 @@ class Resize(object):
img_shape = results['img_shape'] img_shape = results['img_shape']
for key in results.get('bbox_fields', []): for key in results.get('bbox_fields', []):
bboxes = results[key] * results['scale_factor'] bboxes = results[key] * results['scale_factor']
bboxes[:, 0::2] = np.clip(bboxes[:, 0::2], 0, img_shape[1] - 1) bboxes[:, 0::2] = np.clip(bboxes[:, 0::2], 0, img_shape[1])
bboxes[:, 1::2] = np.clip(bboxes[:, 1::2], 0, img_shape[0] - 1) bboxes[:, 1::2] = np.clip(bboxes[:, 1::2], 0, img_shape[0])
results[key] = bboxes results[key] = bboxes
def _resize_masks(self, results): def _resize_masks(self, results):
...@@ -215,12 +215,12 @@ class RandomFlip(object): ...@@ -215,12 +215,12 @@ class RandomFlip(object):
flipped = bboxes.copy() flipped = bboxes.copy()
if direction == 'horizontal': if direction == 'horizontal':
w = img_shape[1] w = img_shape[1]
flipped[..., 0::4] = w - bboxes[..., 2::4] - 1 flipped[..., 0::4] = w - bboxes[..., 2::4]
flipped[..., 2::4] = w - bboxes[..., 0::4] - 1 flipped[..., 2::4] = w - bboxes[..., 0::4]
elif direction == 'vertical': elif direction == 'vertical':
h = img_shape[0] h = img_shape[0]
flipped[..., 1::4] = h - bboxes[..., 3::4] - 1 flipped[..., 1::4] = h - bboxes[..., 3::4]
flipped[..., 3::4] = h - bboxes[..., 1::4] - 1 flipped[..., 3::4] = h - bboxes[..., 1::4]
else: else:
raise ValueError( raise ValueError(
'Invalid flipping direction "{}"'.format(direction)) 'Invalid flipping direction "{}"'.format(direction))
...@@ -372,8 +372,8 @@ class RandomCrop(object): ...@@ -372,8 +372,8 @@ class RandomCrop(object):
bbox_offset = np.array([offset_w, offset_h, offset_w, offset_h], bbox_offset = np.array([offset_w, offset_h, offset_w, offset_h],
dtype=np.float32) dtype=np.float32)
bboxes = results[key] - bbox_offset bboxes = results[key] - bbox_offset
bboxes[:, 0::2] = np.clip(bboxes[:, 0::2], 0, img_shape[1] - 1) bboxes[:, 0::2] = np.clip(bboxes[:, 0::2], 0, img_shape[1])
bboxes[:, 1::2] = np.clip(bboxes[:, 1::2], 0, img_shape[0] - 1) bboxes[:, 1::2] = np.clip(bboxes[:, 1::2], 0, img_shape[0])
results[key] = bboxes results[key] = bboxes
# crop semantic seg # crop semantic seg
......
...@@ -47,6 +47,7 @@ class XMLDataset(CustomDataset): ...@@ -47,6 +47,7 @@ class XMLDataset(CustomDataset):
label = self.cat2label[name] label = self.cat2label[name]
difficult = int(obj.find('difficult').text) difficult = int(obj.find('difficult').text)
bnd_box = obj.find('bndbox') bnd_box = obj.find('bndbox')
# TODO: check whether it is necessary to use int
# Coordinates may be float type # Coordinates may be float type
bbox = [ bbox = [
int(float(bnd_box.find('xmin').text)), int(float(bnd_box.find('xmin').text)),
......
...@@ -369,8 +369,8 @@ class FCOSHead(nn.Module): ...@@ -369,8 +369,8 @@ class FCOSHead(nn.Module):
return gt_labels.new_zeros(num_points), \ return gt_labels.new_zeros(num_points), \
gt_bboxes.new_zeros((num_points, 4)) gt_bboxes.new_zeros((num_points, 4))
areas = (gt_bboxes[:, 2] - gt_bboxes[:, 0] + 1) * ( areas = (gt_bboxes[:, 2] - gt_bboxes[:, 0]) * (
gt_bboxes[:, 3] - gt_bboxes[:, 1] + 1) gt_bboxes[:, 3] - gt_bboxes[:, 1])
# TODO: figure out why these two are different # TODO: figure out why these two are different
# areas = areas[None].expand(num_points, num_gts) # areas = areas[None].expand(num_points, num_gts)
areas = areas[None].repeat(num_points, 1) areas = areas[None].repeat(num_points, 1)
......
...@@ -103,8 +103,8 @@ class GARPNHead(GuidedAnchorHead): ...@@ -103,8 +103,8 @@ class GARPNHead(GuidedAnchorHead):
self.target_stds, img_shape) self.target_stds, img_shape)
# filter out too small bboxes # filter out too small bboxes
if cfg.min_bbox_size > 0: if cfg.min_bbox_size > 0:
w = proposals[:, 2] - proposals[:, 0] + 1 w = proposals[:, 2] - proposals[:, 0]
h = proposals[:, 3] - proposals[:, 1] + 1 h = proposals[:, 3] - proposals[:, 1]
valid_inds = torch.nonzero((w >= cfg.min_bbox_size) & valid_inds = torch.nonzero((w >= cfg.min_bbox_size) &
(h >= cfg.min_bbox_size)).squeeze() (h >= cfg.min_bbox_size)).squeeze()
proposals = proposals[valid_inds, :] proposals = proposals[valid_inds, :]
......
...@@ -82,8 +82,8 @@ class RPNHead(AnchorHead): ...@@ -82,8 +82,8 @@ class RPNHead(AnchorHead):
proposals = delta2bbox(anchors, rpn_bbox_pred, self.target_means, proposals = delta2bbox(anchors, rpn_bbox_pred, self.target_means,
self.target_stds, img_shape) self.target_stds, img_shape)
if cfg.min_bbox_size > 0: if cfg.min_bbox_size > 0:
w = proposals[:, 2] - proposals[:, 0] + 1 w = proposals[:, 2] - proposals[:, 0]
h = proposals[:, 3] - proposals[:, 1] + 1 h = proposals[:, 3] - proposals[:, 1]
valid_inds = torch.nonzero((w >= cfg.min_bbox_size) & valid_inds = torch.nonzero((w >= cfg.min_bbox_size) &
(h >= cfg.min_bbox_size)).squeeze() (h >= cfg.min_bbox_size)).squeeze()
proposals = proposals[valid_inds, :] proposals = proposals[valid_inds, :]
......
...@@ -75,7 +75,7 @@ class SSDHead(AnchorHead): ...@@ -75,7 +75,7 @@ class SSDHead(AnchorHead):
for k in range(len(anchor_strides)): for k in range(len(anchor_strides)):
base_size = min_sizes[k] base_size = min_sizes[k]
stride = anchor_strides[k] stride = anchor_strides[k]
ctr = ((stride - 1) / 2., (stride - 1) / 2.) ctr = ((stride) / 2., (stride) / 2.)
scales = [1., np.sqrt(max_sizes[k] / min_sizes[k])] scales = [1., np.sqrt(max_sizes[k] / min_sizes[k])]
ratios = [1.] ratios = [1.]
for r in anchor_ratios[k]: for r in anchor_ratios[k]:
......
...@@ -154,8 +154,8 @@ class BBoxHead(nn.Module): ...@@ -154,8 +154,8 @@ class BBoxHead(nn.Module):
else: else:
bboxes = rois[:, 1:].clone() bboxes = rois[:, 1:].clone()
if img_shape is not None: if img_shape is not None:
bboxes[:, [0, 2]].clamp_(min=0, max=img_shape[1] - 1) bboxes[:, [0, 2]].clamp_(min=0, max=img_shape[1])
bboxes[:, [1, 3]].clamp_(min=0, max=img_shape[0] - 1) bboxes[:, [1, 3]].clamp_(min=0, max=img_shape[0])
if rescale: if rescale:
if isinstance(scale_factor, float): if isinstance(scale_factor, float):
......
...@@ -40,13 +40,13 @@ def bounded_iou_loss(pred, target, beta=0.2, eps=1e-3): ...@@ -40,13 +40,13 @@ def bounded_iou_loss(pred, target, beta=0.2, eps=1e-3):
""" """
pred_ctrx = (pred[:, 0] + pred[:, 2]) * 0.5 pred_ctrx = (pred[:, 0] + pred[:, 2]) * 0.5
pred_ctry = (pred[:, 1] + pred[:, 3]) * 0.5 pred_ctry = (pred[:, 1] + pred[:, 3]) * 0.5
pred_w = pred[:, 2] - pred[:, 0] + 1 pred_w = pred[:, 2] - pred[:, 0]
pred_h = pred[:, 3] - pred[:, 1] + 1 pred_h = pred[:, 3] - pred[:, 1]
with torch.no_grad(): with torch.no_grad():
target_ctrx = (target[:, 0] + target[:, 2]) * 0.5 target_ctrx = (target[:, 0] + target[:, 2]) * 0.5
target_ctry = (target[:, 1] + target[:, 3]) * 0.5 target_ctry = (target[:, 1] + target[:, 3]) * 0.5
target_w = target[:, 2] - target[:, 0] + 1 target_w = target[:, 2] - target[:, 0]
target_h = target[:, 3] - target[:, 1] + 1 target_h = target[:, 3] - target[:, 1]
dx = target_ctrx - pred_ctrx dx = target_ctrx - pred_ctrx
dy = target_ctry - pred_ctry dy = target_ctry - pred_ctry
...@@ -91,12 +91,12 @@ def giou_loss(pred, target, eps=1e-7): ...@@ -91,12 +91,12 @@ def giou_loss(pred, target, eps=1e-7):
# overlap # overlap
lt = torch.max(pred[:, :2], target[:, :2]) lt = torch.max(pred[:, :2], target[:, :2])
rb = torch.min(pred[:, 2:], target[:, 2:]) rb = torch.min(pred[:, 2:], target[:, 2:])
wh = (rb - lt + 1).clamp(min=0) wh = (rb - lt).clamp(min=0)
overlap = wh[:, 0] * wh[:, 1] overlap = wh[:, 0] * wh[:, 1]
# union # union
ap = (pred[:, 2] - pred[:, 0] + 1) * (pred[:, 3] - pred[:, 1] + 1) ap = (pred[:, 2] - pred[:, 0]) * (pred[:, 3] - pred[:, 1])
ag = (target[:, 2] - target[:, 0] + 1) * (target[:, 3] - target[:, 1] + 1) ag = (target[:, 2] - target[:, 0]) * (target[:, 3] - target[:, 1])
union = ap + ag - overlap + eps union = ap + ag - overlap + eps
# IoU # IoU
...@@ -105,7 +105,7 @@ def giou_loss(pred, target, eps=1e-7): ...@@ -105,7 +105,7 @@ def giou_loss(pred, target, eps=1e-7):
# enclose area # enclose area
enclose_x1y1 = torch.min(pred[:, :2], target[:, :2]) enclose_x1y1 = torch.min(pred[:, :2], target[:, :2])
enclose_x2y2 = torch.max(pred[:, 2:], target[:, 2:]) enclose_x2y2 = torch.max(pred[:, 2:], target[:, 2:])
enclose_wh = (enclose_x2y2 - enclose_x1y1 + 1).clamp(min=0) enclose_wh = (enclose_x2y2 - enclose_x1y1).clamp(min=0)
enclose_area = enclose_wh[:, 0] * enclose_wh[:, 1] + eps enclose_area = enclose_wh[:, 0] * enclose_wh[:, 1] + eps
# GIoU # GIoU
......
import mmcv
import numpy as np import numpy as np
import pycocotools.mask as mask_util import pycocotools.mask as mask_util
import torch import torch
...@@ -8,9 +7,15 @@ from torch.nn.modules.utils import _pair ...@@ -8,9 +7,15 @@ from torch.nn.modules.utils import _pair
from mmdet.core import auto_fp16, force_fp32, mask_target from mmdet.core import auto_fp16, force_fp32, mask_target
from mmdet.ops import ConvModule, build_upsample_layer from mmdet.ops import ConvModule, build_upsample_layer
from mmdet.ops.carafe import CARAFEPack from mmdet.ops.carafe import CARAFEPack
from mmdet.ops.grid_sampler import grid_sample
from ..builder import build_loss from ..builder import build_loss
from ..registry import HEADS from ..registry import HEADS
BYTES_PER_FLOAT = 4
# TODO: This memory limit may be too much or too little. It would be better to
# determine it based on available resources.
GPU_MEM_LIMIT = 1024**3 # 1 GB memory limit
@HEADS.register_module @HEADS.register_module
class FCNMaskHead(nn.Module): class FCNMaskHead(nn.Module):
...@@ -144,7 +149,7 @@ class FCNMaskHead(nn.Module): ...@@ -144,7 +149,7 @@ class FCNMaskHead(nn.Module):
"""Get segmentation masks from mask_pred and bboxes. """Get segmentation masks from mask_pred and bboxes.
Args: Args:
mask_pred (Tensor or ndarray): shape (n, #class+1, h, w). mask_pred (Tensor or ndarray): shape (n, #class, h, w).
For single-scale testing, mask_pred is the direct output of For single-scale testing, mask_pred is the direct output of
model, whose type is Tensor, while for multi-scale testing, model, whose type is Tensor, while for multi-scale testing,
it will be converted to numpy array outside of this method. it will be converted to numpy array outside of this method.
...@@ -158,15 +163,15 @@ class FCNMaskHead(nn.Module): ...@@ -158,15 +163,15 @@ class FCNMaskHead(nn.Module):
list[list]: encoded masks list[list]: encoded masks
""" """
if isinstance(mask_pred, torch.Tensor): if isinstance(mask_pred, torch.Tensor):
mask_pred = mask_pred.sigmoid().cpu().numpy() mask_pred = mask_pred.sigmoid()
assert isinstance(mask_pred, np.ndarray) else:
# when enabling mixed precision training, mask_pred may be float16 mask_pred = det_bboxes.new_tensor(mask_pred)
# numpy array
mask_pred = mask_pred.astype(np.float32)
cls_segms = [[] for _ in range(self.num_classes - 1)] device = mask_pred.device
bboxes = det_bboxes.cpu().numpy()[:, :4] cls_segms = [[] for _ in range(self.num_classes)
labels = det_labels.cpu().numpy() + 1 ] # BG is not included in num_classes
bboxes = det_bboxes[:, :4]
labels = det_labels + 1 # TODO: remove + 1 in cat -1
if rescale: if rescale:
img_h, img_w = ori_shape[:2] img_h, img_w = ori_shape[:2]
...@@ -175,34 +180,130 @@ class FCNMaskHead(nn.Module): ...@@ -175,34 +180,130 @@ class FCNMaskHead(nn.Module):
img_w = np.round(ori_shape[1] * scale_factor).astype(np.int32) img_w = np.round(ori_shape[1] * scale_factor).astype(np.int32)
scale_factor = 1.0 scale_factor = 1.0
for i in range(bboxes.shape[0]): if not isinstance(scale_factor, (float, torch.Tensor)):
if not isinstance(scale_factor, (float, np.ndarray)): scale_factor = bboxes.new_tensor(scale_factor)
scale_factor = scale_factor.cpu().numpy() bboxes = bboxes / scale_factor
bbox = (bboxes[i, :] / scale_factor).astype(np.int32)
label = labels[i]
w = max(bbox[2] - bbox[0] + 1, 1)
h = max(bbox[3] - bbox[1] + 1, 1)
if not self.class_agnostic: N = len(mask_pred)
mask_pred_ = mask_pred[i, label, :, :] # The actual implementation split the input into chunks,
else: # and paste them chunk by chunk.
mask_pred_ = mask_pred[i, 0, :, :] if device.type == 'cpu':
# CPU is most efficient when they are pasted one by one with
# skip_empty=True, so that it performs minimal number of
# operations.
num_chunks = N
else:
# GPU benefits from parallelism for larger chunks,
# but may have memory issue
num_chunks = int(
np.ceil(N * img_h * img_w * BYTES_PER_FLOAT / GPU_MEM_LIMIT))
assert (num_chunks <=
N), 'Default GPU_MEM_LIMIT is too small; try increasing it'
chunks = torch.chunk(torch.arange(N, device=device), num_chunks)
bbox_mask = mmcv.imresize(mask_pred_, (w, h)) threshold = rcnn_test_cfg.mask_thr_binary
bbox_mask = (bbox_mask > rcnn_test_cfg.mask_thr_binary).astype( im_mask = torch.zeros(
np.uint8) N,
img_h,
img_w,
device=device,
dtype=torch.bool if threshold >= 0 else torch.uint8)
if rcnn_test_cfg.get('crop_mask', False): if not self.class_agnostic:
im_mask = bbox_mask mask_pred = mask_pred[range(N), labels][:, None]
else:
im_mask = np.zeros((img_h, img_w), dtype=np.uint8)
im_mask[bbox[1]:bbox[1] + h, bbox[0]:bbox[0] + w] = bbox_mask
if rcnn_test_cfg.get('rle_mask_encode', True): for inds in chunks:
rle = mask_util.encode( masks_chunk, spatial_inds = _do_paste_mask(
np.array(im_mask[:, :, np.newaxis], order='F'))[0] mask_pred[inds],
cls_segms[label - 1].append(rle) bboxes[inds],
img_h,
img_w,
skip_empty=device.type == 'cpu')
if threshold >= 0:
masks_chunk = (masks_chunk >= threshold).to(dtype=torch.bool)
else: else:
cls_segms[label - 1].append(im_mask) # for visualization and debugging
masks_chunk = (masks_chunk * 255).to(dtype=torch.uint8)
im_mask[(inds, ) + spatial_inds] = masks_chunk
for i in range(N):
rle = mask_util.encode(
np.array(
im_mask[i][:, :, None].cpu().numpy(),
order='F',
dtype='uint8'))[0]
cls_segms[labels[i] - 1].append(rle) # TODO: remove -1 in cat -1
return cls_segms return cls_segms
def _do_paste_mask(masks, boxes, img_h, img_w, skip_empty=True):
"""Paste instance masks acoording to boxes.
This implementation is modified from
https://github.com/facebookresearch/detectron2/
Args:
masks (Tensor): N, 1, H, W
boxes (Tensor): N, 4
img_h (int): Height of the image to be pasted.
img_w (int): Width of the image to be pasted.
skip_empty (bool): Only paste masks within the region that
tightly bound all boxes, and returns the results this region only.
An important optimization for CPU.
Returns:
tuple: (Tensor, tuple). The first item is mask tensor, the second one
is the slice object.
If skip_empty == False, the whole image will be pasted. It will
return a mask of shape (N, img_h, img_w) and an empty tuple.
If skip_empty == True, only area around the mask will be pasted.
A mask of shape (N, h', w') and its start and end coordinates
in the original image will be returned.
"""
# On GPU, paste all masks together (up to chunk size)
# by using the entire image to sample the masks
# Compared to pasting them one by one,
# this has more operations but is faster on COCO-scale dataset.
device = masks.device
if skip_empty:
x0_int, y0_int = torch.clamp(
boxes.min(dim=0).values.floor()[:2] - 1,
min=0).to(dtype=torch.int32)
x1_int = torch.clamp(
boxes[:, 2].max().ceil() + 1, max=img_w).to(dtype=torch.int32)
y1_int = torch.clamp(
boxes[:, 3].max().ceil() + 1, max=img_h).to(dtype=torch.int32)
else:
x0_int, y0_int = 0, 0
x1_int, y1_int = img_w, img_h
x0, y0, x1, y1 = torch.split(boxes, 1, dim=1) # each is Nx1
N = masks.shape[0]
img_y = torch.arange(
y0_int, y1_int, device=device, dtype=torch.float32) + 0.5
img_x = torch.arange(
x0_int, x1_int, device=device, dtype=torch.float32) + 0.5
img_y = (img_y - y0) / (y1 - y0) * 2 - 1
img_x = (img_x - x0) / (x1 - x0) * 2 - 1
# img_x, img_y have shapes (N, w), (N, h)
if torch.isinf(img_x).any():
inds = torch.where(torch.isinf(img_x))
img_x[inds] = 0
if torch.isinf(img_y).any():
inds = torch.where(torch.isinf(img_y))
img_y[inds] = 0
gx = img_x[:, None, :].expand(N, img_y.size(1), img_x.size(1))
gy = img_y[:, :, None].expand(N, img_y.size(1), img_x.size(1))
grid = torch.stack([gx, gy], dim=3)
img_masks = grid_sample(
masks.to(dtype=torch.float32), grid, align_corners=False)
if skip_empty:
return img_masks[:, 0], (slice(y0_int, y1_int), slice(x0_int, x1_int))
else:
return img_masks[:, 0], ()
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment