Skip to content
Snippets Groups Projects
Unverified Commit 263ae90c authored by Haian Huang(深度眸)'s avatar Haian Huang(深度眸) Committed by GitHub
Browse files

[Feature]: Support ONNX tracable Batch Inference for Faster R-CNN (#4785)

* Support faster rcnn

* Fix Lint

* Add docstr

* Fix docstr

* Update docstr

* Update code

* Update docstr
parent e3857b5f
No related branches found
No related tags found
No related merge requests found
...@@ -275,35 +275,102 @@ class BBoxHead(nn.Module): ...@@ -275,35 +275,102 @@ class BBoxHead(nn.Module):
scale_factor, scale_factor,
rescale=False, rescale=False,
cfg=None): cfg=None):
"""Transform network output for a batch into bbox predictions.
If the input rois has batch dimension, the function would be in
`batch_mode` and return is a tuple[list[Tensor], list[Tensor]],
otherwise, the return is a tuple[Tensor, Tensor].
Args:
rois (Tensor): Boxes to be transformed. Has shape (num_boxes, 5)
or (B, num_boxes, 5)
cls_score (list[Tensor] or Tensor): Box scores for
each scale level, each is a 4D-tensor, the channel number is
num_points * num_classes.
bbox_pred (Tensor, optional): Box energies / deltas for each scale
level, each is a 4D-tensor, the channel number is
num_classes * 4.
img_shape (Sequence[int] or torch.Tensor or Sequence[
Sequence[int]], optional): Maximum bounds for boxes, specifies
(H, W, C) or (H, W). If rois shape is (B, num_boxes, 4), then
the max_shape should be a Sequence[Sequence[int]]
and the length of max_shape should also be B.
scale_factor (tuple[ndarray] or ndarray): Scale factor of the
image arange as (w_scale, h_scale, w_scale, h_scale). In
`batch_mode`, the scale_factor shape is tuple[ndarray].
rescale (bool): If True, return boxes in original image space.
Default: False.
cfg (obj:`ConfigDict`): `test_cfg` of Bbox Head. Default: None
Returns:
tuple[list[Tensor], list[Tensor]] or tuple[Tensor, Tensor]:
If the input has a batch dimension, the return value is
a tuple of the list. The first list contains the boxes of
the corresponding image in a batch, each tensor has the
shape (num_boxes, 5) and last dimension 5 represent
(tl_x, tl_y, br_x, br_y, score). Each Tensor in the second
list is the labels with shape (num_boxes, ). The length of
both lists should be equal to batch_size. Otherwise return
value is a tuple of two tensors, the first tensor is the
boxes with scores, the second tensor is the labels, both
have the same shape as the first case.
"""
if isinstance(cls_score, list): if isinstance(cls_score, list):
cls_score = sum(cls_score) / float(len(cls_score)) cls_score = sum(cls_score) / float(len(cls_score))
scores = F.softmax(cls_score, dim=1) if cls_score is not None else None
scores = F.softmax(
cls_score, dim=-1) if cls_score is not None else None
batch_mode = True
if rois.ndim == 2:
# e.g. AugTest, Cascade R-CNN, HTC, SCNet...
batch_mode = False
# add batch dimension
if scores is not None:
scores = scores.unsqueeze(0)
if bbox_pred is not None:
bbox_pred = bbox_pred.unsqueeze(0)
rois = rois.unsqueeze(0)
if bbox_pred is not None: if bbox_pred is not None:
bboxes = self.bbox_coder.decode( bboxes = self.bbox_coder.decode(
rois[:, 1:], bbox_pred, max_shape=img_shape) rois[..., 1:], bbox_pred, max_shape=img_shape)
else: else:
bboxes = rois[:, 1:].clone() bboxes = rois[..., 1:].clone()
if img_shape is not None: if img_shape is not None:
bboxes[:, [0, 2]].clamp_(min=0, max=img_shape[1]) max_shape = bboxes.new_tensor(img_shape)[..., :2]
bboxes[:, [1, 3]].clamp_(min=0, max=img_shape[0]) min_xy = bboxes.new_tensor(0)
max_xy = torch.cat(
if rescale and bboxes.size(0) > 0: [max_shape] * 2, dim=-1).flip(-1).unsqueeze(-2)
if isinstance(scale_factor, float): bboxes = torch.where(bboxes < min_xy, min_xy, bboxes)
bboxes /= scale_factor bboxes = torch.where(bboxes > max_xy, max_xy, bboxes)
if rescale and bboxes.size(-2) > 0:
if not isinstance(scale_factor, tuple):
scale_factor = tuple([scale_factor])
# B, 1, bboxes.size(-1)
scale_factor = bboxes.new_tensor(scale_factor).unsqueeze(1).repeat(
1, 1,
bboxes.size(-1) // 4)
bboxes /= scale_factor
det_bboxes = []
det_labels = []
for (bbox, score) in zip(bboxes, scores):
if cfg is not None:
det_bbox, det_label = multiclass_nms(bbox, score,
cfg.score_thr, cfg.nms,
cfg.max_per_img)
else: else:
scale_factor = bboxes.new_tensor(scale_factor) det_bbox, det_label = bbox, score
bboxes = (bboxes.view(bboxes.size(0), -1, 4) / det_bboxes.append(det_bbox)
scale_factor).view(bboxes.size()[0], -1) det_labels.append(det_label)
if cfg is None: if not batch_mode:
return bboxes, scores det_bboxes = det_bboxes[0]
else: det_labels = det_labels[0]
det_bboxes, det_labels = multiclass_nms(bboxes, scores, return det_bboxes, det_labels
cfg.score_thr, cfg.nms,
cfg.max_per_img)
return det_bboxes, det_labels
@force_fp32(apply_to=('bbox_preds', )) @force_fp32(apply_to=('bbox_preds', ))
def refine_bboxes(self, rois, labels, bbox_preds, pos_is_gts, img_metas): def refine_bboxes(self, rois, labels, bbox_preds, pos_is_gts, img_metas):
......
...@@ -55,51 +55,113 @@ class BBoxTestMixin(object): ...@@ -55,51 +55,113 @@ class BBoxTestMixin(object):
proposals, proposals,
rcnn_test_cfg, rcnn_test_cfg,
rescale=False): rescale=False):
"""Test only det bboxes without augmentation.""" """Test only det bboxes without augmentation.
rois = bbox2roi(proposals)
bbox_results = self._bbox_forward(x, rois) Args:
x (tuple[Tensor]): Feature maps of all scale level.
img_metas (list[dict]): Image meta info.
proposals (Tensor or List[Tensor]): Region proposals.
rcnn_test_cfg (obj:`ConfigDict`): `test_cfg` of R-CNN.
rescale (bool): If True, return boxes in original image space.
Default: False.
Returns:
tuple[list[Tensor], list[Tensor]]: The first list contains
the boxes of the corresponding image in a batch, each
tensor has the shape (num_boxes, 5) and last dimension
5 represent (tl_x, tl_y, br_x, br_y, score). Each Tensor
in the second list is the labels with shape (num_boxes, ).
The length of both lists should be equal to batch_size.
"""
# get origin input shape to support onnx dynamic input shape # get origin input shape to support onnx dynamic input shape
if torch.onnx.is_in_onnx_export(): if torch.onnx.is_in_onnx_export():
img_shapes = tuple(meta['img_shape_for_onnx'] assert len(
for meta in img_metas) img_metas
) == 1, 'Only support one input image while in exporting to ONNX'
img_shapes = img_metas[0]['img_shape_for_onnx']
else: else:
img_shapes = tuple(meta['img_shape'] for meta in img_metas) img_shapes = tuple(meta['img_shape'] for meta in img_metas)
scale_factors = tuple(meta['scale_factor'] for meta in img_metas) scale_factors = tuple(meta['scale_factor'] for meta in img_metas)
# split batch bbox prediction back to each image # The length of proposals of different batches may be different.
# In order to form a batch, a padding operation is required.
if isinstance(proposals, list):
# padding to form a batch
max_size = max([proposal.size(0) for proposal in proposals])
for i, proposal in enumerate(proposals):
supplement = proposal.new_full(
(max_size - proposal.size(0), 5), 0)
proposals[i] = torch.cat((supplement, proposal), dim=0)
rois = torch.stack(proposals, dim=0)
else:
rois = proposals
batch_index = torch.arange(
rois.size(0), device=rois.device).float().view(-1, 1, 1).expand(
rois.size(0), rois.size(1), 1)
rois = torch.cat([batch_index, rois[..., :4]], dim=-1)
batch_size = rois.shape[0]
num_proposals_per_img = rois.shape[1]
# Eliminate the batch dimension
rois = rois.view(-1, 5)
bbox_results = self._bbox_forward(x, rois)
cls_score = bbox_results['cls_score'] cls_score = bbox_results['cls_score']
bbox_pred = bbox_results['bbox_pred'] bbox_pred = bbox_results['bbox_pred']
# use shape[] to keep tracing
num_proposals_per_img = tuple(p.shape[0] for p in proposals)
rois = rois.split(num_proposals_per_img, 0)
cls_score = cls_score.split(num_proposals_per_img, 0)
# some detector with_reg is False, bbox_pred will be None # Recover the batch dimension
rois = rois.reshape(batch_size, num_proposals_per_img, -1)
cls_score = cls_score.reshape(batch_size, num_proposals_per_img, -1)
if not torch.onnx.is_in_onnx_export():
# remove padding
supplement_mask = rois[..., -1] == 0
cls_score[supplement_mask, :] = 0
# bbox_pred would be None in some detector when with_reg is False,
# e.g. Grid R-CNN.
if bbox_pred is not None: if bbox_pred is not None:
# the bbox prediction of some detectors like SABL is not Tensor # the bbox prediction of some detectors like SABL is not Tensor
if isinstance(bbox_pred, torch.Tensor): if isinstance(bbox_pred, torch.Tensor):
bbox_pred = bbox_pred.split(num_proposals_per_img, 0) bbox_pred = bbox_pred.reshape(batch_size,
num_proposals_per_img, -1)
if not torch.onnx.is_in_onnx_export():
bbox_pred[supplement_mask, :] = 0
else: else:
bbox_pred = self.bbox_head.bbox_pred_split( # TODO: Looking forward to a better way
# For SABL
bbox_preds = self.bbox_head.bbox_pred_split(
bbox_pred, num_proposals_per_img) bbox_pred, num_proposals_per_img)
# apply bbox post-processing to each image individually
det_bboxes = []
det_labels = []
for i in range(len(proposals)):
# remove padding
supplement_mask = proposals[i][..., -1] == 0
for bbox in bbox_preds[i]:
bbox[supplement_mask] = 0
det_bbox, det_label = self.bbox_head.get_bboxes(
rois[i],
cls_score[i],
bbox_preds[i],
img_shapes[i],
scale_factors[i],
rescale=rescale,
cfg=rcnn_test_cfg)
det_bboxes.append(det_bbox)
det_labels.append(det_label)
return det_bboxes, det_labels
else: else:
bbox_pred = (None, ) * len(proposals) bbox_pred = None
# apply bbox post-processing to each image individually return self.bbox_head.get_bboxes(
det_bboxes = [] rois,
det_labels = [] cls_score,
for i in range(len(proposals)): bbox_pred,
det_bbox, det_label = self.bbox_head.get_bboxes( img_shapes,
rois[i], scale_factors,
cls_score[i], rescale=rescale,
bbox_pred[i], cfg=rcnn_test_cfg)
img_shapes[i],
scale_factors[i],
rescale=rescale,
cfg=rcnn_test_cfg)
det_bboxes.append(det_bbox)
det_labels.append(det_label)
return det_bboxes, det_labels
def aug_test_bboxes(self, feats, img_metas, proposal_list, rcnn_test_cfg): def aug_test_bboxes(self, feats, img_metas, proposal_list, rcnn_test_cfg):
"""Test det bboxes with test time augmentation.""" """Test det bboxes with test time augmentation."""
......
import mmcv import mmcv
import pytest
import torch import torch
from mmdet.core import bbox2roi from mmdet.core import bbox2roi
...@@ -64,6 +65,31 @@ def test_bbox_head_loss(): ...@@ -64,6 +65,31 @@ def test_bbox_head_loss():
assert losses.get('loss_bbox', 0) > 0, 'box-loss should be non-zero' assert losses.get('loss_bbox', 0) > 0, 'box-loss should be non-zero'
@pytest.mark.parametrize(['num_sample', 'num_batch'], [[2, 2], [0, 2], [0, 0]])
def test_bbox_head_get_bboxes(num_sample, num_batch):
self = BBoxHead(reg_class_agnostic=True)
num_class = 6
rois = torch.rand((num_sample, 5))
cls_score = torch.rand((num_sample, num_class))
bbox_pred = torch.rand((num_sample, 4))
scale_factor = 2.0
det_bboxes, det_labels = self.get_bboxes(
rois, cls_score, bbox_pred, None, scale_factor, rescale=True)
if num_sample == 0:
assert len(det_bboxes) == 0 and len(det_labels) == 0
else:
assert det_bboxes.shape == bbox_pred.shape
assert det_labels.shape == cls_score.shape
rois = torch.rand((num_batch, num_sample, 5))
cls_score = torch.rand((num_batch, num_sample, num_class))
bbox_pred = torch.rand((num_batch, num_sample, 4))
det_bboxes, det_labels = self.get_bboxes(
rois, cls_score, bbox_pred, None, scale_factor, rescale=True)
assert len(det_bboxes) == num_batch and len(det_labels) == num_batch
def test_refine_boxes(): def test_refine_boxes():
"""Mirrors the doctest in """Mirrors the doctest in
``mmdet.models.bbox_heads.bbox_head.BBoxHead.refine_boxes`` but checks for ``mmdet.models.bbox_heads.bbox_head.BBoxHead.refine_boxes`` but checks for
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment