Skip to content
Snippets Groups Projects
Commit b287273c authored by Jon Crall's avatar Jon Crall Committed by Kai Chen
Browse files

Expand docstring docs (#1477)

* Expand docs

* Fix flake8 errors

* Clarify Collect docstring

* revert to the original version
parent f83c8638
No related branches found
No related tags found
No related merge requests found
......@@ -12,7 +12,13 @@ def merge_aug_proposals(aug_proposals, img_metas, rpn_test_cfg):
aug_proposals (list[Tensor]): proposals from different testing
schemes, shape (n, 5). Note that they are not rescaled to the
original image size.
img_metas (list[dict]): image info including "shape_scale" and "flip".
img_metas (list[dict]): list of image info dict where each dict has:
'img_shape', 'scale_factor', 'flip', and my also contain
'filename', 'ori_shape', 'pad_shape', and 'img_norm_cfg'.
For details on the values of these keys see
`mmdet/datasets/pipelines/formatting.py:Collect`.
rpn_test_cfg (dict): rpn test config.
Returns:
......
......@@ -134,6 +134,35 @@ class DefaultFormatBundle(object):
@PIPELINES.register_module
class Collect(object):
"""
Collect data from the loader relevant to the specific task.
This is usually the last stage of the data loader pipeline. Typically keys
is set to some subset of "img", "proposals", "gt_bboxes",
"gt_bboxes_ignore", "gt_labels", and/or "gt_masks".
The "img_meta" item is always populated. The contents of the "img_meta"
dictionary depends on "meta_keys". By default this includes:
- "img_shape": shape of the image input to the network as a tuple
(h, w, c). Note that images may be zero padded on the bottom/right
if the batch tensor is larger than this shape.
- "scale_factor": a float indicating the preprocessing scale
- "flip": a boolean indicating if image flip transform was used
- "filename": path to the image file
- "ori_shape": original shape of the image as a tuple (h, w, c)
- "pad_shape": image shape after padding
- "img_norm_cfg": a dict of normalization information:
- mean - per channel mean subtraction
- std - per channel std divisor
- to_rgb - bool indicating if bgr was converted to rgb
"""
def __init__(self,
keys,
......
......@@ -45,6 +45,20 @@ class BaseDetector(nn.Module):
@abstractmethod
def forward_train(self, imgs, img_metas, **kwargs):
"""
Args:
img (list[Tensor]): list of tensors of shape (1, C, H, W).
Typically these should be mean centered and std scaled.
img_metas (list[dict]): list of image info dict where each dict
has:
'img_shape', 'scale_factor', 'flip', and my also contain
'filename', 'ori_shape', 'pad_shape', and 'img_norm_cfg'.
For details on the values of these keys see
`mmdet/datasets/pipelines/formatting.py:Collect`.
**kwargs: specific to concrete implementation
"""
pass
@abstractmethod
......
......@@ -157,6 +157,34 @@ class CascadeRCNN(BaseDetector, RPNTestMixin):
gt_bboxes_ignore=None,
gt_masks=None,
proposals=None):
"""
Args:
img (Tensor): of shape (B, C, H, W) encoding input images.
Typically these should be mean centered and std scaled.
img_meta (list[dict]): list of image info dict where each dict has:
'img_shape', 'scale_factor', 'flip', and my also contain
'filename', 'ori_shape', 'pad_shape', and 'img_norm_cfg'.
For details on the values of these keys see
`mmdet/datasets/pipelines/formatting.py:Collect`.
gt_bboxes (list[Tensor]): each item are the truth boxes for each
image in [tl_x, tl_y, br_x, br_y] format.
gt_labels (list[Tensor]): class indices corresponding to each box
gt_bboxes_ignore (None | list[Tensor]): specify which bounding
boxes can be ignored when computing the loss.
gt_masks (None | Tensor) : true segmentation masks for each box
used if the architecture supports a segmentation task.
proposals : override rpn proposals with custom proposals. Use when
`with_rpn` is False.
Returns:
dict[str, Tensor]: a dictionary of loss components
"""
x = self.extract_feat(img)
losses = dict()
......@@ -270,7 +298,21 @@ class CascadeRCNN(BaseDetector, RPNTestMixin):
return losses
def simple_test(self, img, img_meta, proposals=None, rescale=False):
"""Run inference on a single image.
Args:
img (Tensor): must be in shape (B, C, H, W)
img_meta (list[dict]): a list with one dictionary element.
See `mmdet/datasets/pipelines/formatting.py:Collect` for
details of meta dicts.
proposals : if specified overrides rpn proposals
rescale (bool): if True returns boxes in original image space
Returns:
dict: results
"""
x = self.extract_feat(img)
proposal_list = self.simple_test_rpn(
x, img_meta, self.test_cfg.rpn) if proposals is None else proposals
......
......@@ -8,6 +8,11 @@ from .base import BaseDetector
@DETECTORS.register_module
class SingleStageDetector(BaseDetector):
"""Base class for single-stage detectors.
Single-stage detectors directly and densely predict bounding boxes on the
output features of the backbone+neck.
"""
def __init__(self,
backbone,
......@@ -37,12 +42,18 @@ class SingleStageDetector(BaseDetector):
self.bbox_head.init_weights()
def extract_feat(self, img):
"""Directly extract features from the backbone+neck
"""
x = self.backbone(img)
if self.with_neck:
x = self.neck(x)
return x
def forward_dummy(self, img):
"""Used for computing network flops.
See `mmedetection/tools/get_flops.py`
"""
x = self.extract_feat(img)
outs = self.bbox_head(x)
return outs
......
......@@ -11,6 +11,11 @@ from .test_mixins import BBoxTestMixin, MaskTestMixin, RPNTestMixin
@DETECTORS.register_module
class TwoStageDetector(BaseDetector, RPNTestMixin, BBoxTestMixin,
MaskTestMixin):
"""Base class for two-stage detectors.
Two-stage detectors typically consisting of a region proposal network and a
task-specific regression head.
"""
def __init__(self,
backbone,
......@@ -82,12 +87,18 @@ class TwoStageDetector(BaseDetector, RPNTestMixin, BBoxTestMixin,
self.mask_roi_extractor.init_weights()
def extract_feat(self, img):
"""Directly extract features from the backbone+neck
"""
x = self.backbone(img)
if self.with_neck:
x = self.neck(x)
return x
def forward_dummy(self, img):
"""Used for computing network flops.
See `mmedetection/tools/get_flops.py`
"""
outs = ()
# backbone
x = self.extract_feat(img)
......@@ -124,6 +135,34 @@ class TwoStageDetector(BaseDetector, RPNTestMixin, BBoxTestMixin,
gt_bboxes_ignore=None,
gt_masks=None,
proposals=None):
"""
Args:
img (Tensor): of shape (B, C, H, W) encoding input images.
Typically these should be mean centered and std scaled.
img_meta (list[dict]): list of image info dict where each dict has:
'img_shape', 'scale_factor', 'flip', and my also contain
'filename', 'ori_shape', 'pad_shape', and 'img_norm_cfg'.
For details on the values of these keys see
`mmdet/datasets/pipelines/formatting.py:Collect`.
gt_bboxes (list[Tensor]): each item are the truth boxes for each
image in [tl_x, tl_y, br_x, br_y] format.
gt_labels (list[Tensor]): class indices corresponding to each box
gt_bboxes_ignore (None | list[Tensor]): specify which bounding
boxes can be ignored when computing the loss.
gt_masks (None | Tensor) : true segmentation masks for each box
used if the architecture supports a segmentation task.
proposals : override rpn proposals with custom proposals. Use when
`with_rpn` is False.
Returns:
dict[str, Tensor]: a dictionary of loss components
"""
x = self.extract_feat(img)
losses = dict()
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment