Expand docstring docs (#1477)

* Expand docs * Fix flake8 errors * Clarify Collect docstring * revert to the original version

Expand docstring docs (#1477)
b287273c · Jon Crall · Kai Chen · f83c8638 · b287273c · b287273c
Commit b287273c authored 5 years ago by Jon Crall Committed by Kai Chen 5 years ago
--- a/mmdet/core/post_processing/merge_augs.py
+++ b/mmdet/core/post_processing/merge_augs.py
@@ -12,7 +12,13 @@ def merge_aug_proposals(aug_proposals, img_metas, rpn_test_cfg):
        aug_proposals (list[Tensor]): proposals from different testing
            schemes, shape (n, 5). Note that they are not rescaled to the
            original image size.
-        img_metas (list[dict]): image info including "shape_scale" and "flip".
+
+        img_metas (list[dict]): list of image info dict where each dict has:
+            'img_shape', 'scale_factor', 'flip', and my also contain
+            'filename', 'ori_shape', 'pad_shape', and 'img_norm_cfg'.
+            For details on the values of these keys see
+            `mmdet/datasets/pipelines/formatting.py:Collect`.
+
        rpn_test_cfg (dict): rpn test config.

    Returns:

--- a/mmdet/datasets/pipelines/formating.py
+++ b/mmdet/datasets/pipelines/formating.py
@@ -134,6 +134,35 @@ class DefaultFormatBundle(object):

 @PIPELINES.register_module
 class Collect(object):
+    """
+    Collect data from the loader relevant to the specific task.
+
+    This is usually the last stage of the data loader pipeline. Typically keys
+    is set to some subset of "img", "proposals", "gt_bboxes",
+    "gt_bboxes_ignore", "gt_labels", and/or "gt_masks".
+
+    The "img_meta" item is always populated.  The contents of the "img_meta"
+    dictionary depends on "meta_keys". By default this includes:
+
+        - "img_shape": shape of the image input to the network as a tuple
+            (h, w, c).  Note that images may be zero padded on the bottom/right
+            if the batch tensor is larger than this shape.
+
+        - "scale_factor": a float indicating the preprocessing scale
+
+        - "flip": a boolean indicating if image flip transform was used
+
+        - "filename": path to the image file
+
+        - "ori_shape": original shape of the image as a tuple (h, w, c)
+
+        - "pad_shape": image shape after padding
+
+        - "img_norm_cfg": a dict of normalization information:
+            - mean - per channel mean subtraction
+            - std - per channel std divisor
+            - to_rgb - bool indicating if bgr was converted to rgb
+    """

    def __init__(self,
                 keys,

--- a/mmdet/models/detectors/base.py
+++ b/mmdet/models/detectors/base.py
@@ -45,6 +45,20 @@ class BaseDetector(nn.Module):

    @abstractmethod
    def forward_train(self, imgs, img_metas, **kwargs):
+        """
+        Args:
+            img (list[Tensor]): list of tensors of shape (1, C, H, W).
+                Typically these should be mean centered and std scaled.
+
+            img_metas (list[dict]): list of image info dict where each dict
+                has:
+                'img_shape', 'scale_factor', 'flip', and my also contain
+                'filename', 'ori_shape', 'pad_shape', and 'img_norm_cfg'.
+                For details on the values of these keys see
+                `mmdet/datasets/pipelines/formatting.py:Collect`.
+
+             **kwargs: specific to concrete implementation
+        """
        pass

    @abstractmethod

--- a/mmdet/models/detectors/cascade_rcnn.py
+++ b/mmdet/models/detectors/cascade_rcnn.py
@@ -157,6 +157,34 @@ class CascadeRCNN(BaseDetector, RPNTestMixin):
                      gt_bboxes_ignore=None,
                      gt_masks=None,
                      proposals=None):
+        """
+        Args:
+            img (Tensor): of shape (B, C, H, W) encoding input images.
+                Typically these should be mean centered and std scaled.
+
+            img_meta (list[dict]): list of image info dict where each dict has:
+                'img_shape', 'scale_factor', 'flip', and my also contain
+                'filename', 'ori_shape', 'pad_shape', and 'img_norm_cfg'.
+                For details on the values of these keys see
+                `mmdet/datasets/pipelines/formatting.py:Collect`.
+
+            gt_bboxes (list[Tensor]): each item are the truth boxes for each
+                image in [tl_x, tl_y, br_x, br_y] format.
+
+            gt_labels (list[Tensor]): class indices corresponding to each box
+
+            gt_bboxes_ignore (None | list[Tensor]): specify which bounding
+                boxes can be ignored when computing the loss.
+
+            gt_masks (None | Tensor) : true segmentation masks for each box
+                used if the architecture supports a segmentation task.
+
+            proposals : override rpn proposals with custom proposals. Use when
+                `with_rpn` is False.
+
+        Returns:
+            dict[str, Tensor]: a dictionary of loss components
+        """
        x = self.extract_feat(img)

        losses = dict()
@@ -270,7 +298,21 @@ class CascadeRCNN(BaseDetector, RPNTestMixin):
        return losses

    def simple_test(self, img, img_meta, proposals=None, rescale=False):
+        """Run inference on a single image.
+
+        Args:
+            img (Tensor): must be in shape (B, C, H, W)
+            img_meta (list[dict]): a list with one dictionary element.
+                See `mmdet/datasets/pipelines/formatting.py:Collect` for
+                details of meta dicts.
+            proposals : if specified overrides rpn proposals
+            rescale (bool): if True returns boxes in original image space
+
+        Returns:
+            dict: results
+        """
        x = self.extract_feat(img)
+
        proposal_list = self.simple_test_rpn(
            x, img_meta, self.test_cfg.rpn) if proposals is None else proposals


--- a/mmdet/models/detectors/single_stage.py
+++ b/mmdet/models/detectors/single_stage.py
@@ -8,6 +8,11 @@ from .base import BaseDetector

 @DETECTORS.register_module
 class SingleStageDetector(BaseDetector):
+    """Base class for single-stage detectors.
+
+    Single-stage detectors directly and densely predict bounding boxes on the
+    output features of the backbone+neck.
+    """

    def __init__(self,
                 backbone,
@@ -37,12 +42,18 @@ class SingleStageDetector(BaseDetector):
        self.bbox_head.init_weights()

    def extract_feat(self, img):
+        """Directly extract features from the backbone+neck
+        """
        x = self.backbone(img)
        if self.with_neck:
            x = self.neck(x)
        return x

    def forward_dummy(self, img):
+        """Used for computing network flops.
+
+        See `mmedetection/tools/get_flops.py`
+        """
        x = self.extract_feat(img)
        outs = self.bbox_head(x)
        return outs

--- a/mmdet/models/detectors/two_stage.py
+++ b/mmdet/models/detectors/two_stage.py
@@ -11,6 +11,11 @@ from .test_mixins import BBoxTestMixin, MaskTestMixin, RPNTestMixin
 @DETECTORS.register_module
 class TwoStageDetector(BaseDetector, RPNTestMixin, BBoxTestMixin,
                       MaskTestMixin):
+    """Base class for two-stage detectors.
+
+    Two-stage detectors typically consisting of a region proposal network and a
+    task-specific regression head.
+    """

    def __init__(self,
                 backbone,
@@ -82,12 +87,18 @@ class TwoStageDetector(BaseDetector, RPNTestMixin, BBoxTestMixin,
                self.mask_roi_extractor.init_weights()

    def extract_feat(self, img):
+        """Directly extract features from the backbone+neck
+        """
        x = self.backbone(img)
        if self.with_neck:
            x = self.neck(x)
        return x

    def forward_dummy(self, img):
+        """Used for computing network flops.
+
+        See `mmedetection/tools/get_flops.py`
+        """
        outs = ()
        # backbone
        x = self.extract_feat(img)
@@ -124,6 +135,34 @@ class TwoStageDetector(BaseDetector, RPNTestMixin, BBoxTestMixin,
                      gt_bboxes_ignore=None,
                      gt_masks=None,
                      proposals=None):
+        """
+        Args:
+            img (Tensor): of shape (B, C, H, W) encoding input images.
+                Typically these should be mean centered and std scaled.
+
+            img_meta (list[dict]): list of image info dict where each dict has:
+                'img_shape', 'scale_factor', 'flip', and my also contain
+                'filename', 'ori_shape', 'pad_shape', and 'img_norm_cfg'.
+                For details on the values of these keys see
+                `mmdet/datasets/pipelines/formatting.py:Collect`.
+
+            gt_bboxes (list[Tensor]): each item are the truth boxes for each
+                image in [tl_x, tl_y, br_x, br_y] format.
+
+            gt_labels (list[Tensor]): class indices corresponding to each box
+
+            gt_bboxes_ignore (None | list[Tensor]): specify which bounding
+                boxes can be ignored when computing the loss.
+
+            gt_masks (None | Tensor) : true segmentation masks for each box
+                used if the architecture supports a segmentation task.
+
+            proposals : override rpn proposals with custom proposals. Use when
+                `with_rpn` is False.
+
+        Returns:
+            dict[str, Tensor]: a dictionary of loss components
+        """
        x = self.extract_feat(img)

        losses = dict()