From 5a0e2edc3b7a378e92ac9e4e111105140b512319 Mon Sep 17 00:00:00 2001 From: Kai Chen <chenkaidev@gmail.com> Date: Sun, 3 May 2020 22:01:25 +0800 Subject: [PATCH] Update v2.0 docs (#2590) * add docs * reorg tutorials and a minor fix * add docs on adding new necks --- docs/conf.py | 4 +- docs/getting_started.md | 48 +++++++- docs/index.rst | 10 +- docs/install.md | 78 ++++-------- docs/model_zoo.md | 44 ++++--- .../data_pipeline.md} | 116 ++++-------------- docs/tutorials/finetune.md | 2 +- docs/tutorials/new_dataset.md | 8 +- docs/tutorials/new_modules.md | 44 ++++++- mmdet/datasets/__init__.py | 4 +- 10 files changed, 178 insertions(+), 180 deletions(-) rename docs/{technical_details.md => tutorials/data_pipeline.md} (52%) diff --git a/docs/conf.py b/docs/conf.py index aad51b6a..3bc10a86 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -18,7 +18,7 @@ project = 'MMDetection' copyright = '2018-2020, OpenMMLab' -author = 'OpenMMLab' +author = 'MMDetection Authors' # The full version, including alpha/beta/rc tags release = '1.0.0' @@ -36,7 +36,7 @@ extensions = [ 'sphinx_markdown_tables', ] -autodoc_mock_imports = ['torch', 'torchvision', 'mmcv'] +autodoc_mock_imports = ['torch', 'torchvision', 'mmcv', 'numpy', 'pycocotools'] # Add any paths that contain templates here, relative to this directory. templates_path = ['_templates'] diff --git a/docs/getting_started.md b/docs/getting_started.md index 6a4c71bc..4c5fc8fc 100644 --- a/docs/getting_started.md +++ b/docs/getting_started.md @@ -3,6 +3,48 @@ This page provides basic tutorials about the usage of MMDetection. For installation instructions, please see [install.md](install.md). +## Prepare datasets + +It is recommended to symlink the dataset root to `$MMDETECTION/data`. +If your folder structure is different, you may need to change the corresponding paths in config files. + +``` +mmdetection +鈹溾攢鈹€ mmdet +鈹溾攢鈹€ tools +鈹溾攢鈹€ configs +鈹溾攢鈹€ data +鈹� 鈹溾攢鈹€ coco +鈹� 鈹� 鈹溾攢鈹€ annotations +鈹� 鈹� 鈹溾攢鈹€ train2017 +鈹� 鈹� 鈹溾攢鈹€ val2017 +鈹� 鈹� 鈹溾攢鈹€ test2017 +鈹� 鈹溾攢鈹€ cityscapes +鈹� 鈹� 鈹溾攢鈹€ annotations +鈹� 鈹� 鈹溾攢鈹€ leftImg8bit +鈹� 鈹� 鈹� 鈹溾攢鈹€ train +鈹� 鈹� 鈹� 鈹溾攢鈹€ val +鈹� 鈹� 鈹溾攢鈹€ gtFine +鈹� 鈹� 鈹� 鈹溾攢鈹€ train +鈹� 鈹� 鈹� 鈹溾攢鈹€ val +鈹� 鈹溾攢鈹€ VOCdevkit +鈹� 鈹� 鈹溾攢鈹€ VOC2007 +鈹� 鈹� 鈹溾攢鈹€ VOC2012 + +``` + +The cityscapes annotations have to be converted into the coco format using `tools/convert_datasets/cityscapes.py`: + +```shell +pip install cityscapesscripts +python tools/convert_datasets/cityscapes.py ./data/cityscapes --nproc 8 --out_dir ./data/cityscapes/annotations +``` + +Currently the config files in `cityscapes` use COCO pre-trained weights to initialize. +You could download the pre-trained models in advance if network is unavailable or slow, otherwise it would cause errors at the beginning of training. + +For using custom datasets, please refer to [Tutorials 2: Adding New Dataset](tutorials/new_dataset.md). + ## Inference with pretrained models We provide testing scripts to evaluate a whole dataset (COCO, PASCAL VOC, Cityscapes, etc.), @@ -10,9 +52,9 @@ and also some high-level apis for easier integration to other projects. ### Test a dataset -- [x] single GPU testing -- [x] multiple GPU testing -- [x] visualize detection results +- single GPU +- single node multiple GPU +- multiple node You can use the following commands to test a dataset. diff --git a/docs/index.rst b/docs/index.rst index 7d5aef62..c7a9cbed 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -6,13 +6,13 @@ Welcome to MMDetection's documentation! install.md getting_started.md - model_zoo.md - technical_details.md - compatibility.md config.md - tutorials/new_modules.md - tutorials/new_dataset.md + model_zoo.md tutorials/finetune.md + tutorials/new_dataset.md + tutorials/data_pipeline.md + tutorials/new_modules.md + compatibility.md changelog.md diff --git a/docs/install.md b/docs/install.md index f8d1fc33..a08b6529 100644 --- a/docs/install.md +++ b/docs/install.md @@ -3,19 +3,12 @@ ### Requirements - Linux (Windows is not officially supported) -- Python 3.5+ -- PyTorch 1.1 or higher -- CUDA 9.0 or higher -- NCCL 2 -- GCC 4.9 or higher +- Python 3.6+ +- PyTorch 1.3+ +- CUDA 9.2+ (If you build PyTorch from source, CUDA 9.0 is also compatible) +- GCC 4.9+ - [mmcv](https://github.com/open-mmlab/mmcv) -We have tested the following versions of OS and softwares: - -- OS: Ubuntu 16.04/18.04 and CentOS 7.2 -- CUDA: 9.0/9.2/10.0/10.1 -- NCCL: 2.1.15/2.2.13/2.3.7/2.4.2 -- GCC(G++): 4.9/5.3/5.4/7.3 ### Install mmdetection @@ -32,6 +25,26 @@ b. Install PyTorch and torchvision following the [official instructions](https:/ conda install pytorch torchvision -c pytorch ``` +Note: Make sure that your compilation CUDA version and runtime CUDA version match. +You can check the supported CUDA version for precompiled packages on the [PyTorch website](https://pytorch.org/). + +`E.g.1` If you have CUDA 10.1 installed under `/usr/local/cuda` and would like to install +PyTorch 1.5, you need to install the prebuilt PyTorch with CUDA 10.1. + +```python +conda install pytorch cudatoolkit=10.1 torchvision -c pytorch +``` + +`E.g. 2` If you have CUDA 9.2 installed under `/usr/local/cuda` and would like to install +PyTorch 1.3.1., you need to install the prebuilt PyTorch with CUDA 9.2. + +```python +conda install pytorch=1.3.1 cudatoolkit=10.1 torchvision=0.4.2 -c pytorch +``` + +If you build PyTorch from source instead of installing the prebuilt pacakge, +you can use more CUDA versions such as 9.0. + c. Clone the mmdetection repository. ```shell @@ -82,60 +95,21 @@ We provide a [Dockerfile](https://github.com/open-mmlab/mmdetection/blob/master/ docker build -t mmdetection docker/ ``` -### Prepare datasets - -It is recommended to symlink the dataset root to `$MMDETECTION/data`. -If your folder structure is different, you may need to change the corresponding paths in config files. - -``` -mmdetection -鈹溾攢鈹€ mmdet -鈹溾攢鈹€ tools -鈹溾攢鈹€ configs -鈹溾攢鈹€ data -鈹� 鈹溾攢鈹€ coco -鈹� 鈹� 鈹溾攢鈹€ annotations -鈹� 鈹� 鈹溾攢鈹€ train2017 -鈹� 鈹� 鈹溾攢鈹€ val2017 -鈹� 鈹� 鈹溾攢鈹€ test2017 -鈹� 鈹溾攢鈹€ cityscapes -鈹� 鈹� 鈹溾攢鈹€ annotations -鈹� 鈹� 鈹溾攢鈹€ leftImg8bit -鈹� 鈹� 鈹� 鈹溾攢鈹€ train -鈹� 鈹� 鈹� 鈹溾攢鈹€ val -鈹� 鈹� 鈹溾攢鈹€ gtFine -鈹� 鈹� 鈹� 鈹溾攢鈹€ train -鈹� 鈹� 鈹� 鈹溾攢鈹€ val -鈹� 鈹溾攢鈹€ VOCdevkit -鈹� 鈹� 鈹溾攢鈹€ VOC2007 -鈹� 鈹� 鈹溾攢鈹€ VOC2012 - -``` -The cityscapes annotations have to be converted into the coco format using `tools/convert_datasets/cityscapes.py`: -```shell -pip install cityscapesscripts -python tools/convert_datasets/cityscapes.py ./data/cityscapes --nproc 8 --out_dir ./data/cityscapes/annotations -``` -Current the config files in `cityscapes` use COCO pre-trained weights to initialize. -You could download the pre-trained models in advance if network is unavailable or slow, otherwise it would cause errors at the beginning of training. - ### A from-scratch setup script -Here is a full script for setting up mmdetection with conda and link the dataset path (supposing that your COCO dataset path is $COCO_ROOT). +Here is a full script for setting up mmdetection with conda. ```shell conda create -n open-mmlab python=3.7 -y conda activate open-mmlab +# install latest pytorch prebuilt with the default prebuilt CUDA version (usually the latest) conda install -c pytorch pytorch torchvision -y git clone https://github.com/open-mmlab/mmdetection.git cd mmdetection pip install -r requirements/build.txt pip install "git+https://github.com/cocodataset/cocoapi.git#subdirectory=PythonAPI" pip install -v -e . - -mkdir data -ln -s $COCO_ROOT data ``` ### Using multiple MMDetection versions diff --git a/docs/model_zoo.md b/docs/model_zoo.md index a2295c18..fb0f38a0 100644 --- a/docs/model_zoo.md +++ b/docs/model_zoo.md @@ -18,10 +18,10 @@ You can replace `https://s3.ap-northeast-2.amazonaws.com/open-mmlab` with `https ## Baselines -More models with different backbones will be added to the model zoo. - ### RPN +Please refer to [RPN](https://github.com/open-mmlab/mmdetection/blob/master/configs/rpn) for details. + ### Faster R-CNN Please refer to [Faster R-CNN](https://github.com/open-mmlab/mmdetection/blob/master/configs/faster_rcnn) for details. @@ -38,14 +38,10 @@ Please refer to [Fast R-CNN](https://github.com/open-mmlab/mmdetection/blob/mast Please refer to [RetinaNet](https://github.com/open-mmlab/mmdetection/blob/master/configs/retinanet) for details. -### Cascade R-CNN +### Cascade R-CNN and Cascade Mask R-CNN Please refer to [Cascade R-CNN](https://github.com/open-mmlab/mmdetection/blob/master/configs/cascade_rcnn) for details. -### Cascade Mask R-CNN - -Please refer to [Cascade Mask R-CNN](https://github.com/open-mmlab/mmdetection/blob/master/configs/cascade_rcnn) for details. - ### Hybrid Task Cascade (HTC) Please refer to [HTC](https://github.com/open-mmlab/mmdetection/blob/master/configs/htc) for details. @@ -133,13 +129,14 @@ We also benchmark some methods on [PASCAL VOC](https://github.com/open-mmlab/mmd ## Comparison with Detectron2 -We compare mmdetection with [Detectron2](https://github.com/facebookresearch/detectron2.git). -The backbone used is R-50-FPN. +We compare mmdetection with [Detectron2](https://github.com/facebookresearch/detectron2.git) in terms of speed and performance. +We use the commit id [185c27e](https://github.com/facebookresearch/detectron2/tree/185c27e4b4d2d4c68b5627b3765420c6d7f5a659)(30/4/2020) of detectron. +For fair comparison, we install and run both frameworks on the same machine. ### Hardware -- 8 NVIDIA Tesla V100 GPUs -- Intel Xeon 4114 CPU @ 2.20GHz +- 8 NVIDIA Tesla V100 (32G) GPUs +- Intel(R) Xeon(R) Gold 6148 CPU @ 2.40GHz ### Software environment @@ -151,7 +148,7 @@ The backbone used is R-50-FPN. ### Performance -<table> +<table border="1"> <tr> <th>Type</th> <th>Lr schd</th> @@ -197,11 +194,11 @@ The backbone used is R-50-FPN. The training speed is measure with s/iter. The lower, the better. -<table> +<table border="1"> <tr> <th>Type</th> - <th>Detectron2 (V100)</th> - <th>mmdetection (V100)</th> + <th>Detectron2</th> + <th>mmdetection</th> </tr> <tr> <td>Faster R-CNN</td> @@ -226,34 +223,35 @@ The training speed is measure with s/iter. The lower, the better. The inference speed is measured with fps (img/s) on a single GPU, the higher, the better. To be consistent with Detectron2, we report the pure inference speed (without the time of data loading). For Mask R-CNN, we exclude the time of RLE encoding in post-processing. -The speed in the brackets of detectron2 is tested using our own server, which is slightly slower than the official speed. +We also include the officially reported speed in the parentheses, which is slightly higher +than the results tested on our server due to differences of hardwards. -<table> +<table border="1"> <tr> <th>Type</th> - <th>Detectron2 (V100)</th> - <th>mmdetection (V100)</th> + <th>Detectron2</th> + <th>mmdetection</th> </tr> <tr> <td>Faster R-CNN</td> - <td>26.3 (25.6)</td> + <td>25.6 (26.3)</td> <td>22.2</td> </tr> <tr> <td>Mask R-CNN</td> - <td>23.3 (22.5)</td> + <td>22.5 (23.3)</td> <td>19.6</td> </tr> <tr> <td>Retinanet</td> - <td>18.2 (17.8)</td> + <td>17.8 (18.2)</td> <td>20.6</td> </tr> </table> ### Training memory -<table> +<table border="1"> <tr> <th>Type</th> <th>Detectron2</th> diff --git a/docs/technical_details.md b/docs/tutorials/data_pipeline.md similarity index 52% rename from docs/technical_details.md rename to docs/tutorials/data_pipeline.md index 91b0cfb9..05fe4395 100644 --- a/docs/technical_details.md +++ b/docs/tutorials/data_pipeline.md @@ -1,9 +1,6 @@ -# Technical Details +# Tutorial 3: Custom Data Pipelines -In this section, we will introduce the main units of training a detector: -data pipeline, model and iteration pipeline. - -## Data pipeline +## Design of Data pipelines Following typical conventions, we use `Dataset` and `DataLoader` for data loading with multiple workers. `Dataset` returns a dict of data items corresponding @@ -18,7 +15,7 @@ defines how to process the annotations and a data pipeline defines all the steps A pipeline consists of a sequence of operations. Each operation takes a dict as input and also output a dict for the next transform. We present a classical pipeline in the following figure. The blue blocks are pipeline operations. With the pipeline going on, each operator can add new keys (marked as green) to the result dict or update the existing keys (marked as orange). - + The operations are categorized into data loading, pre-processing, formatting and test-time augmentation. @@ -127,100 +124,41 @@ For each operation, we list the related dict fields that are added/updated/remov `MultiScaleFlipAug` -## Model - -In MMDetection, model components are basically categorized as 4 types. - -- backbone: usually a FCN network to extract feature maps, e.g., ResNet. -- neck: the part between backbones and heads, e.g., FPN, ASPP. -- head: the part for specific tasks, e.g., bbox prediction and mask prediction. -- roi extractor: the part for extracting features from feature maps, e.g., RoI Align. - -We also write implement some general detection pipelines with the above components, -such as `SingleStageDetector` and `TwoStageDetector`. - -### Build a model with basic components +## Extend and use custom pipelines -Following some basic pipelines (e.g., two-stage detectors), the model structure -can be customized through config files with no pains. - -If we want to implement some new components, e.g, the path aggregation -FPN structure in [Path Aggregation Network for Instance Segmentation](https://arxiv.org/abs/1803.01534), there are two things to do. - -1. create a new file in `mmdet/models/necks/pafpn.py`. +1. Write a new pipeline in any file, e.g., `my_pipeline.py`. It takes a dict as input and return a dict. ```python - from ..registry import NECKS - - @NECKS.register - class PAFPN(nn.Module): - - def __init__(self, - in_channels, - out_channels, - num_outs, - start_level=0, - end_level=-1, - add_extra_convs=False): - pass - - def forward(self, inputs): - # implementation is ignored - pass - ``` + from mmdet.datasets import PIPELINES -2. Import the module in `mmdet/models/necks/__init__.py`. + @PIPELINES.register_module() + class MyTransform: - ```python - from .pafpn import PAFPN + def __call__(self, results): + results['dummy'] = True + return results ``` -2. modify the config file from +2. Import the new class. ```python - neck=dict( - type='FPN', - in_channels=[256, 512, 1024, 2048], - out_channels=256, - num_outs=5) + from .my_pipeline import MyTransform ``` - to +3. Use it in config files. ```python - neck=dict( - type='PAFPN', - in_channels=[256, 512, 1024, 2048], - out_channels=256, - num_outs=5) + img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) + train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True), + dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='MyTransform'), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), + ] ``` - -We will release more components (backbones, necks, heads) for research purpose. - -### Write a new model - -To write a new detection pipeline, you need to inherit from `BaseDetector`, -which defines the following abstract methods. - -- `extract_feat()`: given an image batch of shape (n, c, h, w), extract the feature map(s). -- `forward_train()`: forward method of the training mode -- `simple_test()`: single scale testing without augmentation -- `aug_test()`: testing with augmentation (multi-scale, flip, etc.) - -[TwoStageDetector](https://github.com/hellock/mmdetection/blob/master/mmdet/models/detectors/two_stage.py) -is a good example which shows how to do that. - -## Iteration pipeline - -We adopt distributed training for both single machine and multiple machines. -Supposing that the server has 8 GPUs, 8 processes will be started and each process runs on a single GPU. - -Each process keeps an isolated model, data loader, and optimizer. -Model parameters are only synchronized once at the beginning. -After a forward and backward pass, gradients will be allreduced among all GPUs, -and the optimizer will update model parameters. -Since the gradients are allreduced, the model parameter stays the same for all processes after the iteration. - -## Other information - -For more information, please refer to our [technical report](https://arxiv.org/abs/1906.07155). diff --git a/docs/tutorials/finetune.md b/docs/tutorials/finetune.md index b082eccb..17bc5d4c 100644 --- a/docs/tutorials/finetune.md +++ b/docs/tutorials/finetune.md @@ -1,4 +1,4 @@ -# Tutorial 3: Finetuning Models +# Tutorial 1: Finetuning Models Detectors pre-trained on the COCO dataset can serve as a good pre-trained model for other datasets, e.g., CityScapes and KITTI Dataset. This tutorial provides instruction for users to use the models provided in the [Model Zoo](../model_zoo.md) for other datasets to obatin better performance. diff --git a/docs/tutorials/new_dataset.md b/docs/tutorials/new_dataset.md index c7428e60..87f89c1c 100644 --- a/docs/tutorials/new_dataset.md +++ b/docs/tutorials/new_dataset.md @@ -1,4 +1,4 @@ -# Tutorials 2: Adding New Dataset +# Tutorial 2: Adding New Dataset ## Customize datasets by reorganizing data @@ -146,8 +146,11 @@ dataset_A_train = dict( ``` ### Concatemate dataset -There two ways to concaenate the dataset. + +There two ways to concatenate the dataset. + 1. If the datasets you want to concatenate are in the same type with different annotation files, you can concatenate the dataset configs like the following. + ```python dataset_A_train = dict( type='Dataset_A', @@ -157,6 +160,7 @@ There two ways to concaenate the dataset. ``` 2. In case the dataset you want to concatenate is different, you can concatenate the dataset configs like the following. + ```python dataset_A_train = dict() dataset_B_train = dict() diff --git a/docs/tutorials/new_modules.md b/docs/tutorials/new_modules.md index b23b8437..a23dc81b 100644 --- a/docs/tutorials/new_modules.md +++ b/docs/tutorials/new_modules.md @@ -1,4 +1,4 @@ -# Tutorials 1: Adding New Modules +# Tutorial 4: Adding New Modules ## Customize optimizer @@ -121,6 +121,48 @@ model = dict( ... ``` +### Add new necks + +Here we take PAFPN as an example. + +1. Create a new file in `mmdet/models/necks/pafpn.py`. + + ```python + from ..registry import NECKS + + @NECKS.register + class PAFPN(nn.Module): + + def __init__(self, + in_channels, + out_channels, + num_outs, + start_level=0, + end_level=-1, + add_extra_convs=False): + pass + + def forward(self, inputs): + # implementation is ignored + pass + ``` + +2. Import the module in `mmdet/models/necks/__init__.py`. + + ```python + from .pafpn import PAFPN + ``` + +3. Modify the config file. + + ```python + neck=dict( + type='PAFPN', + in_channels=[256, 512, 1024, 2048], + out_channels=256, + num_outs=5) + ``` + ### Add new heads Here we show how to develop a new head with the example of [Double Head R-CNN](https://arxiv.org/abs/1904.06493) as the following. diff --git a/mmdet/datasets/__init__.py b/mmdet/datasets/__init__.py index f1500ca8..a162d65a 100644 --- a/mmdet/datasets/__init__.py +++ b/mmdet/datasets/__init__.py @@ -1,4 +1,4 @@ -from .builder import DATASETS, build_dataloader, build_dataset +from .builder import DATASETS, PIPELINES, build_dataloader, build_dataset from .cityscapes import CityscapesDataset from .coco import CocoDataset from .custom import CustomDataset @@ -12,5 +12,5 @@ __all__ = [ 'CustomDataset', 'XMLDataset', 'CocoDataset', 'VOCDataset', 'CityscapesDataset', 'GroupSampler', 'DistributedGroupSampler', 'DistributedSampler', 'build_dataloader', 'ConcatDataset', 'RepeatDataset', - 'WIDERFaceDataset', 'DATASETS', 'build_dataset' + 'WIDERFaceDataset', 'DATASETS', 'PIPELINES', 'build_dataset' ] -- GitLab