From a805d4c2aec79d0350b12ecaee45f34f6baf7051 Mon Sep 17 00:00:00 2001 From: Kai Chen <chenkaidev@gmail.com> Date: Thu, 17 Jan 2019 18:59:30 +0800 Subject: [PATCH] add gn configs and models --- MODEL_ZOO.md | 16 ++ configs/mask_rcnn_r101_fpn_gn_2x.py | 177 +++++++++++++++++++++ configs/mask_rcnn_r50_fpn_gn_2x.py | 9 +- configs/mask_rcnn_r50_fpn_gn_contrib_2x.py | 177 +++++++++++++++++++++ 4 files changed, 373 insertions(+), 6 deletions(-) create mode 100644 configs/mask_rcnn_r101_fpn_gn_2x.py create mode 100644 configs/mask_rcnn_r50_fpn_gn_contrib_2x.py diff --git a/MODEL_ZOO.md b/MODEL_ZOO.md index c429922b..da6f4026 100644 --- a/MODEL_ZOO.md +++ b/MODEL_ZOO.md @@ -168,6 +168,22 @@ We released RPN, Faster R-CNN and Mask R-CNN models in the first version. More m - Inference time is reported for batch size = 1 and batch size = 8. - The speed difference between VOC and COCO is caused by model parameters and nms. +### Group Normalization (GN) + +| Backbone | model | Lr schd | Mem (GB) | Train time (s/iter) | Inf time (fps) | box AP | mask AP | Download | +|:-------------:|:----------:|:-------:|:--------:|:-------------------:|:--------------:|:------:|:-------:|:--------:| +| R-50-FPN (d) | Mask R-CNN | 2x | 7.2 | 0.806 | 5.4 | 39.9 | 36.1 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/mask_rcnn_r50_fpn_gn_2x_20180113-86832cf2.pth) | +| R-50-FPN (d) | Mask R-CNN | 3x | 7.2 | 0.806 | 5.4 | 40.2 | 36.5 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/mask_rcnn_r50_fpn_gn_3x_20180113-8e82f48d.pth) | +| R-101-FPN (d) | Mask R-CNN | 2x | 9.9 | 0.970 | 4.8 | 41.6 | 37.1 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/mask_rcnn_r101_fpn_gn_2x_20180113-9598649c.pth) | +| R-101-FPN (d) | Mask R-CNN | 3x | 9.9 | 0.970 | 4.8 | 41.7 | 37.3 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/mask_rcnn_r101_fpn_gn_3x_20180113-a14ffb96.pth) | +| R-50-FPN (c) | Mask R-CNN | 2x | 7.2 | 0.806 | 5.4 | 39.7 | 35.9 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/mask_rcnn_r50_fpn_gn_contrib_2x_20180113-ec93305c.pth) | +| R-50-FPN (c) | Mask R-CNN | 3x | 7.2 | 0.806 | 5.4 | 40.1 | 36.2 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/mask_rcnn_r50_fpn_gn_contrib_3x_20180113-9d230cab.pth) | + +**Notes:** +- (d) means pretrained model converted from Detectron, and (c) means the contributed model pretrained by [@thangvubk](https://github.com/thangvubk). +- The `3x` schedule is epoch [28, 34, 36]. +- The memory is measured with `torch.cuda.max_memory_allocated()` instead of `torch.cuda.max_memory_cached()`. We will update the memory usage of other models in the future. + ## Comparison with Detectron diff --git a/configs/mask_rcnn_r101_fpn_gn_2x.py b/configs/mask_rcnn_r101_fpn_gn_2x.py new file mode 100644 index 00000000..d15d1c0a --- /dev/null +++ b/configs/mask_rcnn_r101_fpn_gn_2x.py @@ -0,0 +1,177 @@ +# model settings +normalize = dict(type='GN', num_groups=32, frozen=False) + +model = dict( + type='MaskRCNN', + pretrained='open-mmlab://detectron/resnet101_gn', + backbone=dict( + type='ResNet', + depth=101, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + style='pytorch', + normalize=normalize), + neck=dict( + type='FPN', + in_channels=[256, 512, 1024, 2048], + out_channels=256, + num_outs=5, + normalize=normalize), + rpn_head=dict( + type='RPNHead', + in_channels=256, + feat_channels=256, + anchor_scales=[8], + anchor_ratios=[0.5, 1.0, 2.0], + anchor_strides=[4, 8, 16, 32, 64], + target_means=[.0, .0, .0, .0], + target_stds=[1.0, 1.0, 1.0, 1.0], + use_sigmoid_cls=True), + bbox_roi_extractor=dict( + type='SingleRoIExtractor', + roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2), + out_channels=256, + featmap_strides=[4, 8, 16, 32]), + bbox_head=dict( + type='ConvFCBBoxHead', + num_shared_convs=4, + num_shared_fcs=1, + in_channels=256, + conv_out_channels=256, + fc_out_channels=1024, + roi_feat_size=7, + num_classes=81, + target_means=[0., 0., 0., 0.], + target_stds=[0.1, 0.1, 0.2, 0.2], + reg_class_agnostic=False, + normalize=normalize), + mask_roi_extractor=dict( + type='SingleRoIExtractor', + roi_layer=dict(type='RoIAlign', out_size=14, sample_num=2), + out_channels=256, + featmap_strides=[4, 8, 16, 32]), + mask_head=dict( + type='FCNMaskHead', + num_convs=4, + in_channels=256, + conv_out_channels=256, + num_classes=81, + normalize=normalize)) + +# model training and testing settings +train_cfg = dict( + rpn=dict( + assigner=dict( + type='MaxIoUAssigner', + pos_iou_thr=0.7, + neg_iou_thr=0.3, + min_pos_iou=0.3, + ignore_iof_thr=-1), + sampler=dict( + type='RandomSampler', + num=256, + pos_fraction=0.5, + neg_pos_ub=-1, + add_gt_as_proposals=False), + allowed_border=0, + pos_weight=-1, + smoothl1_beta=1 / 9.0, + debug=False), + rcnn=dict( + assigner=dict( + type='MaxIoUAssigner', + pos_iou_thr=0.5, + neg_iou_thr=0.5, + min_pos_iou=0.5, + ignore_iof_thr=-1), + sampler=dict( + type='RandomSampler', + num=512, + pos_fraction=0.25, + neg_pos_ub=-1, + add_gt_as_proposals=True), + mask_size=28, + pos_weight=-1, + debug=False)) +test_cfg = dict( + rpn=dict( + nms_across_levels=False, + nms_pre=2000, + nms_post=2000, + max_num=2000, + nms_thr=0.7, + min_bbox_size=0), + rcnn=dict( + score_thr=0.05, + nms=dict(type='nms', iou_thr=0.5), + max_per_img=100, + mask_thr_binary=0.5)) +# dataset settings +dataset_type = 'CocoDataset' +data_root = 'data/coco/' +img_norm_cfg = dict( + mean=[102.9801, 115.9465, 122.7717], std=[1.0, 1.0, 1.0], to_rgb=False) +data = dict( + imgs_per_gpu=2, + workers_per_gpu=2, + train=dict( + type=dataset_type, + ann_file=data_root + 'annotations/instances_train2017.json', + img_prefix=data_root + 'train2017/', + img_scale=(1333, 800), + img_norm_cfg=img_norm_cfg, + size_divisor=32, + flip_ratio=0.5, + with_mask=True, + with_crowd=True, + with_label=True), + val=dict( + type=dataset_type, + ann_file=data_root + 'annotations/instances_val2017.json', + img_prefix=data_root + 'val2017/', + img_scale=(1333, 800), + img_norm_cfg=img_norm_cfg, + size_divisor=32, + flip_ratio=0, + with_mask=True, + with_crowd=True, + with_label=True), + test=dict( + type=dataset_type, + ann_file=data_root + 'annotations/instances_val2017.json', + img_prefix=data_root + 'val2017/', + img_scale=(1333, 800), + img_norm_cfg=img_norm_cfg, + size_divisor=32, + flip_ratio=0, + with_mask=False, + with_label=False, + test_mode=True)) +# optimizer +optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) +optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) +# learning policy +lr_config = dict( + policy='step', + warmup='linear', + warmup_iters=500, + warmup_ratio=1.0 / 3, + step=[16, 22]) +checkpoint_config = dict(interval=1) +# yapf:disable +log_config = dict( + interval=50, + hooks=[ + dict(type='TextLoggerHook'), + # dict(type='TensorboardLoggerHook') + ]) +# yapf:enable +# runtime settings +total_epochs = 24 +dist_params = dict(backend='nccl') +log_level = 'INFO' +work_dir = './work_dirs/mask_rcnn_r101_fpn_gn_2x' +load_from = None +resume_from = None +workflow = [('train', 1)] diff --git a/configs/mask_rcnn_r50_fpn_gn_2x.py b/configs/mask_rcnn_r50_fpn_gn_2x.py index c21bc3b3..da07ce8d 100644 --- a/configs/mask_rcnn_r50_fpn_gn_2x.py +++ b/configs/mask_rcnn_r50_fpn_gn_2x.py @@ -1,12 +1,9 @@ # model settings -normalize = dict( - type='GN', - num_groups=32, - frozen=False) +normalize = dict(type='GN', num_groups=32, frozen=False) model = dict( type='MaskRCNN', - pretrained='open-mmlab://contrib/resnet50_gn', + pretrained='open-mmlab://detectron/resnet50_gn', backbone=dict( type='ResNet', depth=50, @@ -114,7 +111,7 @@ test_cfg = dict( dataset_type = 'CocoDataset' data_root = 'data/coco/' img_norm_cfg = dict( - mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) + mean=[102.9801, 115.9465, 122.7717], std=[1.0, 1.0, 1.0], to_rgb=False) data = dict( imgs_per_gpu=2, workers_per_gpu=2, diff --git a/configs/mask_rcnn_r50_fpn_gn_contrib_2x.py b/configs/mask_rcnn_r50_fpn_gn_contrib_2x.py new file mode 100644 index 00000000..bffb778b --- /dev/null +++ b/configs/mask_rcnn_r50_fpn_gn_contrib_2x.py @@ -0,0 +1,177 @@ +# model settings +normalize = dict(type='GN', num_groups=32, frozen=False) + +model = dict( + type='MaskRCNN', + pretrained='open-mmlab://contrib/resnet50_gn', + backbone=dict( + type='ResNet', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + style='pytorch', + normalize=normalize), + neck=dict( + type='FPN', + in_channels=[256, 512, 1024, 2048], + out_channels=256, + num_outs=5, + normalize=normalize), + rpn_head=dict( + type='RPNHead', + in_channels=256, + feat_channels=256, + anchor_scales=[8], + anchor_ratios=[0.5, 1.0, 2.0], + anchor_strides=[4, 8, 16, 32, 64], + target_means=[.0, .0, .0, .0], + target_stds=[1.0, 1.0, 1.0, 1.0], + use_sigmoid_cls=True), + bbox_roi_extractor=dict( + type='SingleRoIExtractor', + roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2), + out_channels=256, + featmap_strides=[4, 8, 16, 32]), + bbox_head=dict( + type='ConvFCBBoxHead', + num_shared_convs=4, + num_shared_fcs=1, + in_channels=256, + conv_out_channels=256, + fc_out_channels=1024, + roi_feat_size=7, + num_classes=81, + target_means=[0., 0., 0., 0.], + target_stds=[0.1, 0.1, 0.2, 0.2], + reg_class_agnostic=False, + normalize=normalize), + mask_roi_extractor=dict( + type='SingleRoIExtractor', + roi_layer=dict(type='RoIAlign', out_size=14, sample_num=2), + out_channels=256, + featmap_strides=[4, 8, 16, 32]), + mask_head=dict( + type='FCNMaskHead', + num_convs=4, + in_channels=256, + conv_out_channels=256, + num_classes=81, + normalize=normalize)) + +# model training and testing settings +train_cfg = dict( + rpn=dict( + assigner=dict( + type='MaxIoUAssigner', + pos_iou_thr=0.7, + neg_iou_thr=0.3, + min_pos_iou=0.3, + ignore_iof_thr=-1), + sampler=dict( + type='RandomSampler', + num=256, + pos_fraction=0.5, + neg_pos_ub=-1, + add_gt_as_proposals=False), + allowed_border=0, + pos_weight=-1, + smoothl1_beta=1 / 9.0, + debug=False), + rcnn=dict( + assigner=dict( + type='MaxIoUAssigner', + pos_iou_thr=0.5, + neg_iou_thr=0.5, + min_pos_iou=0.5, + ignore_iof_thr=-1), + sampler=dict( + type='RandomSampler', + num=512, + pos_fraction=0.25, + neg_pos_ub=-1, + add_gt_as_proposals=True), + mask_size=28, + pos_weight=-1, + debug=False)) +test_cfg = dict( + rpn=dict( + nms_across_levels=False, + nms_pre=2000, + nms_post=2000, + max_num=2000, + nms_thr=0.7, + min_bbox_size=0), + rcnn=dict( + score_thr=0.05, + nms=dict(type='nms', iou_thr=0.5), + max_per_img=100, + mask_thr_binary=0.5)) +# dataset settings +dataset_type = 'CocoDataset' +data_root = 'data/coco/' +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +data = dict( + imgs_per_gpu=2, + workers_per_gpu=2, + train=dict( + type=dataset_type, + ann_file=data_root + 'annotations/instances_train2017.json', + img_prefix=data_root + 'train2017/', + img_scale=(1333, 800), + img_norm_cfg=img_norm_cfg, + size_divisor=32, + flip_ratio=0.5, + with_mask=True, + with_crowd=True, + with_label=True), + val=dict( + type=dataset_type, + ann_file=data_root + 'annotations/instances_val2017.json', + img_prefix=data_root + 'val2017/', + img_scale=(1333, 800), + img_norm_cfg=img_norm_cfg, + size_divisor=32, + flip_ratio=0, + with_mask=True, + with_crowd=True, + with_label=True), + test=dict( + type=dataset_type, + ann_file=data_root + 'annotations/instances_val2017.json', + img_prefix=data_root + 'val2017/', + img_scale=(1333, 800), + img_norm_cfg=img_norm_cfg, + size_divisor=32, + flip_ratio=0, + with_mask=False, + with_label=False, + test_mode=True)) +# optimizer +optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) +optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) +# learning policy +lr_config = dict( + policy='step', + warmup='linear', + warmup_iters=500, + warmup_ratio=1.0 / 3, + step=[16, 22]) +checkpoint_config = dict(interval=1) +# yapf:disable +log_config = dict( + interval=50, + hooks=[ + dict(type='TextLoggerHook'), + # dict(type='TensorboardLoggerHook') + ]) +# yapf:enable +# runtime settings +total_epochs = 24 +dist_params = dict(backend='nccl') +log_level = 'INFO' +work_dir = './work_dirs/mask_rcnn_r50_fpn_gn_contrib_2x' +load_from = None +resume_from = None +workflow = [('train', 1)] -- GitLab