diff --git a/README.md b/README.md index 4a5c5d299e8bff1afd3aa34c4b56cd0f32169e0e..b441e7bbeffb3ef6a181b2e829666fce0be8173a 100644 --- a/README.md +++ b/README.md @@ -76,7 +76,7 @@ Other features - [x] [DCNv2](configs/dcn/README.md) - [x] [Group Normalization](configs/gn/README.md) - [x] [Weight Standardization](configs/gn+ws/README.md) -- [x] [OHEM](configs/faster_rcnn_ohem_r50_fpn_1x.py) +- [x] [OHEM](configs/faster_rcnn_ohem_r50_fpn_1x_coco.py) - [x] Soft-NMS - [x] [Generalized Attention](configs/empirical_attention/README.md) - [x] [GCNet](configs/gcnet/README.md) diff --git a/configs/_base_/datasets/cityscapes_detection.py b/configs/_base_/datasets/cityscapes_detection.py new file mode 100644 index 0000000000000000000000000000000000000000..abbc082c46be7118206003855f0359a7492b974c --- /dev/null +++ b/configs/_base_/datasets/cityscapes_detection.py @@ -0,0 +1,55 @@ +dataset_type = 'CityscapesDataset' +data_root = 'data/cityscapes/' +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True), + dict( + type='Resize', img_scale=[(2048, 800), (2048, 1024)], keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(2048, 1024), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +data = dict( + imgs_per_gpu=1, + workers_per_gpu=2, + train=dict( + type='RepeatDataset', + times=8, + dataset=dict( + type=dataset_type, + ann_file=data_root + + 'annotations/instancesonly_filtered_gtFine_train.json', + img_prefix=data_root + 'leftImg8bit/train/', + pipeline=train_pipeline)), + val=dict( + type=dataset_type, + ann_file=data_root + + 'annotations/instancesonly_filtered_gtFine_val.json', + img_prefix=data_root + 'leftImg8bit/val/', + pipeline=test_pipeline), + test=dict( + type=dataset_type, + ann_file=data_root + + 'annotations/instancesonly_filtered_gtFine_test.json', + img_prefix=data_root + 'leftImg8bit/test/', + pipeline=test_pipeline)) +evaluation = dict(interval=1, metric='bbox') diff --git a/configs/_base_/datasets/cityscapes_instance.py b/configs/_base_/datasets/cityscapes_instance.py new file mode 100644 index 0000000000000000000000000000000000000000..669132ebca25a524bff678df4623c42d8819bbb2 --- /dev/null +++ b/configs/_base_/datasets/cityscapes_instance.py @@ -0,0 +1,16 @@ +_base_ = './cityscapes_detection.py' +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True, with_mask=True), + dict( + type='Resize', img_scale=[(2048, 800), (2048, 1024)], keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']), +] +data = dict(train=dict(dataset=dict(pipeline=train_pipeline))) +evaluation = dict(metric=['bbox', 'segm']) diff --git a/configs/_base_/datasets/coco_detection.py b/configs/_base_/datasets/coco_detection.py new file mode 100644 index 0000000000000000000000000000000000000000..116a660a9c01c9c83857ea3a1ad258ef56800b99 --- /dev/null +++ b/configs/_base_/datasets/coco_detection.py @@ -0,0 +1,48 @@ +dataset_type = 'CocoDataset' +data_root = 'data/coco/' +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True), + dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +data = dict( + imgs_per_gpu=2, + workers_per_gpu=2, + train=dict( + type=dataset_type, + ann_file=data_root + 'annotations/instances_train2017.json', + img_prefix=data_root + 'train2017/', + pipeline=train_pipeline), + val=dict( + type=dataset_type, + ann_file=data_root + 'annotations/instances_val2017.json', + img_prefix=data_root + 'val2017/', + pipeline=test_pipeline), + test=dict( + type=dataset_type, + ann_file=data_root + 'annotations/instances_val2017.json', + img_prefix=data_root + 'val2017/', + pipeline=test_pipeline)) +evaluation = dict(interval=1, metric='bbox') diff --git a/configs/_base_/datasets/coco_instance.py b/configs/_base_/datasets/coco_instance.py new file mode 100644 index 0000000000000000000000000000000000000000..3d3f0102ce68b6ad0f1b81604f7397bb5310dda1 --- /dev/null +++ b/configs/_base_/datasets/coco_instance.py @@ -0,0 +1,15 @@ +_base_ = 'coco_detection.py' +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True, with_mask=True), + dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']), +] +data = dict(train=dict(pipeline=train_pipeline)) +evaluation = dict(metric=['bbox', 'segm']) diff --git a/configs/_base_/datasets/coco_instance_semantic.py b/configs/_base_/datasets/coco_instance_semantic.py new file mode 100644 index 0000000000000000000000000000000000000000..f4d124ba2956d7cc84f803fd8bf4ab53aa124d05 --- /dev/null +++ b/configs/_base_/datasets/coco_instance_semantic.py @@ -0,0 +1,40 @@ +_base_ = 'coco_detection.py' +data_root = 'data/coco/' +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='LoadAnnotations', with_bbox=True, with_mask=True, with_seg=True), + dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='SegRescale', scale_factor=1 / 8), + dict(type='DefaultFormatBundle'), + dict( + type='Collect', + keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks', 'gt_semantic_seg']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +data = dict( + train=dict( + seg_prefix=data_root + 'stuffthingmaps/train2017/', + pipeline=train_pipeline), + val=dict(pipeline=test_pipeline), + test=dict(pipeline=test_pipeline)) +evaluation = dict(metric=['bbox', 'segm']) diff --git a/configs/_base_/datasets/voc0712.py b/configs/_base_/datasets/voc0712.py new file mode 100644 index 0000000000000000000000000000000000000000..fbb7ec820f096ec4f639332e5cc25ef669e3bb4a --- /dev/null +++ b/configs/_base_/datasets/voc0712.py @@ -0,0 +1,55 @@ +# dataset settings +dataset_type = 'VOCDataset' +data_root = 'data/VOCdevkit/' +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True), + dict(type='Resize', img_scale=(1000, 600), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1000, 600), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +data = dict( + imgs_per_gpu=2, + workers_per_gpu=2, + train=dict( + type='RepeatDataset', + times=3, + dataset=dict( + type=dataset_type, + ann_file=[ + data_root + 'VOC2007/ImageSets/Main/trainval.txt', + data_root + 'VOC2012/ImageSets/Main/trainval.txt' + ], + img_prefix=[data_root + 'VOC2007/', data_root + 'VOC2012/'], + pipeline=train_pipeline)), + val=dict( + type=dataset_type, + ann_file=data_root + 'VOC2007/ImageSets/Main/test.txt', + img_prefix=data_root + 'VOC2007/', + pipeline=test_pipeline), + test=dict( + type=dataset_type, + ann_file=data_root + 'VOC2007/ImageSets/Main/test.txt', + img_prefix=data_root + 'VOC2007/', + pipeline=test_pipeline)) +evaluation = dict(interval=1, metric='mAP') diff --git a/configs/_base_/datasets/wider_face.py b/configs/_base_/datasets/wider_face.py new file mode 100644 index 0000000000000000000000000000000000000000..c651c2cebebb2b142b35afe32875b04ac2efc226 --- /dev/null +++ b/configs/_base_/datasets/wider_face.py @@ -0,0 +1,63 @@ +# dataset settings +dataset_type = 'WIDERFaceDataset' +data_root = 'data/WIDERFace/' +img_norm_cfg = dict(mean=[123.675, 116.28, 103.53], std=[1, 1, 1], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile', to_float32=True), + dict(type='LoadAnnotations', with_bbox=True), + dict( + type='PhotoMetricDistortion', + brightness_delta=32, + contrast_range=(0.5, 1.5), + saturation_range=(0.5, 1.5), + hue_delta=18), + dict( + type='Expand', + mean=img_norm_cfg['mean'], + to_rgb=img_norm_cfg['to_rgb'], + ratio_range=(1, 4)), + dict( + type='MinIoURandomCrop', + min_ious=(0.1, 0.3, 0.5, 0.7, 0.9), + min_crop_size=0.3), + dict(type='Resize', img_scale=(300, 300), keep_ratio=False), + dict(type='Normalize', **img_norm_cfg), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(300, 300), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=False), + dict(type='Normalize', **img_norm_cfg), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +data = dict( + imgs_per_gpu=60, + workers_per_gpu=2, + train=dict( + type='RepeatDataset', + times=2, + dataset=dict( + type=dataset_type, + ann_file=data_root + 'train.txt', + img_prefix=data_root + 'WIDER_train/', + min_size=17, + pipeline=train_pipeline)), + val=dict( + type=dataset_type, + ann_file=data_root + 'val.txt', + img_prefix=data_root + 'WIDER_val/', + pipeline=test_pipeline), + test=dict( + type=dataset_type, + ann_file=data_root + 'val.txt', + img_prefix=data_root + 'WIDER_val/', + pipeline=test_pipeline)) diff --git a/configs/_base_/default_runtime.py b/configs/_base_/default_runtime.py new file mode 100644 index 0000000000000000000000000000000000000000..594de8dcc99b9e4fc0208f327a05910a95a1793c --- /dev/null +++ b/configs/_base_/default_runtime.py @@ -0,0 +1,14 @@ +checkpoint_config = dict(interval=1) +# yapf:disable +log_config = dict( + interval=50, + hooks=[ + dict(type='TextLoggerHook'), + # dict(type='TensorboardLoggerHook') + ]) +# yapf:enable +dist_params = dict(backend='nccl') +log_level = 'INFO' +load_from = None +resume_from = None +workflow = [('train', 1)] diff --git a/configs/cascade_mask_rcnn_r50_fpn_1x.py b/configs/_base_/models/cascade_mask_rcnn_r50_fpn.py similarity index 67% rename from configs/cascade_mask_rcnn_r50_fpn_1x.py rename to configs/_base_/models/cascade_mask_rcnn_r50_fpn.py index 459eb17ae6d97804df623ad81aab777d8e06724c..3e86275a46cef8c59d5828645b0da99a02fb0ef3 100644 --- a/configs/cascade_mask_rcnn_r50_fpn_1x.py +++ b/configs/_base_/models/cascade_mask_rcnn_r50_fpn.py @@ -35,8 +35,7 @@ model = dict( featmap_strides=[4, 8, 16, 32]), bbox_head=[ dict( - type='SharedFCBBoxHead', - num_fcs=2, + type='Shared2FCBBoxHead', in_channels=256, fc_out_channels=1024, roi_feat_size=7, @@ -48,8 +47,7 @@ model = dict( type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)), dict( - type='SharedFCBBoxHead', - num_fcs=2, + type='Shared2FCBBoxHead', in_channels=256, fc_out_channels=1024, roi_feat_size=7, @@ -61,8 +59,7 @@ model = dict( type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)), dict( - type='SharedFCBBoxHead', - num_fcs=2, + type='Shared2FCBBoxHead', in_channels=256, fc_out_channels=1024, roi_feat_size=7, @@ -176,79 +173,3 @@ test_cfg = dict( nms=dict(type='nms', iou_thr=0.5), max_per_img=100, mask_thr_binary=0.5)) -# dataset settings -dataset_type = 'CocoDataset' -data_root = 'data/coco/' -img_norm_cfg = dict( - mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) -train_pipeline = [ - dict(type='LoadImageFromFile'), - dict(type='LoadAnnotations', with_bbox=True, with_mask=True), - dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), - dict(type='RandomFlip', flip_ratio=0.5), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='DefaultFormatBundle'), - dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']), -] -test_pipeline = [ - dict(type='LoadImageFromFile'), - dict( - type='MultiScaleFlipAug', - img_scale=(1333, 800), - flip=False, - transforms=[ - dict(type='Resize', keep_ratio=True), - dict(type='RandomFlip'), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='ImageToTensor', keys=['img']), - dict(type='Collect', keys=['img']), - ]) -] -data = dict( - imgs_per_gpu=2, - workers_per_gpu=2, - train=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_train2017.json', - img_prefix=data_root + 'train2017/', - pipeline=train_pipeline), - val=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline), - test=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline)) -evaluation = dict(interval=1, metric=['bbox', 'segm']) -# optimizer -optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) -optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) -# learning policy -lr_config = dict( - policy='step', - warmup='linear', - warmup_iters=500, - warmup_ratio=1.0 / 3, - step=[8, 11]) -checkpoint_config = dict(interval=1) -# yapf:disable -log_config = dict( - interval=50, - hooks=[ - dict(type='TextLoggerHook'), - # dict(type='TensorboardLoggerHook') - ]) -# yapf:enable -# runtime settings -total_epochs = 12 -dist_params = dict(backend='nccl') -log_level = 'INFO' -work_dir = './work_dirs/cascade_mask_rcnn_r50_fpn_1x' -load_from = None -resume_from = None -workflow = [('train', 1)] diff --git a/configs/cascade_rcnn_r50_fpn_1x.py b/configs/_base_/models/cascade_rcnn_r50_fpn.py similarity index 65% rename from configs/cascade_rcnn_r50_fpn_1x.py rename to configs/_base_/models/cascade_rcnn_r50_fpn.py index 6f7940307a70eee63822be18b85dc8a94e8dd4f6..274f8cbd7ce2d98dd3b33cc6716d90c40260ce63 100644 --- a/configs/cascade_rcnn_r50_fpn_1x.py +++ b/configs/_base_/models/cascade_rcnn_r50_fpn.py @@ -35,8 +35,7 @@ model = dict( featmap_strides=[4, 8, 16, 32]), bbox_head=[ dict( - type='SharedFCBBoxHead', - num_fcs=2, + type='Shared2FCBBoxHead', in_channels=256, fc_out_channels=1024, roi_feat_size=7, @@ -48,8 +47,7 @@ model = dict( type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)), dict( - type='SharedFCBBoxHead', - num_fcs=2, + type='Shared2FCBBoxHead', in_channels=256, fc_out_channels=1024, roi_feat_size=7, @@ -61,8 +59,7 @@ model = dict( type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)), dict( - type='SharedFCBBoxHead', - num_fcs=2, + type='Shared2FCBBoxHead', in_channels=256, fc_out_channels=1024, roi_feat_size=7, @@ -157,79 +154,3 @@ test_cfg = dict( min_bbox_size=0), rcnn=dict( score_thr=0.05, nms=dict(type='nms', iou_thr=0.5), max_per_img=100)) -# dataset settings -dataset_type = 'CocoDataset' -data_root = 'data/coco/' -img_norm_cfg = dict( - mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) -train_pipeline = [ - dict(type='LoadImageFromFile'), - dict(type='LoadAnnotations', with_bbox=True), - dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), - dict(type='RandomFlip', flip_ratio=0.5), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='DefaultFormatBundle'), - dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), -] -test_pipeline = [ - dict(type='LoadImageFromFile'), - dict( - type='MultiScaleFlipAug', - img_scale=(1333, 800), - flip=False, - transforms=[ - dict(type='Resize', keep_ratio=True), - dict(type='RandomFlip'), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='ImageToTensor', keys=['img']), - dict(type='Collect', keys=['img']), - ]) -] -data = dict( - imgs_per_gpu=2, - workers_per_gpu=2, - train=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_train2017.json', - img_prefix=data_root + 'train2017/', - pipeline=train_pipeline), - val=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline), - test=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline)) -evaluation = dict(interval=1, metric='bbox') -# optimizer -optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) -optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) -# learning policy -lr_config = dict( - policy='step', - warmup='linear', - warmup_iters=500, - warmup_ratio=1.0 / 3, - step=[8, 11]) -checkpoint_config = dict(interval=1) -# yapf:disable -log_config = dict( - interval=50, - hooks=[ - dict(type='TextLoggerHook'), - # dict(type='TensorboardLoggerHook') - ]) -# yapf:enable -# runtime settings -total_epochs = 12 -dist_params = dict(backend='nccl') -log_level = 'INFO' -work_dir = './work_dirs/cascade_rcnn_r50_fpn_1x' -load_from = None -resume_from = None -workflow = [('train', 1)] diff --git a/configs/_base_/models/fast_rcnn_r50_fpn.py b/configs/_base_/models/fast_rcnn_r50_fpn.py new file mode 100644 index 0000000000000000000000000000000000000000..a6a7b17a0543ba6b52a349c9bb7f647af77c582e --- /dev/null +++ b/configs/_base_/models/fast_rcnn_r50_fpn.py @@ -0,0 +1,54 @@ +# model settings +model = dict( + type='FastRCNN', + pretrained='torchvision://resnet50', + backbone=dict( + type='ResNet', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + style='pytorch'), + neck=dict( + type='FPN', + in_channels=[256, 512, 1024, 2048], + out_channels=256, + num_outs=5), + bbox_roi_extractor=dict( + type='SingleRoIExtractor', + roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2), + out_channels=256, + featmap_strides=[4, 8, 16, 32]), + bbox_head=dict( + type='Shared2FCBBoxHead', + in_channels=256, + fc_out_channels=1024, + roi_feat_size=7, + num_classes=81, + target_means=[0., 0., 0., 0.], + target_stds=[0.1, 0.1, 0.2, 0.2], + reg_class_agnostic=False, + loss_cls=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), + loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0))) +# model training and testing settings +train_cfg = dict( + rcnn=dict( + assigner=dict( + type='MaxIoUAssigner', + pos_iou_thr=0.5, + neg_iou_thr=0.5, + min_pos_iou=0.5, + ignore_iof_thr=-1), + sampler=dict( + type='RandomSampler', + num=512, + pos_fraction=0.25, + neg_pos_ub=-1, + add_gt_as_proposals=True), + pos_weight=-1, + debug=False)) +test_cfg = dict( + rcnn=dict( + score_thr=0.05, nms=dict(type='nms', iou_thr=0.5), max_per_img=100)) diff --git a/configs/faster_rcnn_r50_caffe_c4_1x.py b/configs/_base_/models/faster_rcnn_r50_caffe_c4.py similarity index 55% rename from configs/faster_rcnn_r50_caffe_c4_1x.py rename to configs/_base_/models/faster_rcnn_r50_caffe_c4.py index fcac778c97eaca9a1d9dfd1c10a7a9bd10d32c53..14ca1ab1811c12f8b890a163675bd61fa281b2b4 100644 --- a/configs/faster_rcnn_r50_caffe_c4_1x.py +++ b/configs/_base_/models/faster_rcnn_r50_caffe_c4.py @@ -102,79 +102,3 @@ test_cfg = dict( min_bbox_size=0), rcnn=dict( score_thr=0.05, nms=dict(type='nms', iou_thr=0.5), max_per_img=100)) -# dataset settings -dataset_type = 'CocoDataset' -data_root = 'data/coco/' -img_norm_cfg = dict( - mean=[102.9801, 115.9465, 122.7717], std=[1.0, 1.0, 1.0], to_rgb=False) -train_pipeline = [ - dict(type='LoadImageFromFile'), - dict(type='LoadAnnotations', with_bbox=True), - dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), - dict(type='RandomFlip', flip_ratio=0.5), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='DefaultFormatBundle'), - dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), -] -test_pipeline = [ - dict(type='LoadImageFromFile'), - dict( - type='MultiScaleFlipAug', - img_scale=(1333, 800), - flip=False, - transforms=[ - dict(type='Resize', keep_ratio=True), - dict(type='RandomFlip'), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='ImageToTensor', keys=['img']), - dict(type='Collect', keys=['img']), - ]) -] -data = dict( - imgs_per_gpu=2, - workers_per_gpu=2, - train=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_train2017.json', - img_prefix=data_root + 'train2017/', - pipeline=train_pipeline), - val=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline), - test=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline)) -evaluation = dict(interval=1, metric='bbox') -# optimizer -optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) -optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) -# learning policy -lr_config = dict( - policy='step', - warmup='linear', - warmup_iters=500, - warmup_ratio=1.0 / 3, - step=[8, 11]) -checkpoint_config = dict(interval=1) -# yapf:disable -log_config = dict( - interval=50, - hooks=[ - dict(type='TextLoggerHook'), - # dict(type='TensorboardLoggerHook') - ]) -# yapf:enable -# runtime settings -total_epochs = 12 -dist_params = dict(backend='nccl') -log_level = 'INFO' -work_dir = './work_dirs/faster_rcnn_r50_caffe_c4_1x' -load_from = None -resume_from = None -workflow = [('train', 1)] diff --git a/configs/faster_rcnn_r50_fpn_1x.py b/configs/_base_/models/faster_rcnn_r50_fpn.py similarity index 54% rename from configs/faster_rcnn_r50_fpn_1x.py rename to configs/_base_/models/faster_rcnn_r50_fpn.py index e1659fedb6138e2f9e7877a2948c52bc7ff24cff..05c6ad105c4cc4239a049b39ea4907df316a87a1 100644 --- a/configs/faster_rcnn_r50_fpn_1x.py +++ b/configs/_base_/models/faster_rcnn_r50_fpn.py @@ -1,4 +1,3 @@ -# model settings model = dict( type='FasterRCNN', pretrained='torchvision://resnet50', @@ -33,8 +32,7 @@ model = dict( out_channels=256, featmap_strides=[4, 8, 16, 32]), bbox_head=dict( - type='SharedFCBBoxHead', - num_fcs=2, + type='Shared2FCBBoxHead', in_channels=256, fc_out_channels=1024, roi_feat_size=7, @@ -98,79 +96,3 @@ test_cfg = dict( # soft-nms is also supported for rcnn testing # e.g., nms=dict(type='soft_nms', iou_thr=0.5, min_score=0.05) ) -# dataset settings -dataset_type = 'CocoDataset' -data_root = 'data/coco/' -img_norm_cfg = dict( - mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) -train_pipeline = [ - dict(type='LoadImageFromFile'), - dict(type='LoadAnnotations', with_bbox=True), - dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), - dict(type='RandomFlip', flip_ratio=0.5), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='DefaultFormatBundle'), - dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), -] -test_pipeline = [ - dict(type='LoadImageFromFile'), - dict( - type='MultiScaleFlipAug', - img_scale=(1333, 800), - flip=False, - transforms=[ - dict(type='Resize', keep_ratio=True), - dict(type='RandomFlip'), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='ImageToTensor', keys=['img']), - dict(type='Collect', keys=['img']), - ]) -] -data = dict( - imgs_per_gpu=2, - workers_per_gpu=2, - train=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_train2017.json', - img_prefix=data_root + 'train2017/', - pipeline=train_pipeline), - val=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline), - test=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline)) -evaluation = dict(interval=1, metric='bbox') -# optimizer -optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) -optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) -# learning policy -lr_config = dict( - policy='step', - warmup='linear', - warmup_iters=500, - warmup_ratio=1.0 / 3, - step=[8, 11]) -checkpoint_config = dict(interval=1) -# yapf:disable -log_config = dict( - interval=50, - hooks=[ - dict(type='TextLoggerHook'), - # dict(type='TensorboardLoggerHook') - ]) -# yapf:enable -# runtime settings -total_epochs = 12 -dist_params = dict(backend='nccl') -log_level = 'INFO' -work_dir = './work_dirs/faster_rcnn_r50_fpn_1x' -load_from = None -resume_from = None -workflow = [('train', 1)] diff --git a/configs/mask_rcnn_r50_caffe_c4_1x.py b/configs/_base_/models/mask_rcnn_r50_caffe_c4.py similarity index 57% rename from configs/mask_rcnn_r50_caffe_c4_1x.py rename to configs/_base_/models/mask_rcnn_r50_caffe_c4.py index b5077715f2e6f7e3dfd1a08fd89b2a4068f82cb8..3f1c07bb8593c64da346e3c44ea4aef2fd04c36c 100644 --- a/configs/mask_rcnn_r50_caffe_c4_1x.py +++ b/configs/_base_/models/mask_rcnn_r50_caffe_c4.py @@ -2,7 +2,7 @@ norm_cfg = dict(type='BN', requires_grad=False) model = dict( type='MaskRCNN', - # pretrained='open-mmlab://resnet50_caffe', + pretrained='open-mmlab://resnet50_caffe', backbone=dict( type='ResNet', depth=50, @@ -115,79 +115,3 @@ test_cfg = dict( nms=dict(type='nms', iou_thr=0.5), max_per_img=100, mask_thr_binary=0.5)) -# dataset settings -dataset_type = 'CocoDataset' -data_root = 'data/coco/' -img_norm_cfg = dict( - mean=[102.9801, 115.9465, 122.7717], std=[1.0, 1.0, 1.0], to_rgb=False) -train_pipeline = [ - dict(type='LoadImageFromFile'), - dict(type='LoadAnnotations', with_bbox=True, with_mask=True), - dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), - dict(type='RandomFlip', flip_ratio=0.5), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='DefaultFormatBundle'), - dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']), -] -test_pipeline = [ - dict(type='LoadImageFromFile'), - dict( - type='MultiScaleFlipAug', - img_scale=(1333, 800), - flip=False, - transforms=[ - dict(type='Resize', keep_ratio=True), - dict(type='RandomFlip'), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='ImageToTensor', keys=['img']), - dict(type='Collect', keys=['img']), - ]) -] -data = dict( - imgs_per_gpu=2, - workers_per_gpu=2, - train=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_train2017.json', - img_prefix=data_root + 'train2017/', - pipeline=train_pipeline), - val=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline), - test=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline)) -evaluation = dict(interval=1, metric=['bbox', 'segm']) -# optimizer -optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) -optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) -# learning policy -lr_config = dict( - policy='step', - warmup='linear', - warmup_iters=500, - warmup_ratio=1.0 / 3, - step=[8, 11]) -checkpoint_config = dict(interval=1) -# yapf:disable -log_config = dict( - interval=50, - hooks=[ - dict(type='TextLoggerHook'), - # dict(type='TensorboardLoggerHook') - ]) -# yapf:enable -# runtime settings -total_epochs = 12 -dist_params = dict(backend='nccl') -log_level = 'INFO' -work_dir = './work_dirs/mask_rcnn_r50_caffe_c4_1x' -load_from = None -resume_from = None -workflow = [('train', 1)] diff --git a/configs/mask_rcnn_r50_fpn_1x.py b/configs/_base_/models/mask_rcnn_r50_fpn.py similarity index 57% rename from configs/mask_rcnn_r50_fpn_1x.py rename to configs/_base_/models/mask_rcnn_r50_fpn.py index b3a0e66a091f4c9a8f9f15504f15260c11cdb13f..dfa1a3465d0d432acb784b563089f2fb3d873630 100644 --- a/configs/mask_rcnn_r50_fpn_1x.py +++ b/configs/_base_/models/mask_rcnn_r50_fpn.py @@ -33,8 +33,7 @@ model = dict( out_channels=256, featmap_strides=[4, 8, 16, 32]), bbox_head=dict( - type='SharedFCBBoxHead', - num_fcs=2, + type='Shared2FCBBoxHead', in_channels=256, fc_out_channels=1024, roi_feat_size=7, @@ -112,79 +111,3 @@ test_cfg = dict( nms=dict(type='nms', iou_thr=0.5), max_per_img=100, mask_thr_binary=0.5)) -# dataset settings -dataset_type = 'CocoDataset' -data_root = 'data/coco/' -img_norm_cfg = dict( - mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) -train_pipeline = [ - dict(type='LoadImageFromFile'), - dict(type='LoadAnnotations', with_bbox=True, with_mask=True), - dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), - dict(type='RandomFlip', flip_ratio=0.5), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='DefaultFormatBundle'), - dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']), -] -test_pipeline = [ - dict(type='LoadImageFromFile'), - dict( - type='MultiScaleFlipAug', - img_scale=(1333, 800), - flip=False, - transforms=[ - dict(type='Resize', keep_ratio=True), - dict(type='RandomFlip'), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='ImageToTensor', keys=['img']), - dict(type='Collect', keys=['img']), - ]) -] -data = dict( - imgs_per_gpu=2, - workers_per_gpu=2, - train=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_train2017.json', - img_prefix=data_root + 'train2017/', - pipeline=train_pipeline), - val=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline), - test=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline)) -evaluation = dict(interval=1, metric=['bbox', 'segm']) -# optimizer -optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) -optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) -# learning policy -lr_config = dict( - policy='step', - warmup='linear', - warmup_iters=500, - warmup_ratio=1.0 / 3, - step=[8, 11]) -checkpoint_config = dict(interval=1) -# yapf:disable -log_config = dict( - interval=50, - hooks=[ - dict(type='TextLoggerHook'), - # dict(type='TensorboardLoggerHook') - ]) -# yapf:enable -# runtime settings -total_epochs = 12 -dist_params = dict(backend='nccl') -log_level = 'INFO' -work_dir = './work_dirs/mask_rcnn_r50_fpn_1x' -load_from = None -resume_from = None -workflow = [('train', 1)] diff --git a/configs/_base_/models/retinanet_r50_fpn.py b/configs/_base_/models/retinanet_r50_fpn.py new file mode 100644 index 0000000000000000000000000000000000000000..f94c27fe8ba6375b9af99fd445784dde626f7e73 --- /dev/null +++ b/configs/_base_/models/retinanet_r50_fpn.py @@ -0,0 +1,55 @@ +# model settings +model = dict( + type='RetinaNet', + pretrained='torchvision://resnet50', + backbone=dict( + type='ResNet', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + style='pytorch'), + neck=dict( + type='FPN', + in_channels=[256, 512, 1024, 2048], + out_channels=256, + start_level=1, + add_extra_convs=True, + num_outs=5), + bbox_head=dict( + type='RetinaHead', + num_classes=81, + in_channels=256, + stacked_convs=4, + feat_channels=256, + octave_base_scale=4, + scales_per_octave=3, + anchor_ratios=[0.5, 1.0, 2.0], + anchor_strides=[8, 16, 32, 64, 128], + target_means=[.0, .0, .0, .0], + target_stds=[1.0, 1.0, 1.0, 1.0], + loss_cls=dict( + type='FocalLoss', + use_sigmoid=True, + gamma=2.0, + alpha=0.25, + loss_weight=1.0), + loss_bbox=dict(type='SmoothL1Loss', beta=0.11, loss_weight=1.0))) +# training and testing settings +train_cfg = dict( + assigner=dict( + type='MaxIoUAssigner', + pos_iou_thr=0.5, + neg_iou_thr=0.4, + min_pos_iou=0, + ignore_iof_thr=-1), + allowed_border=-1, + pos_weight=-1, + debug=False) +test_cfg = dict( + nms_pre=1000, + min_bbox_size=0, + score_thr=0.05, + nms=dict(type='nms', iou_thr=0.5), + max_per_img=100) diff --git a/configs/_base_/models/rpn_r50_caffe_c4.py b/configs/_base_/models/rpn_r50_caffe_c4.py new file mode 100644 index 0000000000000000000000000000000000000000..ca9009e803f9f4d85ff6d576dbe35e4ced9cc204 --- /dev/null +++ b/configs/_base_/models/rpn_r50_caffe_c4.py @@ -0,0 +1,54 @@ +# model settings +model = dict( + type='RPN', + pretrained='open-mmlab://resnet50_caffe', + backbone=dict( + type='ResNet', + depth=50, + num_stages=3, + strides=(1, 2, 2), + dilations=(1, 1, 1), + out_indices=(2, ), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=False), + norm_eval=True, + style='caffe'), + neck=None, + rpn_head=dict( + type='RPNHead', + in_channels=1024, + feat_channels=1024, + anchor_scales=[2, 4, 8, 16, 32], + anchor_ratios=[0.5, 1.0, 2.0], + anchor_strides=[16], + target_means=[.0, .0, .0, .0], + target_stds=[1.0, 1.0, 1.0, 1.0], + loss_cls=dict( + type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), + loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0))) +# model training and testing settings +train_cfg = dict( + rpn=dict( + assigner=dict( + type='MaxIoUAssigner', + pos_iou_thr=0.7, + neg_iou_thr=0.3, + min_pos_iou=0.3, + ignore_iof_thr=-1), + sampler=dict( + type='RandomSampler', + num=256, + pos_fraction=0.5, + neg_pos_ub=-1, + add_gt_as_proposals=False), + allowed_border=0, + pos_weight=-1, + debug=False)) +test_cfg = dict( + rpn=dict( + nms_across_levels=False, + nms_pre=12000, + nms_post=2000, + max_num=2000, + nms_thr=0.7, + min_bbox_size=0)) diff --git a/configs/_base_/models/rpn_r50_fpn.py b/configs/_base_/models/rpn_r50_fpn.py new file mode 100644 index 0000000000000000000000000000000000000000..cd9503f9caebea19d41820f264a857c7c0c20b9e --- /dev/null +++ b/configs/_base_/models/rpn_r50_fpn.py @@ -0,0 +1,55 @@ +# model settings +model = dict( + type='RPN', + pretrained='torchvision://resnet50', + backbone=dict( + type='ResNet', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + style='pytorch'), + neck=dict( + type='FPN', + in_channels=[256, 512, 1024, 2048], + out_channels=256, + num_outs=5), + rpn_head=dict( + type='RPNHead', + in_channels=256, + feat_channels=256, + anchor_scales=[8], + anchor_ratios=[0.5, 1.0, 2.0], + anchor_strides=[4, 8, 16, 32, 64], + target_means=[.0, .0, .0, .0], + target_stds=[1.0, 1.0, 1.0, 1.0], + loss_cls=dict( + type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), + loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0))) +# model training and testing settings +train_cfg = dict( + rpn=dict( + assigner=dict( + type='MaxIoUAssigner', + pos_iou_thr=0.7, + neg_iou_thr=0.3, + min_pos_iou=0.3, + ignore_iof_thr=-1), + sampler=dict( + type='RandomSampler', + num=256, + pos_fraction=0.5, + neg_pos_ub=-1, + add_gt_as_proposals=False), + allowed_border=0, + pos_weight=-1, + debug=False)) +test_cfg = dict( + rpn=dict( + nms_across_levels=False, + nms_pre=2000, + nms_post=2000, + max_num=2000, + nms_thr=0.7, + min_bbox_size=0)) diff --git a/configs/_base_/models/ssd300.py b/configs/_base_/models/ssd300.py new file mode 100644 index 0000000000000000000000000000000000000000..66a8dc6ac3578c7ee3203e221f49f55e6f3c78d1 --- /dev/null +++ b/configs/_base_/models/ssd300.py @@ -0,0 +1,44 @@ +# model settings +input_size = 300 +model = dict( + type='SingleStageDetector', + pretrained='open-mmlab://vgg16_caffe', + backbone=dict( + type='SSDVGG', + input_size=input_size, + depth=16, + with_last_pool=False, + ceil_mode=True, + out_indices=(3, 4), + out_feature_indices=(22, 34), + l2_norm_scale=20), + neck=None, + bbox_head=dict( + type='SSDHead', + input_size=input_size, + in_channels=(512, 1024, 512, 256, 256, 256), + num_classes=81, + anchor_strides=(8, 16, 32, 64, 100, 300), + basesize_ratio_range=(0.15, 0.9), + anchor_ratios=([2], [2, 3], [2, 3], [2, 3], [2], [2]), + target_means=(.0, .0, .0, .0), + target_stds=(0.1, 0.1, 0.2, 0.2))) +cudnn_benchmark = True +train_cfg = dict( + assigner=dict( + type='MaxIoUAssigner', + pos_iou_thr=0.5, + neg_iou_thr=0.5, + min_pos_iou=0., + ignore_iof_thr=-1, + gt_max_assign_all=False), + smoothl1_beta=1., + allowed_border=-1, + pos_weight=-1, + neg_pos_ratio=3, + debug=False) +test_cfg = dict( + nms=dict(type='nms', iou_thr=0.45), + min_bbox_size=0, + score_thr=0.02, + max_per_img=200) diff --git a/configs/_base_/schedules/schedule_1x.py b/configs/_base_/schedules/schedule_1x.py new file mode 100644 index 0000000000000000000000000000000000000000..d559ec1d04f2f0e40d5e2c41e3ae763b7befde39 --- /dev/null +++ b/configs/_base_/schedules/schedule_1x.py @@ -0,0 +1,11 @@ +# optimizer +optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) +optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) +# learning policy +lr_config = dict( + policy='step', + warmup='linear', + warmup_iters=500, + warmup_ratio=1.0 / 3, + step=[8, 11]) +total_epochs = 12 diff --git a/configs/_base_/schedules/schedule_20e.py b/configs/_base_/schedules/schedule_20e.py new file mode 100644 index 0000000000000000000000000000000000000000..e6ca2b24df28c4752c84dc76a2b2d45cdfbc200e --- /dev/null +++ b/configs/_base_/schedules/schedule_20e.py @@ -0,0 +1,4 @@ +_base_ = './schedule_1x.py' +# learning policy +lr_config = dict(step=[16, 19]) +total_epochs = 20 diff --git a/configs/_base_/schedules/schedule_2x.py b/configs/_base_/schedules/schedule_2x.py new file mode 100644 index 0000000000000000000000000000000000000000..72b4135c3d74a79dbad55b9ff24d2bddc88956db --- /dev/null +++ b/configs/_base_/schedules/schedule_2x.py @@ -0,0 +1,4 @@ +_base_ = './schedule_1x.py' +# learning policy +lr_config = dict(step=[16, 22]) +total_epochs = 24 diff --git a/configs/albu_example/mask_rcnn_r50_fpn_1x.py b/configs/albu_example/mask_rcnn_r50_fpn_1x.py deleted file mode 100644 index da3e23ae1ac6bdcac81863df91ab8d809e2ae321..0000000000000000000000000000000000000000 --- a/configs/albu_example/mask_rcnn_r50_fpn_1x.py +++ /dev/null @@ -1,249 +0,0 @@ -# model settings -model = dict( - type='MaskRCNN', - pretrained='torchvision://resnet50', - backbone=dict( - type='ResNet', - depth=50, - num_stages=4, - out_indices=(0, 1, 2, 3), - frozen_stages=1, - norm_cfg=dict(type='BN', requires_grad=True), - style='pytorch'), - neck=dict( - type='FPN', - in_channels=[256, 512, 1024, 2048], - out_channels=256, - num_outs=5), - rpn_head=dict( - type='RPNHead', - in_channels=256, - feat_channels=256, - anchor_scales=[8], - anchor_ratios=[0.5, 1.0, 2.0], - anchor_strides=[4, 8, 16, 32, 64], - target_means=[.0, .0, .0, .0], - target_stds=[1.0, 1.0, 1.0, 1.0], - loss_cls=dict( - type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0)), - bbox_roi_extractor=dict( - type='SingleRoIExtractor', - roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2), - out_channels=256, - featmap_strides=[4, 8, 16, 32]), - bbox_head=dict( - type='SharedFCBBoxHead', - num_fcs=2, - in_channels=256, - fc_out_channels=1024, - roi_feat_size=7, - num_classes=81, - target_means=[0., 0., 0., 0.], - target_stds=[0.1, 0.1, 0.2, 0.2], - reg_class_agnostic=False, - loss_cls=dict( - type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)), - mask_roi_extractor=dict( - type='SingleRoIExtractor', - roi_layer=dict(type='RoIAlign', out_size=14, sample_num=2), - out_channels=256, - featmap_strides=[4, 8, 16, 32]), - mask_head=dict( - type='FCNMaskHead', - num_convs=4, - in_channels=256, - conv_out_channels=256, - num_classes=81, - loss_mask=dict( - type='CrossEntropyLoss', use_mask=True, loss_weight=1.0))) -# model training and testing settings -train_cfg = dict( - rpn=dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.7, - neg_iou_thr=0.3, - min_pos_iou=0.3, - ignore_iof_thr=-1), - sampler=dict( - type='RandomSampler', - num=256, - pos_fraction=0.5, - neg_pos_ub=-1, - add_gt_as_proposals=False), - allowed_border=0, - pos_weight=-1, - debug=False), - rpn_proposal=dict( - nms_across_levels=False, - nms_pre=2000, - nms_post=2000, - max_num=2000, - nms_thr=0.7, - min_bbox_size=0), - rcnn=dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.5, - neg_iou_thr=0.5, - min_pos_iou=0.5, - ignore_iof_thr=-1), - sampler=dict( - type='RandomSampler', - num=512, - pos_fraction=0.25, - neg_pos_ub=-1, - add_gt_as_proposals=True), - mask_size=28, - pos_weight=-1, - debug=False)) -test_cfg = dict( - rpn=dict( - nms_across_levels=False, - nms_pre=1000, - nms_post=1000, - max_num=1000, - nms_thr=0.7, - min_bbox_size=0), - rcnn=dict( - score_thr=0.05, - nms=dict(type='nms', iou_thr=0.5), - max_per_img=100, - mask_thr_binary=0.5)) -# dataset settings -dataset_type = 'CocoDataset' -data_root = 'data/coco/' -img_norm_cfg = dict( - mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) -albu_train_transforms = [ - dict( - type='ShiftScaleRotate', - shift_limit=0.0625, - scale_limit=0.0, - rotate_limit=0, - interpolation=1, - p=0.5), - dict( - type='RandomBrightnessContrast', - brightness_limit=[0.1, 0.3], - contrast_limit=[0.1, 0.3], - p=0.2), - dict( - type='OneOf', - transforms=[ - dict( - type='RGBShift', - r_shift_limit=10, - g_shift_limit=10, - b_shift_limit=10, - p=1.0), - dict( - type='HueSaturationValue', - hue_shift_limit=20, - sat_shift_limit=30, - val_shift_limit=20, - p=1.0) - ], - p=0.1), - dict(type='JpegCompression', quality_lower=85, quality_upper=95, p=0.2), - dict(type='ChannelShuffle', p=0.1), - dict( - type='OneOf', - transforms=[ - dict(type='Blur', blur_limit=3, p=1.0), - dict(type='MedianBlur', blur_limit=3, p=1.0) - ], - p=0.1), -] -train_pipeline = [ - dict(type='LoadImageFromFile'), - dict(type='LoadAnnotations', with_bbox=True, with_mask=True), - dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), - dict(type='Pad', size_divisor=32), - dict( - type='Albu', - transforms=albu_train_transforms, - bbox_params=dict( - type='BboxParams', - format='pascal_voc', - label_fields=['gt_labels'], - min_visibility=0.0, - filter_lost_elements=True), - keymap={ - 'img': 'image', - 'gt_masks': 'masks', - 'gt_bboxes': 'bboxes' - }, - update_pad_shape=False, - skip_img_without_anno=True), - dict(type='Normalize', **img_norm_cfg), - dict(type='DefaultFormatBundle'), - dict( - type='Collect', - keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks'], - meta_keys=('filename', 'ori_shape', 'img_shape', 'img_norm_cfg', - 'pad_shape', 'scale_factor')) -] -test_pipeline = [ - dict(type='LoadImageFromFile'), - dict( - type='MultiScaleFlipAug', - img_scale=(1333, 800), - flip=False, - transforms=[ - dict(type='Resize', keep_ratio=True), - dict(type='RandomFlip'), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='ImageToTensor', keys=['img']), - dict(type='Collect', keys=['img']), - ]) -] -data = dict( - imgs_per_gpu=2, - workers_per_gpu=2, - train=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_train2017.json', - img_prefix=data_root + 'train2017/', - pipeline=train_pipeline), - val=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline), - test=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline)) -evaluation = dict(interval=1, metric=['bbox', 'segm']) -# optimizer -optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) -optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) -# learning policy -lr_config = dict( - policy='step', - warmup='linear', - warmup_iters=500, - warmup_ratio=1.0 / 3, - step=[8, 11]) -checkpoint_config = dict(interval=1) -# yapf:disable -log_config = dict( - interval=50, - hooks=[ - dict(type='TextLoggerHook'), - # dict(type='TensorboardLoggerHook') - ]) -# yapf:enable -# runtime settings -total_epochs = 12 -dist_params = dict(backend='nccl') -log_level = 'INFO' -work_dir = './work_dirs/mask_rcnn_r50_fpn_1x' -load_from = None -resume_from = None -workflow = [('train', 1)] diff --git a/configs/albu_example/mask_rcnn_r50_fpn_albu_1x_coco.py b/configs/albu_example/mask_rcnn_r50_fpn_albu_1x_coco.py new file mode 100644 index 0000000000000000000000000000000000000000..610dadd1574f8af30294b906b5fa34c9d1a77f5b --- /dev/null +++ b/configs/albu_example/mask_rcnn_r50_fpn_albu_1x_coco.py @@ -0,0 +1,74 @@ +_base_ = '../mask_rcnn/mask_rcnn_r50_fpn_1x_coco.py' +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +albu_train_transforms = [ + dict( + type='ShiftScaleRotate', + shift_limit=0.0625, + scale_limit=0.0, + rotate_limit=0, + interpolation=1, + p=0.5), + dict( + type='RandomBrightnessContrast', + brightness_limit=[0.1, 0.3], + contrast_limit=[0.1, 0.3], + p=0.2), + dict( + type='OneOf', + transforms=[ + dict( + type='RGBShift', + r_shift_limit=10, + g_shift_limit=10, + b_shift_limit=10, + p=1.0), + dict( + type='HueSaturationValue', + hue_shift_limit=20, + sat_shift_limit=30, + val_shift_limit=20, + p=1.0) + ], + p=0.1), + dict(type='JpegCompression', quality_lower=85, quality_upper=95, p=0.2), + dict(type='ChannelShuffle', p=0.1), + dict( + type='OneOf', + transforms=[ + dict(type='Blur', blur_limit=3, p=1.0), + dict(type='MedianBlur', blur_limit=3, p=1.0) + ], + p=0.1), +] +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True, with_mask=True), + dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), + dict(type='Pad', size_divisor=32), + dict( + type='Albu', + transforms=albu_train_transforms, + bbox_params=dict( + type='BboxParams', + format='pascal_voc', + label_fields=['gt_labels'], + min_visibility=0.0, + filter_lost_elements=True), + keymap={ + 'img': 'image', + 'gt_masks': 'masks', + 'gt_bboxes': 'bboxes' + }, + update_pad_shape=False, + skip_img_without_anno=True), + dict(type='Normalize', **img_norm_cfg), + dict(type='DefaultFormatBundle'), + dict( + type='Collect', + keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks'], + meta_keys=('filename', 'ori_shape', 'img_shape', 'img_norm_cfg', + 'pad_shape', 'scale_factor')) +] +data = dict(train=dict(pipeline=train_pipeline)) +work_dir = './work_dirs/mask_rcnn_r50_fpn_1x' diff --git a/configs/atss/atss_r50_fpn.py b/configs/atss/atss_r50_fpn.py new file mode 100644 index 0000000000000000000000000000000000000000..67b514789ca00c52bdb9251446058955de7739a2 --- /dev/null +++ b/configs/atss/atss_r50_fpn.py @@ -0,0 +1,52 @@ +model = dict( + type='ATSS', + pretrained='torchvision://resnet50', + backbone=dict( + type='ResNet', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + style='pytorch'), + neck=dict( + type='FPN', + in_channels=[256, 512, 1024, 2048], + out_channels=256, + start_level=1, + add_extra_convs=True, + extra_convs_on_inputs=False, + num_outs=5), + bbox_head=dict( + type='ATSSHead', + num_classes=81, + in_channels=256, + stacked_convs=4, + feat_channels=256, + octave_base_scale=8, + scales_per_octave=1, + anchor_ratios=[1.0], + anchor_strides=[8, 16, 32, 64, 128], + target_means=[.0, .0, .0, .0], + target_stds=[0.1, 0.1, 0.2, 0.2], + loss_cls=dict( + type='FocalLoss', + use_sigmoid=True, + gamma=2.0, + alpha=0.25, + loss_weight=1.0), + loss_bbox=dict(type='GIoULoss', loss_weight=2.0), + loss_centerness=dict( + type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0))) +# training and testing settings +train_cfg = dict( + assigner=dict(type='ATSSAssigner', topk=9), + allowed_border=-1, + pos_weight=-1, + debug=False) +test_cfg = dict( + nms_pre=1000, + min_bbox_size=0, + score_thr=0.05, + nms=dict(type='nms', iou_thr=0.6), + max_per_img=100) diff --git a/configs/atss/atss_r50_fpn_1x.py b/configs/atss/atss_r50_fpn_1x.py deleted file mode 100644 index f65e7ef386b28f1f44085d29be734892374d23d6..0000000000000000000000000000000000000000 --- a/configs/atss/atss_r50_fpn_1x.py +++ /dev/null @@ -1,129 +0,0 @@ -# model settings -model = dict( - type='ATSS', - pretrained='torchvision://resnet50', - backbone=dict( - type='ResNet', - depth=50, - num_stages=4, - out_indices=(0, 1, 2, 3), - frozen_stages=1, - norm_cfg=dict(type='BN', requires_grad=True), - style='pytorch'), - neck=dict( - type='FPN', - in_channels=[256, 512, 1024, 2048], - out_channels=256, - start_level=1, - add_extra_convs=True, - extra_convs_on_inputs=False, - num_outs=5), - bbox_head=dict( - type='ATSSHead', - num_classes=81, - in_channels=256, - stacked_convs=4, - feat_channels=256, - octave_base_scale=8, - scales_per_octave=1, - anchor_ratios=[1.0], - anchor_strides=[8, 16, 32, 64, 128], - target_means=[.0, .0, .0, .0], - target_stds=[0.1, 0.1, 0.2, 0.2], - loss_cls=dict( - type='FocalLoss', - use_sigmoid=True, - gamma=2.0, - alpha=0.25, - loss_weight=1.0), - loss_bbox=dict(type='GIoULoss', loss_weight=2.0), - loss_centerness=dict( - type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0))) -# training and testing settings -train_cfg = dict( - assigner=dict(type='ATSSAssigner', topk=9), - allowed_border=-1, - pos_weight=-1, - debug=False) -test_cfg = dict( - nms_pre=1000, - min_bbox_size=0, - score_thr=0.05, - nms=dict(type='nms', iou_thr=0.6), - max_per_img=100) -# dataset settings -dataset_type = 'CocoDataset' -data_root = 'data/coco/' -img_norm_cfg = dict( - mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) -train_pipeline = [ - dict(type='LoadImageFromFile'), - dict(type='LoadAnnotations', with_bbox=True), - dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), - dict(type='RandomFlip', flip_ratio=0.5), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='DefaultFormatBundle'), - dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), -] -test_pipeline = [ - dict(type='LoadImageFromFile'), - dict( - type='MultiScaleFlipAug', - img_scale=(1333, 800), - flip=False, - transforms=[ - dict(type='Resize', keep_ratio=True), - dict(type='RandomFlip'), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='ImageToTensor', keys=['img']), - dict(type='Collect', keys=['img']), - ]) -] -data = dict( - imgs_per_gpu=2, - workers_per_gpu=2, - train=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_train2017.json', - img_prefix=data_root + 'train2017/', - pipeline=train_pipeline), - val=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline), - test=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline)) -evaluation = dict(interval=1, metric='bbox') -# optimizer -optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) -optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) -# learning policy -lr_config = dict( - policy='step', - warmup='linear', - warmup_iters=500, - warmup_ratio=1.0 / 3, - step=[8, 11]) -checkpoint_config = dict(interval=1) -# yapf:disable -log_config = dict( - interval=50, - hooks=[ - dict(type='TextLoggerHook'), - # dict(type='TensorboardLoggerHook') - ]) -# yapf:enable -# runtime settings -total_epochs = 12 -dist_params = dict(backend='nccl') -log_level = 'INFO' -work_dir = './work_dirs/atss_r50_fpn_1x' -load_from = None -resume_from = None -workflow = [('train', 1)] diff --git a/configs/atss/atss_r50_fpn_1x_coco.py b/configs/atss/atss_r50_fpn_1x_coco.py new file mode 100644 index 0000000000000000000000000000000000000000..4e7ae0d07126ada20d80b20893100c263d21c7bc --- /dev/null +++ b/configs/atss/atss_r50_fpn_1x_coco.py @@ -0,0 +1,7 @@ +_base_ = [ + 'atss_r50_fpn.py', '../_base_/datasets/coco_detection.py', + '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py' +] +# optimizer +optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) +work_dir = './work_dirs/atss_r50_fpn_1x' diff --git a/configs/carafe/faster_rcnn_r50_fpn_carafe_1x.py b/configs/carafe/faster_rcnn_r50_fpn_carafe_1x.py deleted file mode 100644 index 111bc4e6ef39e430c236b2180e270b3d1ba9cded..0000000000000000000000000000000000000000 --- a/configs/carafe/faster_rcnn_r50_fpn_carafe_1x.py +++ /dev/null @@ -1,188 +0,0 @@ -# model settings -model = dict( - type='FasterRCNN', - pretrained='torchvision://resnet50', - backbone=dict( - type='ResNet', - depth=50, - num_stages=4, - out_indices=(0, 1, 2, 3), - frozen_stages=1, - norm_cfg=dict(type='BN', requires_grad=True), - style='pytorch'), - neck=dict( - type='FPN_CARAFE', - in_channels=[256, 512, 1024, 2048], - out_channels=256, - num_outs=5, - start_level=0, - end_level=-1, - norm_cfg=None, - act_cfg=None, - order=('conv', 'norm', 'act'), - upsample_cfg=dict( - type='carafe', - up_kernel=5, - up_group=1, - encoder_kernel=3, - encoder_dilation=1, - compressed_channels=64)), - rpn_head=dict( - type='RPNHead', - in_channels=256, - feat_channels=256, - anchor_scales=[8], - anchor_ratios=[0.5, 1.0, 2.0], - anchor_strides=[4, 8, 16, 32, 64], - target_means=[.0, .0, .0, .0], - target_stds=[1.0, 1.0, 1.0, 1.0], - loss_cls=dict( - type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0)), - bbox_roi_extractor=dict( - type='SingleRoIExtractor', - roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2), - out_channels=256, - featmap_strides=[4, 8, 16, 32]), - bbox_head=dict( - type='SharedFCBBoxHead', - num_fcs=2, - in_channels=256, - fc_out_channels=1024, - roi_feat_size=7, - num_classes=81, - target_means=[0., 0., 0., 0.], - target_stds=[0.1, 0.1, 0.2, 0.2], - reg_class_agnostic=False, - loss_cls=dict( - type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0))) -# model training and testing settings -train_cfg = dict( - rpn=dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.7, - neg_iou_thr=0.3, - min_pos_iou=0.3, - ignore_iof_thr=-1), - sampler=dict( - type='RandomSampler', - num=256, - pos_fraction=0.5, - neg_pos_ub=-1, - add_gt_as_proposals=False), - allowed_border=0, - pos_weight=-1, - debug=False), - rpn_proposal=dict( - nms_across_levels=False, - nms_pre=2000, - nms_post=2000, - max_num=2000, - nms_thr=0.7, - min_bbox_size=0), - rcnn=dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.5, - neg_iou_thr=0.5, - min_pos_iou=0.5, - ignore_iof_thr=-1), - sampler=dict( - type='RandomSampler', - num=512, - pos_fraction=0.25, - neg_pos_ub=-1, - add_gt_as_proposals=True), - pos_weight=-1, - debug=False)) -test_cfg = dict( - rpn=dict( - nms_across_levels=False, - nms_pre=1000, - nms_post=1000, - max_num=1000, - nms_thr=0.7, - min_bbox_size=0), - rcnn=dict( - score_thr=0.05, nms=dict(type='nms', iou_thr=0.5), max_per_img=100) - # soft-nms is also supported for rcnn testing - # e.g., nms=dict(type='soft_nms', iou_thr=0.5, min_score=0.05) -) -# dataset settings -dataset_type = 'CocoDataset' -data_root = 'data/coco/' -img_norm_cfg = dict( - mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) -train_pipeline = [ - dict(type='LoadImageFromFile'), - dict(type='LoadAnnotations', with_bbox=True), - dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), - dict(type='RandomFlip', flip_ratio=0.5), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=64), - dict(type='DefaultFormatBundle'), - dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), -] -test_pipeline = [ - dict(type='LoadImageFromFile'), - dict( - type='MultiScaleFlipAug', - img_scale=(1333, 800), - flip=False, - transforms=[ - dict(type='Resize', keep_ratio=True), - dict(type='RandomFlip'), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=64), - dict(type='ImageToTensor', keys=['img']), - dict(type='Collect', keys=['img']), - ]) -] -data = dict( - imgs_per_gpu=2, - workers_per_gpu=2, - train=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_train2017.json', - img_prefix=data_root + 'train2017/', - pipeline=train_pipeline), - val=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline), - test=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline)) -evaluation = dict(interval=1, metric='bbox') -# optimizer -optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) -optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) -# learning policy -lr_config = dict( - policy='step', - warmup='linear', - warmup_iters=500, - warmup_ratio=1.0 / 3, - step=[8, 11]) -checkpoint_config = dict(interval=1) -# yapf:disable -log_config = dict( - interval=50, - hooks=[ - dict(type='TextLoggerHook'), - # dict(type='TensorboardLoggerHook') - ]) -# yapf:enable -# runtime settings -total_epochs = 12 -dist_params = dict(backend='nccl') -log_level = 'INFO' -work_dir = './work_dirs/faster_rcnn_r50_fpn_carafe_1x' -load_from = None -resume_from = None -workflow = [('train', 1)] diff --git a/configs/carafe/faster_rcnn_r50_fpn_carafe_1x_coco.py b/configs/carafe/faster_rcnn_r50_fpn_carafe_1x_coco.py new file mode 100644 index 0000000000000000000000000000000000000000..77814b5a2219e20ec50e3a48190bb4cb8f93fe24 --- /dev/null +++ b/configs/carafe/faster_rcnn_r50_fpn_carafe_1x_coco.py @@ -0,0 +1,51 @@ +_base_ = '../faster_rcnn/faster_rcnn_r50_fpn_1x_coco.py' +model = dict( + neck=dict( + type='FPN_CARAFE', + in_channels=[256, 512, 1024, 2048], + out_channels=256, + num_outs=5, + start_level=0, + end_level=-1, + norm_cfg=None, + act_cfg=None, + order=('conv', 'norm', 'act'), + upsample_cfg=dict( + type='carafe', + up_kernel=5, + up_group=1, + encoder_kernel=3, + encoder_dilation=1, + compressed_channels=64))) +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True), + dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=64), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=64), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +data = dict( + train=dict(pipeline=train_pipeline), + val=dict(pipeline=test_pipeline), + test=dict(pipeline=test_pipeline)) +work_dir = './work_dirs/faster_rcnn_r50_fpn_carafe_1x' diff --git a/configs/carafe/mask_rcnn_r50_fpn_carafe_1x.py b/configs/carafe/mask_rcnn_r50_fpn_carafe_1x.py deleted file mode 100644 index 67e3319514c3a4db0169c7bec313c116375a1cda..0000000000000000000000000000000000000000 --- a/configs/carafe/mask_rcnn_r50_fpn_carafe_1x.py +++ /dev/null @@ -1,210 +0,0 @@ -# model settings -model = dict( - type='MaskRCNN', - pretrained='torchvision://resnet50', - backbone=dict( - type='ResNet', - depth=50, - num_stages=4, - out_indices=(0, 1, 2, 3), - frozen_stages=1, - norm_cfg=dict(type='BN', requires_grad=True), - style='pytorch'), - neck=dict( - type='FPN_CARAFE', - in_channels=[256, 512, 1024, 2048], - out_channels=256, - num_outs=5, - start_level=0, - end_level=-1, - norm_cfg=None, - act_cfg=None, - order=('conv', 'norm', 'act'), - upsample_cfg=dict( - type='carafe', - up_kernel=5, - up_group=1, - encoder_kernel=3, - encoder_dilation=1, - compressed_channels=64)), - rpn_head=dict( - type='RPNHead', - in_channels=256, - feat_channels=256, - anchor_scales=[8], - anchor_ratios=[0.5, 1.0, 2.0], - anchor_strides=[4, 8, 16, 32, 64], - target_means=[.0, .0, .0, .0], - target_stds=[1.0, 1.0, 1.0, 1.0], - loss_cls=dict( - type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0)), - bbox_roi_extractor=dict( - type='SingleRoIExtractor', - roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2), - out_channels=256, - featmap_strides=[4, 8, 16, 32]), - bbox_head=dict( - type='SharedFCBBoxHead', - num_fcs=2, - in_channels=256, - fc_out_channels=1024, - roi_feat_size=7, - num_classes=81, - target_means=[0., 0., 0., 0.], - target_stds=[0.1, 0.1, 0.2, 0.2], - reg_class_agnostic=False, - loss_cls=dict( - type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)), - mask_roi_extractor=dict( - type='SingleRoIExtractor', - roi_layer=dict(type='RoIAlign', out_size=14, sample_num=2), - out_channels=256, - featmap_strides=[4, 8, 16, 32]), - mask_head=dict( - type='FCNMaskHead', - num_convs=4, - in_channels=256, - conv_out_channels=256, - num_classes=81, - upsample_cfg=dict( - type='carafe', - scale_factor=2, - up_kernel=5, - up_group=1, - encoder_kernel=3, - encoder_dilation=1, - compressed_channels=64), - loss_mask=dict( - type='CrossEntropyLoss', use_mask=True, loss_weight=1.0))) -# model training and testing settings -train_cfg = dict( - rpn=dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.7, - neg_iou_thr=0.3, - min_pos_iou=0.3, - ignore_iof_thr=-1), - sampler=dict( - type='RandomSampler', - num=256, - pos_fraction=0.5, - neg_pos_ub=-1, - add_gt_as_proposals=False), - allowed_border=0, - pos_weight=-1, - debug=False), - rpn_proposal=dict( - nms_across_levels=False, - nms_pre=2000, - nms_post=2000, - max_num=2000, - nms_thr=0.7, - min_bbox_size=0), - rcnn=dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.5, - neg_iou_thr=0.5, - min_pos_iou=0.5, - ignore_iof_thr=-1), - sampler=dict( - type='RandomSampler', - num=512, - pos_fraction=0.25, - neg_pos_ub=-1, - add_gt_as_proposals=True), - mask_size=28, - pos_weight=-1, - debug=False)) -test_cfg = dict( - rpn=dict( - nms_across_levels=False, - nms_pre=1000, - nms_post=1000, - max_num=1000, - nms_thr=0.7, - min_bbox_size=0), - rcnn=dict( - score_thr=0.05, - nms=dict(type='nms', iou_thr=0.5), - max_per_img=100, - mask_thr_binary=0.5)) -# dataset settings -dataset_type = 'CocoDataset' -data_root = 'data/coco/' -img_norm_cfg = dict( - mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) -train_pipeline = [ - dict(type='LoadImageFromFile'), - dict(type='LoadAnnotations', with_bbox=True, with_mask=True), - dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), - dict(type='RandomFlip', flip_ratio=0.5), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=64), - dict(type='DefaultFormatBundle'), - dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']), -] -test_pipeline = [ - dict(type='LoadImageFromFile'), - dict( - type='MultiScaleFlipAug', - img_scale=(1333, 800), - flip=False, - transforms=[ - dict(type='Resize', keep_ratio=True), - dict(type='RandomFlip'), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=64), - dict(type='ImageToTensor', keys=['img']), - dict(type='Collect', keys=['img']), - ]) -] -data = dict( - imgs_per_gpu=2, - workers_per_gpu=2, - train=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_train2017.json', - img_prefix=data_root + 'train2017/', - pipeline=train_pipeline), - val=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline), - test=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline)) -evaluation = dict(interval=1, metric=['bbox', 'segm']) -# optimizer -optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) -optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) -# learning policy -lr_config = dict( - policy='step', - warmup='linear', - warmup_iters=500, - warmup_ratio=1.0 / 3, - step=[8, 11]) -checkpoint_config = dict(interval=1) -# yapf:disable -log_config = dict( - interval=50, - hooks=[ - dict(type='TextLoggerHook'), - # dict(type='TensorboardLoggerHook') - ]) -# yapf:enable -# runtime settings -total_epochs = 12 -dist_params = dict(backend='nccl') -log_level = 'INFO' -work_dir = './work_dirs/mask_rcnn_r50_fpn_carafe_1x' -load_from = None -resume_from = None -workflow = [('train', 1)] diff --git a/configs/carafe/mask_rcnn_r50_fpn_carafe_1x_coco.py b/configs/carafe/mask_rcnn_r50_fpn_carafe_1x_coco.py new file mode 100644 index 0000000000000000000000000000000000000000..27ac8e77997f485f43ccea314e51140728f028ef --- /dev/null +++ b/configs/carafe/mask_rcnn_r50_fpn_carafe_1x_coco.py @@ -0,0 +1,60 @@ +_base_ = '../mask_rcnn/mask_rcnn_r50_fpn_1x_coco.py' +model = dict( + neck=dict( + type='FPN_CARAFE', + in_channels=[256, 512, 1024, 2048], + out_channels=256, + num_outs=5, + start_level=0, + end_level=-1, + norm_cfg=None, + act_cfg=None, + order=('conv', 'norm', 'act'), + upsample_cfg=dict( + type='carafe', + up_kernel=5, + up_group=1, + encoder_kernel=3, + encoder_dilation=1, + compressed_channels=64)), + mask_head=dict( + upsample_cfg=dict( + type='carafe', + scale_factor=2, + up_kernel=5, + up_group=1, + encoder_kernel=3, + encoder_dilation=1, + compressed_channels=64))) +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True, with_mask=True), + dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=64), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=64), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +data = dict( + train=dict(pipeline=train_pipeline), + val=dict(pipeline=test_pipeline), + test=dict(pipeline=test_pipeline)) +work_dir = './work_dirs/mask_rcnn_r50_fpn_carafe_1x' diff --git a/configs/cascade_mask_rcnn_r101_fpn_1x.py b/configs/cascade_mask_rcnn_r101_fpn_1x.py deleted file mode 100644 index d5de4a56e7721ed013c7a1c0285e044a5ed136c7..0000000000000000000000000000000000000000 --- a/configs/cascade_mask_rcnn_r101_fpn_1x.py +++ /dev/null @@ -1,254 +0,0 @@ -# model settings -model = dict( - type='CascadeRCNN', - num_stages=3, - pretrained='torchvision://resnet101', - backbone=dict( - type='ResNet', - depth=101, - num_stages=4, - out_indices=(0, 1, 2, 3), - frozen_stages=1, - norm_cfg=dict(type='BN', requires_grad=True), - style='pytorch'), - neck=dict( - type='FPN', - in_channels=[256, 512, 1024, 2048], - out_channels=256, - num_outs=5), - rpn_head=dict( - type='RPNHead', - in_channels=256, - feat_channels=256, - anchor_scales=[8], - anchor_ratios=[0.5, 1.0, 2.0], - anchor_strides=[4, 8, 16, 32, 64], - target_means=[.0, .0, .0, .0], - target_stds=[1.0, 1.0, 1.0, 1.0], - loss_cls=dict( - type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0)), - bbox_roi_extractor=dict( - type='SingleRoIExtractor', - roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2), - out_channels=256, - featmap_strides=[4, 8, 16, 32]), - bbox_head=[ - dict( - type='SharedFCBBoxHead', - num_fcs=2, - in_channels=256, - fc_out_channels=1024, - roi_feat_size=7, - num_classes=81, - target_means=[0., 0., 0., 0.], - target_stds=[0.1, 0.1, 0.2, 0.2], - reg_class_agnostic=True, - loss_cls=dict( - type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)), - dict( - type='SharedFCBBoxHead', - num_fcs=2, - in_channels=256, - fc_out_channels=1024, - roi_feat_size=7, - num_classes=81, - target_means=[0., 0., 0., 0.], - target_stds=[0.05, 0.05, 0.1, 0.1], - reg_class_agnostic=True, - loss_cls=dict( - type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)), - dict( - type='SharedFCBBoxHead', - num_fcs=2, - in_channels=256, - fc_out_channels=1024, - roi_feat_size=7, - num_classes=81, - target_means=[0., 0., 0., 0.], - target_stds=[0.033, 0.033, 0.067, 0.067], - reg_class_agnostic=True, - loss_cls=dict( - type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)) - ], - mask_roi_extractor=dict( - type='SingleRoIExtractor', - roi_layer=dict(type='RoIAlign', out_size=14, sample_num=2), - out_channels=256, - featmap_strides=[4, 8, 16, 32]), - mask_head=dict( - type='FCNMaskHead', - num_convs=4, - in_channels=256, - conv_out_channels=256, - num_classes=81, - loss_mask=dict( - type='CrossEntropyLoss', use_mask=True, loss_weight=1.0))) -# model training and testing settings -train_cfg = dict( - rpn=dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.7, - neg_iou_thr=0.3, - min_pos_iou=0.3, - ignore_iof_thr=-1), - sampler=dict( - type='RandomSampler', - num=256, - pos_fraction=0.5, - neg_pos_ub=-1, - add_gt_as_proposals=False), - allowed_border=0, - pos_weight=-1, - debug=False), - rpn_proposal=dict( - nms_across_levels=False, - nms_pre=2000, - nms_post=2000, - max_num=2000, - nms_thr=0.7, - min_bbox_size=0), - rcnn=[ - dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.5, - neg_iou_thr=0.5, - min_pos_iou=0.5, - ignore_iof_thr=-1), - sampler=dict( - type='RandomSampler', - num=512, - pos_fraction=0.25, - neg_pos_ub=-1, - add_gt_as_proposals=True), - mask_size=28, - pos_weight=-1, - debug=False), - dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.6, - neg_iou_thr=0.6, - min_pos_iou=0.6, - ignore_iof_thr=-1), - sampler=dict( - type='RandomSampler', - num=512, - pos_fraction=0.25, - neg_pos_ub=-1, - add_gt_as_proposals=True), - mask_size=28, - pos_weight=-1, - debug=False), - dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.7, - neg_iou_thr=0.7, - min_pos_iou=0.7, - ignore_iof_thr=-1), - sampler=dict( - type='RandomSampler', - num=512, - pos_fraction=0.25, - neg_pos_ub=-1, - add_gt_as_proposals=True), - mask_size=28, - pos_weight=-1, - debug=False) - ], - stage_loss_weights=[1, 0.5, 0.25]) -test_cfg = dict( - rpn=dict( - nms_across_levels=False, - nms_pre=1000, - nms_post=1000, - max_num=1000, - nms_thr=0.7, - min_bbox_size=0), - rcnn=dict( - score_thr=0.05, - nms=dict(type='nms', iou_thr=0.5), - max_per_img=100, - mask_thr_binary=0.5)) -# dataset settings -dataset_type = 'CocoDataset' -data_root = 'data/coco/' -img_norm_cfg = dict( - mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) -train_pipeline = [ - dict(type='LoadImageFromFile'), - dict(type='LoadAnnotations', with_bbox=True, with_mask=True), - dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), - dict(type='RandomFlip', flip_ratio=0.5), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='DefaultFormatBundle'), - dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']), -] -test_pipeline = [ - dict(type='LoadImageFromFile'), - dict( - type='MultiScaleFlipAug', - img_scale=(1333, 800), - flip=False, - transforms=[ - dict(type='Resize', keep_ratio=True), - dict(type='RandomFlip'), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='ImageToTensor', keys=['img']), - dict(type='Collect', keys=['img']), - ]) -] -data = dict( - imgs_per_gpu=2, - workers_per_gpu=2, - train=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_train2017.json', - img_prefix=data_root + 'train2017/', - pipeline=train_pipeline), - val=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline), - test=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline)) -evaluation = dict(interval=1, metric=['bbox', 'segm']) -# optimizer -optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) -optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) -# learning policy -lr_config = dict( - policy='step', - warmup='linear', - warmup_iters=500, - warmup_ratio=1.0 / 3, - step=[8, 11]) -checkpoint_config = dict(interval=1) -# yapf:disable -log_config = dict( - interval=50, - hooks=[ - dict(type='TextLoggerHook'), - # dict(type='TensorboardLoggerHook') - ]) -# yapf:enable -# runtime settings -total_epochs = 12 -dist_params = dict(backend='nccl') -log_level = 'INFO' -work_dir = './work_dirs/cascade_mask_rcnn_r101_fpn_1x' -load_from = None -resume_from = None -workflow = [('train', 1)] diff --git a/configs/cascade_mask_rcnn_r50_caffe_c4_1x.py b/configs/cascade_mask_rcnn_r50_caffe_c4_1x.py deleted file mode 100644 index 867ef7cb56aab017202c2facf676b8686957795d..0000000000000000000000000000000000000000 --- a/configs/cascade_mask_rcnn_r50_caffe_c4_1x.py +++ /dev/null @@ -1,255 +0,0 @@ -# model settings -norm_cfg = dict(type='BN', requires_grad=False) -model = dict( - type='CascadeRCNN', - num_stages=3, - pretrained='open-mmlab://resnet50_caffe', - backbone=dict( - type='ResNet', - depth=50, - num_stages=3, - strides=(1, 2, 2), - dilations=(1, 1, 1), - out_indices=(2, ), - frozen_stages=1, - norm_cfg=norm_cfg, - norm_eval=True, - style='caffe'), - shared_head=dict( - type='ResLayer', - depth=50, - stage=3, - stride=2, - dilation=1, - style='caffe', - norm_cfg=norm_cfg, - norm_eval=True), - rpn_head=dict( - type='RPNHead', - in_channels=1024, - feat_channels=1024, - anchor_scales=[2, 4, 8, 16, 32], - anchor_ratios=[0.5, 1.0, 2.0], - anchor_strides=[16], - target_means=[.0, .0, .0, .0], - target_stds=[1.0, 1.0, 1.0, 1.0], - loss_cls=dict( - type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0)), - bbox_roi_extractor=dict( - type='SingleRoIExtractor', - roi_layer=dict(type='RoIAlign', out_size=14, sample_num=2), - out_channels=1024, - featmap_strides=[16]), - bbox_head=[ - dict( - type='BBoxHead', - with_avg_pool=True, - roi_feat_size=7, - in_channels=2048, - num_classes=81, - target_means=[0., 0., 0., 0.], - target_stds=[0.1, 0.1, 0.2, 0.2], - reg_class_agnostic=True, - loss_cls=dict( - type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)), - dict( - type='BBoxHead', - with_avg_pool=True, - roi_feat_size=7, - in_channels=2048, - num_classes=81, - target_means=[0., 0., 0., 0.], - target_stds=[0.05, 0.05, 0.1, 0.1], - reg_class_agnostic=True, - loss_cls=dict( - type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)), - dict( - type='BBoxHead', - with_avg_pool=True, - roi_feat_size=7, - in_channels=2048, - num_classes=81, - target_means=[0., 0., 0., 0.], - target_stds=[0.033, 0.033, 0.067, 0.067], - reg_class_agnostic=True, - loss_cls=dict( - type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)) - ], - mask_roi_extractor=None, - mask_head=dict( - type='FCNMaskHead', - num_convs=0, - in_channels=2048, - conv_out_channels=256, - num_classes=81, - loss_mask=dict( - type='CrossEntropyLoss', use_mask=True, loss_weight=1.0))) -# model training and testing settings -train_cfg = dict( - rpn=dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.7, - neg_iou_thr=0.3, - min_pos_iou=0.3, - ignore_iof_thr=-1), - sampler=dict( - type='RandomSampler', - num=256, - pos_fraction=0.5, - neg_pos_ub=-1, - add_gt_as_proposals=False), - allowed_border=0, - pos_weight=-1, - debug=False), - rpn_proposal=dict( - nms_across_levels=False, - nms_pre=12000, - nms_post=2000, - max_num=2000, - nms_thr=0.7, - min_bbox_size=0), - rcnn=[ - dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.5, - neg_iou_thr=0.5, - min_pos_iou=0.5, - ignore_iof_thr=-1), - sampler=dict( - type='RandomSampler', - num=512, - pos_fraction=0.25, - neg_pos_ub=-1, - add_gt_as_proposals=True), - mask_size=14, - pos_weight=-1, - debug=False), - dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.6, - neg_iou_thr=0.6, - min_pos_iou=0.6, - ignore_iof_thr=-1), - sampler=dict( - type='RandomSampler', - num=512, - pos_fraction=0.25, - neg_pos_ub=-1, - add_gt_as_proposals=True), - mask_size=14, - pos_weight=-1, - debug=False), - dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.7, - neg_iou_thr=0.7, - min_pos_iou=0.7, - ignore_iof_thr=-1), - sampler=dict( - type='RandomSampler', - num=512, - pos_fraction=0.25, - neg_pos_ub=-1, - add_gt_as_proposals=True), - mask_size=14, - pos_weight=-1, - debug=False) - ], - stage_loss_weights=[1, 0.5, 0.25]) -test_cfg = dict( - rpn=dict( - nms_across_levels=False, - nms_pre=6000, - nms_post=1000, - max_num=1000, - nms_thr=0.7, - min_bbox_size=0), - rcnn=dict( - score_thr=0.05, - nms=dict(type='nms', iou_thr=0.5), - max_per_img=100, - mask_thr_binary=0.5)) -# dataset settings -dataset_type = 'CocoDataset' -data_root = 'data/coco/' -img_norm_cfg = dict( - mean=[102.9801, 115.9465, 122.7717], std=[1.0, 1.0, 1.0], to_rgb=False) -train_pipeline = [ - dict(type='LoadImageFromFile'), - dict(type='LoadAnnotations', with_bbox=True, with_mask=True), - dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), - dict(type='RandomFlip', flip_ratio=0.5), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='DefaultFormatBundle'), - dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']), -] -test_pipeline = [ - dict(type='LoadImageFromFile'), - dict( - type='MultiScaleFlipAug', - img_scale=(1333, 800), - flip=False, - transforms=[ - dict(type='Resize', keep_ratio=True), - dict(type='RandomFlip'), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='ImageToTensor', keys=['img']), - dict(type='Collect', keys=['img']), - ]) -] -data = dict( - imgs_per_gpu=2, - workers_per_gpu=2, - train=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_train2017.json', - img_prefix=data_root + 'train2017/', - pipeline=train_pipeline), - val=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline), - test=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline)) -evaluation = dict(interval=1, metric=['bbox', 'segm']) -# optimizer -optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) -optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) -# learning policy -lr_config = dict( - policy='step', - warmup='linear', - warmup_iters=500, - warmup_ratio=1.0 / 3, - step=[8, 11]) -checkpoint_config = dict(interval=1) -# yapf:disable -log_config = dict( - interval=50, - hooks=[ - dict(type='TextLoggerHook'), - # dict(type='TensorboardLoggerHook') - ]) -# yapf:enable -# runtime settings -total_epochs = 12 -dist_params = dict(backend='nccl') -log_level = 'INFO' -work_dir = './work_dirs/cascade_mask_rcnn_r50_caffe_c4_1x' -load_from = None -resume_from = None -workflow = [('train', 1)] diff --git a/configs/cascade_mask_rcnn_x101_32x4d_fpn_1x.py b/configs/cascade_mask_rcnn_x101_32x4d_fpn_1x.py deleted file mode 100644 index 4a1edbfd0a3fee44c30a321122252b23d97c0299..0000000000000000000000000000000000000000 --- a/configs/cascade_mask_rcnn_x101_32x4d_fpn_1x.py +++ /dev/null @@ -1,256 +0,0 @@ -# model settings -model = dict( - type='CascadeRCNN', - num_stages=3, - pretrained='open-mmlab://resnext101_32x4d', - backbone=dict( - type='ResNeXt', - depth=101, - groups=32, - base_width=4, - num_stages=4, - out_indices=(0, 1, 2, 3), - frozen_stages=1, - norm_cfg=dict(type='BN', requires_grad=True), - style='pytorch'), - neck=dict( - type='FPN', - in_channels=[256, 512, 1024, 2048], - out_channels=256, - num_outs=5), - rpn_head=dict( - type='RPNHead', - in_channels=256, - feat_channels=256, - anchor_scales=[8], - anchor_ratios=[0.5, 1.0, 2.0], - anchor_strides=[4, 8, 16, 32, 64], - target_means=[.0, .0, .0, .0], - target_stds=[1.0, 1.0, 1.0, 1.0], - loss_cls=dict( - type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0)), - bbox_roi_extractor=dict( - type='SingleRoIExtractor', - roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2), - out_channels=256, - featmap_strides=[4, 8, 16, 32]), - bbox_head=[ - dict( - type='SharedFCBBoxHead', - num_fcs=2, - in_channels=256, - fc_out_channels=1024, - roi_feat_size=7, - num_classes=81, - target_means=[0., 0., 0., 0.], - target_stds=[0.1, 0.1, 0.2, 0.2], - reg_class_agnostic=True, - loss_cls=dict( - type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)), - dict( - type='SharedFCBBoxHead', - num_fcs=2, - in_channels=256, - fc_out_channels=1024, - roi_feat_size=7, - num_classes=81, - target_means=[0., 0., 0., 0.], - target_stds=[0.05, 0.05, 0.1, 0.1], - reg_class_agnostic=True, - loss_cls=dict( - type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)), - dict( - type='SharedFCBBoxHead', - num_fcs=2, - in_channels=256, - fc_out_channels=1024, - roi_feat_size=7, - num_classes=81, - target_means=[0., 0., 0., 0.], - target_stds=[0.033, 0.033, 0.067, 0.067], - reg_class_agnostic=True, - loss_cls=dict( - type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)) - ], - mask_roi_extractor=dict( - type='SingleRoIExtractor', - roi_layer=dict(type='RoIAlign', out_size=14, sample_num=2), - out_channels=256, - featmap_strides=[4, 8, 16, 32]), - mask_head=dict( - type='FCNMaskHead', - num_convs=4, - in_channels=256, - conv_out_channels=256, - num_classes=81, - loss_mask=dict( - type='CrossEntropyLoss', use_mask=True, loss_weight=1.0))) -# model training and testing settings -train_cfg = dict( - rpn=dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.7, - neg_iou_thr=0.3, - min_pos_iou=0.3, - ignore_iof_thr=-1), - sampler=dict( - type='RandomSampler', - num=256, - pos_fraction=0.5, - neg_pos_ub=-1, - add_gt_as_proposals=False), - allowed_border=0, - pos_weight=-1, - debug=False), - rpn_proposal=dict( - nms_across_levels=False, - nms_pre=2000, - nms_post=2000, - max_num=2000, - nms_thr=0.7, - min_bbox_size=0), - rcnn=[ - dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.5, - neg_iou_thr=0.5, - min_pos_iou=0.5, - ignore_iof_thr=-1), - sampler=dict( - type='RandomSampler', - num=512, - pos_fraction=0.25, - neg_pos_ub=-1, - add_gt_as_proposals=True), - mask_size=28, - pos_weight=-1, - debug=False), - dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.6, - neg_iou_thr=0.6, - min_pos_iou=0.6, - ignore_iof_thr=-1), - sampler=dict( - type='RandomSampler', - num=512, - pos_fraction=0.25, - neg_pos_ub=-1, - add_gt_as_proposals=True), - mask_size=28, - pos_weight=-1, - debug=False), - dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.7, - neg_iou_thr=0.7, - min_pos_iou=0.7, - ignore_iof_thr=-1), - sampler=dict( - type='RandomSampler', - num=512, - pos_fraction=0.25, - neg_pos_ub=-1, - add_gt_as_proposals=True), - mask_size=28, - pos_weight=-1, - debug=False) - ], - stage_loss_weights=[1, 0.5, 0.25]) -test_cfg = dict( - rpn=dict( - nms_across_levels=False, - nms_pre=1000, - nms_post=1000, - max_num=1000, - nms_thr=0.7, - min_bbox_size=0), - rcnn=dict( - score_thr=0.05, - nms=dict(type='nms', iou_thr=0.5), - max_per_img=100, - mask_thr_binary=0.5)) -# dataset settings -dataset_type = 'CocoDataset' -data_root = 'data/coco/' -img_norm_cfg = dict( - mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) -train_pipeline = [ - dict(type='LoadImageFromFile'), - dict(type='LoadAnnotations', with_bbox=True, with_mask=True), - dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), - dict(type='RandomFlip', flip_ratio=0.5), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='DefaultFormatBundle'), - dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']), -] -test_pipeline = [ - dict(type='LoadImageFromFile'), - dict( - type='MultiScaleFlipAug', - img_scale=(1333, 800), - flip=False, - transforms=[ - dict(type='Resize', keep_ratio=True), - dict(type='RandomFlip'), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='ImageToTensor', keys=['img']), - dict(type='Collect', keys=['img']), - ]) -] -data = dict( - imgs_per_gpu=2, - workers_per_gpu=2, - train=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_train2017.json', - img_prefix=data_root + 'train2017/', - pipeline=train_pipeline), - val=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline), - test=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline)) -evaluation = dict(interval=1, metric=['bbox', 'segm']) -# optimizer -optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) -optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) -# learning policy -lr_config = dict( - policy='step', - warmup='linear', - warmup_iters=500, - warmup_ratio=1.0 / 3, - step=[8, 11]) -checkpoint_config = dict(interval=1) -# yapf:disable -log_config = dict( - interval=50, - hooks=[ - dict(type='TextLoggerHook'), - # dict(type='TensorboardLoggerHook') - ]) -# yapf:enable -# runtime settings -total_epochs = 12 -dist_params = dict(backend='nccl') -log_level = 'INFO' -work_dir = './work_dirs/cascade_mask_rcnn_x101_32x4d_fpn_1x' -load_from = None -resume_from = None -workflow = [('train', 1)] diff --git a/configs/cascade_mask_rcnn_x101_64x4d_fpn_1x.py b/configs/cascade_mask_rcnn_x101_64x4d_fpn_1x.py deleted file mode 100644 index d4d1f09bb90396c36e21ef4f8ac47a3921f42396..0000000000000000000000000000000000000000 --- a/configs/cascade_mask_rcnn_x101_64x4d_fpn_1x.py +++ /dev/null @@ -1,256 +0,0 @@ -# model settings -model = dict( - type='CascadeRCNN', - num_stages=3, - pretrained='open-mmlab://resnext101_64x4d', - backbone=dict( - type='ResNeXt', - depth=101, - groups=64, - base_width=4, - num_stages=4, - out_indices=(0, 1, 2, 3), - frozen_stages=1, - norm_cfg=dict(type='BN', requires_grad=True), - style='pytorch'), - neck=dict( - type='FPN', - in_channels=[256, 512, 1024, 2048], - out_channels=256, - num_outs=5), - rpn_head=dict( - type='RPNHead', - in_channels=256, - feat_channels=256, - anchor_scales=[8], - anchor_ratios=[0.5, 1.0, 2.0], - anchor_strides=[4, 8, 16, 32, 64], - target_means=[.0, .0, .0, .0], - target_stds=[1.0, 1.0, 1.0, 1.0], - loss_cls=dict( - type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0)), - bbox_roi_extractor=dict( - type='SingleRoIExtractor', - roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2), - out_channels=256, - featmap_strides=[4, 8, 16, 32]), - bbox_head=[ - dict( - type='SharedFCBBoxHead', - num_fcs=2, - in_channels=256, - fc_out_channels=1024, - roi_feat_size=7, - num_classes=81, - target_means=[0., 0., 0., 0.], - target_stds=[0.1, 0.1, 0.2, 0.2], - reg_class_agnostic=True, - loss_cls=dict( - type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)), - dict( - type='SharedFCBBoxHead', - num_fcs=2, - in_channels=256, - fc_out_channels=1024, - roi_feat_size=7, - num_classes=81, - target_means=[0., 0., 0., 0.], - target_stds=[0.05, 0.05, 0.1, 0.1], - reg_class_agnostic=True, - loss_cls=dict( - type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)), - dict( - type='SharedFCBBoxHead', - num_fcs=2, - in_channels=256, - fc_out_channels=1024, - roi_feat_size=7, - num_classes=81, - target_means=[0., 0., 0., 0.], - target_stds=[0.033, 0.033, 0.067, 0.067], - reg_class_agnostic=True, - loss_cls=dict( - type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)) - ], - mask_roi_extractor=dict( - type='SingleRoIExtractor', - roi_layer=dict(type='RoIAlign', out_size=14, sample_num=2), - out_channels=256, - featmap_strides=[4, 8, 16, 32]), - mask_head=dict( - type='FCNMaskHead', - num_convs=4, - in_channels=256, - conv_out_channels=256, - num_classes=81, - loss_mask=dict( - type='CrossEntropyLoss', use_mask=True, loss_weight=1.0))) -# model training and testing settings -train_cfg = dict( - rpn=dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.7, - neg_iou_thr=0.3, - min_pos_iou=0.3, - ignore_iof_thr=-1), - sampler=dict( - type='RandomSampler', - num=256, - pos_fraction=0.5, - neg_pos_ub=-1, - add_gt_as_proposals=False), - allowed_border=0, - pos_weight=-1, - debug=False), - rpn_proposal=dict( - nms_across_levels=False, - nms_pre=2000, - nms_post=2000, - max_num=2000, - nms_thr=0.7, - min_bbox_size=0), - rcnn=[ - dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.5, - neg_iou_thr=0.5, - min_pos_iou=0.5, - ignore_iof_thr=-1), - sampler=dict( - type='RandomSampler', - num=512, - pos_fraction=0.25, - neg_pos_ub=-1, - add_gt_as_proposals=True), - mask_size=28, - pos_weight=-1, - debug=False), - dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.6, - neg_iou_thr=0.6, - min_pos_iou=0.6, - ignore_iof_thr=-1), - sampler=dict( - type='RandomSampler', - num=512, - pos_fraction=0.25, - neg_pos_ub=-1, - add_gt_as_proposals=True), - mask_size=28, - pos_weight=-1, - debug=False), - dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.7, - neg_iou_thr=0.7, - min_pos_iou=0.7, - ignore_iof_thr=-1), - sampler=dict( - type='RandomSampler', - num=512, - pos_fraction=0.25, - neg_pos_ub=-1, - add_gt_as_proposals=True), - mask_size=28, - pos_weight=-1, - debug=False) - ], - stage_loss_weights=[1, 0.5, 0.25]) -test_cfg = dict( - rpn=dict( - nms_across_levels=False, - nms_pre=1000, - nms_post=1000, - max_num=1000, - nms_thr=0.7, - min_bbox_size=0), - rcnn=dict( - score_thr=0.05, - nms=dict(type='nms', iou_thr=0.5), - max_per_img=100, - mask_thr_binary=0.5)) -# dataset settings -dataset_type = 'CocoDataset' -data_root = 'data/coco/' -img_norm_cfg = dict( - mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) -train_pipeline = [ - dict(type='LoadImageFromFile'), - dict(type='LoadAnnotations', with_bbox=True, with_mask=True), - dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), - dict(type='RandomFlip', flip_ratio=0.5), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='DefaultFormatBundle'), - dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']), -] -test_pipeline = [ - dict(type='LoadImageFromFile'), - dict( - type='MultiScaleFlipAug', - img_scale=(1333, 800), - flip=False, - transforms=[ - dict(type='Resize', keep_ratio=True), - dict(type='RandomFlip'), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='ImageToTensor', keys=['img']), - dict(type='Collect', keys=['img']), - ]) -] -data = dict( - imgs_per_gpu=2, - workers_per_gpu=2, - train=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_train2017.json', - img_prefix=data_root + 'train2017/', - pipeline=train_pipeline), - val=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline), - test=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline)) -evaluation = dict(interval=1, metric=['bbox', 'segm']) -# optimizer -optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) -optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) -# learning policy -lr_config = dict( - policy='step', - warmup='linear', - warmup_iters=500, - warmup_ratio=1.0 / 3, - step=[8, 11]) -checkpoint_config = dict(interval=1) -# yapf:disable -log_config = dict( - interval=50, - hooks=[ - dict(type='TextLoggerHook'), - # dict(type='TensorboardLoggerHook') - ]) -# yapf:enable -# runtime settings -total_epochs = 12 -dist_params = dict(backend='nccl') -log_level = 'INFO' -work_dir = './work_dirs/cascade_mask_rcnn_x101_64x4d_fpn_1x' -load_from = None -resume_from = None -workflow = [('train', 1)] diff --git a/configs/cascade_rcnn/cascade_mask_rcnn_r101_fpn_1x_coco.py b/configs/cascade_rcnn/cascade_mask_rcnn_r101_fpn_1x_coco.py new file mode 100644 index 0000000000000000000000000000000000000000..888b70b3f08062dfce9809f9e1f24268fa6ed290 --- /dev/null +++ b/configs/cascade_rcnn/cascade_mask_rcnn_r101_fpn_1x_coco.py @@ -0,0 +1,3 @@ +_base_ = './cascade_mask_rcnn_r50_fpn_1x_coco.py' +model = dict(pretrained='torchvision://resnet101', backbone=dict(depth=101)) +work_dir = './work_dirs/cascade_mask_rcnn_r101_fpn_1x' diff --git a/configs/cascade_rcnn/cascade_mask_rcnn_r50_fpn_1x_coco.py b/configs/cascade_rcnn/cascade_mask_rcnn_r50_fpn_1x_coco.py new file mode 100644 index 0000000000000000000000000000000000000000..dd17860f1fc7545ead5ba1cd7a3712578dc9f108 --- /dev/null +++ b/configs/cascade_rcnn/cascade_mask_rcnn_r50_fpn_1x_coco.py @@ -0,0 +1,6 @@ +_base_ = [ + '../_base_/models/cascade_mask_rcnn_r50_fpn.py', + '../_base_/datasets/coco_instance.py', + '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py' +] +work_dir = './work_dirs/cascade_mask_rcnn_r50_fpn_1x' diff --git a/configs/cascade_rcnn/cascade_mask_rcnn_x101_32x4d_fpn_1x_coco.py b/configs/cascade_rcnn/cascade_mask_rcnn_x101_32x4d_fpn_1x_coco.py new file mode 100644 index 0000000000000000000000000000000000000000..0b0df7594050a4560e8b94f77804d6044981fc1d --- /dev/null +++ b/configs/cascade_rcnn/cascade_mask_rcnn_x101_32x4d_fpn_1x_coco.py @@ -0,0 +1,14 @@ +_base_ = './cascade_mask_rcnn_r50_fpn_1x_coco.py' +model = dict( + pretrained='open-mmlab://resnext101_32x4d', + backbone=dict( + type='ResNeXt', + depth=101, + groups=32, + base_width=4, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + style='pytorch')) +work_dir = './work_dirs/cascade_mask_rcnn_x101_32x4d_fpn_1x' diff --git a/configs/cascade_rcnn/cascade_mask_rcnn_x101_64x4d_fpn_1x_coco.py b/configs/cascade_rcnn/cascade_mask_rcnn_x101_64x4d_fpn_1x_coco.py new file mode 100644 index 0000000000000000000000000000000000000000..7b4829bda32fa0e2dc48b062f5ab61ebe4bc5dbc --- /dev/null +++ b/configs/cascade_rcnn/cascade_mask_rcnn_x101_64x4d_fpn_1x_coco.py @@ -0,0 +1,14 @@ +_base_ = './cascade_mask_rcnn_r50_fpn_1x_coco.py' +model = dict( + pretrained='open-mmlab://resnext101_64x4d', + backbone=dict( + type='ResNeXt', + depth=101, + groups=64, + base_width=4, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + style='pytorch')) +work_dir = './work_dirs/cascade_mask_rcnn_x101_64x4d_fpn_1x' diff --git a/configs/cascade_rcnn/cascade_rcnn_r101_fpn_1x_coco.py b/configs/cascade_rcnn/cascade_rcnn_r101_fpn_1x_coco.py new file mode 100644 index 0000000000000000000000000000000000000000..65093207fb0531da29cae6f357bd4cd9d2e794be --- /dev/null +++ b/configs/cascade_rcnn/cascade_rcnn_r101_fpn_1x_coco.py @@ -0,0 +1,3 @@ +_base_ = './cascade_rcnn_r50_fpn_1x_coco.py' +model = dict(pretrained='torchvision://resnet101', backbone=dict(depth=101)) +work_dir = './work_dirs/cascade_rcnn_r101_fpn_1x' diff --git a/configs/cascade_rcnn/cascade_rcnn_r50_fpn_1x_coco.py b/configs/cascade_rcnn/cascade_rcnn_r50_fpn_1x_coco.py new file mode 100644 index 0000000000000000000000000000000000000000..e6f01cb4364ef77a340510141d00d5a8b19140bb --- /dev/null +++ b/configs/cascade_rcnn/cascade_rcnn_r50_fpn_1x_coco.py @@ -0,0 +1,6 @@ +_base_ = [ + '../_base_/models/cascade_rcnn_r50_fpn.py', + '../_base_/datasets/coco_detection.py', + '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py' +] +work_dir = './work_dirs/cascade_rcnn_r50_fpn_1x' diff --git a/configs/cascade_rcnn/cascade_rcnn_x101_32x4d_fpn_1x_coco.py b/configs/cascade_rcnn/cascade_rcnn_x101_32x4d_fpn_1x_coco.py new file mode 100644 index 0000000000000000000000000000000000000000..c39afb299112eea7d22103706c77e081268c942d --- /dev/null +++ b/configs/cascade_rcnn/cascade_rcnn_x101_32x4d_fpn_1x_coco.py @@ -0,0 +1,14 @@ +_base_ = './cascade_rcnn_r50_fpn_1x_coco.py' +model = dict( + pretrained='open-mmlab://resnext101_32x4d', + backbone=dict( + type='ResNeXt', + depth=101, + groups=32, + base_width=4, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + style='pytorch')) +work_dir = './work_dirs/cascade_rcnn_x101_32x4d_fpn_1x' diff --git a/configs/cascade_rcnn/cascade_rcnn_x101_64x4d_fpn_1x_coco.py b/configs/cascade_rcnn/cascade_rcnn_x101_64x4d_fpn_1x_coco.py new file mode 100644 index 0000000000000000000000000000000000000000..496509496be524a9ddfadf8f5435dde2396a54eb --- /dev/null +++ b/configs/cascade_rcnn/cascade_rcnn_x101_64x4d_fpn_1x_coco.py @@ -0,0 +1,16 @@ +_base_ = './cascade_rcnn_r50_fpn_1x_coco.py' +model = dict( + type='CascadeRCNN', + num_stages=3, + pretrained='open-mmlab://resnext101_64x4d', + backbone=dict( + type='ResNeXt', + depth=101, + groups=64, + base_width=4, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + style='pytorch')) +work_dir = './work_dirs/cascade_rcnn_x101_64x4d_fpn_1x' diff --git a/configs/cascade_rcnn_r101_fpn_1x.py b/configs/cascade_rcnn_r101_fpn_1x.py deleted file mode 100644 index 29fd1b7076eb8a8fecd5d12a7e4be107aa214cfe..0000000000000000000000000000000000000000 --- a/configs/cascade_rcnn_r101_fpn_1x.py +++ /dev/null @@ -1,235 +0,0 @@ -# model settings -model = dict( - type='CascadeRCNN', - num_stages=3, - pretrained='torchvision://resnet101', - backbone=dict( - type='ResNet', - depth=101, - num_stages=4, - out_indices=(0, 1, 2, 3), - frozen_stages=1, - norm_cfg=dict(type='BN', requires_grad=True), - style='pytorch'), - neck=dict( - type='FPN', - in_channels=[256, 512, 1024, 2048], - out_channels=256, - num_outs=5), - rpn_head=dict( - type='RPNHead', - in_channels=256, - feat_channels=256, - anchor_scales=[8], - anchor_ratios=[0.5, 1.0, 2.0], - anchor_strides=[4, 8, 16, 32, 64], - target_means=[.0, .0, .0, .0], - target_stds=[1.0, 1.0, 1.0, 1.0], - loss_cls=dict( - type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0)), - bbox_roi_extractor=dict( - type='SingleRoIExtractor', - roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2), - out_channels=256, - featmap_strides=[4, 8, 16, 32]), - bbox_head=[ - dict( - type='SharedFCBBoxHead', - num_fcs=2, - in_channels=256, - fc_out_channels=1024, - roi_feat_size=7, - num_classes=81, - target_means=[0., 0., 0., 0.], - target_stds=[0.1, 0.1, 0.2, 0.2], - reg_class_agnostic=True, - loss_cls=dict( - type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)), - dict( - type='SharedFCBBoxHead', - num_fcs=2, - in_channels=256, - fc_out_channels=1024, - roi_feat_size=7, - num_classes=81, - target_means=[0., 0., 0., 0.], - target_stds=[0.05, 0.05, 0.1, 0.1], - reg_class_agnostic=True, - loss_cls=dict( - type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)), - dict( - type='SharedFCBBoxHead', - num_fcs=2, - in_channels=256, - fc_out_channels=1024, - roi_feat_size=7, - num_classes=81, - target_means=[0., 0., 0., 0.], - target_stds=[0.033, 0.033, 0.067, 0.067], - reg_class_agnostic=True, - loss_cls=dict( - type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)) - ]) -# model training and testing settings -train_cfg = dict( - rpn=dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.7, - neg_iou_thr=0.3, - min_pos_iou=0.3, - ignore_iof_thr=-1), - sampler=dict( - type='RandomSampler', - num=256, - pos_fraction=0.5, - neg_pos_ub=-1, - add_gt_as_proposals=False), - allowed_border=0, - pos_weight=-1, - debug=False), - rpn_proposal=dict( - nms_across_levels=False, - nms_pre=2000, - nms_post=2000, - max_num=2000, - nms_thr=0.7, - min_bbox_size=0), - rcnn=[ - dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.5, - neg_iou_thr=0.5, - min_pos_iou=0.5, - ignore_iof_thr=-1), - sampler=dict( - type='RandomSampler', - num=512, - pos_fraction=0.25, - neg_pos_ub=-1, - add_gt_as_proposals=True), - pos_weight=-1, - debug=False), - dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.6, - neg_iou_thr=0.6, - min_pos_iou=0.6, - ignore_iof_thr=-1), - sampler=dict( - type='RandomSampler', - num=512, - pos_fraction=0.25, - neg_pos_ub=-1, - add_gt_as_proposals=True), - pos_weight=-1, - debug=False), - dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.7, - neg_iou_thr=0.7, - min_pos_iou=0.7, - ignore_iof_thr=-1), - sampler=dict( - type='RandomSampler', - num=512, - pos_fraction=0.25, - neg_pos_ub=-1, - add_gt_as_proposals=True), - pos_weight=-1, - debug=False) - ], - stage_loss_weights=[1, 0.5, 0.25]) -test_cfg = dict( - rpn=dict( - nms_across_levels=False, - nms_pre=1000, - nms_post=1000, - max_num=1000, - nms_thr=0.7, - min_bbox_size=0), - rcnn=dict( - score_thr=0.05, nms=dict(type='nms', iou_thr=0.5), max_per_img=100)) -# dataset settings -dataset_type = 'CocoDataset' -data_root = 'data/coco/' -img_norm_cfg = dict( - mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) -train_pipeline = [ - dict(type='LoadImageFromFile'), - dict(type='LoadAnnotations', with_bbox=True), - dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), - dict(type='RandomFlip', flip_ratio=0.5), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='DefaultFormatBundle'), - dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), -] -test_pipeline = [ - dict(type='LoadImageFromFile'), - dict( - type='MultiScaleFlipAug', - img_scale=(1333, 800), - flip=False, - transforms=[ - dict(type='Resize', keep_ratio=True), - dict(type='RandomFlip'), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='ImageToTensor', keys=['img']), - dict(type='Collect', keys=['img']), - ]) -] -data = dict( - imgs_per_gpu=2, - workers_per_gpu=2, - train=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_train2017.json', - img_prefix=data_root + 'train2017/', - pipeline=train_pipeline), - val=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline), - test=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline)) -evaluation = dict(interval=1, metric='bbox') -# optimizer -optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) -optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) -# learning policy -lr_config = dict( - policy='step', - warmup='linear', - warmup_iters=500, - warmup_ratio=1.0 / 3, - step=[8, 11]) -checkpoint_config = dict(interval=1) -# yapf:disable -log_config = dict( - interval=50, - hooks=[ - dict(type='TextLoggerHook'), - # dict(type='TensorboardLoggerHook') - ]) -# yapf:enable -# runtime settings -total_epochs = 12 -dist_params = dict(backend='nccl') -log_level = 'INFO' -work_dir = './work_dirs/cascade_rcnn_r101_fpn_1x' -load_from = None -resume_from = None -workflow = [('train', 1)] diff --git a/configs/cascade_rcnn_r50_caffe_c4_1x.py b/configs/cascade_rcnn_r50_caffe_c4_1x.py deleted file mode 100644 index 926310e27f7e14e40206b88e762d3c56864e4fbb..0000000000000000000000000000000000000000 --- a/configs/cascade_rcnn_r50_caffe_c4_1x.py +++ /dev/null @@ -1,243 +0,0 @@ -# model settings -norm_cfg = dict(type='BN', requires_grad=False) -model = dict( - type='CascadeRCNN', - num_stages=3, - pretrained='open-mmlab://resnet50_caffe', - backbone=dict( - type='ResNet', - depth=50, - num_stages=3, - strides=(1, 2, 2), - dilations=(1, 1, 1), - out_indices=(2, ), - frozen_stages=1, - norm_cfg=norm_cfg, - norm_eval=True, - style='caffe'), - shared_head=dict( - type='ResLayer', - depth=50, - stage=3, - stride=2, - dilation=1, - style='caffe', - norm_cfg=norm_cfg, - norm_eval=True), - rpn_head=dict( - type='RPNHead', - in_channels=1024, - feat_channels=1024, - anchor_scales=[2, 4, 8, 16, 32], - anchor_ratios=[0.5, 1.0, 2.0], - anchor_strides=[16], - target_means=[.0, .0, .0, .0], - target_stds=[1.0, 1.0, 1.0, 1.0], - loss_cls=dict( - type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0)), - bbox_roi_extractor=dict( - type='SingleRoIExtractor', - roi_layer=dict(type='RoIAlign', out_size=14, sample_num=2), - out_channels=1024, - featmap_strides=[16]), - bbox_head=[ - dict( - type='BBoxHead', - with_avg_pool=True, - roi_feat_size=7, - in_channels=2048, - num_classes=81, - target_means=[0., 0., 0., 0.], - target_stds=[0.1, 0.1, 0.2, 0.2], - reg_class_agnostic=True, - loss_cls=dict( - type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)), - dict( - type='BBoxHead', - with_avg_pool=True, - roi_feat_size=7, - in_channels=2048, - num_classes=81, - target_means=[0., 0., 0., 0.], - target_stds=[0.05, 0.05, 0.1, 0.1], - reg_class_agnostic=True, - loss_cls=dict( - type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)), - dict( - type='BBoxHead', - with_avg_pool=True, - roi_feat_size=7, - in_channels=2048, - num_classes=81, - target_means=[0., 0., 0., 0.], - target_stds=[0.033, 0.033, 0.067, 0.067], - reg_class_agnostic=True, - loss_cls=dict( - type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)), - ]) -# model training and testing settings -train_cfg = dict( - rpn=dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.7, - neg_iou_thr=0.3, - min_pos_iou=0.3, - ignore_iof_thr=-1), - sampler=dict( - type='RandomSampler', - num=256, - pos_fraction=0.5, - neg_pos_ub=-1, - add_gt_as_proposals=False), - allowed_border=0, - pos_weight=-1, - debug=False), - rpn_proposal=dict( - nms_across_levels=False, - nms_pre=12000, - nms_post=2000, - max_num=2000, - nms_thr=0.7, - min_bbox_size=0), - rcnn=[ - dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.5, - neg_iou_thr=0.5, - min_pos_iou=0.5, - ignore_iof_thr=-1), - sampler=dict( - type='RandomSampler', - num=512, - pos_fraction=0.25, - neg_pos_ub=-1, - add_gt_as_proposals=True), - mask_size=14, - pos_weight=-1, - debug=False), - dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.6, - neg_iou_thr=0.6, - min_pos_iou=0.6, - ignore_iof_thr=-1), - sampler=dict( - type='RandomSampler', - num=512, - pos_fraction=0.25, - neg_pos_ub=-1, - add_gt_as_proposals=True), - mask_size=14, - pos_weight=-1, - debug=False), - dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.7, - neg_iou_thr=0.7, - min_pos_iou=0.7, - ignore_iof_thr=-1), - sampler=dict( - type='RandomSampler', - num=512, - pos_fraction=0.25, - neg_pos_ub=-1, - add_gt_as_proposals=True), - mask_size=14, - pos_weight=-1, - debug=False) - ], - stage_loss_weights=[1, 0.5, 0.25]) -test_cfg = dict( - rpn=dict( - nms_across_levels=False, - nms_pre=6000, - nms_post=1000, - max_num=1000, - nms_thr=0.7, - min_bbox_size=0), - rcnn=dict( - score_thr=0.05, nms=dict(type='nms', iou_thr=0.5), max_per_img=100)) -# dataset settings -dataset_type = 'CocoDataset' -data_root = 'data/coco/' -img_norm_cfg = dict( - mean=[102.9801, 115.9465, 122.7717], std=[1.0, 1.0, 1.0], to_rgb=False) -train_pipeline = [ - dict(type='LoadImageFromFile'), - dict(type='LoadAnnotations', with_bbox=True), - dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), - dict(type='RandomFlip', flip_ratio=0.5), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='DefaultFormatBundle'), - dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), -] -test_pipeline = [ - dict(type='LoadImageFromFile'), - dict( - type='MultiScaleFlipAug', - img_scale=(1333, 800), - flip=False, - transforms=[ - dict(type='Resize', keep_ratio=True), - dict(type='RandomFlip'), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='ImageToTensor', keys=['img']), - dict(type='Collect', keys=['img']), - ]) -] -data = dict( - imgs_per_gpu=2, - workers_per_gpu=2, - train=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_train2017.json', - img_prefix=data_root + 'train2017/', - pipeline=train_pipeline), - val=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline), - test=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline)) -evaluation = dict(interval=1, metric='bbox') -# optimizer -optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) -optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) -# learning policy -lr_config = dict( - policy='step', - warmup='linear', - warmup_iters=500, - warmup_ratio=1.0 / 3, - step=[8, 11]) -checkpoint_config = dict(interval=1) -# yapf:disable -log_config = dict( - interval=50, - hooks=[ - dict(type='TextLoggerHook'), - # dict(type='TensorboardLoggerHook') - ]) -# yapf:enable -# runtime settings -total_epochs = 12 -dist_params = dict(backend='nccl') -log_level = 'INFO' -work_dir = './work_dirs/cascade_rcnn_r50_c4_1x' -load_from = None -resume_from = None -workflow = [('train', 1)] diff --git a/configs/cascade_rcnn_x101_32x4d_fpn_1x.py b/configs/cascade_rcnn_x101_32x4d_fpn_1x.py deleted file mode 100644 index 4806dea90b7a3e1bbeeafc52f1b7529d82b4bee1..0000000000000000000000000000000000000000 --- a/configs/cascade_rcnn_x101_32x4d_fpn_1x.py +++ /dev/null @@ -1,237 +0,0 @@ -# model settings -model = dict( - type='CascadeRCNN', - num_stages=3, - pretrained='open-mmlab://resnext101_32x4d', - backbone=dict( - type='ResNeXt', - depth=101, - groups=32, - base_width=4, - num_stages=4, - out_indices=(0, 1, 2, 3), - frozen_stages=1, - norm_cfg=dict(type='BN', requires_grad=True), - style='pytorch'), - neck=dict( - type='FPN', - in_channels=[256, 512, 1024, 2048], - out_channels=256, - num_outs=5), - rpn_head=dict( - type='RPNHead', - in_channels=256, - feat_channels=256, - anchor_scales=[8], - anchor_ratios=[0.5, 1.0, 2.0], - anchor_strides=[4, 8, 16, 32, 64], - target_means=[.0, .0, .0, .0], - target_stds=[1.0, 1.0, 1.0, 1.0], - loss_cls=dict( - type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0)), - bbox_roi_extractor=dict( - type='SingleRoIExtractor', - roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2), - out_channels=256, - featmap_strides=[4, 8, 16, 32]), - bbox_head=[ - dict( - type='SharedFCBBoxHead', - num_fcs=2, - in_channels=256, - fc_out_channels=1024, - roi_feat_size=7, - num_classes=81, - target_means=[0., 0., 0., 0.], - target_stds=[0.1, 0.1, 0.2, 0.2], - reg_class_agnostic=True, - loss_cls=dict( - type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)), - dict( - type='SharedFCBBoxHead', - num_fcs=2, - in_channels=256, - fc_out_channels=1024, - roi_feat_size=7, - num_classes=81, - target_means=[0., 0., 0., 0.], - target_stds=[0.05, 0.05, 0.1, 0.1], - reg_class_agnostic=True, - loss_cls=dict( - type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)), - dict( - type='SharedFCBBoxHead', - num_fcs=2, - in_channels=256, - fc_out_channels=1024, - roi_feat_size=7, - num_classes=81, - target_means=[0., 0., 0., 0.], - target_stds=[0.033, 0.033, 0.067, 0.067], - reg_class_agnostic=True, - loss_cls=dict( - type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)) - ]) -# model training and testing settings -train_cfg = dict( - rpn=dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.7, - neg_iou_thr=0.3, - min_pos_iou=0.3, - ignore_iof_thr=-1), - sampler=dict( - type='RandomSampler', - num=256, - pos_fraction=0.5, - neg_pos_ub=-1, - add_gt_as_proposals=False), - allowed_border=0, - pos_weight=-1, - debug=False), - rpn_proposal=dict( - nms_across_levels=False, - nms_pre=2000, - nms_post=2000, - max_num=2000, - nms_thr=0.7, - min_bbox_size=0), - rcnn=[ - dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.5, - neg_iou_thr=0.5, - min_pos_iou=0.5, - ignore_iof_thr=-1), - sampler=dict( - type='RandomSampler', - num=512, - pos_fraction=0.25, - neg_pos_ub=-1, - add_gt_as_proposals=True), - pos_weight=-1, - debug=False), - dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.6, - neg_iou_thr=0.6, - min_pos_iou=0.6, - ignore_iof_thr=-1), - sampler=dict( - type='RandomSampler', - num=512, - pos_fraction=0.25, - neg_pos_ub=-1, - add_gt_as_proposals=True), - pos_weight=-1, - debug=False), - dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.7, - neg_iou_thr=0.7, - min_pos_iou=0.7, - ignore_iof_thr=-1), - sampler=dict( - type='RandomSampler', - num=512, - pos_fraction=0.25, - neg_pos_ub=-1, - add_gt_as_proposals=True), - pos_weight=-1, - debug=False) - ], - stage_loss_weights=[1, 0.5, 0.25]) -test_cfg = dict( - rpn=dict( - nms_across_levels=False, - nms_pre=1000, - nms_post=1000, - max_num=1000, - nms_thr=0.7, - min_bbox_size=0), - rcnn=dict( - score_thr=0.05, nms=dict(type='nms', iou_thr=0.5), max_per_img=100)) -# dataset settings -dataset_type = 'CocoDataset' -data_root = 'data/coco/' -img_norm_cfg = dict( - mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) -train_pipeline = [ - dict(type='LoadImageFromFile'), - dict(type='LoadAnnotations', with_bbox=True), - dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), - dict(type='RandomFlip', flip_ratio=0.5), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='DefaultFormatBundle'), - dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), -] -test_pipeline = [ - dict(type='LoadImageFromFile'), - dict( - type='MultiScaleFlipAug', - img_scale=(1333, 800), - flip=False, - transforms=[ - dict(type='Resize', keep_ratio=True), - dict(type='RandomFlip'), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='ImageToTensor', keys=['img']), - dict(type='Collect', keys=['img']), - ]) -] -data = dict( - imgs_per_gpu=2, - workers_per_gpu=2, - train=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_train2017.json', - img_prefix=data_root + 'train2017/', - pipeline=train_pipeline), - val=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline), - test=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline)) -evaluation = dict(interval=1, metric='bbox') -# optimizer -optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) -optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) -# learning policy -lr_config = dict( - policy='step', - warmup='linear', - warmup_iters=500, - warmup_ratio=1.0 / 3, - step=[8, 11]) -checkpoint_config = dict(interval=1) -# yapf:disable -log_config = dict( - interval=50, - hooks=[ - dict(type='TextLoggerHook'), - # dict(type='TensorboardLoggerHook') - ]) -# yapf:enable -# runtime settings -total_epochs = 12 -dist_params = dict(backend='nccl') -log_level = 'INFO' -work_dir = './work_dirs/cascade_rcnn_x101_32x4d_fpn_1x' -load_from = None -resume_from = None -workflow = [('train', 1)] diff --git a/configs/cascade_rcnn_x101_64x4d_fpn_1x.py b/configs/cascade_rcnn_x101_64x4d_fpn_1x.py deleted file mode 100644 index 1d2667a85bc773122b3a3464e87f0a1b4259a4ea..0000000000000000000000000000000000000000 --- a/configs/cascade_rcnn_x101_64x4d_fpn_1x.py +++ /dev/null @@ -1,237 +0,0 @@ -# model settings -model = dict( - type='CascadeRCNN', - num_stages=3, - pretrained='open-mmlab://resnext101_64x4d', - backbone=dict( - type='ResNeXt', - depth=101, - groups=64, - base_width=4, - num_stages=4, - out_indices=(0, 1, 2, 3), - frozen_stages=1, - norm_cfg=dict(type='BN', requires_grad=True), - style='pytorch'), - neck=dict( - type='FPN', - in_channels=[256, 512, 1024, 2048], - out_channels=256, - num_outs=5), - rpn_head=dict( - type='RPNHead', - in_channels=256, - feat_channels=256, - anchor_scales=[8], - anchor_ratios=[0.5, 1.0, 2.0], - anchor_strides=[4, 8, 16, 32, 64], - target_means=[.0, .0, .0, .0], - target_stds=[1.0, 1.0, 1.0, 1.0], - loss_cls=dict( - type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0)), - bbox_roi_extractor=dict( - type='SingleRoIExtractor', - roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2), - out_channels=256, - featmap_strides=[4, 8, 16, 32]), - bbox_head=[ - dict( - type='SharedFCBBoxHead', - num_fcs=2, - in_channels=256, - fc_out_channels=1024, - roi_feat_size=7, - num_classes=81, - target_means=[0., 0., 0., 0.], - target_stds=[0.1, 0.1, 0.2, 0.2], - reg_class_agnostic=True, - loss_cls=dict( - type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)), - dict( - type='SharedFCBBoxHead', - num_fcs=2, - in_channels=256, - fc_out_channels=1024, - roi_feat_size=7, - num_classes=81, - target_means=[0., 0., 0., 0.], - target_stds=[0.05, 0.05, 0.1, 0.1], - reg_class_agnostic=True, - loss_cls=dict( - type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)), - dict( - type='SharedFCBBoxHead', - num_fcs=2, - in_channels=256, - fc_out_channels=1024, - roi_feat_size=7, - num_classes=81, - target_means=[0., 0., 0., 0.], - target_stds=[0.033, 0.033, 0.067, 0.067], - reg_class_agnostic=True, - loss_cls=dict( - type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)) - ]) -# model training and testing settings -train_cfg = dict( - rpn=dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.7, - neg_iou_thr=0.3, - min_pos_iou=0.3, - ignore_iof_thr=-1), - sampler=dict( - type='RandomSampler', - num=256, - pos_fraction=0.5, - neg_pos_ub=-1, - add_gt_as_proposals=False), - allowed_border=0, - pos_weight=-1, - debug=False), - rpn_proposal=dict( - nms_across_levels=False, - nms_pre=2000, - nms_post=2000, - max_num=2000, - nms_thr=0.7, - min_bbox_size=0), - rcnn=[ - dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.5, - neg_iou_thr=0.5, - min_pos_iou=0.5, - ignore_iof_thr=-1), - sampler=dict( - type='RandomSampler', - num=512, - pos_fraction=0.25, - neg_pos_ub=-1, - add_gt_as_proposals=True), - pos_weight=-1, - debug=False), - dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.6, - neg_iou_thr=0.6, - min_pos_iou=0.6, - ignore_iof_thr=-1), - sampler=dict( - type='RandomSampler', - num=512, - pos_fraction=0.25, - neg_pos_ub=-1, - add_gt_as_proposals=True), - pos_weight=-1, - debug=False), - dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.7, - neg_iou_thr=0.7, - min_pos_iou=0.7, - ignore_iof_thr=-1), - sampler=dict( - type='RandomSampler', - num=512, - pos_fraction=0.25, - neg_pos_ub=-1, - add_gt_as_proposals=True), - pos_weight=-1, - debug=False) - ], - stage_loss_weights=[1, 0.5, 0.25]) -test_cfg = dict( - rpn=dict( - nms_across_levels=False, - nms_pre=1000, - nms_post=1000, - max_num=1000, - nms_thr=0.7, - min_bbox_size=0), - rcnn=dict( - score_thr=0.05, nms=dict(type='nms', iou_thr=0.5), max_per_img=100)) -# dataset settings -dataset_type = 'CocoDataset' -data_root = 'data/coco/' -img_norm_cfg = dict( - mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) -train_pipeline = [ - dict(type='LoadImageFromFile'), - dict(type='LoadAnnotations', with_bbox=True), - dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), - dict(type='RandomFlip', flip_ratio=0.5), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='DefaultFormatBundle'), - dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), -] -test_pipeline = [ - dict(type='LoadImageFromFile'), - dict( - type='MultiScaleFlipAug', - img_scale=(1333, 800), - flip=False, - transforms=[ - dict(type='Resize', keep_ratio=True), - dict(type='RandomFlip'), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='ImageToTensor', keys=['img']), - dict(type='Collect', keys=['img']), - ]) -] -data = dict( - imgs_per_gpu=2, - workers_per_gpu=2, - train=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_train2017.json', - img_prefix=data_root + 'train2017/', - pipeline=train_pipeline), - val=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline), - test=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline)) -evaluation = dict(interval=1, metric='bbox') -# optimizer -optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) -optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) -# learning policy -lr_config = dict( - policy='step', - warmup='linear', - warmup_iters=500, - warmup_ratio=1.0 / 3, - step=[8, 11]) -checkpoint_config = dict(interval=1) -# yapf:disable -log_config = dict( - interval=50, - hooks=[ - dict(type='TextLoggerHook'), - # dict(type='TensorboardLoggerHook') - ]) -# yapf:enable -# runtime settings -total_epochs = 12 -dist_params = dict(backend='nccl') -log_level = 'INFO' -work_dir = './work_dirs/cascade_rcnn_x101_64x4d_fpn_1x' -load_from = None -resume_from = None -workflow = [('train', 1)] diff --git a/configs/cityscapes/faster_rcnn_r50_fpn_1x_cityscapes.py b/configs/cityscapes/faster_rcnn_r50_fpn_1x_cityscapes.py index 4fd17e302187f5d6524a16354c6114a4b84ac469..82a5ec2a906e9738812eb1af9afdbda332807173 100644 --- a/configs/cityscapes/faster_rcnn_r50_fpn_1x_cityscapes.py +++ b/configs/cityscapes/faster_rcnn_r50_fpn_1x_cityscapes.py @@ -1,40 +1,12 @@ -# model settings +_base_ = [ + '../_base_/models/faster_rcnn_r50_fpn.py', + '../_base_/datasets/cityscapes_detection.py', + '../_base_/default_runtime.py' +] model = dict( - type='FasterRCNN', pretrained=None, - backbone=dict( - type='ResNet', - depth=50, - num_stages=4, - out_indices=(0, 1, 2, 3), - frozen_stages=1, - norm_cfg=dict(type='BN', requires_grad=True), - style='pytorch'), - neck=dict( - type='FPN', - in_channels=[256, 512, 1024, 2048], - out_channels=256, - num_outs=5), - rpn_head=dict( - type='RPNHead', - in_channels=256, - feat_channels=256, - anchor_scales=[8], - anchor_ratios=[0.5, 1.0, 2.0], - anchor_strides=[4, 8, 16, 32, 64], - target_means=[.0, .0, .0, .0], - target_stds=[1.0, 1.0, 1.0, 1.0], - loss_cls=dict( - type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0)), - bbox_roi_extractor=dict( - type='SingleRoIExtractor', - roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2), - out_channels=256, - featmap_strides=[4, 8, 16, 32]), bbox_head=dict( - type='SharedFCBBoxHead', - num_fcs=2, + type='Shared2FCBBoxHead', in_channels=256, fc_out_channels=1024, roi_feat_size=7, @@ -45,115 +17,6 @@ model = dict( loss_cls=dict( type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0))) -# model training and testing settings -train_cfg = dict( - rpn=dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.7, - neg_iou_thr=0.3, - min_pos_iou=0.3, - ignore_iof_thr=-1), - sampler=dict( - type='RandomSampler', - num=256, - pos_fraction=0.5, - neg_pos_ub=-1, - add_gt_as_proposals=False), - allowed_border=0, - pos_weight=-1, - debug=False), - rpn_proposal=dict( - nms_across_levels=False, - nms_pre=2000, - nms_post=2000, - max_num=2000, - nms_thr=0.7, - min_bbox_size=0), - rcnn=dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.5, - neg_iou_thr=0.5, - min_pos_iou=0.5, - ignore_iof_thr=-1), - sampler=dict( - type='RandomSampler', - num=512, - pos_fraction=0.25, - neg_pos_ub=-1, - add_gt_as_proposals=True), - pos_weight=-1, - debug=False)) -test_cfg = dict( - rpn=dict( - nms_across_levels=False, - nms_pre=1000, - nms_post=1000, - max_num=1000, - nms_thr=0.7, - min_bbox_size=0), - rcnn=dict( - score_thr=0.05, nms=dict(type='nms', iou_thr=0.5), max_per_img=100) - # soft-nms is also supported for rcnn testing - # e.g., nms=dict(type='soft_nms', iou_thr=0.5, min_score=0.05) -) -# dataset settings -dataset_type = 'CityscapesDataset' -data_root = 'data/cityscapes/' -img_norm_cfg = dict( - mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) -train_pipeline = [ - dict(type='LoadImageFromFile'), - dict(type='LoadAnnotations', with_bbox=True), - dict( - type='Resize', img_scale=[(2048, 800), (2048, 1024)], keep_ratio=True), - dict(type='RandomFlip', flip_ratio=0.5), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='DefaultFormatBundle'), - dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), -] -test_pipeline = [ - dict(type='LoadImageFromFile'), - dict( - type='MultiScaleFlipAug', - img_scale=(2048, 1024), - flip=False, - transforms=[ - dict(type='Resize', keep_ratio=True), - dict(type='RandomFlip'), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='ImageToTensor', keys=['img']), - dict(type='Collect', keys=['img']), - ]) -] -data = dict( - imgs_per_gpu=1, - workers_per_gpu=2, - train=dict( - type='RepeatDataset', - times=8, - dataset=dict( - type=dataset_type, - ann_file=data_root + - 'annotations/instancesonly_filtered_gtFine_train.json', - img_prefix=data_root + 'leftImg8bit/train/', - pipeline=train_pipeline)), - val=dict( - type=dataset_type, - ann_file=data_root + - 'annotations/instancesonly_filtered_gtFine_val.json', - img_prefix=data_root + 'leftImg8bit/val/', - pipeline=test_pipeline), - test=dict( - type=dataset_type, - ann_file=data_root + - 'annotations/instancesonly_filtered_gtFine_test.json', - img_prefix=data_root + 'leftImg8bit/test/', - pipeline=test_pipeline)) -evaluation = dict(interval=1, metric='bbox') # optimizer # lr is set for a batch size of 8 optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) @@ -166,21 +29,8 @@ lr_config = dict( warmup_ratio=1.0 / 3, # [7] yields higher performance than [6] step=[7]) -checkpoint_config = dict(interval=1) -# yapf:disable -log_config = dict( - interval=100, - hooks=[ - dict(type='TextLoggerHook'), - # dict(type='TensorboardLoggerHook') - ]) -# yapf:enable -# runtime settings total_epochs = 8 # actual epoch = 8 * 8 = 64 -dist_params = dict(backend='nccl') -log_level = 'INFO' +log_config = dict(interval=100) work_dir = './work_dirs/cityscapes/faster_rcnn_r50_fpn_1x_cityscapes' # For better, more stable performance initialize from COCO load_from = 'https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/faster_rcnn_r50_fpn_2x_20181010-443129e1.pth' # noqa -resume_from = None -workflow = [('train', 1)] diff --git a/configs/cityscapes/mask_rcnn_r50_fpn_1x_cityscapes.py b/configs/cityscapes/mask_rcnn_r50_fpn_1x_cityscapes.py index e6481c7aa8ec5b1e6db0563310a628e8a3ccc731..39501a43e55cc81c55c7b5b59a6d46e7c05c0ad2 100644 --- a/configs/cityscapes/mask_rcnn_r50_fpn_1x_cityscapes.py +++ b/configs/cityscapes/mask_rcnn_r50_fpn_1x_cityscapes.py @@ -1,40 +1,11 @@ -# model settings +_base_ = [ + '../_base_/models/mask_rcnn_r50_fpn.py', + '../_base_/datasets/cityscapes_instance.py', '../_base_/default_runtime.py' +] model = dict( - type='MaskRCNN', pretrained=None, - backbone=dict( - type='ResNet', - depth=50, - num_stages=4, - out_indices=(0, 1, 2, 3), - frozen_stages=1, - norm_cfg=dict(type='BN', requires_grad=True), - style='pytorch'), - neck=dict( - type='FPN', - in_channels=[256, 512, 1024, 2048], - out_channels=256, - num_outs=5), - rpn_head=dict( - type='RPNHead', - in_channels=256, - feat_channels=256, - anchor_scales=[8], - anchor_ratios=[0.5, 1.0, 2.0], - anchor_strides=[4, 8, 16, 32, 64], - target_means=[.0, .0, .0, .0], - target_stds=[1.0, 1.0, 1.0, 1.0], - loss_cls=dict( - type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0)), - bbox_roi_extractor=dict( - type='SingleRoIExtractor', - roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2), - out_channels=256, - featmap_strides=[4, 8, 16, 32]), bbox_head=dict( - type='SharedFCBBoxHead', - num_fcs=2, + type='Shared2FCBBoxHead', in_channels=256, fc_out_channels=1024, roi_feat_size=7, @@ -45,11 +16,6 @@ model = dict( loss_cls=dict( type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)), - mask_roi_extractor=dict( - type='SingleRoIExtractor', - roi_layer=dict(type='RoIAlign', out_size=14, sample_num=2), - out_channels=256, - featmap_strides=[4, 8, 16, 32]), mask_head=dict( type='FCNMaskHead', num_convs=4, @@ -58,116 +24,6 @@ model = dict( num_classes=9, loss_mask=dict( type='CrossEntropyLoss', use_mask=True, loss_weight=1.0))) -# model training and testing settings -train_cfg = dict( - rpn=dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.7, - neg_iou_thr=0.3, - min_pos_iou=0.3, - ignore_iof_thr=-1), - sampler=dict( - type='RandomSampler', - num=256, - pos_fraction=0.5, - neg_pos_ub=-1, - add_gt_as_proposals=False), - allowed_border=0, - pos_weight=-1, - debug=False), - rpn_proposal=dict( - nms_across_levels=False, - nms_pre=2000, - nms_post=2000, - max_num=2000, - nms_thr=0.7, - min_bbox_size=0), - rcnn=dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.5, - neg_iou_thr=0.5, - min_pos_iou=0.5, - ignore_iof_thr=-1), - sampler=dict( - type='RandomSampler', - num=512, - pos_fraction=0.25, - neg_pos_ub=-1, - add_gt_as_proposals=True), - mask_size=28, - pos_weight=-1, - debug=False)) -test_cfg = dict( - rpn=dict( - nms_across_levels=False, - nms_pre=1000, - nms_post=1000, - max_num=1000, - nms_thr=0.7, - min_bbox_size=0), - rcnn=dict( - score_thr=0.05, - nms=dict(type='nms', iou_thr=0.5), - max_per_img=100, - mask_thr_binary=0.5)) -# dataset settings -dataset_type = 'CityscapesDataset' -data_root = 'data/cityscapes/' -img_norm_cfg = dict( - mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) -train_pipeline = [ - dict(type='LoadImageFromFile'), - dict(type='LoadAnnotations', with_bbox=True, with_mask=True), - dict( - type='Resize', img_scale=[(2048, 800), (2048, 1024)], keep_ratio=True), - dict(type='RandomFlip', flip_ratio=0.5), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='DefaultFormatBundle'), - dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']), -] -test_pipeline = [ - dict(type='LoadImageFromFile'), - dict( - type='MultiScaleFlipAug', - img_scale=(2048, 1024), - flip=False, - transforms=[ - dict(type='Resize', keep_ratio=True), - dict(type='RandomFlip'), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='ImageToTensor', keys=['img']), - dict(type='Collect', keys=['img']), - ]) -] -data = dict( - imgs_per_gpu=1, - workers_per_gpu=2, - train=dict( - type='RepeatDataset', - times=8, - dataset=dict( - type=dataset_type, - ann_file=data_root + - 'annotations/instancesonly_filtered_gtFine_train.json', - img_prefix=data_root + 'leftImg8bit/train/', - pipeline=train_pipeline)), - val=dict( - type=dataset_type, - ann_file=data_root + - 'annotations/instancesonly_filtered_gtFine_val.json', - img_prefix=data_root + 'leftImg8bit/val/', - pipeline=test_pipeline), - test=dict( - type=dataset_type, - ann_file=data_root + - 'annotations/instancesonly_filtered_gtFine_test.json', - img_prefix=data_root + 'leftImg8bit/test/', - pipeline=test_pipeline)) -evaluation = dict(interval=1, metric=['bbox', 'segm']) # optimizer # lr is set for a batch size of 8 optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) @@ -180,21 +36,8 @@ lr_config = dict( warmup_ratio=1.0 / 3, # [7] yields higher performance than [6] step=[7]) -checkpoint_config = dict(interval=1) -# yapf:disable -log_config = dict( - interval=100, - hooks=[ - dict(type='TextLoggerHook'), - # dict(type='TensorboardLoggerHook') - ]) -# yapf:enable -# runtime settings total_epochs = 8 # actual epoch = 8 * 8 = 64 -dist_params = dict(backend='nccl') -log_level = 'INFO' +log_config = dict(interval=100) work_dir = './work_dirs/cityscapes/mask_rcnn_r50_fpn_1x_cityscapes' # For better, more stable performance initialize from COCO load_from = 'https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/mask_rcnn_r50_fpn_2x_20181010-41d35c05.pth' # noqa -resume_from = None -workflow = [('train', 1)] diff --git a/configs/dcn/cascade_mask_rcnn_dconv_c3-c5_r50_fpn_1x.py b/configs/dcn/cascade_mask_rcnn_dconv_c3-c5_r50_fpn_1x.py deleted file mode 100644 index 7d82c6b2b989f4599fbe1b59793052dade1a8e1d..0000000000000000000000000000000000000000 --- a/configs/dcn/cascade_mask_rcnn_dconv_c3-c5_r50_fpn_1x.py +++ /dev/null @@ -1,256 +0,0 @@ -# model settings -model = dict( - type='CascadeRCNN', - num_stages=3, - pretrained='torchvision://resnet50', - backbone=dict( - type='ResNet', - depth=50, - num_stages=4, - out_indices=(0, 1, 2, 3), - frozen_stages=1, - norm_cfg=dict(type='BN', requires_grad=True), - style='pytorch', - dcn=dict(type='DCN', deformable_groups=1, fallback_on_stride=False), - stage_with_dcn=(False, True, True, True)), - neck=dict( - type='FPN', - in_channels=[256, 512, 1024, 2048], - out_channels=256, - num_outs=5), - rpn_head=dict( - type='RPNHead', - in_channels=256, - feat_channels=256, - anchor_scales=[8], - anchor_ratios=[0.5, 1.0, 2.0], - anchor_strides=[4, 8, 16, 32, 64], - target_means=[.0, .0, .0, .0], - target_stds=[1.0, 1.0, 1.0, 1.0], - loss_cls=dict( - type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0)), - bbox_roi_extractor=dict( - type='SingleRoIExtractor', - roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2), - out_channels=256, - featmap_strides=[4, 8, 16, 32]), - bbox_head=[ - dict( - type='SharedFCBBoxHead', - num_fcs=2, - in_channels=256, - fc_out_channels=1024, - roi_feat_size=7, - num_classes=81, - target_means=[0., 0., 0., 0.], - target_stds=[0.1, 0.1, 0.2, 0.2], - reg_class_agnostic=True, - loss_cls=dict( - type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)), - dict( - type='SharedFCBBoxHead', - num_fcs=2, - in_channels=256, - fc_out_channels=1024, - roi_feat_size=7, - num_classes=81, - target_means=[0., 0., 0., 0.], - target_stds=[0.05, 0.05, 0.1, 0.1], - reg_class_agnostic=True, - loss_cls=dict( - type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)), - dict( - type='SharedFCBBoxHead', - num_fcs=2, - in_channels=256, - fc_out_channels=1024, - roi_feat_size=7, - num_classes=81, - target_means=[0., 0., 0., 0.], - target_stds=[0.033, 0.033, 0.067, 0.067], - reg_class_agnostic=True, - loss_cls=dict( - type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)) - ], - mask_roi_extractor=dict( - type='SingleRoIExtractor', - roi_layer=dict(type='RoIAlign', out_size=14, sample_num=2), - out_channels=256, - featmap_strides=[4, 8, 16, 32]), - mask_head=dict( - type='FCNMaskHead', - num_convs=4, - in_channels=256, - conv_out_channels=256, - num_classes=81, - loss_mask=dict( - type='CrossEntropyLoss', use_mask=True, loss_weight=1.0))) -# model training and testing settings -train_cfg = dict( - rpn=dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.7, - neg_iou_thr=0.3, - min_pos_iou=0.3, - ignore_iof_thr=-1), - sampler=dict( - type='RandomSampler', - num=256, - pos_fraction=0.5, - neg_pos_ub=-1, - add_gt_as_proposals=False), - allowed_border=0, - pos_weight=-1, - debug=False), - rpn_proposal=dict( - nms_across_levels=False, - nms_pre=2000, - nms_post=2000, - max_num=2000, - nms_thr=0.7, - min_bbox_size=0), - rcnn=[ - dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.5, - neg_iou_thr=0.5, - min_pos_iou=0.5, - ignore_iof_thr=-1), - sampler=dict( - type='RandomSampler', - num=512, - pos_fraction=0.25, - neg_pos_ub=-1, - add_gt_as_proposals=True), - mask_size=28, - pos_weight=-1, - debug=False), - dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.6, - neg_iou_thr=0.6, - min_pos_iou=0.6, - ignore_iof_thr=-1), - sampler=dict( - type='RandomSampler', - num=512, - pos_fraction=0.25, - neg_pos_ub=-1, - add_gt_as_proposals=True), - mask_size=28, - pos_weight=-1, - debug=False), - dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.7, - neg_iou_thr=0.7, - min_pos_iou=0.7, - ignore_iof_thr=-1), - sampler=dict( - type='RandomSampler', - num=512, - pos_fraction=0.25, - neg_pos_ub=-1, - add_gt_as_proposals=True), - mask_size=28, - pos_weight=-1, - debug=False) - ], - stage_loss_weights=[1, 0.5, 0.25]) -test_cfg = dict( - rpn=dict( - nms_across_levels=False, - nms_pre=1000, - nms_post=1000, - max_num=1000, - nms_thr=0.7, - min_bbox_size=0), - rcnn=dict( - score_thr=0.05, - nms=dict(type='nms', iou_thr=0.5), - max_per_img=100, - mask_thr_binary=0.5)) -# dataset settings -dataset_type = 'CocoDataset' -data_root = 'data/coco/' -img_norm_cfg = dict( - mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) -train_pipeline = [ - dict(type='LoadImageFromFile'), - dict(type='LoadAnnotations', with_bbox=True, with_mask=True), - dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), - dict(type='RandomFlip', flip_ratio=0.5), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='DefaultFormatBundle'), - dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']), -] -test_pipeline = [ - dict(type='LoadImageFromFile'), - dict( - type='MultiScaleFlipAug', - img_scale=(1333, 800), - flip=False, - transforms=[ - dict(type='Resize', keep_ratio=True), - dict(type='RandomFlip'), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='ImageToTensor', keys=['img']), - dict(type='Collect', keys=['img']), - ]) -] -data = dict( - imgs_per_gpu=2, - workers_per_gpu=2, - train=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_train2017.json', - img_prefix=data_root + 'train2017/', - pipeline=train_pipeline), - val=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline), - test=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline)) -evaluation = dict(interval=1, metric=['bbox', 'segm']) -# optimizer -optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) -optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) -# learning policy -lr_config = dict( - policy='step', - warmup='linear', - warmup_iters=500, - warmup_ratio=1.0 / 3, - step=[8, 11]) -checkpoint_config = dict(interval=1) -# yapf:disable -log_config = dict( - interval=50, - hooks=[ - dict(type='TextLoggerHook'), - # dict(type='TensorboardLoggerHook') - ]) -# yapf:enable -# runtime settings -total_epochs = 12 -dist_params = dict(backend='nccl') -log_level = 'INFO' -work_dir = './work_dirs/cascade_mask_rcnn_dconv_c3-c5_r50_fpn_1x' -load_from = None -resume_from = None -workflow = [('train', 1)] diff --git a/configs/dcn/cascade_mask_rcnn_r50_fpn_dconv_c3-c5_1x_coco.py b/configs/dcn/cascade_mask_rcnn_r50_fpn_dconv_c3-c5_1x_coco.py new file mode 100644 index 0000000000000000000000000000000000000000..04346844f2cf0d788a575b36979a3ed83b9d0333 --- /dev/null +++ b/configs/dcn/cascade_mask_rcnn_r50_fpn_dconv_c3-c5_1x_coco.py @@ -0,0 +1,6 @@ +_base_ = '../cascade_rcnn/cascade_mask_rcnn_r50_fpn_1x_coco.py' +model = dict( + backbone=dict( + dcn=dict(type='DCN', deformable_groups=1, fallback_on_stride=False), + stage_with_dcn=(False, True, True, True))) +work_dir = './work_dirs/cascade_mask_rcnn_dconv_c3-c5_r50_fpn_1x' diff --git a/configs/dcn/cascade_rcnn_dconv_c3-c5_r50_fpn_1x.py b/configs/dcn/cascade_rcnn_dconv_c3-c5_r50_fpn_1x.py deleted file mode 100644 index 9ac1f985fa1e64407ae0883d9752d1ca3fc2f3bb..0000000000000000000000000000000000000000 --- a/configs/dcn/cascade_rcnn_dconv_c3-c5_r50_fpn_1x.py +++ /dev/null @@ -1,237 +0,0 @@ -# model settings -model = dict( - type='CascadeRCNN', - num_stages=3, - pretrained='torchvision://resnet50', - backbone=dict( - type='ResNet', - depth=50, - num_stages=4, - out_indices=(0, 1, 2, 3), - frozen_stages=1, - norm_cfg=dict(type='BN', requires_grad=True), - style='pytorch', - dcn=dict(type='DCN', deformable_groups=1, fallback_on_stride=False), - stage_with_dcn=(False, True, True, True)), - neck=dict( - type='FPN', - in_channels=[256, 512, 1024, 2048], - out_channels=256, - num_outs=5), - rpn_head=dict( - type='RPNHead', - in_channels=256, - feat_channels=256, - anchor_scales=[8], - anchor_ratios=[0.5, 1.0, 2.0], - anchor_strides=[4, 8, 16, 32, 64], - target_means=[.0, .0, .0, .0], - target_stds=[1.0, 1.0, 1.0, 1.0], - loss_cls=dict( - type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0)), - bbox_roi_extractor=dict( - type='SingleRoIExtractor', - roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2), - out_channels=256, - featmap_strides=[4, 8, 16, 32]), - bbox_head=[ - dict( - type='SharedFCBBoxHead', - num_fcs=2, - in_channels=256, - fc_out_channels=1024, - roi_feat_size=7, - num_classes=81, - target_means=[0., 0., 0., 0.], - target_stds=[0.1, 0.1, 0.2, 0.2], - reg_class_agnostic=True, - loss_cls=dict( - type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)), - dict( - type='SharedFCBBoxHead', - num_fcs=2, - in_channels=256, - fc_out_channels=1024, - roi_feat_size=7, - num_classes=81, - target_means=[0., 0., 0., 0.], - target_stds=[0.05, 0.05, 0.1, 0.1], - reg_class_agnostic=True, - loss_cls=dict( - type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)), - dict( - type='SharedFCBBoxHead', - num_fcs=2, - in_channels=256, - fc_out_channels=1024, - roi_feat_size=7, - num_classes=81, - target_means=[0., 0., 0., 0.], - target_stds=[0.033, 0.033, 0.067, 0.067], - reg_class_agnostic=True, - loss_cls=dict( - type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)) - ]) -# model training and testing settings -train_cfg = dict( - rpn=dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.7, - neg_iou_thr=0.3, - min_pos_iou=0.3, - ignore_iof_thr=-1), - sampler=dict( - type='RandomSampler', - num=256, - pos_fraction=0.5, - neg_pos_ub=-1, - add_gt_as_proposals=False), - allowed_border=0, - pos_weight=-1, - debug=False), - rpn_proposal=dict( - nms_across_levels=False, - nms_pre=2000, - nms_post=2000, - max_num=2000, - nms_thr=0.7, - min_bbox_size=0), - rcnn=[ - dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.5, - neg_iou_thr=0.5, - min_pos_iou=0.5, - ignore_iof_thr=-1), - sampler=dict( - type='RandomSampler', - num=512, - pos_fraction=0.25, - neg_pos_ub=-1, - add_gt_as_proposals=True), - pos_weight=-1, - debug=False), - dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.6, - neg_iou_thr=0.6, - min_pos_iou=0.6, - ignore_iof_thr=-1), - sampler=dict( - type='RandomSampler', - num=512, - pos_fraction=0.25, - neg_pos_ub=-1, - add_gt_as_proposals=True), - pos_weight=-1, - debug=False), - dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.7, - neg_iou_thr=0.7, - min_pos_iou=0.7, - ignore_iof_thr=-1), - sampler=dict( - type='RandomSampler', - num=512, - pos_fraction=0.25, - neg_pos_ub=-1, - add_gt_as_proposals=True), - pos_weight=-1, - debug=False) - ], - stage_loss_weights=[1, 0.5, 0.25]) -test_cfg = dict( - rpn=dict( - nms_across_levels=False, - nms_pre=1000, - nms_post=1000, - max_num=1000, - nms_thr=0.7, - min_bbox_size=0), - rcnn=dict( - score_thr=0.05, nms=dict(type='nms', iou_thr=0.5), max_per_img=100)) -# dataset settings -dataset_type = 'CocoDataset' -data_root = 'data/coco/' -img_norm_cfg = dict( - mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) -train_pipeline = [ - dict(type='LoadImageFromFile'), - dict(type='LoadAnnotations', with_bbox=True), - dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), - dict(type='RandomFlip', flip_ratio=0.5), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='DefaultFormatBundle'), - dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), -] -test_pipeline = [ - dict(type='LoadImageFromFile'), - dict( - type='MultiScaleFlipAug', - img_scale=(1333, 800), - flip=False, - transforms=[ - dict(type='Resize', keep_ratio=True), - dict(type='RandomFlip'), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='ImageToTensor', keys=['img']), - dict(type='Collect', keys=['img']), - ]) -] -data = dict( - imgs_per_gpu=2, - workers_per_gpu=2, - train=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_train2017.json', - img_prefix=data_root + 'train2017/', - pipeline=train_pipeline), - val=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline), - test=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline)) -evaluation = dict(interval=1, metric='bbox') -# optimizer -optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) -optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) -# learning policy -lr_config = dict( - policy='step', - warmup='linear', - warmup_iters=500, - warmup_ratio=1.0 / 3, - step=[8, 11]) -checkpoint_config = dict(interval=1) -# yapf:disable -log_config = dict( - interval=50, - hooks=[ - dict(type='TextLoggerHook'), - # dict(type='TensorboardLoggerHook') - ]) -# yapf:enable -# runtime settings -total_epochs = 12 -dist_params = dict(backend='nccl') -log_level = 'INFO' -work_dir = './work_dirs/cascade_rcnn_dconv_c3-c5_r50_fpn_1x' -load_from = None -resume_from = None -workflow = [('train', 1)] diff --git a/configs/dcn/cascade_rcnn_r50_fpn_dconv_c3-c5_1x_coco.py b/configs/dcn/cascade_rcnn_r50_fpn_dconv_c3-c5_1x_coco.py new file mode 100644 index 0000000000000000000000000000000000000000..6337f08efb70f2a98d23de140e072c03b1da781b --- /dev/null +++ b/configs/dcn/cascade_rcnn_r50_fpn_dconv_c3-c5_1x_coco.py @@ -0,0 +1,6 @@ +_base_ = '../cascade_rcnn/cascade_rcnn_r50_fpn_1x_coco.py' +model = dict( + backbone=dict( + dcn=dict(type='DCN', deformable_groups=1, fallback_on_stride=False), + stage_with_dcn=(False, True, True, True))) +work_dir = './work_dirs/cascade_rcnn_dconv_c3-c5_r50_fpn_1x' diff --git a/configs/dcn/faster_rcnn_dconv_c3-c5_r50_fpn_1x.py b/configs/dcn/faster_rcnn_dconv_c3-c5_r50_fpn_1x.py deleted file mode 100644 index b252251f435cffe7de031af2d4ebe2f614461fee..0000000000000000000000000000000000000000 --- a/configs/dcn/faster_rcnn_dconv_c3-c5_r50_fpn_1x.py +++ /dev/null @@ -1,178 +0,0 @@ -# model settings -model = dict( - type='FasterRCNN', - pretrained='torchvision://resnet50', - backbone=dict( - type='ResNet', - depth=50, - num_stages=4, - out_indices=(0, 1, 2, 3), - frozen_stages=1, - norm_cfg=dict(type='BN', requires_grad=True), - style='pytorch', - dcn=dict(type='DCN', deformable_groups=1, fallback_on_stride=False), - stage_with_dcn=(False, True, True, True)), - neck=dict( - type='FPN', - in_channels=[256, 512, 1024, 2048], - out_channels=256, - num_outs=5), - rpn_head=dict( - type='RPNHead', - in_channels=256, - feat_channels=256, - anchor_scales=[8], - anchor_ratios=[0.5, 1.0, 2.0], - anchor_strides=[4, 8, 16, 32, 64], - target_means=[.0, .0, .0, .0], - target_stds=[1.0, 1.0, 1.0, 1.0], - loss_cls=dict( - type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0)), - bbox_roi_extractor=dict( - type='SingleRoIExtractor', - roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2), - out_channels=256, - featmap_strides=[4, 8, 16, 32]), - bbox_head=dict( - type='SharedFCBBoxHead', - num_fcs=2, - in_channels=256, - fc_out_channels=1024, - roi_feat_size=7, - num_classes=81, - target_means=[0., 0., 0., 0.], - target_stds=[0.1, 0.1, 0.2, 0.2], - reg_class_agnostic=False, - loss_cls=dict( - type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0))) -# model training and testing settings -train_cfg = dict( - rpn=dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.7, - neg_iou_thr=0.3, - min_pos_iou=0.3, - ignore_iof_thr=-1), - sampler=dict( - type='RandomSampler', - num=256, - pos_fraction=0.5, - neg_pos_ub=-1, - add_gt_as_proposals=False), - allowed_border=0, - pos_weight=-1, - debug=False), - rpn_proposal=dict( - nms_across_levels=False, - nms_pre=2000, - nms_post=2000, - max_num=2000, - nms_thr=0.7, - min_bbox_size=0), - rcnn=dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.5, - neg_iou_thr=0.5, - min_pos_iou=0.5, - ignore_iof_thr=-1), - sampler=dict( - type='RandomSampler', - num=512, - pos_fraction=0.25, - neg_pos_ub=-1, - add_gt_as_proposals=True), - pos_weight=-1, - debug=False)) -test_cfg = dict( - rpn=dict( - nms_across_levels=False, - nms_pre=1000, - nms_post=1000, - max_num=1000, - nms_thr=0.7, - min_bbox_size=0), - rcnn=dict( - score_thr=0.05, nms=dict(type='nms', iou_thr=0.5), max_per_img=100) - # soft-nms is also supported for rcnn testing - # e.g., nms=dict(type='soft_nms', iou_thr=0.5, min_score=0.05) -) -# dataset settings -dataset_type = 'CocoDataset' -data_root = 'data/coco/' -img_norm_cfg = dict( - mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) -train_pipeline = [ - dict(type='LoadImageFromFile'), - dict(type='LoadAnnotations', with_bbox=True), - dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), - dict(type='RandomFlip', flip_ratio=0.5), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='DefaultFormatBundle'), - dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), -] -test_pipeline = [ - dict(type='LoadImageFromFile'), - dict( - type='MultiScaleFlipAug', - img_scale=(1333, 800), - flip=False, - transforms=[ - dict(type='Resize', keep_ratio=True), - dict(type='RandomFlip'), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='ImageToTensor', keys=['img']), - dict(type='Collect', keys=['img']), - ]) -] -data = dict( - imgs_per_gpu=2, - workers_per_gpu=2, - train=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_train2017.json', - img_prefix=data_root + 'train2017/', - pipeline=train_pipeline), - val=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline), - test=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline)) -evaluation = dict(interval=1, metric='bbox') -# optimizer -optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) -optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) -# learning policy -lr_config = dict( - policy='step', - warmup='linear', - warmup_iters=500, - warmup_ratio=1.0 / 3, - step=[8, 11]) -checkpoint_config = dict(interval=1) -# yapf:disable -log_config = dict( - interval=50, - hooks=[ - dict(type='TextLoggerHook'), - # dict(type='TensorboardLoggerHook') - ]) -# yapf:enable -# runtime settings -total_epochs = 12 -dist_params = dict(backend='nccl') -log_level = 'INFO' -work_dir = './work_dirs/faster_rcnn_dconv_c3-c5_r50_fpn_1x' -load_from = None -resume_from = None -workflow = [('train', 1)] diff --git a/configs/dcn/faster_rcnn_dconv_c3-c5_x101_32x4d_fpn_1x.py b/configs/dcn/faster_rcnn_dconv_c3-c5_x101_32x4d_fpn_1x.py deleted file mode 100644 index 53a07c3345ce9383559c92736ea7d7b776abcb97..0000000000000000000000000000000000000000 --- a/configs/dcn/faster_rcnn_dconv_c3-c5_x101_32x4d_fpn_1x.py +++ /dev/null @@ -1,180 +0,0 @@ -# model settings -model = dict( - type='FasterRCNN', - pretrained='open-mmlab://resnext101_32x4d', - backbone=dict( - type='ResNeXt', - depth=101, - groups=32, - base_width=4, - num_stages=4, - out_indices=(0, 1, 2, 3), - frozen_stages=1, - norm_cfg=dict(type='BN', requires_grad=True), - style='pytorch', - dcn=dict(type='DCN', deformable_groups=1, fallback_on_stride=False), - stage_with_dcn=(False, True, True, True)), - neck=dict( - type='FPN', - in_channels=[256, 512, 1024, 2048], - out_channels=256, - num_outs=5), - rpn_head=dict( - type='RPNHead', - in_channels=256, - feat_channels=256, - anchor_scales=[8], - anchor_ratios=[0.5, 1.0, 2.0], - anchor_strides=[4, 8, 16, 32, 64], - target_means=[.0, .0, .0, .0], - target_stds=[1.0, 1.0, 1.0, 1.0], - loss_cls=dict( - type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0)), - bbox_roi_extractor=dict( - type='SingleRoIExtractor', - roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2), - out_channels=256, - featmap_strides=[4, 8, 16, 32]), - bbox_head=dict( - type='SharedFCBBoxHead', - num_fcs=2, - in_channels=256, - fc_out_channels=1024, - roi_feat_size=7, - num_classes=81, - target_means=[0., 0., 0., 0.], - target_stds=[0.1, 0.1, 0.2, 0.2], - reg_class_agnostic=False, - loss_cls=dict( - type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0))) -# model training and testing settings -train_cfg = dict( - rpn=dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.7, - neg_iou_thr=0.3, - min_pos_iou=0.3, - ignore_iof_thr=-1), - sampler=dict( - type='RandomSampler', - num=256, - pos_fraction=0.5, - neg_pos_ub=-1, - add_gt_as_proposals=False), - allowed_border=0, - pos_weight=-1, - debug=False), - rpn_proposal=dict( - nms_across_levels=False, - nms_pre=2000, - nms_post=2000, - max_num=2000, - nms_thr=0.7, - min_bbox_size=0), - rcnn=dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.5, - neg_iou_thr=0.5, - min_pos_iou=0.5, - ignore_iof_thr=-1), - sampler=dict( - type='RandomSampler', - num=512, - pos_fraction=0.25, - neg_pos_ub=-1, - add_gt_as_proposals=True), - pos_weight=-1, - debug=False)) -test_cfg = dict( - rpn=dict( - nms_across_levels=False, - nms_pre=1000, - nms_post=1000, - max_num=1000, - nms_thr=0.7, - min_bbox_size=0), - rcnn=dict( - score_thr=0.05, nms=dict(type='nms', iou_thr=0.5), max_per_img=100) - # soft-nms is also supported for rcnn testing - # e.g., nms=dict(type='soft_nms', iou_thr=0.5, min_score=0.05) -) -# dataset settings -dataset_type = 'CocoDataset' -data_root = 'data/coco/' -img_norm_cfg = dict( - mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) -train_pipeline = [ - dict(type='LoadImageFromFile'), - dict(type='LoadAnnotations', with_bbox=True), - dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), - dict(type='RandomFlip', flip_ratio=0.5), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='DefaultFormatBundle'), - dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), -] -test_pipeline = [ - dict(type='LoadImageFromFile'), - dict( - type='MultiScaleFlipAug', - img_scale=(1333, 800), - flip=False, - transforms=[ - dict(type='Resize', keep_ratio=True), - dict(type='RandomFlip'), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='ImageToTensor', keys=['img']), - dict(type='Collect', keys=['img']), - ]) -] -data = dict( - imgs_per_gpu=2, - workers_per_gpu=2, - train=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_train2017.json', - img_prefix=data_root + 'train2017/', - pipeline=train_pipeline), - val=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline), - test=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline)) -evaluation = dict(interval=1, metric='bbox') -# optimizer -optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) -optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) -# learning policy -lr_config = dict( - policy='step', - warmup='linear', - warmup_iters=500, - warmup_ratio=1.0 / 3, - step=[8, 11]) -checkpoint_config = dict(interval=1) -# yapf:disable -log_config = dict( - interval=50, - hooks=[ - dict(type='TextLoggerHook'), - # dict(type='TensorboardLoggerHook') - ]) -# yapf:enable -# runtime settings -total_epochs = 12 -dist_params = dict(backend='nccl') -log_level = 'INFO' -work_dir = './work_dirs/faster_rcnn_dconv_c3-c5_x101_32x4d_fpn_1x' -load_from = None -resume_from = None -workflow = [('train', 1)] diff --git a/configs/dcn/faster_rcnn_dpool_r50_fpn_1x.py b/configs/dcn/faster_rcnn_dpool_r50_fpn_1x.py deleted file mode 100644 index 124ba04653f2b0c5abf6dc6c73c7ad55a6b488f0..0000000000000000000000000000000000000000 --- a/configs/dcn/faster_rcnn_dpool_r50_fpn_1x.py +++ /dev/null @@ -1,182 +0,0 @@ -# model settings -model = dict( - type='FasterRCNN', - pretrained='torchvision://resnet50', - backbone=dict( - type='ResNet', - depth=50, - num_stages=4, - out_indices=(0, 1, 2, 3), - frozen_stages=1, - norm_cfg=dict(type='BN', requires_grad=True), - style='pytorch'), - neck=dict( - type='FPN', - in_channels=[256, 512, 1024, 2048], - out_channels=256, - num_outs=5), - rpn_head=dict( - type='RPNHead', - in_channels=256, - feat_channels=256, - anchor_scales=[8], - anchor_ratios=[0.5, 1.0, 2.0], - anchor_strides=[4, 8, 16, 32, 64], - target_means=[.0, .0, .0, .0], - target_stds=[1.0, 1.0, 1.0, 1.0], - loss_cls=dict( - type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0)), - bbox_roi_extractor=dict( - type='SingleRoIExtractor', - roi_layer=dict( - type='DeformRoIPoolingPack', - out_size=7, - out_channels=256, - no_trans=False, - group_size=1, - trans_std=0.1), - out_channels=256, - featmap_strides=[4, 8, 16, 32]), - bbox_head=dict( - type='SharedFCBBoxHead', - num_fcs=2, - in_channels=256, - fc_out_channels=1024, - roi_feat_size=7, - num_classes=81, - target_means=[0., 0., 0., 0.], - target_stds=[0.1, 0.1, 0.2, 0.2], - reg_class_agnostic=False, - loss_cls=dict( - type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0))) -# model training and testing settings -train_cfg = dict( - rpn=dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.7, - neg_iou_thr=0.3, - min_pos_iou=0.3, - ignore_iof_thr=-1), - sampler=dict( - type='RandomSampler', - num=256, - pos_fraction=0.5, - neg_pos_ub=-1, - add_gt_as_proposals=False), - allowed_border=0, - pos_weight=-1, - debug=False), - rpn_proposal=dict( - nms_across_levels=False, - nms_pre=2000, - nms_post=2000, - max_num=2000, - nms_thr=0.7, - min_bbox_size=0), - rcnn=dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.5, - neg_iou_thr=0.5, - min_pos_iou=0.5, - ignore_iof_thr=-1), - sampler=dict( - type='RandomSampler', - num=512, - pos_fraction=0.25, - neg_pos_ub=-1, - add_gt_as_proposals=True), - pos_weight=-1, - debug=False)) -test_cfg = dict( - rpn=dict( - nms_across_levels=False, - nms_pre=1000, - nms_post=1000, - max_num=1000, - nms_thr=0.7, - min_bbox_size=0), - rcnn=dict( - score_thr=0.05, nms=dict(type='nms', iou_thr=0.5), max_per_img=100) - # soft-nms is also supported for rcnn testing - # e.g., nms=dict(type='soft_nms', iou_thr=0.5, min_score=0.05) -) -# dataset settings -dataset_type = 'CocoDataset' -data_root = 'data/coco/' -img_norm_cfg = dict( - mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) -train_pipeline = [ - dict(type='LoadImageFromFile'), - dict(type='LoadAnnotations', with_bbox=True), - dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), - dict(type='RandomFlip', flip_ratio=0.5), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='DefaultFormatBundle'), - dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), -] -test_pipeline = [ - dict(type='LoadImageFromFile'), - dict( - type='MultiScaleFlipAug', - img_scale=(1333, 800), - flip=False, - transforms=[ - dict(type='Resize', keep_ratio=True), - dict(type='RandomFlip'), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='ImageToTensor', keys=['img']), - dict(type='Collect', keys=['img']), - ]) -] -data = dict( - imgs_per_gpu=2, - workers_per_gpu=2, - train=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_train2017.json', - img_prefix=data_root + 'train2017/', - pipeline=train_pipeline), - val=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline), - test=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline)) -evaluation = dict(interval=1, metric='bbox') -# optimizer -optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) -optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) -# learning policy -lr_config = dict( - policy='step', - warmup='linear', - warmup_iters=500, - warmup_ratio=1.0 / 3, - step=[8, 11]) -checkpoint_config = dict(interval=1) -# yapf:disable -log_config = dict( - interval=50, - hooks=[ - dict(type='TextLoggerHook'), - # dict(type='TensorboardLoggerHook') - ]) -# yapf:enable -# runtime settings -total_epochs = 12 -dist_params = dict(backend='nccl') -log_level = 'INFO' -work_dir = './work_dirs/faster_rcnn_dpool_r50_fpn_1x' -load_from = None -resume_from = None -workflow = [('train', 1)] diff --git a/configs/dcn/faster_rcnn_mdconv_c3-c5_group4_r50_fpn_1x.py b/configs/dcn/faster_rcnn_mdconv_c3-c5_group4_r50_fpn_1x.py deleted file mode 100644 index e1d2cddcb142ab3f5792041e233a951b1352da90..0000000000000000000000000000000000000000 --- a/configs/dcn/faster_rcnn_mdconv_c3-c5_group4_r50_fpn_1x.py +++ /dev/null @@ -1,178 +0,0 @@ -# model settings -model = dict( - type='FasterRCNN', - pretrained='torchvision://resnet50', - backbone=dict( - type='ResNet', - depth=50, - num_stages=4, - out_indices=(0, 1, 2, 3), - frozen_stages=1, - norm_cfg=dict(type='BN', requires_grad=True), - style='pytorch', - dcn=dict(type='DCNv2', deformable_groups=4, fallback_on_stride=False), - stage_with_dcn=(False, True, True, True)), - neck=dict( - type='FPN', - in_channels=[256, 512, 1024, 2048], - out_channels=256, - num_outs=5), - rpn_head=dict( - type='RPNHead', - in_channels=256, - feat_channels=256, - anchor_scales=[8], - anchor_ratios=[0.5, 1.0, 2.0], - anchor_strides=[4, 8, 16, 32, 64], - target_means=[.0, .0, .0, .0], - target_stds=[1.0, 1.0, 1.0, 1.0], - loss_cls=dict( - type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0)), - bbox_roi_extractor=dict( - type='SingleRoIExtractor', - roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2), - out_channels=256, - featmap_strides=[4, 8, 16, 32]), - bbox_head=dict( - type='SharedFCBBoxHead', - num_fcs=2, - in_channels=256, - fc_out_channels=1024, - roi_feat_size=7, - num_classes=81, - target_means=[0., 0., 0., 0.], - target_stds=[0.1, 0.1, 0.2, 0.2], - reg_class_agnostic=False, - loss_cls=dict( - type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0))) -# model training and testing settings -train_cfg = dict( - rpn=dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.7, - neg_iou_thr=0.3, - min_pos_iou=0.3, - ignore_iof_thr=-1), - sampler=dict( - type='RandomSampler', - num=256, - pos_fraction=0.5, - neg_pos_ub=-1, - add_gt_as_proposals=False), - allowed_border=0, - pos_weight=-1, - debug=False), - rpn_proposal=dict( - nms_across_levels=False, - nms_pre=2000, - nms_post=2000, - max_num=2000, - nms_thr=0.7, - min_bbox_size=0), - rcnn=dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.5, - neg_iou_thr=0.5, - min_pos_iou=0.5, - ignore_iof_thr=-1), - sampler=dict( - type='RandomSampler', - num=512, - pos_fraction=0.25, - neg_pos_ub=-1, - add_gt_as_proposals=True), - pos_weight=-1, - debug=False)) -test_cfg = dict( - rpn=dict( - nms_across_levels=False, - nms_pre=1000, - nms_post=1000, - max_num=1000, - nms_thr=0.7, - min_bbox_size=0), - rcnn=dict( - score_thr=0.05, nms=dict(type='nms', iou_thr=0.5), max_per_img=100) - # soft-nms is also supported for rcnn testing - # e.g., nms=dict(type='soft_nms', iou_thr=0.5, min_score=0.05) -) -# dataset settings -dataset_type = 'CocoDataset' -data_root = 'data/coco/' -img_norm_cfg = dict( - mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) -train_pipeline = [ - dict(type='LoadImageFromFile'), - dict(type='LoadAnnotations', with_bbox=True), - dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), - dict(type='RandomFlip', flip_ratio=0.5), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='DefaultFormatBundle'), - dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), -] -test_pipeline = [ - dict(type='LoadImageFromFile'), - dict( - type='MultiScaleFlipAug', - img_scale=(1333, 800), - flip=False, - transforms=[ - dict(type='Resize', keep_ratio=True), - dict(type='RandomFlip'), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='ImageToTensor', keys=['img']), - dict(type='Collect', keys=['img']), - ]) -] -data = dict( - imgs_per_gpu=2, - workers_per_gpu=2, - train=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_train2017.json', - img_prefix=data_root + 'train2017/', - pipeline=train_pipeline), - val=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline), - test=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline)) -evaluation = dict(interval=1, metric='bbox') -# optimizer -optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) -optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) -# learning policy -lr_config = dict( - policy='step', - warmup='linear', - warmup_iters=500, - warmup_ratio=1.0 / 3, - step=[8, 11]) -checkpoint_config = dict(interval=1) -# yapf:disable -log_config = dict( - interval=50, - hooks=[ - dict(type='TextLoggerHook'), - # dict(type='TensorboardLoggerHook') - ]) -# yapf:enable -# runtime settings -total_epochs = 12 -dist_params = dict(backend='nccl') -log_level = 'INFO' -work_dir = './work_dirs/faster_rcnn_mdconv_c3-c5_group4_r50_fpn_1x' -load_from = None -resume_from = None -workflow = [('train', 1)] diff --git a/configs/dcn/faster_rcnn_mdconv_c3-c5_r50_fpn_1x.py b/configs/dcn/faster_rcnn_mdconv_c3-c5_r50_fpn_1x.py deleted file mode 100644 index e22dbd4391212f3113cc1e98b25af66198411f35..0000000000000000000000000000000000000000 --- a/configs/dcn/faster_rcnn_mdconv_c3-c5_r50_fpn_1x.py +++ /dev/null @@ -1,178 +0,0 @@ -# model settings -model = dict( - type='FasterRCNN', - pretrained='torchvision://resnet50', - backbone=dict( - type='ResNet', - depth=50, - num_stages=4, - out_indices=(0, 1, 2, 3), - frozen_stages=1, - norm_cfg=dict(type='BN', requires_grad=True), - style='pytorch', - dcn=dict(type='DCNv2', deformable_groups=1, fallback_on_stride=False), - stage_with_dcn=(False, True, True, True)), - neck=dict( - type='FPN', - in_channels=[256, 512, 1024, 2048], - out_channels=256, - num_outs=5), - rpn_head=dict( - type='RPNHead', - in_channels=256, - feat_channels=256, - anchor_scales=[8], - anchor_ratios=[0.5, 1.0, 2.0], - anchor_strides=[4, 8, 16, 32, 64], - target_means=[.0, .0, .0, .0], - target_stds=[1.0, 1.0, 1.0, 1.0], - loss_cls=dict( - type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0)), - bbox_roi_extractor=dict( - type='SingleRoIExtractor', - roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2), - out_channels=256, - featmap_strides=[4, 8, 16, 32]), - bbox_head=dict( - type='SharedFCBBoxHead', - num_fcs=2, - in_channels=256, - fc_out_channels=1024, - roi_feat_size=7, - num_classes=81, - target_means=[0., 0., 0., 0.], - target_stds=[0.1, 0.1, 0.2, 0.2], - reg_class_agnostic=False, - loss_cls=dict( - type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0))) -# model training and testing settings -train_cfg = dict( - rpn=dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.7, - neg_iou_thr=0.3, - min_pos_iou=0.3, - ignore_iof_thr=-1), - sampler=dict( - type='RandomSampler', - num=256, - pos_fraction=0.5, - neg_pos_ub=-1, - add_gt_as_proposals=False), - allowed_border=0, - pos_weight=-1, - debug=False), - rpn_proposal=dict( - nms_across_levels=False, - nms_pre=2000, - nms_post=2000, - max_num=2000, - nms_thr=0.7, - min_bbox_size=0), - rcnn=dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.5, - neg_iou_thr=0.5, - min_pos_iou=0.5, - ignore_iof_thr=-1), - sampler=dict( - type='RandomSampler', - num=512, - pos_fraction=0.25, - neg_pos_ub=-1, - add_gt_as_proposals=True), - pos_weight=-1, - debug=False)) -test_cfg = dict( - rpn=dict( - nms_across_levels=False, - nms_pre=1000, - nms_post=1000, - max_num=1000, - nms_thr=0.7, - min_bbox_size=0), - rcnn=dict( - score_thr=0.05, nms=dict(type='nms', iou_thr=0.5), max_per_img=100) - # soft-nms is also supported for rcnn testing - # e.g., nms=dict(type='soft_nms', iou_thr=0.5, min_score=0.05) -) -# dataset settings -dataset_type = 'CocoDataset' -data_root = 'data/coco/' -img_norm_cfg = dict( - mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) -train_pipeline = [ - dict(type='LoadImageFromFile'), - dict(type='LoadAnnotations', with_bbox=True), - dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), - dict(type='RandomFlip', flip_ratio=0.5), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='DefaultFormatBundle'), - dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), -] -test_pipeline = [ - dict(type='LoadImageFromFile'), - dict( - type='MultiScaleFlipAug', - img_scale=(1333, 800), - flip=False, - transforms=[ - dict(type='Resize', keep_ratio=True), - dict(type='RandomFlip'), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='ImageToTensor', keys=['img']), - dict(type='Collect', keys=['img']), - ]) -] -data = dict( - imgs_per_gpu=2, - workers_per_gpu=2, - train=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_train2017.json', - img_prefix=data_root + 'train2017/', - pipeline=train_pipeline), - val=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline), - test=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline)) -evaluation = dict(interval=1, metric='bbox') -# optimizer -optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) -optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) -# learning policy -lr_config = dict( - policy='step', - warmup='linear', - warmup_iters=500, - warmup_ratio=1.0 / 3, - step=[8, 11]) -checkpoint_config = dict(interval=1) -# yapf:disable -log_config = dict( - interval=50, - hooks=[ - dict(type='TextLoggerHook'), - # dict(type='TensorboardLoggerHook') - ]) -# yapf:enable -# runtime settings -total_epochs = 12 -dist_params = dict(backend='nccl') -log_level = 'INFO' -work_dir = './work_dirs/faster_rcnn_mdconv_c3-c5_r50_fpn_1x' -load_from = None -resume_from = None -workflow = [('train', 1)] diff --git a/configs/dcn/faster_rcnn_mdpool_r50_fpn_1x.py b/configs/dcn/faster_rcnn_mdpool_r50_fpn_1x.py deleted file mode 100644 index c36e65c499314fc787336749a1d6e5602906ee7b..0000000000000000000000000000000000000000 --- a/configs/dcn/faster_rcnn_mdpool_r50_fpn_1x.py +++ /dev/null @@ -1,182 +0,0 @@ -# model settings -model = dict( - type='FasterRCNN', - pretrained='torchvision://resnet50', - backbone=dict( - type='ResNet', - depth=50, - num_stages=4, - out_indices=(0, 1, 2, 3), - frozen_stages=1, - norm_cfg=dict(type='BN', requires_grad=True), - style='pytorch'), - neck=dict( - type='FPN', - in_channels=[256, 512, 1024, 2048], - out_channels=256, - num_outs=5), - rpn_head=dict( - type='RPNHead', - in_channels=256, - feat_channels=256, - anchor_scales=[8], - anchor_ratios=[0.5, 1.0, 2.0], - anchor_strides=[4, 8, 16, 32, 64], - target_means=[.0, .0, .0, .0], - target_stds=[1.0, 1.0, 1.0, 1.0], - loss_cls=dict( - type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0)), - bbox_roi_extractor=dict( - type='SingleRoIExtractor', - roi_layer=dict( - type='ModulatedDeformRoIPoolingPack', - out_size=7, - out_channels=256, - no_trans=False, - group_size=1, - trans_std=0.1), - out_channels=256, - featmap_strides=[4, 8, 16, 32]), - bbox_head=dict( - type='SharedFCBBoxHead', - num_fcs=2, - in_channels=256, - fc_out_channels=1024, - roi_feat_size=7, - num_classes=81, - target_means=[0., 0., 0., 0.], - target_stds=[0.1, 0.1, 0.2, 0.2], - reg_class_agnostic=False, - loss_cls=dict( - type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0))) -# model training and testing settings -train_cfg = dict( - rpn=dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.7, - neg_iou_thr=0.3, - min_pos_iou=0.3, - ignore_iof_thr=-1), - sampler=dict( - type='RandomSampler', - num=256, - pos_fraction=0.5, - neg_pos_ub=-1, - add_gt_as_proposals=False), - allowed_border=0, - pos_weight=-1, - debug=False), - rpn_proposal=dict( - nms_across_levels=False, - nms_pre=2000, - nms_post=2000, - max_num=2000, - nms_thr=0.7, - min_bbox_size=0), - rcnn=dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.5, - neg_iou_thr=0.5, - min_pos_iou=0.5, - ignore_iof_thr=-1), - sampler=dict( - type='RandomSampler', - num=512, - pos_fraction=0.25, - neg_pos_ub=-1, - add_gt_as_proposals=True), - pos_weight=-1, - debug=False)) -test_cfg = dict( - rpn=dict( - nms_across_levels=False, - nms_pre=1000, - nms_post=1000, - max_num=1000, - nms_thr=0.7, - min_bbox_size=0), - rcnn=dict( - score_thr=0.05, nms=dict(type='nms', iou_thr=0.5), max_per_img=100) - # soft-nms is also supported for rcnn testing - # e.g., nms=dict(type='soft_nms', iou_thr=0.5, min_score=0.05) -) -# dataset settings -dataset_type = 'CocoDataset' -data_root = 'data/coco/' -img_norm_cfg = dict( - mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) -train_pipeline = [ - dict(type='LoadImageFromFile'), - dict(type='LoadAnnotations', with_bbox=True), - dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), - dict(type='RandomFlip', flip_ratio=0.5), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='DefaultFormatBundle'), - dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), -] -test_pipeline = [ - dict(type='LoadImageFromFile'), - dict( - type='MultiScaleFlipAug', - img_scale=(1333, 800), - flip=False, - transforms=[ - dict(type='Resize', keep_ratio=True), - dict(type='RandomFlip'), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='ImageToTensor', keys=['img']), - dict(type='Collect', keys=['img']), - ]) -] -data = dict( - imgs_per_gpu=2, - workers_per_gpu=2, - train=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_train2017.json', - img_prefix=data_root + 'train2017/', - pipeline=train_pipeline), - val=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline), - test=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline)) -evaluation = dict(interval=1, metric='bbox') -# optimizer -optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) -optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) -# learning policy -lr_config = dict( - policy='step', - warmup='linear', - warmup_iters=500, - warmup_ratio=1.0 / 3, - step=[8, 11]) -checkpoint_config = dict(interval=1) -# yapf:disable -log_config = dict( - interval=50, - hooks=[ - dict(type='TextLoggerHook'), - # dict(type='TensorboardLoggerHook') - ]) -# yapf:enable -# runtime settings -total_epochs = 12 -dist_params = dict(backend='nccl') -log_level = 'INFO' -work_dir = './work_dirs/faster_rcnn_mdpool_r50_fpn_1x' -load_from = None -resume_from = None -workflow = [('train', 1)] diff --git a/configs/dcn/faster_rcnn_r50_fpn_dconv_c3-c5_1x_coco.py b/configs/dcn/faster_rcnn_r50_fpn_dconv_c3-c5_1x_coco.py new file mode 100644 index 0000000000000000000000000000000000000000..08f07fb62e7ec091663d92a3634179a6d6b02aa0 --- /dev/null +++ b/configs/dcn/faster_rcnn_r50_fpn_dconv_c3-c5_1x_coco.py @@ -0,0 +1,6 @@ +_base_ = '../faster_rcnn/faster_rcnn_r50_fpn_1x_coco.py' +model = dict( + backbone=dict( + dcn=dict(type='DCN', deformable_groups=1, fallback_on_stride=False), + stage_with_dcn=(False, True, True, True))) +work_dir = './work_dirs/faster_rcnn_dconv_c3-c5_r50_fpn_1x' diff --git a/configs/dcn/faster_rcnn_r50_fpn_dpool_1x_coco.py b/configs/dcn/faster_rcnn_r50_fpn_dpool_1x_coco.py new file mode 100644 index 0000000000000000000000000000000000000000..5a2ed70238a43fcfd056d4d8a3e7d12d8ac0197f --- /dev/null +++ b/configs/dcn/faster_rcnn_r50_fpn_dpool_1x_coco.py @@ -0,0 +1,15 @@ +_base_ = '../faster_rcnn/faster_rcnn_r50_fpn_1x_coco.py' +model = dict( + bbox_roi_extractor=dict( + type='SingleRoIExtractor', + roi_layer=dict( + _delete_=True, + type='DeformRoIPoolingPack', + out_size=7, + out_channels=256, + no_trans=False, + group_size=1, + trans_std=0.1), + out_channels=256, + featmap_strides=[4, 8, 16, 32])) +work_dir = './work_dirs/faster_rcnn_dpool_r50_fpn_1x' diff --git a/configs/dcn/faster_rcnn_r50_fpn_mdconv_c3-c5_1x_coco.py b/configs/dcn/faster_rcnn_r50_fpn_mdconv_c3-c5_1x_coco.py new file mode 100644 index 0000000000000000000000000000000000000000..7a42f0803af23be25f54a7df799f0be3914f91d6 --- /dev/null +++ b/configs/dcn/faster_rcnn_r50_fpn_mdconv_c3-c5_1x_coco.py @@ -0,0 +1,6 @@ +_base_ = '../faster_rcnn/faster_rcnn_r50_fpn_1x_coco.py' +model = dict( + backbone=dict( + dcn=dict(type='DCNv2', deformable_groups=1, fallback_on_stride=False), + stage_with_dcn=(False, True, True, True))) +work_dir = './work_dirs/faster_rcnn_mdconv_c3-c5_r50_fpn_1x' diff --git a/configs/dcn/faster_rcnn_r50_fpn_mdconv_c3-c5_group4_1x_coco.py b/configs/dcn/faster_rcnn_r50_fpn_mdconv_c3-c5_group4_1x_coco.py new file mode 100644 index 0000000000000000000000000000000000000000..2e22302aefbee5b40c80dfbbb90caeeb1c49c5ab --- /dev/null +++ b/configs/dcn/faster_rcnn_r50_fpn_mdconv_c3-c5_group4_1x_coco.py @@ -0,0 +1,6 @@ +_base_ = '../faster_rcnn/faster_rcnn_r50_fpn_1x_coco.py' +model = dict( + backbone=dict( + dcn=dict(type='DCNv2', deformable_groups=4, fallback_on_stride=False), + stage_with_dcn=(False, True, True, True))) +work_dir = './work_dirs/faster_rcnn_mdconv_c3-c5_group4_r50_fpn_1x' diff --git a/configs/dcn/faster_rcnn_r50_fpn_mdpool_1x_coco.py b/configs/dcn/faster_rcnn_r50_fpn_mdpool_1x_coco.py new file mode 100644 index 0000000000000000000000000000000000000000..2bf1bf68d4da350953107128f71782e7ef5d4773 --- /dev/null +++ b/configs/dcn/faster_rcnn_r50_fpn_mdpool_1x_coco.py @@ -0,0 +1,15 @@ +_base_ = '../faster_rcnn/faster_rcnn_r50_fpn_1x_coco.py' +model = dict( + bbox_roi_extractor=dict( + type='SingleRoIExtractor', + roi_layer=dict( + _delete_=True, + type='ModulatedDeformRoIPoolingPack', + out_size=7, + out_channels=256, + no_trans=False, + group_size=1, + trans_std=0.1), + out_channels=256, + featmap_strides=[4, 8, 16, 32])) +work_dir = './work_dirs/faster_rcnn_mdpool_r50_fpn_1x' diff --git a/configs/dcn/faster_rcnn_x101_32x4d_fpn_dconv_c3-c5_1x_coco.py b/configs/dcn/faster_rcnn_x101_32x4d_fpn_dconv_c3-c5_1x_coco.py new file mode 100644 index 0000000000000000000000000000000000000000..b09dc5c0e622585aeb2c373503ab9f48ba087335 --- /dev/null +++ b/configs/dcn/faster_rcnn_x101_32x4d_fpn_dconv_c3-c5_1x_coco.py @@ -0,0 +1,16 @@ +_base_ = '../faster_rcnn/faster_rcnn_r50_fpn_1x_coco.py' +model = dict( + pretrained='open-mmlab://resnext101_32x4d', + backbone=dict( + type='ResNeXt', + depth=101, + groups=32, + base_width=4, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + style='pytorch', + dcn=dict(type='DCN', deformable_groups=1, fallback_on_stride=False), + stage_with_dcn=(False, True, True, True))) +work_dir = './work_dirs/faster_rcnn_dconv_c3-c5_x101_32x4d_fpn_1x' diff --git a/configs/dcn/mask_rcnn_dconv_c3-c5_r50_fpn_1x.py b/configs/dcn/mask_rcnn_dconv_c3-c5_r50_fpn_1x.py deleted file mode 100644 index 802f4e79049f39265f3af4fc997e1fc378e994cd..0000000000000000000000000000000000000000 --- a/configs/dcn/mask_rcnn_dconv_c3-c5_r50_fpn_1x.py +++ /dev/null @@ -1,192 +0,0 @@ -# model settings -model = dict( - type='MaskRCNN', - pretrained='torchvision://resnet50', - backbone=dict( - type='ResNet', - depth=50, - num_stages=4, - out_indices=(0, 1, 2, 3), - frozen_stages=1, - norm_cfg=dict(type='BN', requires_grad=True), - style='pytorch', - dcn=dict(type='DCN', deformable_groups=1, fallback_on_stride=False), - stage_with_dcn=(False, True, True, True)), - neck=dict( - type='FPN', - in_channels=[256, 512, 1024, 2048], - out_channels=256, - num_outs=5), - rpn_head=dict( - type='RPNHead', - in_channels=256, - feat_channels=256, - anchor_scales=[8], - anchor_ratios=[0.5, 1.0, 2.0], - anchor_strides=[4, 8, 16, 32, 64], - target_means=[.0, .0, .0, .0], - target_stds=[1.0, 1.0, 1.0, 1.0], - loss_cls=dict( - type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0)), - bbox_roi_extractor=dict( - type='SingleRoIExtractor', - roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2), - out_channels=256, - featmap_strides=[4, 8, 16, 32]), - bbox_head=dict( - type='SharedFCBBoxHead', - num_fcs=2, - in_channels=256, - fc_out_channels=1024, - roi_feat_size=7, - num_classes=81, - target_means=[0., 0., 0., 0.], - target_stds=[0.1, 0.1, 0.2, 0.2], - reg_class_agnostic=False, - loss_cls=dict( - type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)), - mask_roi_extractor=dict( - type='SingleRoIExtractor', - roi_layer=dict(type='RoIAlign', out_size=14, sample_num=2), - out_channels=256, - featmap_strides=[4, 8, 16, 32]), - mask_head=dict( - type='FCNMaskHead', - num_convs=4, - in_channels=256, - conv_out_channels=256, - num_classes=81, - loss_mask=dict( - type='CrossEntropyLoss', use_mask=True, loss_weight=1.0))) -# model training and testing settings -train_cfg = dict( - rpn=dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.7, - neg_iou_thr=0.3, - min_pos_iou=0.3, - ignore_iof_thr=-1), - sampler=dict( - type='RandomSampler', - num=256, - pos_fraction=0.5, - neg_pos_ub=-1, - add_gt_as_proposals=False), - allowed_border=0, - pos_weight=-1, - debug=False), - rpn_proposal=dict( - nms_across_levels=False, - nms_pre=2000, - nms_post=2000, - max_num=2000, - nms_thr=0.7, - min_bbox_size=0), - rcnn=dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.5, - neg_iou_thr=0.5, - min_pos_iou=0.5, - ignore_iof_thr=-1), - sampler=dict( - type='RandomSampler', - num=512, - pos_fraction=0.25, - neg_pos_ub=-1, - add_gt_as_proposals=True), - mask_size=28, - pos_weight=-1, - debug=False)) -test_cfg = dict( - rpn=dict( - nms_across_levels=False, - nms_pre=1000, - nms_post=1000, - max_num=1000, - nms_thr=0.7, - min_bbox_size=0), - rcnn=dict( - score_thr=0.05, - nms=dict(type='nms', iou_thr=0.5), - max_per_img=100, - mask_thr_binary=0.5)) -# dataset settings -dataset_type = 'CocoDataset' -data_root = 'data/coco/' -img_norm_cfg = dict( - mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) -train_pipeline = [ - dict(type='LoadImageFromFile'), - dict(type='LoadAnnotations', with_bbox=True, with_mask=True), - dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), - dict(type='RandomFlip', flip_ratio=0.5), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='DefaultFormatBundle'), - dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']), -] -test_pipeline = [ - dict(type='LoadImageFromFile'), - dict( - type='MultiScaleFlipAug', - img_scale=(1333, 800), - flip=False, - transforms=[ - dict(type='Resize', keep_ratio=True), - dict(type='RandomFlip'), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='ImageToTensor', keys=['img']), - dict(type='Collect', keys=['img']), - ]) -] -data = dict( - imgs_per_gpu=2, - workers_per_gpu=2, - train=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_train2017.json', - img_prefix=data_root + 'train2017/', - pipeline=train_pipeline), - val=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline), - test=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline)) -evaluation = dict(interval=1, metric=['bbox', 'segm']) -# optimizer -optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) -optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) -# learning policy -lr_config = dict( - policy='step', - warmup='linear', - warmup_iters=500, - warmup_ratio=1.0 / 3, - step=[8, 11]) -checkpoint_config = dict(interval=1) -# yapf:disable -log_config = dict( - interval=50, - hooks=[ - dict(type='TextLoggerHook'), - # dict(type='TensorboardLoggerHook') - ]) -# yapf:enable -# runtime settings -total_epochs = 12 -dist_params = dict(backend='nccl') -log_level = 'INFO' -work_dir = './work_dirs/mask_rcnn_dconv_c3-c5_r50_fpn_1x' -load_from = None -resume_from = None -workflow = [('train', 1)] diff --git a/configs/dcn/mask_rcnn_mdconv_c3-c5_r50_fpn_1x.py b/configs/dcn/mask_rcnn_mdconv_c3-c5_r50_fpn_1x.py deleted file mode 100644 index 1f2c703b3b174f1ac738da22468ae2ea7f70f232..0000000000000000000000000000000000000000 --- a/configs/dcn/mask_rcnn_mdconv_c3-c5_r50_fpn_1x.py +++ /dev/null @@ -1,192 +0,0 @@ -# model settings -model = dict( - type='MaskRCNN', - pretrained='torchvision://resnet50', - backbone=dict( - type='ResNet', - depth=50, - num_stages=4, - out_indices=(0, 1, 2, 3), - frozen_stages=1, - norm_cfg=dict(type='BN', requires_grad=True), - style='pytorch', - dcn=dict(type='DCNv2', deformable_groups=1, fallback_on_stride=False), - stage_with_dcn=(False, True, True, True)), - neck=dict( - type='FPN', - in_channels=[256, 512, 1024, 2048], - out_channels=256, - num_outs=5), - rpn_head=dict( - type='RPNHead', - in_channels=256, - feat_channels=256, - anchor_scales=[8], - anchor_ratios=[0.5, 1.0, 2.0], - anchor_strides=[4, 8, 16, 32, 64], - target_means=[.0, .0, .0, .0], - target_stds=[1.0, 1.0, 1.0, 1.0], - loss_cls=dict( - type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0)), - bbox_roi_extractor=dict( - type='SingleRoIExtractor', - roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2), - out_channels=256, - featmap_strides=[4, 8, 16, 32]), - bbox_head=dict( - type='SharedFCBBoxHead', - num_fcs=2, - in_channels=256, - fc_out_channels=1024, - roi_feat_size=7, - num_classes=81, - target_means=[0., 0., 0., 0.], - target_stds=[0.1, 0.1, 0.2, 0.2], - reg_class_agnostic=False, - loss_cls=dict( - type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)), - mask_roi_extractor=dict( - type='SingleRoIExtractor', - roi_layer=dict(type='RoIAlign', out_size=14, sample_num=2), - out_channels=256, - featmap_strides=[4, 8, 16, 32]), - mask_head=dict( - type='FCNMaskHead', - num_convs=4, - in_channels=256, - conv_out_channels=256, - num_classes=81, - loss_mask=dict( - type='CrossEntropyLoss', use_mask=True, loss_weight=1.0))) -# model training and testing settings -train_cfg = dict( - rpn=dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.7, - neg_iou_thr=0.3, - min_pos_iou=0.3, - ignore_iof_thr=-1), - sampler=dict( - type='RandomSampler', - num=256, - pos_fraction=0.5, - neg_pos_ub=-1, - add_gt_as_proposals=False), - allowed_border=0, - pos_weight=-1, - debug=False), - rpn_proposal=dict( - nms_across_levels=False, - nms_pre=2000, - nms_post=2000, - max_num=2000, - nms_thr=0.7, - min_bbox_size=0), - rcnn=dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.5, - neg_iou_thr=0.5, - min_pos_iou=0.5, - ignore_iof_thr=-1), - sampler=dict( - type='RandomSampler', - num=512, - pos_fraction=0.25, - neg_pos_ub=-1, - add_gt_as_proposals=True), - mask_size=28, - pos_weight=-1, - debug=False)) -test_cfg = dict( - rpn=dict( - nms_across_levels=False, - nms_pre=1000, - nms_post=1000, - max_num=1000, - nms_thr=0.7, - min_bbox_size=0), - rcnn=dict( - score_thr=0.05, - nms=dict(type='nms', iou_thr=0.5), - max_per_img=100, - mask_thr_binary=0.5)) -# dataset settings -dataset_type = 'CocoDataset' -data_root = 'data/coco/' -img_norm_cfg = dict( - mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) -train_pipeline = [ - dict(type='LoadImageFromFile'), - dict(type='LoadAnnotations', with_bbox=True, with_mask=True), - dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), - dict(type='RandomFlip', flip_ratio=0.5), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='DefaultFormatBundle'), - dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']), -] -test_pipeline = [ - dict(type='LoadImageFromFile'), - dict( - type='MultiScaleFlipAug', - img_scale=(1333, 800), - flip=False, - transforms=[ - dict(type='Resize', keep_ratio=True), - dict(type='RandomFlip'), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='ImageToTensor', keys=['img']), - dict(type='Collect', keys=['img']), - ]) -] -data = dict( - imgs_per_gpu=2, - workers_per_gpu=2, - train=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_train2017.json', - img_prefix=data_root + 'train2017/', - pipeline=train_pipeline), - val=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline), - test=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline)) -evaluation = dict(interval=1, metric=['bbox', 'segm']) -# optimizer -optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) -optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) -# learning policy -lr_config = dict( - policy='step', - warmup='linear', - warmup_iters=500, - warmup_ratio=1.0 / 3, - step=[8, 11]) -checkpoint_config = dict(interval=1) -# yapf:disable -log_config = dict( - interval=50, - hooks=[ - dict(type='TextLoggerHook'), - # dict(type='TensorboardLoggerHook') - ]) -# yapf:enable -# runtime settings -total_epochs = 12 -dist_params = dict(backend='nccl') -log_level = 'INFO' -work_dir = './work_dirs/mask_rcnn_dconv_c3-c5_r50_fpn_1x' -load_from = None -resume_from = None -workflow = [('train', 1)] diff --git a/configs/dcn/mask_rcnn_r50_fpn_dconv_c3-c5_1x_coco.py b/configs/dcn/mask_rcnn_r50_fpn_dconv_c3-c5_1x_coco.py new file mode 100644 index 0000000000000000000000000000000000000000..1f7b9a87e3031c824dab02bf18e319c4b308c79d --- /dev/null +++ b/configs/dcn/mask_rcnn_r50_fpn_dconv_c3-c5_1x_coco.py @@ -0,0 +1,6 @@ +_base_ = '../mask_rcnn/mask_rcnn_r50_fpn_1x_coco.py' +model = dict( + backbone=dict( + dcn=dict(type='DCN', deformable_groups=1, fallback_on_stride=False), + stage_with_dcn=(False, True, True, True))) +work_dir = './work_dirs/mask_rcnn_dconv_c3-c5_r50_fpn_1x' diff --git a/configs/dcn/mask_rcnn_r50_fpn_mdconv_c3-c5_1x_coco.py b/configs/dcn/mask_rcnn_r50_fpn_mdconv_c3-c5_1x_coco.py new file mode 100644 index 0000000000000000000000000000000000000000..8518cebf7a68f39c40a6801a445d2c2b688ee368 --- /dev/null +++ b/configs/dcn/mask_rcnn_r50_fpn_mdconv_c3-c5_1x_coco.py @@ -0,0 +1,6 @@ +_base_ = '../mask_rcnn/mask_rcnn_r50_fpn_1x_coco.py' +model = dict( + backbone=dict( + dcn=dict(type='DCNv2', deformable_groups=1, fallback_on_stride=False), + stage_with_dcn=(False, True, True, True))) +work_dir = './work_dirs/mask_rcnn_dconv_c3-c5_r50_fpn_1x' diff --git a/configs/double_heads/dh_faster_rcnn_r50_fpn_1x.py b/configs/double_heads/dh_faster_rcnn_r50_fpn_1x.py deleted file mode 100644 index 0f3143687373be9636020888ceb9a5f0549f4812..0000000000000000000000000000000000000000 --- a/configs/double_heads/dh_faster_rcnn_r50_fpn_1x.py +++ /dev/null @@ -1,179 +0,0 @@ -# model settings -model = dict( - type='DoubleHeadRCNN', - pretrained='torchvision://resnet50', - backbone=dict( - type='ResNet', - depth=50, - num_stages=4, - out_indices=(0, 1, 2, 3), - frozen_stages=1, - norm_cfg=dict(type='BN', requires_grad=True), - style='pytorch'), - neck=dict( - type='FPN', - in_channels=[256, 512, 1024, 2048], - out_channels=256, - num_outs=5), - rpn_head=dict( - type='RPNHead', - in_channels=256, - feat_channels=256, - anchor_scales=[8], - anchor_ratios=[0.5, 1.0, 2.0], - anchor_strides=[4, 8, 16, 32, 64], - target_means=[.0, .0, .0, .0], - target_stds=[1.0, 1.0, 1.0, 1.0], - loss_cls=dict( - type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0)), - bbox_roi_extractor=dict( - type='SingleRoIExtractor', - roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2), - out_channels=256, - featmap_strides=[4, 8, 16, 32]), - reg_roi_scale_factor=1.3, - bbox_head=dict( - type='DoubleConvFCBBoxHead', - num_convs=4, - num_fcs=2, - in_channels=256, - conv_out_channels=1024, - fc_out_channels=1024, - roi_feat_size=7, - num_classes=81, - target_means=[0., 0., 0., 0.], - target_stds=[0.1, 0.1, 0.2, 0.2], - reg_class_agnostic=False, - loss_cls=dict( - type='CrossEntropyLoss', use_sigmoid=False, loss_weight=2.0), - loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=2.0))) -# model training and testing settings -train_cfg = dict( - rpn=dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.7, - neg_iou_thr=0.3, - min_pos_iou=0.3, - ignore_iof_thr=-1), - sampler=dict( - type='RandomSampler', - num=256, - pos_fraction=0.5, - neg_pos_ub=-1, - add_gt_as_proposals=False), - allowed_border=0, - pos_weight=-1, - debug=False), - rpn_proposal=dict( - nms_across_levels=False, - nms_pre=2000, - nms_post=2000, - max_num=2000, - nms_thr=0.7, - min_bbox_size=0), - rcnn=dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.5, - neg_iou_thr=0.5, - min_pos_iou=0.5, - ignore_iof_thr=-1), - sampler=dict( - type='RandomSampler', - num=512, - pos_fraction=0.25, - neg_pos_ub=-1, - add_gt_as_proposals=True), - pos_weight=-1, - debug=False)) -test_cfg = dict( - rpn=dict( - nms_across_levels=False, - nms_pre=1000, - nms_post=1000, - max_num=1000, - nms_thr=0.7, - min_bbox_size=0), - rcnn=dict( - score_thr=0.05, nms=dict(type='nms', iou_thr=0.5), max_per_img=100) - # soft-nms is also supported for rcnn testing - # e.g., nms=dict(type='soft_nms', iou_thr=0.5, min_score=0.05) -) -# dataset settings -dataset_type = 'CocoDataset' -data_root = 'data/coco/' -img_norm_cfg = dict( - mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) -train_pipeline = [ - dict(type='LoadImageFromFile'), - dict(type='LoadAnnotations', with_bbox=True), - dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), - dict(type='RandomFlip', flip_ratio=0.5), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='DefaultFormatBundle'), - dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), -] -test_pipeline = [ - dict(type='LoadImageFromFile'), - dict( - type='MultiScaleFlipAug', - img_scale=(1333, 800), - flip=False, - transforms=[ - dict(type='Resize', keep_ratio=True), - dict(type='RandomFlip'), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='ImageToTensor', keys=['img']), - dict(type='Collect', keys=['img']), - ]) -] -data = dict( - imgs_per_gpu=2, - workers_per_gpu=2, - train=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_train2017.json', - img_prefix=data_root + 'train2017/', - pipeline=train_pipeline), - val=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline), - test=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline)) -evaluation = dict(interval=1, metric='bbox') -# optimizer -optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) -optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) -# learning policy -lr_config = dict( - policy='step', - warmup='linear', - warmup_iters=500, - warmup_ratio=1.0 / 3, - step=[8, 11]) -checkpoint_config = dict(interval=1) -# yapf:disable -log_config = dict( - interval=50, - hooks=[ - dict(type='TextLoggerHook'), - # dict(type='TensorboardLoggerHook') - ]) -# yapf:enable -# runtime settings -total_epochs = 12 -dist_params = dict(backend='nccl') -log_level = 'INFO' -work_dir = './work_dirs/dh_faster_rcnn_r50_fpn_1x' -load_from = None -resume_from = None -workflow = [('train', 1)] diff --git a/configs/double_heads/dh_faster_rcnn_r50_fpn_1x_coco.py b/configs/double_heads/dh_faster_rcnn_r50_fpn_1x_coco.py new file mode 100644 index 0000000000000000000000000000000000000000..6f3fa49e6cf8eec005bbbe0d864e6db9857c4217 --- /dev/null +++ b/configs/double_heads/dh_faster_rcnn_r50_fpn_1x_coco.py @@ -0,0 +1,21 @@ +_base_ = '../faster_rcnn/faster_rcnn_r50_fpn_1x_coco.py' +model = dict( + type='DoubleHeadRCNN', + reg_roi_scale_factor=1.3, + bbox_head=dict( + _delete_=True, + type='DoubleConvFCBBoxHead', + num_convs=4, + num_fcs=2, + in_channels=256, + conv_out_channels=1024, + fc_out_channels=1024, + roi_feat_size=7, + num_classes=81, + target_means=[0., 0., 0., 0.], + target_stds=[0.1, 0.1, 0.2, 0.2], + reg_class_agnostic=False, + loss_cls=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=2.0), + loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=2.0))) +work_dir = './work_dirs/dh_faster_rcnn_r50_fpn_1x' diff --git a/configs/empirical_attention/faster_rcnn_r50_fpn_attention_0010_1x.py b/configs/empirical_attention/faster_rcnn_r50_fpn_attention_0010_1x.py deleted file mode 100644 index 9e196b24d28886e82cf1152d63eab6f00f651d50..0000000000000000000000000000000000000000 --- a/configs/empirical_attention/faster_rcnn_r50_fpn_attention_0010_1x.py +++ /dev/null @@ -1,180 +0,0 @@ -# model settings -model = dict( - type='FasterRCNN', - pretrained='torchvision://resnet50', - backbone=dict( - type='ResNet', - depth=50, - num_stages=4, - out_indices=(0, 1, 2, 3), - frozen_stages=1, - norm_cfg=dict(type='BN', requires_grad=True), - style='pytorch', - gen_attention=dict( - spatial_range=-1, num_heads=8, attention_type='0010', kv_stride=2), - stage_with_gen_attention=[[], [], [0, 1, 2, 3, 4, 5], [0, 1, 2]], - ), - neck=dict( - type='FPN', - in_channels=[256, 512, 1024, 2048], - out_channels=256, - num_outs=5), - rpn_head=dict( - type='RPNHead', - in_channels=256, - feat_channels=256, - anchor_scales=[8], - anchor_ratios=[0.5, 1.0, 2.0], - anchor_strides=[4, 8, 16, 32, 64], - target_means=[.0, .0, .0, .0], - target_stds=[1.0, 1.0, 1.0, 1.0], - loss_cls=dict( - type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0)), - bbox_roi_extractor=dict( - type='SingleRoIExtractor', - roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2), - out_channels=256, - featmap_strides=[4, 8, 16, 32]), - bbox_head=dict( - type='SharedFCBBoxHead', - num_fcs=2, - in_channels=256, - fc_out_channels=1024, - roi_feat_size=7, - num_classes=81, - target_means=[0., 0., 0., 0.], - target_stds=[0.1, 0.1, 0.2, 0.2], - reg_class_agnostic=False, - loss_cls=dict( - type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0))) -# model training and testing settings -train_cfg = dict( - rpn=dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.7, - neg_iou_thr=0.3, - min_pos_iou=0.3, - ignore_iof_thr=-1), - sampler=dict( - type='RandomSampler', - num=256, - pos_fraction=0.5, - neg_pos_ub=-1, - add_gt_as_proposals=False), - allowed_border=0, - pos_weight=-1, - debug=False), - rpn_proposal=dict( - nms_across_levels=False, - nms_pre=2000, - nms_post=2000, - max_num=2000, - nms_thr=0.7, - min_bbox_size=0), - rcnn=dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.5, - neg_iou_thr=0.5, - min_pos_iou=0.5, - ignore_iof_thr=-1), - sampler=dict( - type='RandomSampler', - num=512, - pos_fraction=0.25, - neg_pos_ub=-1, - add_gt_as_proposals=True), - pos_weight=-1, - debug=False)) -test_cfg = dict( - rpn=dict( - nms_across_levels=False, - nms_pre=1000, - nms_post=1000, - max_num=1000, - nms_thr=0.7, - min_bbox_size=0), - rcnn=dict( - score_thr=0.05, nms=dict(type='nms', iou_thr=0.5), max_per_img=100) - # soft-nms is also supported for rcnn testing - # e.g., nms=dict(type='soft_nms', iou_thr=0.5, min_score=0.05) -) -# dataset settings -dataset_type = 'CocoDataset' -data_root = 'data/coco/' -img_norm_cfg = dict( - mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) -train_pipeline = [ - dict(type='LoadImageFromFile'), - dict(type='LoadAnnotations', with_bbox=True), - dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), - dict(type='RandomFlip', flip_ratio=0.5), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='DefaultFormatBundle'), - dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), -] -test_pipeline = [ - dict(type='LoadImageFromFile'), - dict( - type='MultiScaleFlipAug', - img_scale=(1333, 800), - flip=False, - transforms=[ - dict(type='Resize', keep_ratio=True), - dict(type='RandomFlip'), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='ImageToTensor', keys=['img']), - dict(type='Collect', keys=['img']), - ]) -] -data = dict( - imgs_per_gpu=2, - workers_per_gpu=2, - train=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_train2017.json', - img_prefix=data_root + 'train2017/', - pipeline=train_pipeline), - val=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline), - test=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline)) -evaluation = dict(interval=1, metric='bbox') -# optimizer -optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) -optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) -# learning policy -lr_config = dict( - policy='step', - warmup='linear', - warmup_iters=500, - warmup_ratio=1.0 / 3, - step=[8, 11]) -checkpoint_config = dict(interval=1) -# yapf:disable -log_config = dict( - interval=50, - hooks=[ - dict(type='TextLoggerHook'), - # dict(type='TensorboardLoggerHook') - ]) -# yapf:enable -# runtime settings -total_epochs = 12 -dist_params = dict(backend='nccl') -log_level = 'INFO' -work_dir = './work_dirs/faster_rcnn_r50_fpn_attention_0010_1x' -load_from = None -resume_from = None -workflow = [('train', 1)] diff --git a/configs/empirical_attention/faster_rcnn_r50_fpn_attention_0010_1x_coco.py b/configs/empirical_attention/faster_rcnn_r50_fpn_attention_0010_1x_coco.py new file mode 100644 index 0000000000000000000000000000000000000000..5f618301c47d54b5f2e07135a21100c98dc94762 --- /dev/null +++ b/configs/empirical_attention/faster_rcnn_r50_fpn_attention_0010_1x_coco.py @@ -0,0 +1,8 @@ +_base_ = '../faster_rcnn/faster_rcnn_r50_fpn_1x_coco.py' +model = dict( + backbone=dict( + gen_attention=dict( + spatial_range=-1, num_heads=8, attention_type='0010', kv_stride=2), + stage_with_gen_attention=[[], [], [0, 1, 2, 3, 4, 5], [0, 1, 2]], + )) +work_dir = './work_dirs/faster_rcnn_r50_fpn_attention_0010_1x' diff --git a/configs/empirical_attention/faster_rcnn_r50_fpn_attention_0010_dcn_1x.py b/configs/empirical_attention/faster_rcnn_r50_fpn_attention_0010_dcn_1x.py deleted file mode 100644 index 51827b404947446732b36899d2a86dc813a33a5a..0000000000000000000000000000000000000000 --- a/configs/empirical_attention/faster_rcnn_r50_fpn_attention_0010_dcn_1x.py +++ /dev/null @@ -1,182 +0,0 @@ -# model settings -model = dict( - type='FasterRCNN', - pretrained='torchvision://resnet50', - backbone=dict( - type='ResNet', - depth=50, - num_stages=4, - out_indices=(0, 1, 2, 3), - frozen_stages=1, - norm_cfg=dict(type='BN', requires_grad=True), - style='pytorch', - gen_attention=dict( - spatial_range=-1, num_heads=8, attention_type='0010', kv_stride=2), - stage_with_gen_attention=[[], [], [0, 1, 2, 3, 4, 5], [0, 1, 2]], - dcn=dict(type='DCN', deformable_groups=1, fallback_on_stride=False), - stage_with_dcn=(False, True, True, True), - ), - neck=dict( - type='FPN', - in_channels=[256, 512, 1024, 2048], - out_channels=256, - num_outs=5), - rpn_head=dict( - type='RPNHead', - in_channels=256, - feat_channels=256, - anchor_scales=[8], - anchor_ratios=[0.5, 1.0, 2.0], - anchor_strides=[4, 8, 16, 32, 64], - target_means=[.0, .0, .0, .0], - target_stds=[1.0, 1.0, 1.0, 1.0], - loss_cls=dict( - type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0)), - bbox_roi_extractor=dict( - type='SingleRoIExtractor', - roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2), - out_channels=256, - featmap_strides=[4, 8, 16, 32]), - bbox_head=dict( - type='SharedFCBBoxHead', - num_fcs=2, - in_channels=256, - fc_out_channels=1024, - roi_feat_size=7, - num_classes=81, - target_means=[0., 0., 0., 0.], - target_stds=[0.1, 0.1, 0.2, 0.2], - reg_class_agnostic=False, - loss_cls=dict( - type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0))) -# model training and testing settings -train_cfg = dict( - rpn=dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.7, - neg_iou_thr=0.3, - min_pos_iou=0.3, - ignore_iof_thr=-1), - sampler=dict( - type='RandomSampler', - num=256, - pos_fraction=0.5, - neg_pos_ub=-1, - add_gt_as_proposals=False), - allowed_border=0, - pos_weight=-1, - debug=False), - rpn_proposal=dict( - nms_across_levels=False, - nms_pre=2000, - nms_post=2000, - max_num=2000, - nms_thr=0.7, - min_bbox_size=0), - rcnn=dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.5, - neg_iou_thr=0.5, - min_pos_iou=0.5, - ignore_iof_thr=-1), - sampler=dict( - type='RandomSampler', - num=512, - pos_fraction=0.25, - neg_pos_ub=-1, - add_gt_as_proposals=True), - pos_weight=-1, - debug=False)) -test_cfg = dict( - rpn=dict( - nms_across_levels=False, - nms_pre=1000, - nms_post=1000, - max_num=1000, - nms_thr=0.7, - min_bbox_size=0), - rcnn=dict( - score_thr=0.05, nms=dict(type='nms', iou_thr=0.5), max_per_img=100) - # soft-nms is also supported for rcnn testing - # e.g., nms=dict(type='soft_nms', iou_thr=0.5, min_score=0.05) -) -# dataset settings -dataset_type = 'CocoDataset' -data_root = 'data/coco/' -img_norm_cfg = dict( - mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) -train_pipeline = [ - dict(type='LoadImageFromFile'), - dict(type='LoadAnnotations', with_bbox=True), - dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), - dict(type='RandomFlip', flip_ratio=0.5), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='DefaultFormatBundle'), - dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), -] -test_pipeline = [ - dict(type='LoadImageFromFile'), - dict( - type='MultiScaleFlipAug', - img_scale=(1333, 800), - flip=False, - transforms=[ - dict(type='Resize', keep_ratio=True), - dict(type='RandomFlip'), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='ImageToTensor', keys=['img']), - dict(type='Collect', keys=['img']), - ]) -] -data = dict( - imgs_per_gpu=2, - workers_per_gpu=2, - train=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_train2017.json', - img_prefix=data_root + 'train2017/', - pipeline=train_pipeline), - val=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline), - test=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline)) -evaluation = dict(interval=1, metric='bbox') -# optimizer -optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) -optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) -# learning policy -lr_config = dict( - policy='step', - warmup='linear', - warmup_iters=500, - warmup_ratio=1.0 / 3, - step=[8, 11]) -checkpoint_config = dict(interval=1) -# yapf:disable -log_config = dict( - interval=50, - hooks=[ - dict(type='TextLoggerHook'), - # dict(type='TensorboardLoggerHook') - ]) -# yapf:enable -# runtime settings -total_epochs = 12 -dist_params = dict(backend='nccl') -log_level = 'INFO' -work_dir = './work_dirs/faster_rcnn_r50_fpn_attention_0010_dcn_1x' -load_from = None -resume_from = None -workflow = [('train', 1)] diff --git a/configs/empirical_attention/faster_rcnn_r50_fpn_attention_0010_dcn_1x_coco.py b/configs/empirical_attention/faster_rcnn_r50_fpn_attention_0010_dcn_1x_coco.py new file mode 100644 index 0000000000000000000000000000000000000000..ed54590a1090897617554ed2f1c3f64b4ad5ea7e --- /dev/null +++ b/configs/empirical_attention/faster_rcnn_r50_fpn_attention_0010_dcn_1x_coco.py @@ -0,0 +1,10 @@ +_base_ = '../faster_rcnn/faster_rcnn_r50_fpn_1x_coco.py' +model = dict( + backbone=dict( + gen_attention=dict( + spatial_range=-1, num_heads=8, attention_type='0010', kv_stride=2), + stage_with_gen_attention=[[], [], [0, 1, 2, 3, 4, 5], [0, 1, 2]], + dcn=dict(type='DCN', deformable_groups=1, fallback_on_stride=False), + stage_with_dcn=(False, True, True, True), + )) +work_dir = './work_dirs/faster_rcnn_r50_fpn_attention_0010_dcn_1x' diff --git a/configs/empirical_attention/faster_rcnn_r50_fpn_attention_1111_1x.py b/configs/empirical_attention/faster_rcnn_r50_fpn_attention_1111_1x.py deleted file mode 100644 index d76d599b2c40703a4857ddb0704eb9ce9395281a..0000000000000000000000000000000000000000 --- a/configs/empirical_attention/faster_rcnn_r50_fpn_attention_1111_1x.py +++ /dev/null @@ -1,180 +0,0 @@ -# model settings -model = dict( - type='FasterRCNN', - pretrained='torchvision://resnet50', - backbone=dict( - type='ResNet', - depth=50, - num_stages=4, - out_indices=(0, 1, 2, 3), - frozen_stages=1, - norm_cfg=dict(type='BN', requires_grad=True), - style='pytorch', - gen_attention=dict( - spatial_range=-1, num_heads=8, attention_type='1111', kv_stride=2), - stage_with_gen_attention=[[], [], [0, 1, 2, 3, 4, 5], [0, 1, 2]], - ), - neck=dict( - type='FPN', - in_channels=[256, 512, 1024, 2048], - out_channels=256, - num_outs=5), - rpn_head=dict( - type='RPNHead', - in_channels=256, - feat_channels=256, - anchor_scales=[8], - anchor_ratios=[0.5, 1.0, 2.0], - anchor_strides=[4, 8, 16, 32, 64], - target_means=[.0, .0, .0, .0], - target_stds=[1.0, 1.0, 1.0, 1.0], - loss_cls=dict( - type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0)), - bbox_roi_extractor=dict( - type='SingleRoIExtractor', - roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2), - out_channels=256, - featmap_strides=[4, 8, 16, 32]), - bbox_head=dict( - type='SharedFCBBoxHead', - num_fcs=2, - in_channels=256, - fc_out_channels=1024, - roi_feat_size=7, - num_classes=81, - target_means=[0., 0., 0., 0.], - target_stds=[0.1, 0.1, 0.2, 0.2], - reg_class_agnostic=False, - loss_cls=dict( - type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0))) -# model training and testing settings -train_cfg = dict( - rpn=dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.7, - neg_iou_thr=0.3, - min_pos_iou=0.3, - ignore_iof_thr=-1), - sampler=dict( - type='RandomSampler', - num=256, - pos_fraction=0.5, - neg_pos_ub=-1, - add_gt_as_proposals=False), - allowed_border=0, - pos_weight=-1, - debug=False), - rpn_proposal=dict( - nms_across_levels=False, - nms_pre=2000, - nms_post=2000, - max_num=2000, - nms_thr=0.7, - min_bbox_size=0), - rcnn=dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.5, - neg_iou_thr=0.5, - min_pos_iou=0.5, - ignore_iof_thr=-1), - sampler=dict( - type='RandomSampler', - num=512, - pos_fraction=0.25, - neg_pos_ub=-1, - add_gt_as_proposals=True), - pos_weight=-1, - debug=False)) -test_cfg = dict( - rpn=dict( - nms_across_levels=False, - nms_pre=1000, - nms_post=1000, - max_num=1000, - nms_thr=0.7, - min_bbox_size=0), - rcnn=dict( - score_thr=0.05, nms=dict(type='nms', iou_thr=0.5), max_per_img=100) - # soft-nms is also supported for rcnn testing - # e.g., nms=dict(type='soft_nms', iou_thr=0.5, min_score=0.05) -) -# dataset settings -dataset_type = 'CocoDataset' -data_root = 'data/coco/' -img_norm_cfg = dict( - mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) -train_pipeline = [ - dict(type='LoadImageFromFile'), - dict(type='LoadAnnotations', with_bbox=True), - dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), - dict(type='RandomFlip', flip_ratio=0.5), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='DefaultFormatBundle'), - dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), -] -test_pipeline = [ - dict(type='LoadImageFromFile'), - dict( - type='MultiScaleFlipAug', - img_scale=(1333, 800), - flip=False, - transforms=[ - dict(type='Resize', keep_ratio=True), - dict(type='RandomFlip'), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='ImageToTensor', keys=['img']), - dict(type='Collect', keys=['img']), - ]) -] -data = dict( - imgs_per_gpu=2, - workers_per_gpu=2, - train=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_train2017.json', - img_prefix=data_root + 'train2017/', - pipeline=train_pipeline), - val=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline), - test=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline)) -evaluation = dict(interval=1, metric='bbox') -# optimizer -optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) -optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) -# learning policy -lr_config = dict( - policy='step', - warmup='linear', - warmup_iters=500, - warmup_ratio=1.0 / 3, - step=[8, 11]) -checkpoint_config = dict(interval=1) -# yapf:disable -log_config = dict( - interval=50, - hooks=[ - dict(type='TextLoggerHook'), - # dict(type='TensorboardLoggerHook') - ]) -# yapf:enable -# runtime settings -total_epochs = 12 -dist_params = dict(backend='nccl') -log_level = 'INFO' -work_dir = './work_dirs/faster_rcnn_r50_fpn_attention_1111_1x' -load_from = None -resume_from = None -workflow = [('train', 1)] diff --git a/configs/empirical_attention/faster_rcnn_r50_fpn_attention_1111_1x_coco.py b/configs/empirical_attention/faster_rcnn_r50_fpn_attention_1111_1x_coco.py new file mode 100644 index 0000000000000000000000000000000000000000..62b5b05126e34a1bb6562602342429c6ade2bdaf --- /dev/null +++ b/configs/empirical_attention/faster_rcnn_r50_fpn_attention_1111_1x_coco.py @@ -0,0 +1,8 @@ +_base_ = '../faster_rcnn/faster_rcnn_r50_fpn_1x_coco.py' +model = dict( + backbone=dict( + gen_attention=dict( + spatial_range=-1, num_heads=8, attention_type='1111', kv_stride=2), + stage_with_gen_attention=[[], [], [0, 1, 2, 3, 4, 5], [0, 1, 2]], + )) +work_dir = './work_dirs/faster_rcnn_r50_fpn_attention_1111_1x' diff --git a/configs/empirical_attention/faster_rcnn_r50_fpn_attention_1111_dcn_1x.py b/configs/empirical_attention/faster_rcnn_r50_fpn_attention_1111_dcn_1x.py deleted file mode 100644 index 8132e696100cebae03b2ae554f3ec57ad47fb206..0000000000000000000000000000000000000000 --- a/configs/empirical_attention/faster_rcnn_r50_fpn_attention_1111_dcn_1x.py +++ /dev/null @@ -1,182 +0,0 @@ -# model settings -model = dict( - type='FasterRCNN', - pretrained='torchvision://resnet50', - backbone=dict( - type='ResNet', - depth=50, - num_stages=4, - out_indices=(0, 1, 2, 3), - frozen_stages=1, - norm_cfg=dict(type='BN', requires_grad=True), - style='pytorch', - gen_attention=dict( - spatial_range=-1, num_heads=8, attention_type='1111', kv_stride=2), - stage_with_gen_attention=[[], [], [0, 1, 2, 3, 4, 5], [0, 1, 2]], - dcn=dict(type='DCN', deformable_groups=1, fallback_on_stride=False), - stage_with_dcn=(False, True, True, True), - ), - neck=dict( - type='FPN', - in_channels=[256, 512, 1024, 2048], - out_channels=256, - num_outs=5), - rpn_head=dict( - type='RPNHead', - in_channels=256, - feat_channels=256, - anchor_scales=[8], - anchor_ratios=[0.5, 1.0, 2.0], - anchor_strides=[4, 8, 16, 32, 64], - target_means=[.0, .0, .0, .0], - target_stds=[1.0, 1.0, 1.0, 1.0], - loss_cls=dict( - type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0)), - bbox_roi_extractor=dict( - type='SingleRoIExtractor', - roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2), - out_channels=256, - featmap_strides=[4, 8, 16, 32]), - bbox_head=dict( - type='SharedFCBBoxHead', - num_fcs=2, - in_channels=256, - fc_out_channels=1024, - roi_feat_size=7, - num_classes=81, - target_means=[0., 0., 0., 0.], - target_stds=[0.1, 0.1, 0.2, 0.2], - reg_class_agnostic=False, - loss_cls=dict( - type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0))) -# model training and testing settings -train_cfg = dict( - rpn=dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.7, - neg_iou_thr=0.3, - min_pos_iou=0.3, - ignore_iof_thr=-1), - sampler=dict( - type='RandomSampler', - num=256, - pos_fraction=0.5, - neg_pos_ub=-1, - add_gt_as_proposals=False), - allowed_border=0, - pos_weight=-1, - debug=False), - rpn_proposal=dict( - nms_across_levels=False, - nms_pre=2000, - nms_post=2000, - max_num=2000, - nms_thr=0.7, - min_bbox_size=0), - rcnn=dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.5, - neg_iou_thr=0.5, - min_pos_iou=0.5, - ignore_iof_thr=-1), - sampler=dict( - type='RandomSampler', - num=512, - pos_fraction=0.25, - neg_pos_ub=-1, - add_gt_as_proposals=True), - pos_weight=-1, - debug=False)) -test_cfg = dict( - rpn=dict( - nms_across_levels=False, - nms_pre=1000, - nms_post=1000, - max_num=1000, - nms_thr=0.7, - min_bbox_size=0), - rcnn=dict( - score_thr=0.05, nms=dict(type='nms', iou_thr=0.5), max_per_img=100) - # soft-nms is also supported for rcnn testing - # e.g., nms=dict(type='soft_nms', iou_thr=0.5, min_score=0.05) -) -# dataset settings -dataset_type = 'CocoDataset' -data_root = 'data/coco/' -img_norm_cfg = dict( - mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) -train_pipeline = [ - dict(type='LoadImageFromFile'), - dict(type='LoadAnnotations', with_bbox=True), - dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), - dict(type='RandomFlip', flip_ratio=0.5), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='DefaultFormatBundle'), - dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), -] -test_pipeline = [ - dict(type='LoadImageFromFile'), - dict( - type='MultiScaleFlipAug', - img_scale=(1333, 800), - flip=False, - transforms=[ - dict(type='Resize', keep_ratio=True), - dict(type='RandomFlip'), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='ImageToTensor', keys=['img']), - dict(type='Collect', keys=['img']), - ]) -] -data = dict( - imgs_per_gpu=2, - workers_per_gpu=2, - train=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_train2017.json', - img_prefix=data_root + 'train2017/', - pipeline=train_pipeline), - val=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline), - test=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline)) -evaluation = dict(interval=1, metric='bbox') -# optimizer -optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) -optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) -# learning policy -lr_config = dict( - policy='step', - warmup='linear', - warmup_iters=500, - warmup_ratio=1.0 / 3, - step=[8, 11]) -checkpoint_config = dict(interval=1) -# yapf:disable -log_config = dict( - interval=50, - hooks=[ - dict(type='TextLoggerHook'), - # dict(type='TensorboardLoggerHook') - ]) -# yapf:enable -# runtime settings -total_epochs = 12 -dist_params = dict(backend='nccl') -log_level = 'INFO' -work_dir = './work_dirs/faster_rcnn_r50_fpn_attention_1111_dcn_1x' -load_from = None -resume_from = None -workflow = [('train', 1)] diff --git a/configs/empirical_attention/faster_rcnn_r50_fpn_attention_1111_dcn_1x_coco.py b/configs/empirical_attention/faster_rcnn_r50_fpn_attention_1111_dcn_1x_coco.py new file mode 100644 index 0000000000000000000000000000000000000000..7c27530977e31ec18d926e3f54efbf618eb0123f --- /dev/null +++ b/configs/empirical_attention/faster_rcnn_r50_fpn_attention_1111_dcn_1x_coco.py @@ -0,0 +1,10 @@ +_base_ = '../faster_rcnn/faster_rcnn_r50_fpn_1x_coco.py' +model = dict( + backbone=dict( + gen_attention=dict( + spatial_range=-1, num_heads=8, attention_type='1111', kv_stride=2), + stage_with_gen_attention=[[], [], [0, 1, 2, 3, 4, 5], [0, 1, 2]], + dcn=dict(type='DCN', deformable_groups=1, fallback_on_stride=False), + stage_with_dcn=(False, True, True, True), + )) +work_dir = './work_dirs/faster_rcnn_r50_fpn_attention_1111_dcn_1x' diff --git a/configs/fast_mask_rcnn_r101_fpn_1x.py b/configs/fast_mask_rcnn_r101_fpn_1x.py deleted file mode 100644 index 8f4079bc144a8f659453d5898383be948d923e63..0000000000000000000000000000000000000000 --- a/configs/fast_mask_rcnn_r101_fpn_1x.py +++ /dev/null @@ -1,159 +0,0 @@ -# model settings -model = dict( - type='FastRCNN', - pretrained='torchvision://resnet101', - backbone=dict( - type='ResNet', - depth=101, - num_stages=4, - out_indices=(0, 1, 2, 3), - frozen_stages=1, - norm_cfg=dict(type='BN', requires_grad=True), - style='pytorch'), - neck=dict( - type='FPN', - in_channels=[256, 512, 1024, 2048], - out_channels=256, - num_outs=5), - bbox_roi_extractor=dict( - type='SingleRoIExtractor', - roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2), - out_channels=256, - featmap_strides=[4, 8, 16, 32]), - bbox_head=dict( - type='SharedFCBBoxHead', - num_fcs=2, - in_channels=256, - fc_out_channels=1024, - roi_feat_size=7, - num_classes=81, - target_means=[0., 0., 0., 0.], - target_stds=[0.1, 0.1, 0.2, 0.2], - reg_class_agnostic=False, - loss_cls=dict( - type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)), - mask_roi_extractor=dict( - type='SingleRoIExtractor', - roi_layer=dict(type='RoIAlign', out_size=14, sample_num=2), - out_channels=256, - featmap_strides=[4, 8, 16, 32]), - mask_head=dict( - type='FCNMaskHead', - num_convs=4, - in_channels=256, - conv_out_channels=256, - num_classes=81, - loss_mask=dict( - type='CrossEntropyLoss', use_mask=True, loss_weight=1.0))) -# model training and testing settings -train_cfg = dict( - rcnn=dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.5, - neg_iou_thr=0.5, - min_pos_iou=0.5, - ignore_iof_thr=-1), - sampler=dict( - type='RandomSampler', - num=512, - pos_fraction=0.25, - neg_pos_ub=-1, - add_gt_as_proposals=True), - mask_size=28, - pos_weight=-1, - debug=False)) -test_cfg = dict( - rcnn=dict( - score_thr=0.05, - nms=dict(type='nms', iou_thr=0.5), - max_per_img=100, - mask_thr_binary=0.5)) -# dataset settings -dataset_type = 'CocoDataset' -data_root = 'data/coco/' -img_norm_cfg = dict( - mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) -train_pipeline = [ - dict(type='LoadImageFromFile'), - dict(type='LoadProposals', num_max_proposals=2000), - dict(type='LoadAnnotations', with_bbox=True, with_mask=True), - dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), - dict(type='RandomFlip', flip_ratio=0.5), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='DefaultFormatBundle'), - dict( - type='Collect', - keys=['img', 'proposals', 'gt_bboxes', 'gt_labels', 'gt_masks']), -] -test_pipeline = [ - dict(type='LoadImageFromFile'), - dict(type='LoadProposals', num_max_proposals=None), - dict( - type='MultiScaleFlipAug', - img_scale=(1333, 800), - flip=False, - transforms=[ - dict(type='Resize', keep_ratio=True), - dict(type='RandomFlip'), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='ImageToTensor', keys=['img']), - dict(type='ToTensor', keys=['proposals']), - dict( - type='ToDataContainer', - fields=[dict(key='proposals', stack=False)]), - dict(type='Collect', keys=['img', 'proposals']), - ]) -] -data = dict( - imgs_per_gpu=2, - workers_per_gpu=2, - train=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_train2017.json', - proposal_file=data_root + 'proposals/rpn_r50_fpn_1x_train2017.pkl', - img_prefix=data_root + 'train2017/', - pipeline=train_pipeline), - val=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - proposal_file=data_root + 'proposals/rpn_r50_fpn_1x_val2017.pkl', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline), - test=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - proposal_file=data_root + 'proposals/rpn_r50_fpn_1x_val2017.pkl', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline)) -evaluation = dict(interval=1, metric=['bbox', 'segm']) -# optimizer -optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) -optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) -# learning policy -lr_config = dict( - policy='step', - warmup='linear', - warmup_iters=500, - warmup_ratio=1.0 / 3, - step=[8, 11]) -checkpoint_config = dict(interval=1) -# yapf:disable -log_config = dict( - interval=50, - hooks=[ - dict(type='TextLoggerHook'), - # dict(type='TensorboardLoggerHook') - ]) -# yapf:enable -# runtime settings -total_epochs = 12 -dist_params = dict(backend='nccl') -log_level = 'INFO' -work_dir = './work_dirs/fast_mask_rcnn_r101_fpn_1x' -load_from = None -resume_from = None -workflow = [('train', 1)] diff --git a/configs/fast_mask_rcnn_r50_caffe_c4_1x.py b/configs/fast_mask_rcnn_r50_caffe_c4_1x.py deleted file mode 100644 index 5d32f0e78d2325a4fc92b4ee735ac12bb95043ce..0000000000000000000000000000000000000000 --- a/configs/fast_mask_rcnn_r50_caffe_c4_1x.py +++ /dev/null @@ -1,157 +0,0 @@ -# model settings -norm_cfg = dict(type='BN', requires_grad=False) -model = dict( - type='FastRCNN', - pretrained='open-mmlab://resnet50_caffe', - backbone=dict( - type='ResNet', - depth=50, - num_stages=3, - strides=(1, 2, 2), - dilations=(1, 1, 1), - out_indices=(2, ), - frozen_stages=1, - norm_cfg=norm_cfg, - norm_eval=True, - style='caffe'), - shared_head=dict( - type='ResLayer', - depth=50, - stage=3, - stride=2, - dilation=1, - style='caffe', - norm_cfg=norm_cfg, - norm_eval=True), - bbox_roi_extractor=dict( - type='SingleRoIExtractor', - roi_layer=dict(type='RoIAlign', out_size=14, sample_num=2), - out_channels=1024, - featmap_strides=[16]), - bbox_head=dict( - type='BBoxHead', - with_avg_pool=True, - roi_feat_size=7, - in_channels=2048, - num_classes=81, - target_means=[0., 0., 0., 0.], - target_stds=[0.1, 0.1, 0.2, 0.2], - reg_class_agnostic=False), - mask_roi_extractor=None, - mask_head=dict( - type='FCNMaskHead', - num_convs=0, - in_channels=2048, - conv_out_channels=256, - num_classes=81)) -# model training and testing settings -train_cfg = dict( - rcnn=dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.5, - neg_iou_thr=0.5, - min_pos_iou=0.5, - ignore_iof_thr=-1), - sampler=dict( - type='RandomSampler', - num=512, - pos_fraction=0.25, - neg_pos_ub=-1, - add_gt_as_proposals=True), - mask_size=14, - pos_weight=-1, - debug=False)) -test_cfg = dict( - rcnn=dict( - score_thr=0.05, - nms=dict(type='nms', iou_thr=0.5), - max_per_img=100, - mask_thr_binary=0.5)) -# dataset settings -dataset_type = 'CocoDataset' -data_root = 'data/coco/' -img_norm_cfg = dict( - mean=[102.9801, 115.9465, 122.7717], std=[1.0, 1.0, 1.0], to_rgb=False) -train_pipeline = [ - dict(type='LoadImageFromFile'), - dict(type='LoadProposals', num_max_proposals=2000), - dict(type='LoadAnnotations', with_bbox=True, with_mask=True), - dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), - dict(type='RandomFlip', flip_ratio=0.5), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='DefaultFormatBundle'), - dict( - type='Collect', - keys=['img', 'proposals', 'gt_bboxes', 'gt_labels', 'gt_masks']), -] -test_pipeline = [ - dict(type='LoadImageFromFile'), - dict(type='LoadProposals', num_max_proposals=None), - dict( - type='MultiScaleFlipAug', - img_scale=(1333, 800), - flip=False, - transforms=[ - dict(type='Resize', keep_ratio=True), - dict(type='RandomFlip'), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='ImageToTensor', keys=['img']), - dict(type='ToTensor', keys=['proposals']), - dict( - type='ToDataContainer', - fields=[dict(key='proposals', stack=False)]), - dict(type='Collect', keys=['img', 'proposals']), - ]) -] -data = dict( - imgs_per_gpu=2, - workers_per_gpu=2, - train=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_train2017.json', - proposal_file=data_root + 'proposals/rpn_r50_c4_1x_train2017.pkl', - img_prefix=data_root + 'train2017/', - pipeline=train_pipeline), - val=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - proposal_file=data_root + 'proposals/rpn_r50_c4_1x_val2017.pkl', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline), - test=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - proposal_file=data_root + 'proposals/rpn_r50_c4_1x_val2017.pkl', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline)) -evaluation = dict(interval=1, metric=['bbox', 'segm']) -# optimizer -optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) -optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) -# learning policy -lr_config = dict( - policy='step', - warmup='linear', - warmup_iters=500, - warmup_ratio=1.0 / 3, - step=[8, 11]) -checkpoint_config = dict(interval=1) -# yapf:disable -log_config = dict( - interval=50, - hooks=[ - dict(type='TextLoggerHook'), - # dict(type='TensorboardLoggerHook') - ]) -# yapf:enable -# runtime settings -total_epochs = 12 -dist_params = dict(backend='nccl') -log_level = 'INFO' -work_dir = './work_dirs/fast_mask_rcnn_r50_caffe_c4_1x' -load_from = None -resume_from = None -workflow = [('train', 1)] diff --git a/configs/fast_mask_rcnn_r50_fpn_1x.py b/configs/fast_mask_rcnn_r50_fpn_1x.py deleted file mode 100644 index 315fa2c0216eace74b197f423decf45317db6c41..0000000000000000000000000000000000000000 --- a/configs/fast_mask_rcnn_r50_fpn_1x.py +++ /dev/null @@ -1,159 +0,0 @@ -# model settings -model = dict( - type='FastRCNN', - pretrained='torchvision://resnet50', - backbone=dict( - type='ResNet', - depth=50, - num_stages=4, - out_indices=(0, 1, 2, 3), - frozen_stages=1, - norm_cfg=dict(type='BN', requires_grad=True), - style='pytorch'), - neck=dict( - type='FPN', - in_channels=[256, 512, 1024, 2048], - out_channels=256, - num_outs=5), - bbox_roi_extractor=dict( - type='SingleRoIExtractor', - roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2), - out_channels=256, - featmap_strides=[4, 8, 16, 32]), - bbox_head=dict( - type='SharedFCBBoxHead', - num_fcs=2, - in_channels=256, - fc_out_channels=1024, - roi_feat_size=7, - num_classes=81, - target_means=[0., 0., 0., 0.], - target_stds=[0.1, 0.1, 0.2, 0.2], - reg_class_agnostic=False, - loss_cls=dict( - type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)), - mask_roi_extractor=dict( - type='SingleRoIExtractor', - roi_layer=dict(type='RoIAlign', out_size=14, sample_num=2), - out_channels=256, - featmap_strides=[4, 8, 16, 32]), - mask_head=dict( - type='FCNMaskHead', - num_convs=4, - in_channels=256, - conv_out_channels=256, - num_classes=81, - loss_mask=dict( - type='CrossEntropyLoss', use_mask=True, loss_weight=1.0))) -# model training and testing settings -train_cfg = dict( - rcnn=dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.5, - neg_iou_thr=0.5, - min_pos_iou=0.5, - ignore_iof_thr=-1), - sampler=dict( - type='RandomSampler', - num=512, - pos_fraction=0.25, - neg_pos_ub=-1, - add_gt_as_proposals=True), - mask_size=28, - pos_weight=-1, - debug=False)) -test_cfg = dict( - rcnn=dict( - score_thr=0.05, - nms=dict(type='nms', iou_thr=0.5), - max_per_img=100, - mask_thr_binary=0.5)) -# dataset settings -dataset_type = 'CocoDataset' -data_root = 'data/coco/' -img_norm_cfg = dict( - mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) -train_pipeline = [ - dict(type='LoadImageFromFile'), - dict(type='LoadProposals', num_max_proposals=2000), - dict(type='LoadAnnotations', with_bbox=True, with_mask=True), - dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), - dict(type='RandomFlip', flip_ratio=0.5), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='DefaultFormatBundle'), - dict( - type='Collect', - keys=['img', 'proposals', 'gt_bboxes', 'gt_labels', 'gt_masks']), -] -test_pipeline = [ - dict(type='LoadImageFromFile'), - dict(type='LoadProposals', num_max_proposals=None), - dict( - type='MultiScaleFlipAug', - img_scale=(1333, 800), - flip=False, - transforms=[ - dict(type='Resize', keep_ratio=True), - dict(type='RandomFlip'), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='ImageToTensor', keys=['img']), - dict(type='ToTensor', keys=['proposals']), - dict( - type='ToDataContainer', - fields=[dict(key='proposals', stack=False)]), - dict(type='Collect', keys=['img', 'proposals']), - ]) -] -data = dict( - imgs_per_gpu=2, - workers_per_gpu=2, - train=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_train2017.json', - proposal_file=data_root + 'proposals/rpn_r50_fpn_1x_train2017.pkl', - img_prefix=data_root + 'train2017/', - pipeline=train_pipeline), - val=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - proposal_file=data_root + 'proposals/rpn_r50_fpn_1x_val2017.pkl', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline), - test=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - proposal_file=data_root + 'proposals/rpn_r50_fpn_1x_val2017.pkl', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline)) -evaluation = dict(interval=1, metric=['bbox', 'segm']) -# optimizer -optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) -optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) -# learning policy -lr_config = dict( - policy='step', - warmup='linear', - warmup_iters=500, - warmup_ratio=1.0 / 3, - step=[8, 11]) -checkpoint_config = dict(interval=1) -# yapf:disable -log_config = dict( - interval=50, - hooks=[ - dict(type='TextLoggerHook'), - # dict(type='TensorboardLoggerHook') - ]) -# yapf:enable -# runtime settings -total_epochs = 12 -dist_params = dict(backend='nccl') -log_level = 'INFO' -work_dir = './work_dirs/fast_mask_rcnn_r50_fpn_1x' -load_from = None -resume_from = None -workflow = [('train', 1)] diff --git a/configs/fast_rcnn/fast_rcnn_r50_fpn_1x_coco.py b/configs/fast_rcnn/fast_rcnn_r50_fpn_1x_coco.py new file mode 100644 index 0000000000000000000000000000000000000000..db6ea5dd551043ac7d3cd047eaf475bd5d06f56a --- /dev/null +++ b/configs/fast_rcnn/fast_rcnn_r50_fpn_1x_coco.py @@ -0,0 +1,53 @@ +_base_ = [ + '../_base_/models/fast_rcnn_r50_fpn.py', + '../_base_/datasets/coco_detection.py', + '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py' +] +dataset_type = 'CocoDataset' +data_root = 'data/coco/' +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadProposals', num_max_proposals=2000), + dict(type='LoadAnnotations', with_bbox=True), + dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'proposals', 'gt_bboxes', 'gt_labels']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadProposals', num_max_proposals=None), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='ToTensor', keys=['proposals']), + dict( + type='ToDataContainer', + fields=[dict(key='proposals', stack=False)]), + dict(type='Collect', keys=['img', 'proposals']), + ]) +] +data = dict( + imgs_per_gpu=2, + workers_per_gpu=2, + train=dict( + proposal_file=data_root + 'proposals/rpn_r50_fpn_1x_train2017.pkl', + pipeline=train_pipeline), + val=dict( + proposal_file=data_root + 'proposals/rpn_r50_fpn_1x_val2017.pkl', + pipeline=test_pipeline), + test=dict( + proposal_file=data_root + 'proposals/rpn_r50_fpn_1x_val2017.pkl', + pipeline=test_pipeline)) +work_dir = './work_dirs/fast_rcnn_r50_fpn_1x' diff --git a/configs/fast_rcnn_r101_fpn_1x.py b/configs/fast_rcnn_r101_fpn_1x.py deleted file mode 100644 index e46089320d844cb33cfdadddf4765680c861be51..0000000000000000000000000000000000000000 --- a/configs/fast_rcnn_r101_fpn_1x.py +++ /dev/null @@ -1,140 +0,0 @@ -# model settings -model = dict( - type='FastRCNN', - pretrained='torchvision://resnet101', - backbone=dict( - type='ResNet', - depth=101, - num_stages=4, - out_indices=(0, 1, 2, 3), - frozen_stages=1, - norm_cfg=dict(type='BN', requires_grad=True), - style='pytorch'), - neck=dict( - type='FPN', - in_channels=[256, 512, 1024, 2048], - out_channels=256, - num_outs=5), - bbox_roi_extractor=dict( - type='SingleRoIExtractor', - roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2), - out_channels=256, - featmap_strides=[4, 8, 16, 32]), - bbox_head=dict( - type='SharedFCBBoxHead', - num_fcs=2, - in_channels=256, - fc_out_channels=1024, - roi_feat_size=7, - num_classes=81, - target_means=[0., 0., 0., 0.], - target_stds=[0.1, 0.1, 0.2, 0.2], - reg_class_agnostic=False, - loss_cls=dict( - type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0))) -# model training and testing settings -train_cfg = dict( - rcnn=dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.5, - neg_iou_thr=0.5, - min_pos_iou=0.5, - ignore_iof_thr=-1), - sampler=dict( - type='RandomSampler', - num=512, - pos_fraction=0.25, - neg_pos_ub=-1, - add_gt_as_proposals=True), - pos_weight=-1, - debug=False)) -test_cfg = dict( - rcnn=dict( - score_thr=0.05, nms=dict(type='nms', iou_thr=0.5), max_per_img=100)) -# dataset settings -dataset_type = 'CocoDataset' -data_root = 'data/coco/' -img_norm_cfg = dict( - mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) -train_pipeline = [ - dict(type='LoadImageFromFile'), - dict(type='LoadProposals', num_max_proposals=2000), - dict(type='LoadAnnotations', with_bbox=True), - dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), - dict(type='RandomFlip', flip_ratio=0.5), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='DefaultFormatBundle'), - dict(type='Collect', keys=['img', 'proposals', 'gt_bboxes', 'gt_labels']), -] -test_pipeline = [ - dict(type='LoadImageFromFile'), - dict(type='LoadProposals', num_max_proposals=None), - dict( - type='MultiScaleFlipAug', - img_scale=(1333, 800), - flip=False, - transforms=[ - dict(type='Resize', keep_ratio=True), - dict(type='RandomFlip'), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='ImageToTensor', keys=['img']), - dict(type='ToTensor', keys=['proposals']), - dict( - type='ToDataContainer', - fields=[dict(key='proposals', stack=False)]), - dict(type='Collect', keys=['img', 'proposals']), - ]) -] -data = dict( - imgs_per_gpu=2, - workers_per_gpu=2, - train=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_train2017.json', - proposal_file=data_root + 'proposals/rpn_r50_fpn_1x_train2017.pkl', - img_prefix=data_root + 'train2017/', - pipeline=train_pipeline), - val=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - proposal_file=data_root + 'proposals/rpn_r50_fpn_1x_val2017.pkl', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline), - test=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - proposal_file=data_root + 'proposals/rpn_r50_fpn_1x_val2017.pkl', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline)) -evaluation = dict(interval=1, metric='bbox') -# optimizer -optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) -optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) -# learning policy -lr_config = dict( - policy='step', - warmup='linear', - warmup_iters=500, - warmup_ratio=1.0 / 3, - step=[8, 11]) -checkpoint_config = dict(interval=1) -# yapf:disable -log_config = dict( - interval=50, - hooks=[ - dict(type='TextLoggerHook'), - # dict(type='TensorboardLoggerHook') - ]) -# yapf:enable -# runtime settings -total_epochs = 12 -dist_params = dict(backend='nccl') -log_level = 'INFO' -work_dir = './work_dirs/fast_rcnn_r101_fpn_1x' -load_from = None -resume_from = None -workflow = [('train', 1)] diff --git a/configs/fast_rcnn_r50_caffe_c4_1x.py b/configs/fast_rcnn_r50_caffe_c4_1x.py deleted file mode 100644 index dec822a383f141020a2f32bcdf7c9f7476eff652..0000000000000000000000000000000000000000 --- a/configs/fast_rcnn_r50_caffe_c4_1x.py +++ /dev/null @@ -1,147 +0,0 @@ -# model settings -norm_cfg = dict(type='BN', requires_grad=False) -model = dict( - type='FastRCNN', - pretrained='open-mmlab://resnet50_caffe', - backbone=dict( - type='ResNet', - depth=50, - num_stages=3, - strides=(1, 2, 2), - dilations=(1, 1, 1), - out_indices=(2, ), - frozen_stages=1, - norm_cfg=norm_cfg, - norm_eval=True, - style='caffe'), - shared_head=dict( - type='ResLayer', - depth=50, - stage=3, - stride=2, - dilation=1, - style='caffe', - norm_cfg=norm_cfg, - norm_eval=True), - bbox_roi_extractor=dict( - type='SingleRoIExtractor', - roi_layer=dict(type='RoIAlign', out_size=14, sample_num=2), - out_channels=1024, - featmap_strides=[16]), - bbox_head=dict( - type='BBoxHead', - with_avg_pool=True, - roi_feat_size=7, - in_channels=2048, - num_classes=81, - target_means=[0., 0., 0., 0.], - target_stds=[0.1, 0.1, 0.2, 0.2], - reg_class_agnostic=False, - loss_cls=dict( - type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0))) -# model training and testing settings -train_cfg = dict( - rcnn=dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.5, - neg_iou_thr=0.5, - min_pos_iou=0.5, - ignore_iof_thr=-1), - sampler=dict( - type='RandomSampler', - num=512, - pos_fraction=0.25, - neg_pos_ub=-1, - add_gt_as_proposals=True), - pos_weight=-1, - debug=False)) -test_cfg = dict( - rcnn=dict( - score_thr=0.05, nms=dict(type='nms', iou_thr=0.5), max_per_img=100)) -# dataset settings -dataset_type = 'CocoDataset' -data_root = 'data/coco/' -img_norm_cfg = dict( - mean=[102.9801, 115.9465, 122.7717], std=[1.0, 1.0, 1.0], to_rgb=False) -train_pipeline = [ - dict(type='LoadImageFromFile'), - dict(type='LoadProposals', num_max_proposals=2000), - dict(type='LoadAnnotations', with_bbox=True), - dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), - dict(type='RandomFlip', flip_ratio=0.5), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='DefaultFormatBundle'), - dict(type='Collect', keys=['img', 'proposals', 'gt_bboxes', 'gt_labels']), -] -test_pipeline = [ - dict(type='LoadImageFromFile'), - dict(type='LoadProposals', num_max_proposals=None), - dict( - type='MultiScaleFlipAug', - img_scale=(1333, 800), - flip=False, - transforms=[ - dict(type='Resize', keep_ratio=True), - dict(type='RandomFlip'), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='ImageToTensor', keys=['img']), - dict(type='ToTensor', keys=['proposals']), - dict( - type='ToDataContainer', - fields=[dict(key='proposals', stack=False)]), - dict(type='Collect', keys=['img', 'proposals']), - ]) -] -data = dict( - imgs_per_gpu=2, - workers_per_gpu=2, - train=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_train2017.json', - proposal_file=data_root + 'proposals/rpn_r50_c4_1x_train2017.pkl', - img_prefix=data_root + 'train2017/', - pipeline=train_pipeline), - val=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - proposal_file=data_root + 'proposals/rpn_r50_c4_1x_val2017.pkl', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline), - test=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - proposal_file=data_root + 'proposals/rpn_r50_c4_1x_val2017.pkl', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline)) -evaluation = dict(interval=1, metric='bbox') -# optimizer -optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) -optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) -# learning policy -lr_config = dict( - policy='step', - warmup='linear', - warmup_iters=500, - warmup_ratio=1.0 / 3, - step=[8, 11]) -checkpoint_config = dict(interval=1) -# yapf:disable -log_config = dict( - interval=50, - hooks=[ - dict(type='TextLoggerHook'), - # dict(type='TensorboardLoggerHook') - ]) -# yapf:enable -# runtime settings -total_epochs = 12 -dist_params = dict(backend='nccl') -log_level = 'INFO' -work_dir = './work_dirs/fast_rcnn_r50_caffe_c4_1x' -load_from = None -resume_from = None -workflow = [('train', 1)] diff --git a/configs/fast_rcnn_r50_fpn_1x.py b/configs/fast_rcnn_r50_fpn_1x.py deleted file mode 100644 index dbeab714fb661906e917bb0be83536926049e418..0000000000000000000000000000000000000000 --- a/configs/fast_rcnn_r50_fpn_1x.py +++ /dev/null @@ -1,140 +0,0 @@ -# model settings -model = dict( - type='FastRCNN', - pretrained='torchvision://resnet50', - backbone=dict( - type='ResNet', - depth=50, - num_stages=4, - out_indices=(0, 1, 2, 3), - frozen_stages=1, - norm_cfg=dict(type='BN', requires_grad=True), - style='pytorch'), - neck=dict( - type='FPN', - in_channels=[256, 512, 1024, 2048], - out_channels=256, - num_outs=5), - bbox_roi_extractor=dict( - type='SingleRoIExtractor', - roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2), - out_channels=256, - featmap_strides=[4, 8, 16, 32]), - bbox_head=dict( - type='SharedFCBBoxHead', - num_fcs=2, - in_channels=256, - fc_out_channels=1024, - roi_feat_size=7, - num_classes=81, - target_means=[0., 0., 0., 0.], - target_stds=[0.1, 0.1, 0.2, 0.2], - reg_class_agnostic=False, - loss_cls=dict( - type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0))) -# model training and testing settings -train_cfg = dict( - rcnn=dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.5, - neg_iou_thr=0.5, - min_pos_iou=0.5, - ignore_iof_thr=-1), - sampler=dict( - type='RandomSampler', - num=512, - pos_fraction=0.25, - neg_pos_ub=-1, - add_gt_as_proposals=True), - pos_weight=-1, - debug=False)) -test_cfg = dict( - rcnn=dict( - score_thr=0.05, nms=dict(type='nms', iou_thr=0.5), max_per_img=100)) -# dataset settings -dataset_type = 'CocoDataset' -data_root = 'data/coco/' -img_norm_cfg = dict( - mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) -train_pipeline = [ - dict(type='LoadImageFromFile'), - dict(type='LoadProposals', num_max_proposals=2000), - dict(type='LoadAnnotations', with_bbox=True), - dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), - dict(type='RandomFlip', flip_ratio=0.5), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='DefaultFormatBundle'), - dict(type='Collect', keys=['img', 'proposals', 'gt_bboxes', 'gt_labels']), -] -test_pipeline = [ - dict(type='LoadImageFromFile'), - dict(type='LoadProposals', num_max_proposals=None), - dict( - type='MultiScaleFlipAug', - img_scale=(1333, 800), - flip=False, - transforms=[ - dict(type='Resize', keep_ratio=True), - dict(type='RandomFlip'), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='ImageToTensor', keys=['img']), - dict(type='ToTensor', keys=['proposals']), - dict( - type='ToDataContainer', - fields=[dict(key='proposals', stack=False)]), - dict(type='Collect', keys=['img', 'proposals']), - ]) -] -data = dict( - imgs_per_gpu=2, - workers_per_gpu=2, - train=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_train2017.json', - proposal_file=data_root + 'proposals/rpn_r50_fpn_1x_train2017.pkl', - img_prefix=data_root + 'train2017/', - pipeline=train_pipeline), - val=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - proposal_file=data_root + 'proposals/rpn_r50_fpn_1x_val2017.pkl', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline), - test=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - proposal_file=data_root + 'proposals/rpn_r50_fpn_1x_val2017.pkl', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline)) -evaluation = dict(interval=1, metric='bbox') -# optimizer -optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) -optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) -# learning policy -lr_config = dict( - policy='step', - warmup='linear', - warmup_iters=500, - warmup_ratio=1.0 / 3, - step=[8, 11]) -checkpoint_config = dict(interval=1) -# yapf:disable -log_config = dict( - interval=50, - hooks=[ - dict(type='TextLoggerHook'), - # dict(type='TensorboardLoggerHook') - ]) -# yapf:enable -# runtime settings -total_epochs = 12 -dist_params = dict(backend='nccl') -log_level = 'INFO' -work_dir = './work_dirs/fast_rcnn_r50_fpn_1x' -load_from = None -resume_from = None -workflow = [('train', 1)] diff --git a/configs/faster_rcnn/faster_rcnn_r101_fpn_1x_coco.py b/configs/faster_rcnn/faster_rcnn_r101_fpn_1x_coco.py new file mode 100644 index 0000000000000000000000000000000000000000..c0b51c59d11b2a6c388ce62c3f62e548fed0c035 --- /dev/null +++ b/configs/faster_rcnn/faster_rcnn_r101_fpn_1x_coco.py @@ -0,0 +1,3 @@ +_base_ = './faster_rcnn_r50_fpn_1x_coco.py' +model = dict(pretrained='torchvision://resnet101', backbone=dict(depth=101)) +work_dir = './work_dirs/faster_rcnn_r101_fpn_1x' diff --git a/configs/faster_rcnn/faster_rcnn_r50_caffe_c4_1x_coco.py b/configs/faster_rcnn/faster_rcnn_r50_caffe_c4_1x_coco.py new file mode 100644 index 0000000000000000000000000000000000000000..5a5b7ccd8cd968e433f6990238f5a71792a600f4 --- /dev/null +++ b/configs/faster_rcnn/faster_rcnn_r50_caffe_c4_1x_coco.py @@ -0,0 +1,40 @@ +_base_ = [ + '../_base_/models/faster_rcnn_r50_caffe_c4.py', + '../_base_/datasets/coco_detection.py', + '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py' +] +# use caffe img_norm +img_norm_cfg = dict( + mean=[102.9801, 115.9465, 122.7717], std=[1.0, 1.0, 1.0], to_rgb=False) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True), + dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +data = dict( + train=dict(pipeline=train_pipeline), + val=dict(pipeline=test_pipeline), + test=dict(pipeline=test_pipeline)) +# optimizer +optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) +work_dir = './work_dirs/faster_rcnn_r50_caffe_c4_1x' diff --git a/configs/faster_rcnn/faster_rcnn_r50_caffe_fpn_1x_coco.py b/configs/faster_rcnn/faster_rcnn_r50_caffe_fpn_1x_coco.py new file mode 100644 index 0000000000000000000000000000000000000000..8fdc75b4d1a61477bb1066cd6fcd08d1fb99da5f --- /dev/null +++ b/configs/faster_rcnn/faster_rcnn_r50_caffe_fpn_1x_coco.py @@ -0,0 +1,37 @@ +_base_ = './faster_rcnn_r50_fpn_1x_coco.py' +model = dict( + pretrained='open-mmlab://resnet50_caffe', + backbone=dict(norm_cfg=dict(requires_grad=False), style='caffe')) +# use caffe img_norm +img_norm_cfg = dict( + mean=[102.9801, 115.9465, 122.7717], std=[1.0, 1.0, 1.0], to_rgb=False) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True), + dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +data = dict( + train=dict(pipeline=train_pipeline), + val=dict(pipeline=test_pipeline), + test=dict(pipeline=test_pipeline)) +work_dir = './work_dirs/faster_rcnn_r50_caffe_fpn_1x' diff --git a/configs/faster_rcnn/faster_rcnn_r50_fpn_1x_coco.py b/configs/faster_rcnn/faster_rcnn_r50_fpn_1x_coco.py new file mode 100644 index 0000000000000000000000000000000000000000..4df206625ee54bddfafd354d5c9bca05711334aa --- /dev/null +++ b/configs/faster_rcnn/faster_rcnn_r50_fpn_1x_coco.py @@ -0,0 +1,6 @@ +_base_ = [ + '../_base_/models/faster_rcnn_r50_fpn.py', + '../_base_/datasets/coco_detection.py', + '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py' +] +work_dir = './work_dirs/faster_rcnn_r50_fpn_1x' diff --git a/configs/faster_rcnn/faster_rcnn_r50_fpn_ohem_1x_coco.py b/configs/faster_rcnn/faster_rcnn_r50_fpn_ohem_1x_coco.py new file mode 100644 index 0000000000000000000000000000000000000000..0a02fdf994ec70c48e4a206200d5668d8b900b1d --- /dev/null +++ b/configs/faster_rcnn/faster_rcnn_r50_fpn_ohem_1x_coco.py @@ -0,0 +1,3 @@ +_base_ = './faster_rcnn_r50_fpn_1x_coco.py' +train_cfg = dict(rcnn=dict(sampler=dict(type='OHEMSampler'))) +work_dir = './work_dirs/faster_rcnn_r50_fpn_1x' diff --git a/configs/faster_rcnn/faster_rcnn_x101_32x4d_fpn_1x_coco.py b/configs/faster_rcnn/faster_rcnn_x101_32x4d_fpn_1x_coco.py new file mode 100644 index 0000000000000000000000000000000000000000..2dc66a86ed847d4b61018f24ffe50ae93fa19fff --- /dev/null +++ b/configs/faster_rcnn/faster_rcnn_x101_32x4d_fpn_1x_coco.py @@ -0,0 +1,14 @@ +_base_ = './faster_rcnn_r50_fpn_1x_coco.py' +model = dict( + pretrained='open-mmlab://resnext101_32x4d', + backbone=dict( + type='ResNeXt', + depth=101, + groups=32, + base_width=4, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + style='pytorch')) +work_dir = './work_dirs/faster_rcnn_x101_32x4d_fpn_1x' diff --git a/configs/faster_rcnn/faster_rcnn_x101_64x4d_fpn_1x_coco.py b/configs/faster_rcnn/faster_rcnn_x101_64x4d_fpn_1x_coco.py new file mode 100644 index 0000000000000000000000000000000000000000..0e85ee5ea5db6b01267a8f175a37ca8ef15c85fb --- /dev/null +++ b/configs/faster_rcnn/faster_rcnn_x101_64x4d_fpn_1x_coco.py @@ -0,0 +1,14 @@ +_base_ = './faster_rcnn_r50_fpn_1x_coco.py' +model = dict( + pretrained='open-mmlab://resnext101_64x4d', + backbone=dict( + type='ResNeXt', + depth=101, + groups=64, + base_width=4, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + style='pytorch')) +work_dir = './work_dirs/faster_rcnn_x101_64x4d_fpn_1x' diff --git a/configs/faster_rcnn_ohem_r50_fpn_1x.py b/configs/faster_rcnn_ohem_r50_fpn_1x.py deleted file mode 100644 index baad9c85736d66d60173a24d0ce59a59b589e9f8..0000000000000000000000000000000000000000 --- a/configs/faster_rcnn_ohem_r50_fpn_1x.py +++ /dev/null @@ -1,176 +0,0 @@ -# model settings -model = dict( - type='FasterRCNN', - pretrained='torchvision://resnet50', - backbone=dict( - type='ResNet', - depth=50, - num_stages=4, - out_indices=(0, 1, 2, 3), - frozen_stages=1, - norm_cfg=dict(type='BN', requires_grad=True), - style='pytorch'), - neck=dict( - type='FPN', - in_channels=[256, 512, 1024, 2048], - out_channels=256, - num_outs=5), - rpn_head=dict( - type='RPNHead', - in_channels=256, - feat_channels=256, - anchor_scales=[8], - anchor_ratios=[0.5, 1.0, 2.0], - anchor_strides=[4, 8, 16, 32, 64], - target_means=[.0, .0, .0, .0], - target_stds=[1.0, 1.0, 1.0, 1.0], - loss_cls=dict( - type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0)), - bbox_roi_extractor=dict( - type='SingleRoIExtractor', - roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2), - out_channels=256, - featmap_strides=[4, 8, 16, 32]), - bbox_head=dict( - type='SharedFCBBoxHead', - num_fcs=2, - in_channels=256, - fc_out_channels=1024, - roi_feat_size=7, - num_classes=81, - target_means=[0., 0., 0., 0.], - target_stds=[0.1, 0.1, 0.2, 0.2], - reg_class_agnostic=False, - loss_cls=dict( - type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0))) -# model training and testing settings -train_cfg = dict( - rpn=dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.7, - neg_iou_thr=0.3, - min_pos_iou=0.3, - ignore_iof_thr=-1), - sampler=dict( - type='RandomSampler', - num=256, - pos_fraction=0.5, - neg_pos_ub=-1, - add_gt_as_proposals=False), - allowed_border=0, - pos_weight=-1, - debug=False), - rpn_proposal=dict( - nms_across_levels=False, - nms_pre=2000, - nms_post=2000, - max_num=2000, - nms_thr=0.7, - min_bbox_size=0), - rcnn=dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.5, - neg_iou_thr=0.5, - min_pos_iou=0.5, - ignore_iof_thr=-1), - sampler=dict( - type='OHEMSampler', - num=512, - pos_fraction=0.25, - neg_pos_ub=-1, - add_gt_as_proposals=True), - pos_weight=-1, - debug=False)) -test_cfg = dict( - rpn=dict( - nms_across_levels=False, - nms_pre=1000, - nms_post=1000, - max_num=1000, - nms_thr=0.7, - min_bbox_size=0), - rcnn=dict( - score_thr=0.05, nms=dict(type='nms', iou_thr=0.5), max_per_img=100) - # soft-nms is also supported for rcnn testing - # e.g., nms=dict(type='soft_nms', iou_thr=0.5, min_score=0.05) -) -# dataset settings -dataset_type = 'CocoDataset' -data_root = 'data/coco/' -img_norm_cfg = dict( - mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) -train_pipeline = [ - dict(type='LoadImageFromFile'), - dict(type='LoadAnnotations', with_bbox=True), - dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), - dict(type='RandomFlip', flip_ratio=0.5), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='DefaultFormatBundle'), - dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), -] -test_pipeline = [ - dict(type='LoadImageFromFile'), - dict( - type='MultiScaleFlipAug', - img_scale=(1333, 800), - flip=False, - transforms=[ - dict(type='Resize', keep_ratio=True), - dict(type='RandomFlip'), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='ImageToTensor', keys=['img']), - dict(type='Collect', keys=['img']), - ]) -] -data = dict( - imgs_per_gpu=2, - workers_per_gpu=2, - train=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_train2017.json', - img_prefix=data_root + 'train2017/', - pipeline=train_pipeline), - val=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline), - test=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline)) -evaluation = dict(interval=1, metric='bbox') -# optimizer -optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) -optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) -# learning policy -lr_config = dict( - policy='step', - warmup='linear', - warmup_iters=500, - warmup_ratio=1.0 / 3, - step=[8, 11]) -checkpoint_config = dict(interval=1) -# yapf:disable -log_config = dict( - interval=50, - hooks=[ - dict(type='TextLoggerHook'), - # dict(type='TensorboardLoggerHook') - ]) -# yapf:enable -# runtime settings -total_epochs = 12 -dist_params = dict(backend='nccl') -log_level = 'INFO' -work_dir = './work_dirs/faster_rcnn_r50_fpn_1x' -load_from = None -resume_from = None -workflow = [('train', 1)] diff --git a/configs/faster_rcnn_r101_fpn_1x.py b/configs/faster_rcnn_r101_fpn_1x.py deleted file mode 100644 index d3735e5fdfb6cb055ae66c7d1e70d1565ce4d755..0000000000000000000000000000000000000000 --- a/configs/faster_rcnn_r101_fpn_1x.py +++ /dev/null @@ -1,176 +0,0 @@ -# model settings -model = dict( - type='FasterRCNN', - pretrained='torchvision://resnet101', - backbone=dict( - type='ResNet', - depth=101, - num_stages=4, - out_indices=(0, 1, 2, 3), - frozen_stages=1, - norm_cfg=dict(type='BN', requires_grad=True), - style='pytorch'), - neck=dict( - type='FPN', - in_channels=[256, 512, 1024, 2048], - out_channels=256, - num_outs=5), - rpn_head=dict( - type='RPNHead', - in_channels=256, - feat_channels=256, - anchor_scales=[8], - anchor_ratios=[0.5, 1.0, 2.0], - anchor_strides=[4, 8, 16, 32, 64], - target_means=[.0, .0, .0, .0], - target_stds=[1.0, 1.0, 1.0, 1.0], - loss_cls=dict( - type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0)), - bbox_roi_extractor=dict( - type='SingleRoIExtractor', - roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2), - out_channels=256, - featmap_strides=[4, 8, 16, 32]), - bbox_head=dict( - type='SharedFCBBoxHead', - num_fcs=2, - in_channels=256, - fc_out_channels=1024, - roi_feat_size=7, - num_classes=81, - target_means=[0., 0., 0., 0.], - target_stds=[0.1, 0.1, 0.2, 0.2], - reg_class_agnostic=False, - loss_cls=dict( - type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0))) -# model training and testing settings -train_cfg = dict( - rpn=dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.7, - neg_iou_thr=0.3, - min_pos_iou=0.3, - ignore_iof_thr=-1), - sampler=dict( - type='RandomSampler', - num=256, - pos_fraction=0.5, - neg_pos_ub=-1, - add_gt_as_proposals=False), - allowed_border=0, - pos_weight=-1, - debug=False), - rpn_proposal=dict( - nms_across_levels=False, - nms_pre=2000, - nms_post=2000, - max_num=2000, - nms_thr=0.7, - min_bbox_size=0), - rcnn=dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.5, - neg_iou_thr=0.5, - min_pos_iou=0.5, - ignore_iof_thr=-1), - sampler=dict( - type='RandomSampler', - num=512, - pos_fraction=0.25, - neg_pos_ub=-1, - add_gt_as_proposals=True), - pos_weight=-1, - debug=False)) -test_cfg = dict( - rpn=dict( - nms_across_levels=False, - nms_pre=1000, - nms_post=1000, - max_num=1000, - nms_thr=0.7, - min_bbox_size=0), - rcnn=dict( - score_thr=0.05, nms=dict(type='nms', iou_thr=0.5), max_per_img=100) - # soft-nms is also supported for rcnn testing - # e.g., nms=dict(type='soft_nms', iou_thr=0.5, min_score=0.05) -) -# dataset settings -dataset_type = 'CocoDataset' -data_root = 'data/coco/' -img_norm_cfg = dict( - mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) -train_pipeline = [ - dict(type='LoadImageFromFile'), - dict(type='LoadAnnotations', with_bbox=True), - dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), - dict(type='RandomFlip', flip_ratio=0.5), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='DefaultFormatBundle'), - dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), -] -test_pipeline = [ - dict(type='LoadImageFromFile'), - dict( - type='MultiScaleFlipAug', - img_scale=(1333, 800), - flip=False, - transforms=[ - dict(type='Resize', keep_ratio=True), - dict(type='RandomFlip'), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='ImageToTensor', keys=['img']), - dict(type='Collect', keys=['img']), - ]) -] -data = dict( - imgs_per_gpu=2, - workers_per_gpu=2, - train=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_train2017.json', - img_prefix=data_root + 'train2017/', - pipeline=train_pipeline), - val=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline), - test=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline)) -evaluation = dict(interval=1, metric='bbox') -# optimizer -optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) -optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) -# learning policy -lr_config = dict( - policy='step', - warmup='linear', - warmup_iters=500, - warmup_ratio=1.0 / 3, - step=[8, 11]) -checkpoint_config = dict(interval=1) -# yapf:disable -log_config = dict( - interval=50, - hooks=[ - dict(type='TextLoggerHook'), - # dict(type='TensorboardLoggerHook') - ]) -# yapf:enable -# runtime settings -total_epochs = 12 -dist_params = dict(backend='nccl') -log_level = 'INFO' -work_dir = './work_dirs/faster_rcnn_r101_fpn_1x' -load_from = None -resume_from = None -workflow = [('train', 1)] diff --git a/configs/faster_rcnn_x101_32x4d_fpn_1x.py b/configs/faster_rcnn_x101_32x4d_fpn_1x.py deleted file mode 100644 index 9e14dc9f5ea4d3ceeb07a9e3564ffbca6c5a3657..0000000000000000000000000000000000000000 --- a/configs/faster_rcnn_x101_32x4d_fpn_1x.py +++ /dev/null @@ -1,178 +0,0 @@ -# model settings -model = dict( - type='FasterRCNN', - pretrained='open-mmlab://resnext101_32x4d', - backbone=dict( - type='ResNeXt', - depth=101, - groups=32, - base_width=4, - num_stages=4, - out_indices=(0, 1, 2, 3), - frozen_stages=1, - norm_cfg=dict(type='BN', requires_grad=True), - style='pytorch'), - neck=dict( - type='FPN', - in_channels=[256, 512, 1024, 2048], - out_channels=256, - num_outs=5), - rpn_head=dict( - type='RPNHead', - in_channels=256, - feat_channels=256, - anchor_scales=[8], - anchor_ratios=[0.5, 1.0, 2.0], - anchor_strides=[4, 8, 16, 32, 64], - target_means=[.0, .0, .0, .0], - target_stds=[1.0, 1.0, 1.0, 1.0], - loss_cls=dict( - type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0)), - bbox_roi_extractor=dict( - type='SingleRoIExtractor', - roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2), - out_channels=256, - featmap_strides=[4, 8, 16, 32]), - bbox_head=dict( - type='SharedFCBBoxHead', - num_fcs=2, - in_channels=256, - fc_out_channels=1024, - roi_feat_size=7, - num_classes=81, - target_means=[0., 0., 0., 0.], - target_stds=[0.1, 0.1, 0.2, 0.2], - reg_class_agnostic=False, - loss_cls=dict( - type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0))) -# model training and testing settings -train_cfg = dict( - rpn=dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.7, - neg_iou_thr=0.3, - min_pos_iou=0.3, - ignore_iof_thr=-1), - sampler=dict( - type='RandomSampler', - num=256, - pos_fraction=0.5, - neg_pos_ub=-1, - add_gt_as_proposals=False), - allowed_border=0, - pos_weight=-1, - debug=False), - rpn_proposal=dict( - nms_across_levels=False, - nms_pre=2000, - nms_post=2000, - max_num=2000, - nms_thr=0.7, - min_bbox_size=0), - rcnn=dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.5, - neg_iou_thr=0.5, - min_pos_iou=0.5, - ignore_iof_thr=-1), - sampler=dict( - type='RandomSampler', - num=512, - pos_fraction=0.25, - neg_pos_ub=-1, - add_gt_as_proposals=True), - pos_weight=-1, - debug=False)) -test_cfg = dict( - rpn=dict( - nms_across_levels=False, - nms_pre=1000, - nms_post=1000, - max_num=1000, - nms_thr=0.7, - min_bbox_size=0), - rcnn=dict( - score_thr=0.05, nms=dict(type='nms', iou_thr=0.5), max_per_img=100) - # soft-nms is also supported for rcnn testing - # e.g., nms=dict(type='soft_nms', iou_thr=0.5, min_score=0.05) -) -# dataset settings -dataset_type = 'CocoDataset' -data_root = 'data/coco/' -img_norm_cfg = dict( - mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) -train_pipeline = [ - dict(type='LoadImageFromFile'), - dict(type='LoadAnnotations', with_bbox=True), - dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), - dict(type='RandomFlip', flip_ratio=0.5), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='DefaultFormatBundle'), - dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), -] -test_pipeline = [ - dict(type='LoadImageFromFile'), - dict( - type='MultiScaleFlipAug', - img_scale=(1333, 800), - flip=False, - transforms=[ - dict(type='Resize', keep_ratio=True), - dict(type='RandomFlip'), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='ImageToTensor', keys=['img']), - dict(type='Collect', keys=['img']), - ]) -] -data = dict( - imgs_per_gpu=2, - workers_per_gpu=2, - train=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_train2017.json', - img_prefix=data_root + 'train2017/', - pipeline=train_pipeline), - val=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline), - test=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline)) -evaluation = dict(interval=1, metric='bbox') -# optimizer -optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) -optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) -# learning policy -lr_config = dict( - policy='step', - warmup='linear', - warmup_iters=500, - warmup_ratio=1.0 / 3, - step=[8, 11]) -checkpoint_config = dict(interval=1) -# yapf:disable -log_config = dict( - interval=50, - hooks=[ - dict(type='TextLoggerHook'), - # dict(type='TensorboardLoggerHook') - ]) -# yapf:enable -# runtime settings -total_epochs = 12 -dist_params = dict(backend='nccl') -log_level = 'INFO' -work_dir = './work_dirs/faster_rcnn_x101_32x4d_fpn_1x' -load_from = None -resume_from = None -workflow = [('train', 1)] diff --git a/configs/faster_rcnn_x101_64x4d_fpn_1x.py b/configs/faster_rcnn_x101_64x4d_fpn_1x.py deleted file mode 100644 index 5454cfe170583d310352798c1eb0ea6c67a2b1a9..0000000000000000000000000000000000000000 --- a/configs/faster_rcnn_x101_64x4d_fpn_1x.py +++ /dev/null @@ -1,178 +0,0 @@ -# model settings -model = dict( - type='FasterRCNN', - pretrained='open-mmlab://resnext101_64x4d', - backbone=dict( - type='ResNeXt', - depth=101, - groups=64, - base_width=4, - num_stages=4, - out_indices=(0, 1, 2, 3), - frozen_stages=1, - norm_cfg=dict(type='BN', requires_grad=True), - style='pytorch'), - neck=dict( - type='FPN', - in_channels=[256, 512, 1024, 2048], - out_channels=256, - num_outs=5), - rpn_head=dict( - type='RPNHead', - in_channels=256, - feat_channels=256, - anchor_scales=[8], - anchor_ratios=[0.5, 1.0, 2.0], - anchor_strides=[4, 8, 16, 32, 64], - target_means=[.0, .0, .0, .0], - target_stds=[1.0, 1.0, 1.0, 1.0], - loss_cls=dict( - type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0)), - bbox_roi_extractor=dict( - type='SingleRoIExtractor', - roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2), - out_channels=256, - featmap_strides=[4, 8, 16, 32]), - bbox_head=dict( - type='SharedFCBBoxHead', - num_fcs=2, - in_channels=256, - fc_out_channels=1024, - roi_feat_size=7, - num_classes=81, - target_means=[0., 0., 0., 0.], - target_stds=[0.1, 0.1, 0.2, 0.2], - reg_class_agnostic=False, - loss_cls=dict( - type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0))) -# model training and testing settings -train_cfg = dict( - rpn=dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.7, - neg_iou_thr=0.3, - min_pos_iou=0.3, - ignore_iof_thr=-1), - sampler=dict( - type='RandomSampler', - num=256, - pos_fraction=0.5, - neg_pos_ub=-1, - add_gt_as_proposals=False), - allowed_border=0, - pos_weight=-1, - debug=False), - rpn_proposal=dict( - nms_across_levels=False, - nms_pre=2000, - nms_post=2000, - max_num=2000, - nms_thr=0.7, - min_bbox_size=0), - rcnn=dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.5, - neg_iou_thr=0.5, - min_pos_iou=0.5, - ignore_iof_thr=-1), - sampler=dict( - type='RandomSampler', - num=512, - pos_fraction=0.25, - neg_pos_ub=-1, - add_gt_as_proposals=True), - pos_weight=-1, - debug=False)) -test_cfg = dict( - rpn=dict( - nms_across_levels=False, - nms_pre=1000, - nms_post=1000, - max_num=1000, - nms_thr=0.7, - min_bbox_size=0), - rcnn=dict( - score_thr=0.05, nms=dict(type='nms', iou_thr=0.5), max_per_img=100) - # soft-nms is also supported for rcnn testing - # e.g., nms=dict(type='soft_nms', iou_thr=0.5, min_score=0.05) -) -# dataset settings -dataset_type = 'CocoDataset' -data_root = 'data/coco/' -img_norm_cfg = dict( - mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) -train_pipeline = [ - dict(type='LoadImageFromFile'), - dict(type='LoadAnnotations', with_bbox=True), - dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), - dict(type='RandomFlip', flip_ratio=0.5), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='DefaultFormatBundle'), - dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), -] -test_pipeline = [ - dict(type='LoadImageFromFile'), - dict( - type='MultiScaleFlipAug', - img_scale=(1333, 800), - flip=False, - transforms=[ - dict(type='Resize', keep_ratio=True), - dict(type='RandomFlip'), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='ImageToTensor', keys=['img']), - dict(type='Collect', keys=['img']), - ]) -] -data = dict( - imgs_per_gpu=2, - workers_per_gpu=2, - train=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_train2017.json', - img_prefix=data_root + 'train2017/', - pipeline=train_pipeline), - val=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline), - test=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline)) -evaluation = dict(interval=1, metric='bbox') -# optimizer -optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) -optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) -# learning policy -lr_config = dict( - policy='step', - warmup='linear', - warmup_iters=500, - warmup_ratio=1.0 / 3, - step=[8, 11]) -checkpoint_config = dict(interval=1) -# yapf:disable -log_config = dict( - interval=50, - hooks=[ - dict(type='TextLoggerHook'), - # dict(type='TensorboardLoggerHook') - ]) -# yapf:enable -# runtime settings -total_epochs = 12 -dist_params = dict(backend='nccl') -log_level = 'INFO' -work_dir = './work_dirs/faster_rcnn_x101_64x4d_fpn_1x' -load_from = None -resume_from = None -workflow = [('train', 1)] diff --git a/configs/fcos/fcos_center_r50_caffe_fpn_gn-head_4x4_1x_coco.py b/configs/fcos/fcos_center_r50_caffe_fpn_gn-head_4x4_1x_coco.py new file mode 100644 index 0000000000000000000000000000000000000000..d655f854aa70217106addd33d238ff172919d5c3 --- /dev/null +++ b/configs/fcos/fcos_center_r50_caffe_fpn_gn-head_4x4_1x_coco.py @@ -0,0 +1,3 @@ +_base_ = './fcos_r50_caffe_fpn_gn-head_4x4_1x_coco.py' +model = dict(bbox_head=dict(center_sampling=True, center_sample_radius=1.5)) +work_dir = './work_dirs/fcos_center_r50_caffe_fpn_gn_1x_4gpu' diff --git a/configs/fcos/fcos_center_r50_caffe_fpn_gn_1x_4gpu.py.py b/configs/fcos/fcos_center_r50_caffe_fpn_gn_1x_4gpu.py.py deleted file mode 100644 index 678fb0a531da55b89bcc8ef1b6a323888ca64759..0000000000000000000000000000000000000000 --- a/configs/fcos/fcos_center_r50_caffe_fpn_gn_1x_4gpu.py.py +++ /dev/null @@ -1,137 +0,0 @@ -# model settings -model = dict( - type='FCOS', - pretrained='open-mmlab://resnet50_caffe', - backbone=dict( - type='ResNet', - depth=50, - num_stages=4, - out_indices=(0, 1, 2, 3), - frozen_stages=1, - norm_cfg=dict(type='BN', requires_grad=False), - style='caffe'), - neck=dict( - type='FPN', - in_channels=[256, 512, 1024, 2048], - out_channels=256, - start_level=1, - add_extra_convs=True, - extra_convs_on_inputs=False, # use P5 - num_outs=5, - relu_before_extra_convs=True), - bbox_head=dict( - type='FCOSHead', - num_classes=81, - in_channels=256, - stacked_convs=4, - feat_channels=256, - strides=[8, 16, 32, 64, 128], - loss_cls=dict( - type='FocalLoss', - use_sigmoid=True, - gamma=2.0, - alpha=0.25, - loss_weight=1.0), - loss_bbox=dict(type='IoULoss', loss_weight=1.0), - loss_centerness=dict( - type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), - center_sampling=True, - center_sample_radius=1.5)) -# training and testing settings -train_cfg = dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.5, - neg_iou_thr=0.4, - min_pos_iou=0, - ignore_iof_thr=-1), - allowed_border=-1, - pos_weight=-1, - debug=False) -test_cfg = dict( - nms_pre=1000, - min_bbox_size=0, - score_thr=0.05, - nms=dict(type='nms', iou_thr=0.5), - max_per_img=100) -# dataset settings -dataset_type = 'CocoDataset' -data_root = 'data/coco/' -img_norm_cfg = dict( - mean=[102.9801, 115.9465, 122.7717], std=[1.0, 1.0, 1.0], to_rgb=False) -train_pipeline = [ - dict(type='LoadImageFromFile'), - dict(type='LoadAnnotations', with_bbox=True), - dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), - dict(type='RandomFlip', flip_ratio=0.5), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='DefaultFormatBundle'), - dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), -] -test_pipeline = [ - dict(type='LoadImageFromFile'), - dict( - type='MultiScaleFlipAug', - img_scale=(1333, 800), - flip=False, - transforms=[ - dict(type='Resize', keep_ratio=True), - dict(type='RandomFlip'), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='ImageToTensor', keys=['img']), - dict(type='Collect', keys=['img']), - ]) -] -data = dict( - imgs_per_gpu=4, - workers_per_gpu=4, - train=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_train2017.json', - img_prefix=data_root + 'train2017/', - pipeline=train_pipeline), - val=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline), - test=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline)) -evaluation = dict(interval=1, metric='bbox') -# optimizer -optimizer = dict( - type='SGD', - lr=0.01, - momentum=0.9, - weight_decay=0.0001, - paramwise_options=dict(bias_lr_mult=2., bias_decay_mult=0.)) -optimizer_config = dict(grad_clip=None) -# learning policy -lr_config = dict( - policy='step', - warmup='constant', - warmup_iters=500, - warmup_ratio=1.0 / 3, - step=[8, 11]) -checkpoint_config = dict(interval=1) -# yapf:disable -log_config = dict( - interval=50, - hooks=[ - dict(type='TextLoggerHook'), - # dict(type='TensorboardLoggerHook') - ]) -# yapf:enable -# runtime settings -total_epochs = 12 -dist_params = dict(backend='nccl') -log_level = 'INFO' -work_dir = './work_dirs/fcos_center_r50_caffe_fpn_gn_1x_4gpu' -load_from = None -resume_from = None -workflow = [('train', 1)] diff --git a/configs/fcos/fcos_mstrain_640_800_r101_caffe_fpn_gn_2x_4gpu.py b/configs/fcos/fcos_mstrain_640_800_r101_caffe_fpn_gn_2x_4gpu.py deleted file mode 100644 index 40a358e9445280c0b8627e21cd4b0a8b5c43d8bc..0000000000000000000000000000000000000000 --- a/configs/fcos/fcos_mstrain_640_800_r101_caffe_fpn_gn_2x_4gpu.py +++ /dev/null @@ -1,139 +0,0 @@ -# model settings -model = dict( - type='FCOS', - pretrained='open-mmlab://resnet101_caffe', - backbone=dict( - type='ResNet', - depth=101, - num_stages=4, - out_indices=(0, 1, 2, 3), - frozen_stages=1, - norm_cfg=dict(type='BN', requires_grad=False), - style='caffe'), - neck=dict( - type='FPN', - in_channels=[256, 512, 1024, 2048], - out_channels=256, - start_level=1, - add_extra_convs=True, - extra_convs_on_inputs=False, # use P5 - num_outs=5, - relu_before_extra_convs=True), - bbox_head=dict( - type='FCOSHead', - num_classes=81, - in_channels=256, - stacked_convs=4, - feat_channels=256, - strides=[8, 16, 32, 64, 128], - loss_cls=dict( - type='FocalLoss', - use_sigmoid=True, - gamma=2.0, - alpha=0.25, - loss_weight=1.0), - loss_bbox=dict(type='IoULoss', loss_weight=1.0), - loss_centerness=dict( - type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0))) -# training and testing settings -train_cfg = dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.5, - neg_iou_thr=0.4, - min_pos_iou=0, - ignore_iof_thr=-1), - allowed_border=-1, - pos_weight=-1, - debug=False) -test_cfg = dict( - nms_pre=1000, - min_bbox_size=0, - score_thr=0.05, - nms=dict(type='nms', iou_thr=0.5), - max_per_img=100) -# dataset settings -dataset_type = 'CocoDataset' -data_root = 'data/coco/' -img_norm_cfg = dict( - mean=[102.9801, 115.9465, 122.7717], std=[1.0, 1.0, 1.0], to_rgb=False) -train_pipeline = [ - dict(type='LoadImageFromFile'), - dict(type='LoadAnnotations', with_bbox=True), - dict( - type='Resize', - img_scale=[(1333, 640), (1333, 800)], - multiscale_mode='value', - keep_ratio=True), - dict(type='RandomFlip', flip_ratio=0.5), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='DefaultFormatBundle'), - dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), -] -test_pipeline = [ - dict(type='LoadImageFromFile'), - dict( - type='MultiScaleFlipAug', - img_scale=(1333, 800), - flip=False, - transforms=[ - dict(type='Resize', keep_ratio=True), - dict(type='RandomFlip'), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='ImageToTensor', keys=['img']), - dict(type='Collect', keys=['img']), - ]) -] -data = dict( - imgs_per_gpu=4, - workers_per_gpu=4, - train=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_train2017.json', - img_prefix=data_root + 'train2017/', - pipeline=train_pipeline), - val=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline), - test=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline)) -evaluation = dict(interval=1, metric='bbox') -# optimizer -optimizer = dict( - type='SGD', - lr=0.01, - momentum=0.9, - weight_decay=0.0001, - paramwise_options=dict(bias_lr_mult=2., bias_decay_mult=0.)) -optimizer_config = dict(grad_clip=None) -# learning policy -lr_config = dict( - policy='step', - warmup='constant', - warmup_iters=500, - warmup_ratio=1.0 / 3, - step=[16, 22]) -checkpoint_config = dict(interval=1) -# yapf:disable -log_config = dict( - interval=50, - hooks=[ - dict(type='TextLoggerHook'), - # dict(type='TensorboardLoggerHook') - ]) -# yapf:enable -# runtime settings -total_epochs = 24 -dist_params = dict(backend='nccl') -log_level = 'INFO' -work_dir = './work_dirs/fcos_mstrain_640_800_r101_caffe_fpn_gn_2x_4gpu' -load_from = None -resume_from = None -workflow = [('train', 1)] diff --git a/configs/fcos/fcos_mstrain_640_800_x101_64x4d_fpn_gn_2x.py b/configs/fcos/fcos_mstrain_640_800_x101_64x4d_fpn_gn_2x.py deleted file mode 100644 index 3ba6505024b346514c6f8fe3913b81906b9bf073..0000000000000000000000000000000000000000 --- a/configs/fcos/fcos_mstrain_640_800_x101_64x4d_fpn_gn_2x.py +++ /dev/null @@ -1,141 +0,0 @@ -# model settings -model = dict( - type='FCOS', - pretrained='open-mmlab://resnext101_64x4d', - backbone=dict( - type='ResNeXt', - depth=101, - groups=64, - base_width=4, - num_stages=4, - out_indices=(0, 1, 2, 3), - frozen_stages=1, - norm_cfg=dict(type='BN', requires_grad=True), - style='pytorch'), - neck=dict( - type='FPN', - in_channels=[256, 512, 1024, 2048], - out_channels=256, - start_level=1, - add_extra_convs=True, - extra_convs_on_inputs=False, # use P5 - num_outs=5, - relu_before_extra_convs=True), - bbox_head=dict( - type='FCOSHead', - num_classes=81, - in_channels=256, - stacked_convs=4, - feat_channels=256, - strides=[8, 16, 32, 64, 128], - loss_cls=dict( - type='FocalLoss', - use_sigmoid=True, - gamma=2.0, - alpha=0.25, - loss_weight=1.0), - loss_bbox=dict(type='IoULoss', loss_weight=1.0), - loss_centerness=dict( - type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0))) -# training and testing settings -train_cfg = dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.5, - neg_iou_thr=0.4, - min_pos_iou=0, - ignore_iof_thr=-1), - allowed_border=-1, - pos_weight=-1, - debug=False) -test_cfg = dict( - nms_pre=1000, - min_bbox_size=0, - score_thr=0.05, - nms=dict(type='nms', iou_thr=0.5), - max_per_img=100) -# dataset settings -dataset_type = 'CocoDataset' -data_root = 'data/coco/' -img_norm_cfg = dict( - mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) -train_pipeline = [ - dict(type='LoadImageFromFile'), - dict(type='LoadAnnotations', with_bbox=True), - dict( - type='Resize', - img_scale=[(1333, 640), (1333, 800)], - multiscale_mode='value', - keep_ratio=True), - dict(type='RandomFlip', flip_ratio=0.5), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='DefaultFormatBundle'), - dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), -] -test_pipeline = [ - dict(type='LoadImageFromFile'), - dict( - type='MultiScaleFlipAug', - img_scale=(1333, 800), - flip=False, - transforms=[ - dict(type='Resize', keep_ratio=True), - dict(type='RandomFlip'), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='ImageToTensor', keys=['img']), - dict(type='Collect', keys=['img']), - ]) -] -data = dict( - imgs_per_gpu=2, - workers_per_gpu=2, - train=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_train2017.json', - img_prefix=data_root + 'train2017/', - pipeline=train_pipeline), - val=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline), - test=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline)) -evaluation = dict(interval=1, metric='bbox') -# optimizer -optimizer = dict( - type='SGD', - lr=0.01, - momentum=0.9, - weight_decay=0.0001, - paramwise_options=dict(bias_lr_mult=2., bias_decay_mult=0.)) -optimizer_config = dict(grad_clip=None) -# learning policy -lr_config = dict( - policy='step', - warmup='constant', - warmup_iters=500, - warmup_ratio=1.0 / 3, - step=[16, 22]) -checkpoint_config = dict(interval=1) -# yapf:disable -log_config = dict( - interval=50, - hooks=[ - dict(type='TextLoggerHook'), - # dict(type='TensorboardLoggerHook') - ]) -# yapf:enable -# runtime settings -total_epochs = 24 -dist_params = dict(backend='nccl') -log_level = 'INFO' -work_dir = './work_dirs/fcos_mstrain_640_800_x101_64x4d_fpn_gn_2x' -load_from = None -resume_from = None -workflow = [('train', 1)] diff --git a/configs/fcos/fcos_r101_caffe_fpn_gn-head_mstrain_640-800_4x4_2x_coco.py b/configs/fcos/fcos_r101_caffe_fpn_gn-head_mstrain_640-800_4x4_2x_coco.py new file mode 100644 index 0000000000000000000000000000000000000000..e4795fde3694653dc2be38cc21f2d2477940ea7b --- /dev/null +++ b/configs/fcos/fcos_r101_caffe_fpn_gn-head_mstrain_640-800_4x4_2x_coco.py @@ -0,0 +1,44 @@ +_base_ = './fcos_r50_caffe_fpn_gn-head_4x4_1x_coco.py' +model = dict( + pretrained='open-mmlab://resnet101_caffe', backbone=dict(depth=101)) +img_norm_cfg = dict( + mean=[102.9801, 115.9465, 122.7717], std=[1.0, 1.0, 1.0], to_rgb=False) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True), + dict( + type='Resize', + img_scale=[(1333, 640), (1333, 800)], + multiscale_mode='value', + keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +data = dict( + imgs_per_gpu=4, + workers_per_gpu=4, + train=dict(pipeline=train_pipeline), + val=dict(pipeline=test_pipeline), + test=dict(pipeline=test_pipeline)) +# learning policy +lr_config = dict(step=[16, 22]) +total_epochs = 24 +work_dir = './work_dirs/fcos_mstrain_640_800_r101_caffe_fpn_gn_2x_4gpu' diff --git a/configs/fcos/fcos_r50_caffe_fpn_gn_1x_4gpu.py b/configs/fcos/fcos_r50_caffe_fpn_gn-head_4x4_1x_coco.py similarity index 66% rename from configs/fcos/fcos_r50_caffe_fpn_gn_1x_4gpu.py rename to configs/fcos/fcos_r50_caffe_fpn_gn-head_4x4_1x_coco.py index fb37d3149c85b95cd9e218dfd092de6ce348e387..fa19da5278be2f4e321a25c688c7ea65e75dbf2e 100644 --- a/configs/fcos/fcos_r50_caffe_fpn_gn_1x_4gpu.py +++ b/configs/fcos/fcos_r50_caffe_fpn_gn-head_4x4_1x_coco.py @@ -1,3 +1,7 @@ +_base_ = [ + '../_base_/datasets/coco_detection.py', + '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py' +] # model settings model = dict( type='FCOS', @@ -52,9 +56,6 @@ test_cfg = dict( score_thr=0.05, nms=dict(type='nms', iou_thr=0.5), max_per_img=100) -# dataset settings -dataset_type = 'CocoDataset' -data_root = 'data/coco/' img_norm_cfg = dict( mean=[102.9801, 115.9465, 122.7717], std=[1.0, 1.0, 1.0], to_rgb=False) train_pipeline = [ @@ -85,51 +86,14 @@ test_pipeline = [ data = dict( imgs_per_gpu=4, workers_per_gpu=4, - train=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_train2017.json', - img_prefix=data_root + 'train2017/', - pipeline=train_pipeline), - val=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline), - test=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline)) -evaluation = dict(interval=1, metric='bbox') + train=dict(pipeline=train_pipeline), + val=dict(pipeline=test_pipeline), + test=dict(pipeline=test_pipeline)) # optimizer optimizer = dict( - type='SGD', - lr=0.01, - momentum=0.9, - weight_decay=0.0001, - paramwise_options=dict(bias_lr_mult=2., bias_decay_mult=0.)) -optimizer_config = dict(grad_clip=None) + lr=0.01, paramwise_options=dict(bias_lr_mult=2., bias_decay_mult=0.)) +optimizer_config = dict(_delete_=True, grad_clip=None) # learning policy -lr_config = dict( - policy='step', - warmup='constant', - warmup_iters=500, - warmup_ratio=1.0 / 3, - step=[8, 11]) -checkpoint_config = dict(interval=1) -# yapf:disable -log_config = dict( - interval=50, - hooks=[ - dict(type='TextLoggerHook'), - # dict(type='TensorboardLoggerHook') - ]) -# yapf:enable -# runtime settings +lr_config = dict(warmup='constant') total_epochs = 12 -dist_params = dict(backend='nccl') -log_level = 'INFO' work_dir = './work_dirs/fcos_r50_caffe_fpn_gn_1x_4gpu' -load_from = None -resume_from = None -workflow = [('train', 1)] diff --git a/configs/fcos/fcos_x101_64x4d_fpn_gn-head_mstrain_640-800_4x2_2x_coco.py b/configs/fcos/fcos_x101_64x4d_fpn_gn-head_mstrain_640-800_4x2_2x_coco.py new file mode 100644 index 0000000000000000000000000000000000000000..a1fe0d9fd0d9a9988f067ca9eb123afbbf9e6f07 --- /dev/null +++ b/configs/fcos/fcos_x101_64x4d_fpn_gn-head_mstrain_640-800_4x2_2x_coco.py @@ -0,0 +1,58 @@ +_base_ = './fcos_r50_caffe_fpn_gn-head_4x4_1x_coco.py' +model = dict( + pretrained='open-mmlab://resnext101_64x4d', + backbone=dict( + type='ResNeXt', + depth=101, + groups=64, + base_width=4, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + style='pytorch')) +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True), + dict( + type='Resize', + img_scale=[(1333, 640), (1333, 800)], + multiscale_mode='value', + keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +data = dict( + imgs_per_gpu=2, + workers_per_gpu=2, + train=dict(pipeline=train_pipeline), + val=dict(pipeline=test_pipeline), + test=dict(pipeline=test_pipeline)) +# optimizer +optimizer = dict( + lr=0.01, paramwise_options=dict(bias_lr_mult=2., bias_decay_mult=0.)) +optimizer_config = dict(_delete_=True, grad_clip=None) +# learning policy +lr_config = dict(warmup='constant', step=[16, 22]) +total_epochs = 24 +work_dir = './work_dirs/fcos_mstrain_640_800_x101_64x4d_fpn_gn_2x' diff --git a/configs/foveabox/fovea_align_gn_ms_r101_fpn_4gpu_2x.py b/configs/foveabox/fovea_align_gn_ms_r101_fpn_4gpu_2x.py deleted file mode 100644 index 502ff3ee76705ea23064706eee0df0971d095e11..0000000000000000000000000000000000000000 --- a/configs/foveabox/fovea_align_gn_ms_r101_fpn_4gpu_2x.py +++ /dev/null @@ -1,126 +0,0 @@ -# model settings -model = dict( - type='FOVEA', - pretrained='torchvision://resnet101', - backbone=dict( - type='ResNet', - depth=101, - num_stages=4, - out_indices=(0, 1, 2, 3), - frozen_stages=1, - norm_cfg=dict(type='BN', requires_grad=True), - style='pytorch'), - neck=dict( - type='FPN', - in_channels=[256, 512, 1024, 2048], - out_channels=256, - start_level=1, - num_outs=5, - add_extra_convs=True), - bbox_head=dict( - type='FoveaHead', - num_classes=81, - in_channels=256, - stacked_convs=4, - feat_channels=256, - strides=[8, 16, 32, 64, 128], - base_edge_list=[16, 32, 64, 128, 256], - scale_ranges=((1, 64), (32, 128), (64, 256), (128, 512), (256, 2048)), - sigma=0.4, - with_deform=True, - norm_cfg=dict(type='GN', num_groups=32, requires_grad=True), - loss_cls=dict( - type='FocalLoss', - use_sigmoid=True, - gamma=1.50, - alpha=0.4, - loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=0.11, loss_weight=1.0))) -# training and testing settings -train_cfg = dict() -test_cfg = dict( - nms_pre=1000, - score_thr=0.05, - nms=dict(type='nms', iou_thr=0.5), - max_per_img=100) -# dataset settings -# dataset settings -dataset_type = 'CocoDataset' -data_root = 'data/coco/' -img_norm_cfg = dict( - mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) -train_pipeline = [ - dict(type='LoadImageFromFile'), - dict(type='LoadAnnotations', with_bbox=True), - dict( - type='Resize', - img_scale=[(1333, 640), (1333, 800)], - multiscale_mode='value', - keep_ratio=True), - dict(type='RandomFlip', flip_ratio=0.5), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='DefaultFormatBundle'), - dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), -] -test_pipeline = [ - dict(type='LoadImageFromFile'), - dict( - type='MultiScaleFlipAug', - img_scale=(1333, 800), - flip=False, - transforms=[ - dict(type='Resize', keep_ratio=True), - dict(type='RandomFlip'), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='ImageToTensor', keys=['img']), - dict(type='Collect', keys=['img']), - ]) -] -data = dict( - imgs_per_gpu=4, - workers_per_gpu=4, - train=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_train2017.json', - img_prefix=data_root + 'train2017/', - pipeline=train_pipeline), - val=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline), - test=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline)) -evaluation = dict(interval=1, metric='bbox') -# optimizer -optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) -optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) -# learning policy -lr_config = dict( - policy='step', - warmup='linear', - warmup_iters=500, - warmup_ratio=1.0 / 3, - step=[16, 22]) -checkpoint_config = dict(interval=1) -# yapf:disable -log_config = dict( - interval=50, - hooks=[ - dict(type='TextLoggerHook'), - # dict(type='TensorboardLoggerHook') - ]) -# yapf:enable -# runtime settings -total_epochs = 24 -dist_params = dict(backend='nccl') -log_level = 'INFO' -work_dir = './work_dirs/fovea_align_gn_ms_r101_fpn_4gpu_2x' -load_from = None -resume_from = None -workflow = [('train', 1)] diff --git a/configs/foveabox/fovea_align_gn_ms_r50_fpn_4gpu_2x.py b/configs/foveabox/fovea_align_gn_ms_r50_fpn_4gpu_2x.py deleted file mode 100644 index a4960660ccf39d35b89a660b95973c239f816880..0000000000000000000000000000000000000000 --- a/configs/foveabox/fovea_align_gn_ms_r50_fpn_4gpu_2x.py +++ /dev/null @@ -1,126 +0,0 @@ -# model settings -model = dict( - type='FOVEA', - pretrained='torchvision://resnet50', - backbone=dict( - type='ResNet', - depth=50, - num_stages=4, - out_indices=(0, 1, 2, 3), - frozen_stages=1, - norm_cfg=dict(type='BN', requires_grad=True), - style='pytorch'), - neck=dict( - type='FPN', - in_channels=[256, 512, 1024, 2048], - out_channels=256, - start_level=1, - num_outs=5, - add_extra_convs=True), - bbox_head=dict( - type='FoveaHead', - num_classes=81, - in_channels=256, - stacked_convs=4, - feat_channels=256, - strides=[8, 16, 32, 64, 128], - base_edge_list=[16, 32, 64, 128, 256], - scale_ranges=((1, 64), (32, 128), (64, 256), (128, 512), (256, 2048)), - sigma=0.4, - with_deform=True, - norm_cfg=dict(type='GN', num_groups=32, requires_grad=True), - loss_cls=dict( - type='FocalLoss', - use_sigmoid=True, - gamma=1.50, - alpha=0.4, - loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=0.11, loss_weight=1.0))) -# training and testing settings -train_cfg = dict() -test_cfg = dict( - nms_pre=1000, - score_thr=0.05, - nms=dict(type='nms', iou_thr=0.5), - max_per_img=100) -# dataset settings -# dataset settings -dataset_type = 'CocoDataset' -data_root = 'data/coco/' -img_norm_cfg = dict( - mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) -train_pipeline = [ - dict(type='LoadImageFromFile'), - dict(type='LoadAnnotations', with_bbox=True), - dict( - type='Resize', - img_scale=[(1333, 640), (1333, 800)], - multiscale_mode='value', - keep_ratio=True), - dict(type='RandomFlip', flip_ratio=0.5), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='DefaultFormatBundle'), - dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), -] -test_pipeline = [ - dict(type='LoadImageFromFile'), - dict( - type='MultiScaleFlipAug', - img_scale=(1333, 800), - flip=False, - transforms=[ - dict(type='Resize', keep_ratio=True), - dict(type='RandomFlip'), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='ImageToTensor', keys=['img']), - dict(type='Collect', keys=['img']), - ]) -] -data = dict( - imgs_per_gpu=4, - workers_per_gpu=4, - train=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_train2017.json', - img_prefix=data_root + 'train2017/', - pipeline=train_pipeline), - val=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline), - test=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline)) -evaluation = dict(interval=1, metric='bbox') -# optimizer -optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) -optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) -# learning policy -lr_config = dict( - policy='step', - warmup='linear', - warmup_iters=500, - warmup_ratio=1.0 / 3, - step=[16, 22]) -checkpoint_config = dict(interval=1) -# yapf:disable -log_config = dict( - interval=50, - hooks=[ - dict(type='TextLoggerHook'), - # dict(type='TensorboardLoggerHook') - ]) -# yapf:enable -# runtime settings -total_epochs = 24 -dist_params = dict(backend='nccl') -log_level = 'INFO' -work_dir = './work_dirs/fovea_align_gn_ms_r50_fpn_4gpu_2x' -load_from = None -resume_from = None -workflow = [('train', 1)] diff --git a/configs/foveabox/fovea_align_gn_r101_fpn_4gpu_2x.py b/configs/foveabox/fovea_align_gn_r101_fpn_4gpu_2x.py deleted file mode 100644 index 8b7acaf61e7250e6855d0589b98cb39f1637704f..0000000000000000000000000000000000000000 --- a/configs/foveabox/fovea_align_gn_r101_fpn_4gpu_2x.py +++ /dev/null @@ -1,121 +0,0 @@ -# model settings -model = dict( - type='FOVEA', - pretrained='torchvision://resnet101', - backbone=dict( - type='ResNet', - depth=101, - num_stages=4, - out_indices=(0, 1, 2, 3), - frozen_stages=1, - norm_cfg=dict(type='BN', requires_grad=True), - style='pytorch'), - neck=dict( - type='FPN', - in_channels=[256, 512, 1024, 2048], - out_channels=256, - start_level=1, - num_outs=5, - add_extra_convs=True), - bbox_head=dict( - type='FoveaHead', - num_classes=81, - in_channels=256, - stacked_convs=4, - feat_channels=256, - strides=[8, 16, 32, 64, 128], - base_edge_list=[16, 32, 64, 128, 256], - scale_ranges=((1, 64), (32, 128), (64, 256), (128, 512), (256, 2048)), - sigma=0.4, - with_deform=True, - norm_cfg=dict(type='GN', num_groups=32, requires_grad=True), - loss_cls=dict( - type='FocalLoss', - use_sigmoid=True, - gamma=1.50, - alpha=0.4, - loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=0.11, loss_weight=1.0))) -# training and testing settings -train_cfg = dict() -test_cfg = dict( - nms_pre=1000, - score_thr=0.05, - nms=dict(type='nms', iou_thr=0.5), - max_per_img=100) -# dataset settings -dataset_type = 'CocoDataset' -data_root = 'data/coco/' -img_norm_cfg = dict( - mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) -train_pipeline = [ - dict(type='LoadImageFromFile'), - dict(type='LoadAnnotations', with_bbox=True), - dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), - dict(type='RandomFlip', flip_ratio=0.5), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='DefaultFormatBundle'), - dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), -] -test_pipeline = [ - dict(type='LoadImageFromFile'), - dict( - type='MultiScaleFlipAug', - img_scale=(1333, 800), - flip=False, - transforms=[ - dict(type='Resize', keep_ratio=True), - dict(type='RandomFlip'), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='ImageToTensor', keys=['img']), - dict(type='Collect', keys=['img']), - ]) -] -data = dict( - imgs_per_gpu=4, - workers_per_gpu=4, - train=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_train2017.json', - img_prefix=data_root + 'train2017/', - pipeline=train_pipeline), - val=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline), - test=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline)) -evaluation = dict(interval=1, metric='bbox') -# optimizer -optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) -optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) -# learning policy -lr_config = dict( - policy='step', - warmup='linear', - warmup_iters=500, - warmup_ratio=1.0 / 3, - step=[16, 22]) -checkpoint_config = dict(interval=1) -# yapf:disable -log_config = dict( - interval=50, - hooks=[ - dict(type='TextLoggerHook'), - # dict(type='TensorboardLoggerHook') - ]) -# yapf:enable -# runtime settings -total_epochs = 24 -dist_params = dict(backend='nccl') -log_level = 'INFO' -work_dir = './work_dirs/fovea_align_gn_r101_fpn_4gpu_2x' -load_from = None -resume_from = None -workflow = [('train', 1)] diff --git a/configs/foveabox/fovea_align_gn_r50_fpn_4gpu_2x.py b/configs/foveabox/fovea_align_gn_r50_fpn_4gpu_2x.py deleted file mode 100644 index 15efc4c5a8738ba8c519cadb624fb1086a7d3ba3..0000000000000000000000000000000000000000 --- a/configs/foveabox/fovea_align_gn_r50_fpn_4gpu_2x.py +++ /dev/null @@ -1,121 +0,0 @@ -# model settings -model = dict( - type='FOVEA', - pretrained='torchvision://resnet50', - backbone=dict( - type='ResNet', - depth=50, - num_stages=4, - out_indices=(0, 1, 2, 3), - frozen_stages=1, - norm_cfg=dict(type='BN', requires_grad=True), - style='pytorch'), - neck=dict( - type='FPN', - in_channels=[256, 512, 1024, 2048], - out_channels=256, - start_level=1, - num_outs=5, - add_extra_convs=True), - bbox_head=dict( - type='FoveaHead', - num_classes=81, - in_channels=256, - stacked_convs=4, - feat_channels=256, - strides=[8, 16, 32, 64, 128], - base_edge_list=[16, 32, 64, 128, 256], - scale_ranges=((1, 64), (32, 128), (64, 256), (128, 512), (256, 2048)), - sigma=0.4, - with_deform=True, - norm_cfg=dict(type='GN', num_groups=32, requires_grad=True), - loss_cls=dict( - type='FocalLoss', - use_sigmoid=True, - gamma=1.50, - alpha=0.4, - loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=0.11, loss_weight=1.0))) -# training and testing settings -train_cfg = dict() -test_cfg = dict( - nms_pre=1000, - score_thr=0.05, - nms=dict(type='nms', iou_thr=0.5), - max_per_img=100) -# dataset settings -dataset_type = 'CocoDataset' -data_root = 'data/coco/' -img_norm_cfg = dict( - mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) -train_pipeline = [ - dict(type='LoadImageFromFile'), - dict(type='LoadAnnotations', with_bbox=True), - dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), - dict(type='RandomFlip', flip_ratio=0.5), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='DefaultFormatBundle'), - dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), -] -test_pipeline = [ - dict(type='LoadImageFromFile'), - dict( - type='MultiScaleFlipAug', - img_scale=(1333, 800), - flip=False, - transforms=[ - dict(type='Resize', keep_ratio=True), - dict(type='RandomFlip'), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='ImageToTensor', keys=['img']), - dict(type='Collect', keys=['img']), - ]) -] -data = dict( - imgs_per_gpu=4, - workers_per_gpu=4, - train=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_train2017.json', - img_prefix=data_root + 'train2017/', - pipeline=train_pipeline), - val=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline), - test=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline)) -evaluation = dict(interval=1, metric='bbox') -# optimizer -optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) -optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) -# learning policy -lr_config = dict( - policy='step', - warmup='linear', - warmup_iters=500, - warmup_ratio=1.0 / 3, - step=[16, 22]) -checkpoint_config = dict(interval=1) -# yapf:disable -log_config = dict( - interval=50, - hooks=[ - dict(type='TextLoggerHook'), - # dict(type='TensorboardLoggerHook') - ]) -# yapf:enable -# runtime settings -total_epochs = 24 -dist_params = dict(backend='nccl') -log_level = 'INFO' -work_dir = './work_dirs/fovea_align_gn_r50_fpn_4gpu_2x' -load_from = None -resume_from = None -workflow = [('train', 1)] diff --git a/configs/foveabox/fovea_align_r101_fpn_gn-head_4x4_2x_coco.py b/configs/foveabox/fovea_align_r101_fpn_gn-head_4x4_2x_coco.py new file mode 100644 index 0000000000000000000000000000000000000000..e2ea2f2e20dae4482c641e9070747b8944874633 --- /dev/null +++ b/configs/foveabox/fovea_align_r101_fpn_gn-head_4x4_2x_coco.py @@ -0,0 +1,11 @@ +_base_ = './fovea_r50_fpn_4x4_1x_coco.py' +model = dict( + pretrained='torchvision://resnet101', + backbone=dict(depth=101), + bbox_head=dict( + with_deform=True, + norm_cfg=dict(type='GN', num_groups=32, requires_grad=True))) +# learning policy +lr_config = dict(step=[16, 22]) +total_epochs = 24 +work_dir = './work_dirs/fovea_align_gn_r101_fpn_4gpu_2x' diff --git a/configs/foveabox/fovea_align_r101_fpn_gn-head_mstrain_640-800_4x4_2x_coco.py b/configs/foveabox/fovea_align_r101_fpn_gn-head_mstrain_640-800_4x4_2x_coco.py new file mode 100644 index 0000000000000000000000000000000000000000..7717a0e24e697d61d44930c21ab9ab0d45c3b4e6 --- /dev/null +++ b/configs/foveabox/fovea_align_r101_fpn_gn-head_mstrain_640-800_4x4_2x_coco.py @@ -0,0 +1,28 @@ +_base_ = './fovea_r50_fpn_4x4_1x_coco.py' +model = dict( + pretrained='torchvision://resnet101', + backbone=dict(depth=101), + bbox_head=dict( + with_deform=True, + norm_cfg=dict(type='GN', num_groups=32, requires_grad=True))) +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True), + dict( + type='Resize', + img_scale=[(1333, 640), (1333, 800)], + multiscale_mode='value', + keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), +] +data = dict(train=dict(pipeline=train_pipeline)) +# learning policy +lr_config = dict(step=[16, 22]) +total_epochs = 24 +work_dir = './work_dirs/fovea_align_gn_ms_r101_fpn_4gpu_2x' diff --git a/configs/foveabox/fovea_align_r50_fpn_gn-head_4x4_2x_coco.py b/configs/foveabox/fovea_align_r50_fpn_gn-head_4x4_2x_coco.py new file mode 100644 index 0000000000000000000000000000000000000000..0e35fb5361ab23f0a34e282457417e5a9a97cf4c --- /dev/null +++ b/configs/foveabox/fovea_align_r50_fpn_gn-head_4x4_2x_coco.py @@ -0,0 +1,9 @@ +_base_ = './fovea_r50_fpn_4x4_1x_coco.py' +model = dict( + bbox_head=dict( + with_deform=True, + norm_cfg=dict(type='GN', num_groups=32, requires_grad=True))) +# learning policy +lr_config = dict(step=[16, 22]) +total_epochs = 24 +work_dir = './work_dirs/fovea_align_gn_r50_fpn_4gpu_2x' diff --git a/configs/foveabox/fovea_align_r50_fpn_gn-head_mstrain_640-800_4x4_2x_coco.py b/configs/foveabox/fovea_align_r50_fpn_gn-head_mstrain_640-800_4x4_2x_coco.py new file mode 100644 index 0000000000000000000000000000000000000000..15068d2b2289e8eadc182eb8bf1bc1ffa1230e00 --- /dev/null +++ b/configs/foveabox/fovea_align_r50_fpn_gn-head_mstrain_640-800_4x4_2x_coco.py @@ -0,0 +1,26 @@ +_base_ = './fovea_r50_fpn_4x4_1x_coco.py' +model = dict( + bbox_head=dict( + with_deform=True, + norm_cfg=dict(type='GN', num_groups=32, requires_grad=True))) +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True), + dict( + type='Resize', + img_scale=[(1333, 640), (1333, 800)], + multiscale_mode='value', + keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), +] +data = dict(train=dict(pipeline=train_pipeline)) +# learning policy +lr_config = dict(step=[16, 22]) +total_epochs = 24 +work_dir = './work_dirs/fovea_align_gn_ms_r50_fpn_4gpu_2x' diff --git a/configs/foveabox/fovea_r50_fpn_4gpu_1x.py b/configs/foveabox/fovea_r50_fpn_4gpu_1x.py deleted file mode 100644 index 724a8aab327b7da75497ad0125919712c332b2d2..0000000000000000000000000000000000000000 --- a/configs/foveabox/fovea_r50_fpn_4gpu_1x.py +++ /dev/null @@ -1,121 +0,0 @@ -# model settings -model = dict( - type='FOVEA', - pretrained='torchvision://resnet50', - backbone=dict( - type='ResNet', - depth=50, - num_stages=4, - out_indices=(0, 1, 2, 3), - frozen_stages=1, - norm_cfg=dict(type='BN', requires_grad=True), - style='pytorch'), - neck=dict( - type='FPN', - in_channels=[256, 512, 1024, 2048], - out_channels=256, - start_level=1, - num_outs=5, - add_extra_convs=True), - bbox_head=dict( - type='FoveaHead', - num_classes=81, - in_channels=256, - stacked_convs=4, - feat_channels=256, - strides=[8, 16, 32, 64, 128], - base_edge_list=[16, 32, 64, 128, 256], - scale_ranges=((1, 64), (32, 128), (64, 256), (128, 512), (256, 2048)), - sigma=0.4, - with_deform=False, - loss_cls=dict( - type='FocalLoss', - use_sigmoid=True, - gamma=1.50, - alpha=0.4, - loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=0.11, loss_weight=1.0))) -# training and testing settings -train_cfg = dict() -test_cfg = dict( - nms_pre=1000, - score_thr=0.05, - nms=dict(type='nms', iou_thr=0.5), - max_per_img=100) -# dataset settings -# dataset settings -dataset_type = 'CocoDataset' -data_root = 'data/coco/' -img_norm_cfg = dict( - mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) -train_pipeline = [ - dict(type='LoadImageFromFile'), - dict(type='LoadAnnotations', with_bbox=True), - dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), - dict(type='RandomFlip', flip_ratio=0.5), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='DefaultFormatBundle'), - dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), -] -test_pipeline = [ - dict(type='LoadImageFromFile'), - dict( - type='MultiScaleFlipAug', - img_scale=(1333, 800), - flip=False, - transforms=[ - dict(type='Resize', keep_ratio=True), - dict(type='RandomFlip'), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='ImageToTensor', keys=['img']), - dict(type='Collect', keys=['img']), - ]) -] -data = dict( - imgs_per_gpu=4, - workers_per_gpu=4, - train=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_train2017.json', - img_prefix=data_root + 'train2017/', - pipeline=train_pipeline), - val=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline), - test=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline)) -evaluation = dict(interval=1, metric='bbox') -# optimizer -optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) -optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) -# learning policy -lr_config = dict( - policy='step', - warmup='linear', - warmup_iters=500, - warmup_ratio=1.0 / 3, - step=[8, 11]) -checkpoint_config = dict(interval=1) -# yapf:disable -log_config = dict( - interval=50, - hooks=[ - dict(type='TextLoggerHook'), - # dict(type='TensorboardLoggerHook') - ]) -# yapf:enable -# runtime settings -total_epochs = 12 -dist_params = dict(backend='nccl') -log_level = 'INFO' -work_dir = './work_dirs/fovea_r50_fpn_4gpu_1x' -load_from = None -resume_from = None -workflow = [('train', 1)] diff --git a/configs/foveabox/fovea_r50_fpn_4x4_1x_coco.py b/configs/foveabox/fovea_r50_fpn_4x4_1x_coco.py new file mode 100644 index 0000000000000000000000000000000000000000..c201cfbfe3ab443340d47d0f46cf6f862c618df1 --- /dev/null +++ b/configs/foveabox/fovea_r50_fpn_4x4_1x_coco.py @@ -0,0 +1,52 @@ +_base_ = [ + '../_base_/datasets/coco_detection.py', + '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py' +] +# model settings +model = dict( + type='FOVEA', + pretrained='torchvision://resnet50', + backbone=dict( + type='ResNet', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + style='pytorch'), + neck=dict( + type='FPN', + in_channels=[256, 512, 1024, 2048], + out_channels=256, + start_level=1, + num_outs=5, + add_extra_convs=True), + bbox_head=dict( + type='FoveaHead', + num_classes=81, + in_channels=256, + stacked_convs=4, + feat_channels=256, + strides=[8, 16, 32, 64, 128], + base_edge_list=[16, 32, 64, 128, 256], + scale_ranges=((1, 64), (32, 128), (64, 256), (128, 512), (256, 2048)), + sigma=0.4, + with_deform=False, + loss_cls=dict( + type='FocalLoss', + use_sigmoid=True, + gamma=1.50, + alpha=0.4, + loss_weight=1.0), + loss_bbox=dict(type='SmoothL1Loss', beta=0.11, loss_weight=1.0))) +# training and testing settings +train_cfg = dict() +test_cfg = dict( + nms_pre=1000, + score_thr=0.05, + nms=dict(type='nms', iou_thr=0.5), + max_per_img=100) +data = dict(imgs_per_gpu=4, workers_per_gpu=4) +# optimizer +optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) +work_dir = './work_dirs/fovea_r50_fpn_4gpu_1x' diff --git a/configs/fp16/faster_rcnn_r50_fpn_fp16_1x.py b/configs/fp16/faster_rcnn_r50_fpn_fp16_1x.py deleted file mode 100644 index ba3a4db66e2a4603d3bdc17c88b1802bc14ca2df..0000000000000000000000000000000000000000 --- a/configs/fp16/faster_rcnn_r50_fpn_fp16_1x.py +++ /dev/null @@ -1,179 +0,0 @@ -# fp16 settings -fp16 = dict(loss_scale=512.) - -# model settings -model = dict( - type='FasterRCNN', - pretrained='torchvision://resnet50', - backbone=dict( - type='ResNet', - depth=50, - num_stages=4, - out_indices=(0, 1, 2, 3), - frozen_stages=1, - norm_cfg=dict(type='BN', requires_grad=True), - style='pytorch'), - neck=dict( - type='FPN', - in_channels=[256, 512, 1024, 2048], - out_channels=256, - num_outs=5), - rpn_head=dict( - type='RPNHead', - in_channels=256, - feat_channels=256, - anchor_scales=[8], - anchor_ratios=[0.5, 1.0, 2.0], - anchor_strides=[4, 8, 16, 32, 64], - target_means=[.0, .0, .0, .0], - target_stds=[1.0, 1.0, 1.0, 1.0], - loss_cls=dict( - type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0)), - bbox_roi_extractor=dict( - type='SingleRoIExtractor', - roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2), - out_channels=256, - featmap_strides=[4, 8, 16, 32]), - bbox_head=dict( - type='SharedFCBBoxHead', - num_fcs=2, - in_channels=256, - fc_out_channels=1024, - roi_feat_size=7, - num_classes=81, - target_means=[0., 0., 0., 0.], - target_stds=[0.1, 0.1, 0.2, 0.2], - reg_class_agnostic=False, - loss_cls=dict( - type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0))) -# model training and testing settings -train_cfg = dict( - rpn=dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.7, - neg_iou_thr=0.3, - min_pos_iou=0.3, - ignore_iof_thr=-1), - sampler=dict( - type='RandomSampler', - num=256, - pos_fraction=0.5, - neg_pos_ub=-1, - add_gt_as_proposals=False), - allowed_border=0, - pos_weight=-1, - debug=False), - rpn_proposal=dict( - nms_across_levels=False, - nms_pre=2000, - nms_post=2000, - max_num=2000, - nms_thr=0.7, - min_bbox_size=0), - rcnn=dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.5, - neg_iou_thr=0.5, - min_pos_iou=0.5, - ignore_iof_thr=-1), - sampler=dict( - type='RandomSampler', - num=512, - pos_fraction=0.25, - neg_pos_ub=-1, - add_gt_as_proposals=True), - pos_weight=-1, - debug=False)) -test_cfg = dict( - rpn=dict( - nms_across_levels=False, - nms_pre=1000, - nms_post=1000, - max_num=1000, - nms_thr=0.7, - min_bbox_size=0), - rcnn=dict( - score_thr=0.05, nms=dict(type='nms', iou_thr=0.5), max_per_img=100) - # soft-nms is also supported for rcnn testing - # e.g., nms=dict(type='soft_nms', iou_thr=0.5, min_score=0.05) -) -# dataset settings -dataset_type = 'CocoDataset' -data_root = 'data/coco/' -img_norm_cfg = dict( - mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) -train_pipeline = [ - dict(type='LoadImageFromFile'), - dict(type='LoadAnnotations', with_bbox=True), - dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), - dict(type='RandomFlip', flip_ratio=0.5), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='DefaultFormatBundle'), - dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), -] -test_pipeline = [ - dict(type='LoadImageFromFile'), - dict( - type='MultiScaleFlipAug', - img_scale=(1333, 800), - flip=False, - transforms=[ - dict(type='Resize', keep_ratio=True), - dict(type='RandomFlip'), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='ImageToTensor', keys=['img']), - dict(type='Collect', keys=['img']), - ]) -] -data = dict( - imgs_per_gpu=2, - workers_per_gpu=2, - train=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_train2017.json', - img_prefix=data_root + 'train2017/', - pipeline=train_pipeline), - val=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline), - test=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline)) -evaluation = dict(interval=1, metric='bbox') -# optimizer -optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) -optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) -# learning policy -lr_config = dict( - policy='step', - warmup='linear', - warmup_iters=500, - warmup_ratio=1.0 / 3, - step=[8, 11]) -checkpoint_config = dict(interval=1) -# yapf:disable -log_config = dict( - interval=50, - hooks=[ - dict(type='TextLoggerHook'), - # dict(type='TensorboardLoggerHook') - ]) -# yapf:enable -# runtime settings -total_epochs = 12 -dist_params = dict(backend='nccl') -log_level = 'INFO' -work_dir = './work_dirs/faster_rcnn_r50_fpn_fp16_1x' -load_from = None -resume_from = None -workflow = [('train', 1)] diff --git a/configs/fp16/faster_rcnn_r50_fpn_fp16_1x_coco.py b/configs/fp16/faster_rcnn_r50_fpn_fp16_1x_coco.py new file mode 100644 index 0000000000000000000000000000000000000000..37196fecff959da3dca88b82df498d28fcff9a9a --- /dev/null +++ b/configs/fp16/faster_rcnn_r50_fpn_fp16_1x_coco.py @@ -0,0 +1,4 @@ +_base_ = '../faster_rcnn/faster_rcnn_r50_fpn_1x_coco.py' +# fp16 settings +fp16 = dict(loss_scale=512.) +work_dir = './work_dirs/faster_rcnn_r50_fpn_fp16_1x' diff --git a/configs/fp16/mask_rcnn_r50_fpn_fp16_1x.py b/configs/fp16/mask_rcnn_r50_fpn_fp16_1x.py deleted file mode 100644 index 34bb1489235d19bae21538d9969ecb6b72268793..0000000000000000000000000000000000000000 --- a/configs/fp16/mask_rcnn_r50_fpn_fp16_1x.py +++ /dev/null @@ -1,193 +0,0 @@ -# fp16 settings -fp16 = dict(loss_scale=512.) - -# model settings -model = dict( - type='MaskRCNN', - pretrained='torchvision://resnet50', - backbone=dict( - type='ResNet', - depth=50, - num_stages=4, - out_indices=(0, 1, 2, 3), - frozen_stages=1, - norm_cfg=dict(type='BN', requires_grad=True), - style='pytorch'), - neck=dict( - type='FPN', - in_channels=[256, 512, 1024, 2048], - out_channels=256, - num_outs=5), - rpn_head=dict( - type='RPNHead', - in_channels=256, - feat_channels=256, - anchor_scales=[8], - anchor_ratios=[0.5, 1.0, 2.0], - anchor_strides=[4, 8, 16, 32, 64], - target_means=[.0, .0, .0, .0], - target_stds=[1.0, 1.0, 1.0, 1.0], - loss_cls=dict( - type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0)), - bbox_roi_extractor=dict( - type='SingleRoIExtractor', - roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2), - out_channels=256, - featmap_strides=[4, 8, 16, 32]), - bbox_head=dict( - type='SharedFCBBoxHead', - num_fcs=2, - in_channels=256, - fc_out_channels=1024, - roi_feat_size=7, - num_classes=81, - target_means=[0., 0., 0., 0.], - target_stds=[0.1, 0.1, 0.2, 0.2], - reg_class_agnostic=False, - loss_cls=dict( - type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)), - mask_roi_extractor=dict( - type='SingleRoIExtractor', - roi_layer=dict(type='RoIAlign', out_size=14, sample_num=2), - out_channels=256, - featmap_strides=[4, 8, 16, 32]), - mask_head=dict( - type='FCNMaskHead', - num_convs=4, - in_channels=256, - conv_out_channels=256, - num_classes=81, - loss_mask=dict( - type='CrossEntropyLoss', use_mask=True, loss_weight=1.0))) -# model training and testing settings -train_cfg = dict( - rpn=dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.7, - neg_iou_thr=0.3, - min_pos_iou=0.3, - ignore_iof_thr=-1), - sampler=dict( - type='RandomSampler', - num=256, - pos_fraction=0.5, - neg_pos_ub=-1, - add_gt_as_proposals=False), - allowed_border=0, - pos_weight=-1, - debug=False), - rpn_proposal=dict( - nms_across_levels=False, - nms_pre=2000, - nms_post=2000, - max_num=2000, - nms_thr=0.7, - min_bbox_size=0), - rcnn=dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.5, - neg_iou_thr=0.5, - min_pos_iou=0.5, - ignore_iof_thr=-1), - sampler=dict( - type='RandomSampler', - num=512, - pos_fraction=0.25, - neg_pos_ub=-1, - add_gt_as_proposals=True), - mask_size=28, - pos_weight=-1, - debug=False)) -test_cfg = dict( - rpn=dict( - nms_across_levels=False, - nms_pre=1000, - nms_post=1000, - max_num=1000, - nms_thr=0.7, - min_bbox_size=0), - rcnn=dict( - score_thr=0.05, - nms=dict(type='nms', iou_thr=0.5), - max_per_img=100, - mask_thr_binary=0.5)) -# dataset settings -dataset_type = 'CocoDataset' -data_root = 'data/coco/' -img_norm_cfg = dict( - mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) -train_pipeline = [ - dict(type='LoadImageFromFile'), - dict(type='LoadAnnotations', with_bbox=True, with_mask=True), - dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), - dict(type='RandomFlip', flip_ratio=0.5), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='DefaultFormatBundle'), - dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']), -] -test_pipeline = [ - dict(type='LoadImageFromFile'), - dict( - type='MultiScaleFlipAug', - img_scale=(1333, 800), - flip=False, - transforms=[ - dict(type='Resize', keep_ratio=True), - dict(type='RandomFlip'), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='ImageToTensor', keys=['img']), - dict(type='Collect', keys=['img']), - ]) -] -data = dict( - imgs_per_gpu=2, - workers_per_gpu=2, - train=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_train2017.json', - img_prefix=data_root + 'train2017/', - pipeline=train_pipeline), - val=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline), - test=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline)) -evaluation = dict(interval=1, metric=['bbox', 'segm']) -# optimizer -optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) -optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) -# learning policy -lr_config = dict( - policy='step', - warmup='linear', - warmup_iters=500, - warmup_ratio=1.0 / 3, - step=[8, 11]) -checkpoint_config = dict(interval=1) -# yapf:disable -log_config = dict( - interval=50, - hooks=[ - dict(type='TextLoggerHook'), - # dict(type='TensorboardLoggerHook') - ]) -# yapf:enable -# runtime settings -total_epochs = 12 -dist_params = dict(backend='nccl') -log_level = 'INFO' -work_dir = './work_dirs/mask_rcnn_r50_fpn_fp16_1x' -load_from = None -resume_from = None -workflow = [('train', 1)] diff --git a/configs/fp16/mask_rcnn_r50_fpn_fp16_1x_coco.py b/configs/fp16/mask_rcnn_r50_fpn_fp16_1x_coco.py new file mode 100644 index 0000000000000000000000000000000000000000..ba14b08af7dc68c5ee2b735eb7c8bb174697bd9c --- /dev/null +++ b/configs/fp16/mask_rcnn_r50_fpn_fp16_1x_coco.py @@ -0,0 +1,4 @@ +_base_ = '../mask_rcnn/mask_rcnn_r50_fpn_1x_coco.py' +# fp16 settings +fp16 = dict(loss_scale=512.) +work_dir = './work_dirs/mask_rcnn_r50_fpn_fp16_1x' diff --git a/configs/fp16/retinanet_r50_fpn_fp16_1x.py b/configs/fp16/retinanet_r50_fpn_fp16_1x.py deleted file mode 100644 index 3eebebb64e953e3e12c380d8ed058be1de08353d..0000000000000000000000000000000000000000 --- a/configs/fp16/retinanet_r50_fpn_fp16_1x.py +++ /dev/null @@ -1,134 +0,0 @@ -# fp16 settings -fp16 = dict(loss_scale=512.) - -# model settings -model = dict( - type='RetinaNet', - pretrained='torchvision://resnet50', - backbone=dict( - type='ResNet', - depth=50, - num_stages=4, - out_indices=(0, 1, 2, 3), - frozen_stages=1, - norm_cfg=dict(type='BN', requires_grad=True), - style='pytorch'), - neck=dict( - type='FPN', - in_channels=[256, 512, 1024, 2048], - out_channels=256, - start_level=1, - add_extra_convs=True, - num_outs=5), - bbox_head=dict( - type='RetinaHead', - num_classes=81, - in_channels=256, - stacked_convs=4, - feat_channels=256, - octave_base_scale=4, - scales_per_octave=3, - anchor_ratios=[0.5, 1.0, 2.0], - anchor_strides=[8, 16, 32, 64, 128], - target_means=[.0, .0, .0, .0], - target_stds=[1.0, 1.0, 1.0, 1.0], - loss_cls=dict( - type='FocalLoss', - use_sigmoid=True, - gamma=2.0, - alpha=0.25, - loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=0.11, loss_weight=1.0))) -# training and testing settings -train_cfg = dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.5, - neg_iou_thr=0.4, - min_pos_iou=0, - ignore_iof_thr=-1), - allowed_border=-1, - pos_weight=-1, - debug=False) -test_cfg = dict( - nms_pre=1000, - min_bbox_size=0, - score_thr=0.05, - nms=dict(type='nms', iou_thr=0.5), - max_per_img=100) -# dataset settings -dataset_type = 'CocoDataset' -data_root = 'data/coco/' -img_norm_cfg = dict( - mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) -train_pipeline = [ - dict(type='LoadImageFromFile'), - dict(type='LoadAnnotations', with_bbox=True), - dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), - dict(type='RandomFlip', flip_ratio=0.5), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='DefaultFormatBundle'), - dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), -] -test_pipeline = [ - dict(type='LoadImageFromFile'), - dict( - type='MultiScaleFlipAug', - img_scale=(1333, 800), - flip=False, - transforms=[ - dict(type='Resize', keep_ratio=True), - dict(type='RandomFlip'), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='ImageToTensor', keys=['img']), - dict(type='Collect', keys=['img']), - ]) -] -data = dict( - imgs_per_gpu=2, - workers_per_gpu=2, - train=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_train2017.json', - img_prefix=data_root + 'train2017/', - pipeline=train_pipeline), - val=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline), - test=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline)) -evaluation = dict(interval=1, metric='bbox') -# optimizer -optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) -optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) -# learning policy -lr_config = dict( - policy='step', - warmup='linear', - warmup_iters=500, - warmup_ratio=1.0 / 3, - step=[8, 11]) -checkpoint_config = dict(interval=1) -# yapf:disable -log_config = dict( - interval=50, - hooks=[ - dict(type='TextLoggerHook'), - # dict(type='TensorboardLoggerHook') - ]) -# yapf:enable -# runtime settings -total_epochs = 12 -dist_params = dict(backend='nccl') -log_level = 'INFO' -work_dir = './work_dirs/retinanet_r50_fpn_fp16_1x' -load_from = None -resume_from = None -workflow = [('train', 1)] diff --git a/configs/fp16/retinanet_r50_fpn_fp16_1x_coco.py b/configs/fp16/retinanet_r50_fpn_fp16_1x_coco.py new file mode 100644 index 0000000000000000000000000000000000000000..70cfb7d8baf3dcd634c102fe924e422d7d2610f2 --- /dev/null +++ b/configs/fp16/retinanet_r50_fpn_fp16_1x_coco.py @@ -0,0 +1,4 @@ +_base_ = '../retinanet/retinanet_r50_fpn_1x_coco.py' +# fp16 settings +fp16 = dict(loss_scale=512.) +work_dir = './work_dirs/retinanet_r50_fpn_fp16_1x' diff --git a/configs/free_anchor/retinanet_free_anchor_r101_fpn_1x.py b/configs/free_anchor/retinanet_free_anchor_r101_fpn_1x.py deleted file mode 100644 index 134de95153ebb99a84f832eb856be04585635c02..0000000000000000000000000000000000000000 --- a/configs/free_anchor/retinanet_free_anchor_r101_fpn_1x.py +++ /dev/null @@ -1,125 +0,0 @@ -# model settings -model = dict( - type='RetinaNet', - pretrained='torchvision://resnet101', - backbone=dict( - type='ResNet', - depth=101, - num_stages=4, - out_indices=(0, 1, 2, 3), - frozen_stages=1, - norm_cfg=dict(type='BN', requires_grad=True), - style='pytorch'), - neck=dict( - type='FPN', - in_channels=[256, 512, 1024, 2048], - out_channels=256, - start_level=1, - add_extra_convs=True, - num_outs=5), - bbox_head=dict( - type='FreeAnchorRetinaHead', - num_classes=81, - in_channels=256, - stacked_convs=4, - feat_channels=256, - octave_base_scale=4, - scales_per_octave=3, - anchor_ratios=[0.5, 1.0, 2.0], - anchor_strides=[8, 16, 32, 64, 128], - target_means=[.0, .0, .0, .0], - target_stds=[0.1, 0.1, 0.2, 0.2], - loss_bbox=dict(type='SmoothL1Loss', beta=0.11, loss_weight=0.75))) -# training and testing settings -train_cfg = dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.5, - neg_iou_thr=0.4, - min_pos_iou=0, - ignore_iof_thr=-1), - allowed_border=-1, - pos_weight=-1, - debug=False) -test_cfg = dict( - nms_pre=1000, - min_bbox_size=0, - score_thr=0.05, - nms=dict(type='nms', iou_thr=0.5), - max_per_img=100) -# dataset settings -dataset_type = 'CocoDataset' -data_root = 'data/coco/' -img_norm_cfg = dict( - mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) -train_pipeline = [ - dict(type='LoadImageFromFile'), - dict(type='LoadAnnotations', with_bbox=True), - dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), - dict(type='RandomFlip', flip_ratio=0.5), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='DefaultFormatBundle'), - dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), -] -test_pipeline = [ - dict(type='LoadImageFromFile'), - dict( - type='MultiScaleFlipAug', - img_scale=(1333, 800), - flip=False, - transforms=[ - dict(type='Resize', keep_ratio=True), - dict(type='RandomFlip'), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='ImageToTensor', keys=['img']), - dict(type='Collect', keys=['img']), - ]) -] -data = dict( - imgs_per_gpu=2, - workers_per_gpu=2, - train=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_train2017.json', - img_prefix=data_root + 'train2017/', - pipeline=train_pipeline), - val=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline), - test=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline)) -evaluation = dict(interval=1, metric='bbox') -# optimizer -optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) -optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) -# learning policy -lr_config = dict( - policy='step', - warmup='linear', - warmup_iters=500, - warmup_ratio=1.0 / 3, - step=[8, 11]) -checkpoint_config = dict(interval=1) -# yapf:disable -log_config = dict( - interval=50, - hooks=[ - dict(type='TextLoggerHook'), - # dict(type='TensorboardLoggerHook') - ]) -# yapf:enable -# runtime settings -total_epochs = 12 -dist_params = dict(backend='nccl') -log_level = 'INFO' -work_dir = './work_dirs/retinanet_free_anchor_r101_fpn_1x' -load_from = None -resume_from = None -workflow = [('train', 1)] diff --git a/configs/free_anchor/retinanet_free_anchor_r101_fpn_1x_coco.py b/configs/free_anchor/retinanet_free_anchor_r101_fpn_1x_coco.py new file mode 100644 index 0000000000000000000000000000000000000000..44fcacfe3f0e6335af4e1c9d7bf974c8fce507f5 --- /dev/null +++ b/configs/free_anchor/retinanet_free_anchor_r101_fpn_1x_coco.py @@ -0,0 +1,3 @@ +_base_ = './retinanet_free_anchor_r50_fpn_1x_coco.py' +model = dict(pretrained='torchvision://resnet101', backbone=dict(depth=101)) +work_dir = './work_dirs/retinanet_free_anchor_r101_fpn_1x' diff --git a/configs/free_anchor/retinanet_free_anchor_r50_fpn_1x.py b/configs/free_anchor/retinanet_free_anchor_r50_fpn_1x.py deleted file mode 100644 index 93a211a6146fe1725b544af3e4a2b9fb33ede7b6..0000000000000000000000000000000000000000 --- a/configs/free_anchor/retinanet_free_anchor_r50_fpn_1x.py +++ /dev/null @@ -1,125 +0,0 @@ -# model settings -model = dict( - type='RetinaNet', - pretrained='torchvision://resnet50', - backbone=dict( - type='ResNet', - depth=50, - num_stages=4, - out_indices=(0, 1, 2, 3), - frozen_stages=1, - norm_cfg=dict(type='BN', requires_grad=True), - style='pytorch'), - neck=dict( - type='FPN', - in_channels=[256, 512, 1024, 2048], - out_channels=256, - start_level=1, - add_extra_convs=True, - num_outs=5), - bbox_head=dict( - type='FreeAnchorRetinaHead', - num_classes=81, - in_channels=256, - stacked_convs=4, - feat_channels=256, - octave_base_scale=4, - scales_per_octave=3, - anchor_ratios=[0.5, 1.0, 2.0], - anchor_strides=[8, 16, 32, 64, 128], - target_means=[.0, .0, .0, .0], - target_stds=[0.1, 0.1, 0.2, 0.2], - loss_bbox=dict(type='SmoothL1Loss', beta=0.11, loss_weight=0.75))) -# training and testing settings -train_cfg = dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.5, - neg_iou_thr=0.4, - min_pos_iou=0, - ignore_iof_thr=-1), - allowed_border=-1, - pos_weight=-1, - debug=False) -test_cfg = dict( - nms_pre=1000, - min_bbox_size=0, - score_thr=0.05, - nms=dict(type='nms', iou_thr=0.5), - max_per_img=100) -# dataset settings -dataset_type = 'CocoDataset' -data_root = 'data/coco/' -img_norm_cfg = dict( - mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) -train_pipeline = [ - dict(type='LoadImageFromFile'), - dict(type='LoadAnnotations', with_bbox=True), - dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), - dict(type='RandomFlip', flip_ratio=0.5), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='DefaultFormatBundle'), - dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), -] -test_pipeline = [ - dict(type='LoadImageFromFile'), - dict( - type='MultiScaleFlipAug', - img_scale=(1333, 800), - flip=False, - transforms=[ - dict(type='Resize', keep_ratio=True), - dict(type='RandomFlip'), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='ImageToTensor', keys=['img']), - dict(type='Collect', keys=['img']), - ]) -] -data = dict( - imgs_per_gpu=2, - workers_per_gpu=2, - train=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_train2017.json', - img_prefix=data_root + 'train2017/', - pipeline=train_pipeline), - val=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline), - test=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline)) -evaluation = dict(interval=1, metric='bbox') -# optimizer -optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) -optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) -# learning policy -lr_config = dict( - policy='step', - warmup='linear', - warmup_iters=500, - warmup_ratio=1.0 / 3, - step=[8, 11]) -checkpoint_config = dict(interval=1) -# yapf:disable -log_config = dict( - interval=50, - hooks=[ - dict(type='TextLoggerHook'), - # dict(type='TensorboardLoggerHook') - ]) -# yapf:enable -# runtime settings -total_epochs = 12 -dist_params = dict(backend='nccl') -log_level = 'INFO' -work_dir = './work_dirs/retinanet_free_anchor_r50_fpn_1x' -load_from = None -resume_from = None -workflow = [('train', 1)] diff --git a/configs/free_anchor/retinanet_free_anchor_r50_fpn_1x_coco.py b/configs/free_anchor/retinanet_free_anchor_r50_fpn_1x_coco.py new file mode 100644 index 0000000000000000000000000000000000000000..ad50440bf3cf277cb86a6a1756a59105dd707a40 --- /dev/null +++ b/configs/free_anchor/retinanet_free_anchor_r50_fpn_1x_coco.py @@ -0,0 +1,17 @@ +_base_ = '../retinanet/retinanet_r50_fpn_1x_coco.py' +model = dict( + bbox_head=dict( + _delete_=True, + type='FreeAnchorRetinaHead', + num_classes=81, + in_channels=256, + stacked_convs=4, + feat_channels=256, + octave_base_scale=4, + scales_per_octave=3, + anchor_ratios=[0.5, 1.0, 2.0], + anchor_strides=[8, 16, 32, 64, 128], + target_means=[.0, .0, .0, .0], + target_stds=[0.1, 0.1, 0.2, 0.2], + loss_bbox=dict(type='SmoothL1Loss', beta=0.11, loss_weight=0.75))) +work_dir = './work_dirs/retinanet_free_anchor_r50_fpn_1x' diff --git a/configs/free_anchor/retinanet_free_anchor_x101-32x4d_fpn_1x.py b/configs/free_anchor/retinanet_free_anchor_x101-32x4d_fpn_1x.py deleted file mode 100644 index 5edec0d29920c8b2b0cab8008961e7aeb77e6782..0000000000000000000000000000000000000000 --- a/configs/free_anchor/retinanet_free_anchor_x101-32x4d_fpn_1x.py +++ /dev/null @@ -1,127 +0,0 @@ -# model settings -model = dict( - type='RetinaNet', - pretrained='open-mmlab://resnext101_32x4d', - backbone=dict( - type='ResNeXt', - depth=101, - groups=32, - base_width=4, - num_stages=4, - out_indices=(0, 1, 2, 3), - frozen_stages=1, - norm_cfg=dict(type='BN', requires_grad=True), - style='pytorch'), - neck=dict( - type='FPN', - in_channels=[256, 512, 1024, 2048], - out_channels=256, - start_level=1, - add_extra_convs=True, - num_outs=5), - bbox_head=dict( - type='FreeAnchorRetinaHead', - num_classes=81, - in_channels=256, - stacked_convs=4, - feat_channels=256, - octave_base_scale=4, - scales_per_octave=3, - anchor_ratios=[0.5, 1.0, 2.0], - anchor_strides=[8, 16, 32, 64, 128], - target_means=[.0, .0, .0, .0], - target_stds=[0.1, 0.1, 0.2, 0.2], - loss_bbox=dict(type='SmoothL1Loss', beta=0.11, loss_weight=0.75))) -# training and testing settings -train_cfg = dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.5, - neg_iou_thr=0.4, - min_pos_iou=0, - ignore_iof_thr=-1), - allowed_border=-1, - pos_weight=-1, - debug=False) -test_cfg = dict( - nms_pre=1000, - min_bbox_size=0, - score_thr=0.05, - nms=dict(type='nms', iou_thr=0.5), - max_per_img=100) -# dataset settings -dataset_type = 'CocoDataset' -data_root = 'data/coco/' -img_norm_cfg = dict( - mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) -train_pipeline = [ - dict(type='LoadImageFromFile'), - dict(type='LoadAnnotations', with_bbox=True), - dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), - dict(type='RandomFlip', flip_ratio=0.5), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='DefaultFormatBundle'), - dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), -] -test_pipeline = [ - dict(type='LoadImageFromFile'), - dict( - type='MultiScaleFlipAug', - img_scale=(1333, 800), - flip=False, - transforms=[ - dict(type='Resize', keep_ratio=True), - dict(type='RandomFlip'), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='ImageToTensor', keys=['img']), - dict(type='Collect', keys=['img']), - ]) -] -data = dict( - imgs_per_gpu=2, - workers_per_gpu=2, - train=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_train2017.json', - img_prefix=data_root + 'train2017/', - pipeline=train_pipeline), - val=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline), - test=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline)) -evaluation = dict(interval=1, metric='bbox') -# optimizer -optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) -optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) -# learning policy -lr_config = dict( - policy='step', - warmup='linear', - warmup_iters=500, - warmup_ratio=1.0 / 3, - step=[8, 11]) -checkpoint_config = dict(interval=1) -# yapf:disable -log_config = dict( - interval=50, - hooks=[ - dict(type='TextLoggerHook'), - # dict(type='TensorboardLoggerHook') - ]) -# yapf:enable -# runtime settings -total_epochs = 12 -dist_params = dict(backend='nccl') -log_level = 'INFO' -work_dir = './work_dirs/retinanet_free_anchor_x101-32x4d_fpn_1x' -load_from = None -resume_from = None -workflow = [('train', 1)] diff --git a/configs/free_anchor/retinanet_free_anchor_x101-32x4d_fpn_1x_coco.py b/configs/free_anchor/retinanet_free_anchor_x101-32x4d_fpn_1x_coco.py new file mode 100644 index 0000000000000000000000000000000000000000..c3b607412781387d4c1c287ee93818ec7313abf0 --- /dev/null +++ b/configs/free_anchor/retinanet_free_anchor_x101-32x4d_fpn_1x_coco.py @@ -0,0 +1,13 @@ +_base_ = './retinanet_free_anchor_r50_fpn_1x_coco.py' +model = dict( + pretrained='open-mmlab://resnext101_32x4d', + backbone=dict( + type='ResNeXt', + depth=101, + groups=32, + base_width=4, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + style='pytorch')) +work_dir = './work_dirs/retinanet_free_anchor_x101-32x4d_fpn_1x' diff --git a/configs/gcnet/mask_rcnn_r16_gcb_c3-c5_r50_fpn_1x.py b/configs/gcnet/mask_rcnn_r16_gcb_c3-c5_r50_fpn_1x.py deleted file mode 100644 index bce18b940a59b29f3d053f1c00e24db7f86e5bee..0000000000000000000000000000000000000000 --- a/configs/gcnet/mask_rcnn_r16_gcb_c3-c5_r50_fpn_1x.py +++ /dev/null @@ -1,192 +0,0 @@ -# model settings -model = dict( - type='MaskRCNN', - pretrained='torchvision://resnet50', - backbone=dict( - type='ResNet', - depth=50, - num_stages=4, - out_indices=(0, 1, 2, 3), - frozen_stages=1, - norm_cfg=dict(type='BN', requires_grad=True), - style='pytorch', - gcb=dict(ratio=1. / 16., ), - stage_with_gcb=(False, True, True, True)), - neck=dict( - type='FPN', - in_channels=[256, 512, 1024, 2048], - out_channels=256, - num_outs=5), - rpn_head=dict( - type='RPNHead', - in_channels=256, - feat_channels=256, - anchor_scales=[8], - anchor_ratios=[0.5, 1.0, 2.0], - anchor_strides=[4, 8, 16, 32, 64], - target_means=[.0, .0, .0, .0], - target_stds=[1.0, 1.0, 1.0, 1.0], - loss_cls=dict( - type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0)), - bbox_roi_extractor=dict( - type='SingleRoIExtractor', - roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2), - out_channels=256, - featmap_strides=[4, 8, 16, 32]), - bbox_head=dict( - type='SharedFCBBoxHead', - num_fcs=2, - in_channels=256, - fc_out_channels=1024, - roi_feat_size=7, - num_classes=81, - target_means=[0., 0., 0., 0.], - target_stds=[0.1, 0.1, 0.2, 0.2], - reg_class_agnostic=False, - loss_cls=dict( - type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)), - mask_roi_extractor=dict( - type='SingleRoIExtractor', - roi_layer=dict(type='RoIAlign', out_size=14, sample_num=2), - out_channels=256, - featmap_strides=[4, 8, 16, 32]), - mask_head=dict( - type='FCNMaskHead', - num_convs=4, - in_channels=256, - conv_out_channels=256, - num_classes=81, - loss_mask=dict( - type='CrossEntropyLoss', use_mask=True, loss_weight=1.0))) -# model training and testing settings -train_cfg = dict( - rpn=dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.7, - neg_iou_thr=0.3, - min_pos_iou=0.3, - ignore_iof_thr=-1), - sampler=dict( - type='RandomSampler', - num=256, - pos_fraction=0.5, - neg_pos_ub=-1, - add_gt_as_proposals=False), - allowed_border=0, - pos_weight=-1, - debug=False), - rpn_proposal=dict( - nms_across_levels=False, - nms_pre=2000, - nms_post=2000, - max_num=2000, - nms_thr=0.7, - min_bbox_size=0), - rcnn=dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.5, - neg_iou_thr=0.5, - min_pos_iou=0.5, - ignore_iof_thr=-1), - sampler=dict( - type='RandomSampler', - num=512, - pos_fraction=0.25, - neg_pos_ub=-1, - add_gt_as_proposals=True), - mask_size=28, - pos_weight=-1, - debug=False)) -test_cfg = dict( - rpn=dict( - nms_across_levels=False, - nms_pre=1000, - nms_post=1000, - max_num=1000, - nms_thr=0.7, - min_bbox_size=0), - rcnn=dict( - score_thr=0.05, - nms=dict(type='nms', iou_thr=0.5), - max_per_img=100, - mask_thr_binary=0.5)) -# dataset settings -dataset_type = 'CocoDataset' -data_root = 'data/coco/' -img_norm_cfg = dict( - mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) -train_pipeline = [ - dict(type='LoadImageFromFile'), - dict(type='LoadAnnotations', with_bbox=True, with_mask=True), - dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), - dict(type='RandomFlip', flip_ratio=0.5), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='DefaultFormatBundle'), - dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']), -] -test_pipeline = [ - dict(type='LoadImageFromFile'), - dict( - type='MultiScaleFlipAug', - img_scale=(1333, 800), - flip=False, - transforms=[ - dict(type='Resize', keep_ratio=True), - dict(type='RandomFlip'), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='ImageToTensor', keys=['img']), - dict(type='Collect', keys=['img']), - ]) -] -data = dict( - imgs_per_gpu=2, - workers_per_gpu=2, - train=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_train2017.json', - img_prefix=data_root + 'train2017/', - pipeline=train_pipeline), - val=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline), - test=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline)) -evaluation = dict(interval=1, metric=['bbox', 'segm']) -# optimizer -optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) -optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) -# learning policy -lr_config = dict( - policy='step', - warmup='linear', - warmup_iters=500, - warmup_ratio=1.0 / 3, - step=[8, 11]) -checkpoint_config = dict(interval=1) -# yapf:disable -log_config = dict( - interval=50, - hooks=[ - dict(type='TextLoggerHook'), - # dict(type='TensorboardLoggerHook') - ]) -# yapf:enable -# runtime settings -total_epochs = 12 -dist_params = dict(backend='nccl') -log_level = 'INFO' -work_dir = './work_dirs/mask_rcnn_r16_gcb_c3-c5_r50_fpn_1x' -load_from = None -resume_from = None -workflow = [('train', 1)] diff --git a/configs/gcnet/mask_rcnn_r16_gcb_c3-c5_r50_fpn_syncbn_1x.py b/configs/gcnet/mask_rcnn_r16_gcb_c3-c5_r50_fpn_syncbn_1x.py deleted file mode 100644 index acb15eb33a8c21b9d6eac549505f435ef69fe9e0..0000000000000000000000000000000000000000 --- a/configs/gcnet/mask_rcnn_r16_gcb_c3-c5_r50_fpn_syncbn_1x.py +++ /dev/null @@ -1,193 +0,0 @@ -# model settings -model = dict( - type='MaskRCNN', - pretrained='torchvision://resnet50', - backbone=dict( - type='ResNet', - depth=50, - num_stages=4, - out_indices=(0, 1, 2, 3), - frozen_stages=1, - norm_cfg=dict(type='SyncBN', requires_grad=True), - norm_eval=False, - style='pytorch', - gcb=dict(ratio=1. / 16., ), - stage_with_gcb=(False, True, True, True)), - neck=dict( - type='FPN', - in_channels=[256, 512, 1024, 2048], - out_channels=256, - num_outs=5), - rpn_head=dict( - type='RPNHead', - in_channels=256, - feat_channels=256, - anchor_scales=[8], - anchor_ratios=[0.5, 1.0, 2.0], - anchor_strides=[4, 8, 16, 32, 64], - target_means=[.0, .0, .0, .0], - target_stds=[1.0, 1.0, 1.0, 1.0], - loss_cls=dict( - type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0)), - bbox_roi_extractor=dict( - type='SingleRoIExtractor', - roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2), - out_channels=256, - featmap_strides=[4, 8, 16, 32]), - bbox_head=dict( - type='SharedFCBBoxHead', - num_fcs=2, - in_channels=256, - fc_out_channels=1024, - roi_feat_size=7, - num_classes=81, - target_means=[0., 0., 0., 0.], - target_stds=[0.1, 0.1, 0.2, 0.2], - reg_class_agnostic=False, - loss_cls=dict( - type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)), - mask_roi_extractor=dict( - type='SingleRoIExtractor', - roi_layer=dict(type='RoIAlign', out_size=14, sample_num=2), - out_channels=256, - featmap_strides=[4, 8, 16, 32]), - mask_head=dict( - type='FCNMaskHead', - num_convs=4, - in_channels=256, - conv_out_channels=256, - num_classes=81, - loss_mask=dict( - type='CrossEntropyLoss', use_mask=True, loss_weight=1.0))) -# model training and testing settings -train_cfg = dict( - rpn=dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.7, - neg_iou_thr=0.3, - min_pos_iou=0.3, - ignore_iof_thr=-1), - sampler=dict( - type='RandomSampler', - num=256, - pos_fraction=0.5, - neg_pos_ub=-1, - add_gt_as_proposals=False), - allowed_border=0, - pos_weight=-1, - debug=False), - rpn_proposal=dict( - nms_across_levels=False, - nms_pre=2000, - nms_post=2000, - max_num=2000, - nms_thr=0.7, - min_bbox_size=0), - rcnn=dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.5, - neg_iou_thr=0.5, - min_pos_iou=0.5, - ignore_iof_thr=-1), - sampler=dict( - type='RandomSampler', - num=512, - pos_fraction=0.25, - neg_pos_ub=-1, - add_gt_as_proposals=True), - mask_size=28, - pos_weight=-1, - debug=False)) -test_cfg = dict( - rpn=dict( - nms_across_levels=False, - nms_pre=1000, - nms_post=1000, - max_num=1000, - nms_thr=0.7, - min_bbox_size=0), - rcnn=dict( - score_thr=0.05, - nms=dict(type='nms', iou_thr=0.5), - max_per_img=100, - mask_thr_binary=0.5)) -# dataset settings -dataset_type = 'CocoDataset' -data_root = 'data/coco/' -img_norm_cfg = dict( - mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) -train_pipeline = [ - dict(type='LoadImageFromFile'), - dict(type='LoadAnnotations', with_bbox=True, with_mask=True), - dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), - dict(type='RandomFlip', flip_ratio=0.5), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='DefaultFormatBundle'), - dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']), -] -test_pipeline = [ - dict(type='LoadImageFromFile'), - dict( - type='MultiScaleFlipAug', - img_scale=(1333, 800), - flip=False, - transforms=[ - dict(type='Resize', keep_ratio=True), - dict(type='RandomFlip'), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='ImageToTensor', keys=['img']), - dict(type='Collect', keys=['img']), - ]) -] -data = dict( - imgs_per_gpu=2, - workers_per_gpu=2, - train=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_train2017.json', - img_prefix=data_root + 'train2017/', - pipeline=train_pipeline), - val=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline), - test=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline)) -evaluation = dict(interval=1, metric=['bbox', 'segm']) -# optimizer -optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) -optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) -# learning policy -lr_config = dict( - policy='step', - warmup='linear', - warmup_iters=500, - warmup_ratio=1.0 / 3, - step=[8, 11]) -checkpoint_config = dict(interval=1) -# yapf:disable -log_config = dict( - interval=50, - hooks=[ - dict(type='TextLoggerHook'), - # dict(type='TensorboardLoggerHook') - ]) -# yapf:enable -# runtime settings -total_epochs = 12 -dist_params = dict(backend='nccl') -log_level = 'INFO' -work_dir = './work_dirs/mask_rcnn_r16_gcb_c3-c5_r50_fpn_syncbn_1x' -load_from = None -resume_from = None -workflow = [('train', 1)] diff --git a/configs/gcnet/mask_rcnn_r4_gcb_c3-c5_r50_fpn_1x.py b/configs/gcnet/mask_rcnn_r4_gcb_c3-c5_r50_fpn_1x.py deleted file mode 100644 index 41058e74a71eb510a4acc78773789f5f06d04427..0000000000000000000000000000000000000000 --- a/configs/gcnet/mask_rcnn_r4_gcb_c3-c5_r50_fpn_1x.py +++ /dev/null @@ -1,192 +0,0 @@ -# model settings -model = dict( - type='MaskRCNN', - pretrained='torchvision://resnet50', - backbone=dict( - type='ResNet', - depth=50, - num_stages=4, - out_indices=(0, 1, 2, 3), - frozen_stages=1, - norm_cfg=dict(type='BN', requires_grad=True), - style='pytorch', - gcb=dict(ratio=1. / 4., ), - stage_with_gcb=(False, True, True, True)), - neck=dict( - type='FPN', - in_channels=[256, 512, 1024, 2048], - out_channels=256, - num_outs=5), - rpn_head=dict( - type='RPNHead', - in_channels=256, - feat_channels=256, - anchor_scales=[8], - anchor_ratios=[0.5, 1.0, 2.0], - anchor_strides=[4, 8, 16, 32, 64], - target_means=[.0, .0, .0, .0], - target_stds=[1.0, 1.0, 1.0, 1.0], - loss_cls=dict( - type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0)), - bbox_roi_extractor=dict( - type='SingleRoIExtractor', - roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2), - out_channels=256, - featmap_strides=[4, 8, 16, 32]), - bbox_head=dict( - type='SharedFCBBoxHead', - num_fcs=2, - in_channels=256, - fc_out_channels=1024, - roi_feat_size=7, - num_classes=81, - target_means=[0., 0., 0., 0.], - target_stds=[0.1, 0.1, 0.2, 0.2], - reg_class_agnostic=False, - loss_cls=dict( - type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)), - mask_roi_extractor=dict( - type='SingleRoIExtractor', - roi_layer=dict(type='RoIAlign', out_size=14, sample_num=2), - out_channels=256, - featmap_strides=[4, 8, 16, 32]), - mask_head=dict( - type='FCNMaskHead', - num_convs=4, - in_channels=256, - conv_out_channels=256, - num_classes=81, - loss_mask=dict( - type='CrossEntropyLoss', use_mask=True, loss_weight=1.0))) -# model training and testing settings -train_cfg = dict( - rpn=dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.7, - neg_iou_thr=0.3, - min_pos_iou=0.3, - ignore_iof_thr=-1), - sampler=dict( - type='RandomSampler', - num=256, - pos_fraction=0.5, - neg_pos_ub=-1, - add_gt_as_proposals=False), - allowed_border=0, - pos_weight=-1, - debug=False), - rpn_proposal=dict( - nms_across_levels=False, - nms_pre=2000, - nms_post=2000, - max_num=2000, - nms_thr=0.7, - min_bbox_size=0), - rcnn=dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.5, - neg_iou_thr=0.5, - min_pos_iou=0.5, - ignore_iof_thr=-1), - sampler=dict( - type='RandomSampler', - num=512, - pos_fraction=0.25, - neg_pos_ub=-1, - add_gt_as_proposals=True), - mask_size=28, - pos_weight=-1, - debug=False)) -test_cfg = dict( - rpn=dict( - nms_across_levels=False, - nms_pre=1000, - nms_post=1000, - max_num=1000, - nms_thr=0.7, - min_bbox_size=0), - rcnn=dict( - score_thr=0.05, - nms=dict(type='nms', iou_thr=0.5), - max_per_img=100, - mask_thr_binary=0.5)) -# dataset settings -dataset_type = 'CocoDataset' -data_root = 'data/coco/' -img_norm_cfg = dict( - mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) -train_pipeline = [ - dict(type='LoadImageFromFile'), - dict(type='LoadAnnotations', with_bbox=True, with_mask=True), - dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), - dict(type='RandomFlip', flip_ratio=0.5), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='DefaultFormatBundle'), - dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']), -] -test_pipeline = [ - dict(type='LoadImageFromFile'), - dict( - type='MultiScaleFlipAug', - img_scale=(1333, 800), - flip=False, - transforms=[ - dict(type='Resize', keep_ratio=True), - dict(type='RandomFlip'), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='ImageToTensor', keys=['img']), - dict(type='Collect', keys=['img']), - ]) -] -data = dict( - imgs_per_gpu=2, - workers_per_gpu=2, - train=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_train2017.json', - img_prefix=data_root + 'train2017/', - pipeline=train_pipeline), - val=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline), - test=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline)) -evaluation = dict(interval=1, metric=['bbox', 'segm']) -# optimizer -optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) -optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) -# learning policy -lr_config = dict( - policy='step', - warmup='linear', - warmup_iters=500, - warmup_ratio=1.0 / 3, - step=[8, 11]) -checkpoint_config = dict(interval=1) -# yapf:disable -log_config = dict( - interval=50, - hooks=[ - dict(type='TextLoggerHook'), - # dict(type='TensorboardLoggerHook') - ]) -# yapf:enable -# runtime settings -total_epochs = 12 -dist_params = dict(backend='nccl') -log_level = 'INFO' -work_dir = './work_dirs/mask_rcnn_r4_gcb_c3-c5_r50_fpn_1x' -load_from = None -resume_from = None -workflow = [('train', 1)] diff --git a/configs/gcnet/mask_rcnn_r4_gcb_c3-c5_r50_fpn_syncbn_1x.py b/configs/gcnet/mask_rcnn_r4_gcb_c3-c5_r50_fpn_syncbn_1x.py deleted file mode 100644 index 9e047758cf37dfeaba98713077e33e2714379c65..0000000000000000000000000000000000000000 --- a/configs/gcnet/mask_rcnn_r4_gcb_c3-c5_r50_fpn_syncbn_1x.py +++ /dev/null @@ -1,193 +0,0 @@ -# model settings -model = dict( - type='MaskRCNN', - pretrained='torchvision://resnet50', - backbone=dict( - type='ResNet', - depth=50, - num_stages=4, - out_indices=(0, 1, 2, 3), - frozen_stages=1, - norm_cfg=dict(type='SyncBN', requires_grad=True), - norm_eval=False, - style='pytorch', - gcb=dict(ratio=1. / 4., ), - stage_with_gcb=(False, True, True, True)), - neck=dict( - type='FPN', - in_channels=[256, 512, 1024, 2048], - out_channels=256, - num_outs=5), - rpn_head=dict( - type='RPNHead', - in_channels=256, - feat_channels=256, - anchor_scales=[8], - anchor_ratios=[0.5, 1.0, 2.0], - anchor_strides=[4, 8, 16, 32, 64], - target_means=[.0, .0, .0, .0], - target_stds=[1.0, 1.0, 1.0, 1.0], - loss_cls=dict( - type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0)), - bbox_roi_extractor=dict( - type='SingleRoIExtractor', - roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2), - out_channels=256, - featmap_strides=[4, 8, 16, 32]), - bbox_head=dict( - type='SharedFCBBoxHead', - num_fcs=2, - in_channels=256, - fc_out_channels=1024, - roi_feat_size=7, - num_classes=81, - target_means=[0., 0., 0., 0.], - target_stds=[0.1, 0.1, 0.2, 0.2], - reg_class_agnostic=False, - loss_cls=dict( - type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)), - mask_roi_extractor=dict( - type='SingleRoIExtractor', - roi_layer=dict(type='RoIAlign', out_size=14, sample_num=2), - out_channels=256, - featmap_strides=[4, 8, 16, 32]), - mask_head=dict( - type='FCNMaskHead', - num_convs=4, - in_channels=256, - conv_out_channels=256, - num_classes=81, - loss_mask=dict( - type='CrossEntropyLoss', use_mask=True, loss_weight=1.0))) -# model training and testing settings -train_cfg = dict( - rpn=dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.7, - neg_iou_thr=0.3, - min_pos_iou=0.3, - ignore_iof_thr=-1), - sampler=dict( - type='RandomSampler', - num=256, - pos_fraction=0.5, - neg_pos_ub=-1, - add_gt_as_proposals=False), - allowed_border=0, - pos_weight=-1, - debug=False), - rpn_proposal=dict( - nms_across_levels=False, - nms_pre=2000, - nms_post=2000, - max_num=2000, - nms_thr=0.7, - min_bbox_size=0), - rcnn=dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.5, - neg_iou_thr=0.5, - min_pos_iou=0.5, - ignore_iof_thr=-1), - sampler=dict( - type='RandomSampler', - num=512, - pos_fraction=0.25, - neg_pos_ub=-1, - add_gt_as_proposals=True), - mask_size=28, - pos_weight=-1, - debug=False)) -test_cfg = dict( - rpn=dict( - nms_across_levels=False, - nms_pre=1000, - nms_post=1000, - max_num=1000, - nms_thr=0.7, - min_bbox_size=0), - rcnn=dict( - score_thr=0.05, - nms=dict(type='nms', iou_thr=0.5), - max_per_img=100, - mask_thr_binary=0.5)) -# dataset settings -dataset_type = 'CocoDataset' -data_root = 'data/coco/' -img_norm_cfg = dict( - mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) -train_pipeline = [ - dict(type='LoadImageFromFile'), - dict(type='LoadAnnotations', with_bbox=True, with_mask=True), - dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), - dict(type='RandomFlip', flip_ratio=0.5), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='DefaultFormatBundle'), - dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']), -] -test_pipeline = [ - dict(type='LoadImageFromFile'), - dict( - type='MultiScaleFlipAug', - img_scale=(1333, 800), - flip=False, - transforms=[ - dict(type='Resize', keep_ratio=True), - dict(type='RandomFlip'), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='ImageToTensor', keys=['img']), - dict(type='Collect', keys=['img']), - ]) -] -data = dict( - imgs_per_gpu=2, - workers_per_gpu=2, - train=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_train2017.json', - img_prefix=data_root + 'train2017/', - pipeline=train_pipeline), - val=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline), - test=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline)) -evaluation = dict(interval=1, metric=['bbox', 'segm']) -# optimizer -optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) -optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) -# learning policy -lr_config = dict( - policy='step', - warmup='linear', - warmup_iters=500, - warmup_ratio=1.0 / 3, - step=[8, 11]) -checkpoint_config = dict(interval=1) -# yapf:disable -log_config = dict( - interval=50, - hooks=[ - dict(type='TextLoggerHook'), - # dict(type='TensorboardLoggerHook') - ]) -# yapf:enable -# runtime settings -total_epochs = 12 -dist_params = dict(backend='nccl') -log_level = 'INFO' -work_dir = './work_dirs/mask_rcnn_r4_gcb_c3-c5_r50_fpn_syncbn_1x' -load_from = None -resume_from = None -workflow = [('train', 1)] diff --git a/configs/gcnet/mask_rcnn_r50_fpn_r16_gcb_c3-c5_1x_coco.py b/configs/gcnet/mask_rcnn_r50_fpn_r16_gcb_c3-c5_1x_coco.py new file mode 100644 index 0000000000000000000000000000000000000000..f84d93e939cc4ce742a3bdeffcd08dbb3c8bedcf --- /dev/null +++ b/configs/gcnet/mask_rcnn_r50_fpn_r16_gcb_c3-c5_1x_coco.py @@ -0,0 +1,5 @@ +_base_ = '../mask_rcnn/mask_rcnn_r50_fpn_1x_coco.py' +model = dict( + backbone=dict( + gcb=dict(ratio=1. / 16., ), stage_with_gcb=(False, True, True, True))) +work_dir = './work_dirs/mask_rcnn_r16_gcb_c3-c5_r50_fpn_1x' diff --git a/configs/gcnet/mask_rcnn_r50_fpn_r4_gcb_c3-c5_1x_coco.py b/configs/gcnet/mask_rcnn_r50_fpn_r4_gcb_c3-c5_1x_coco.py new file mode 100644 index 0000000000000000000000000000000000000000..d15ae123da5d545d80bdd4bbb5b7210516bc4011 --- /dev/null +++ b/configs/gcnet/mask_rcnn_r50_fpn_r4_gcb_c3-c5_1x_coco.py @@ -0,0 +1,5 @@ +_base_ = '../mask_rcnn/mask_rcnn_r50_fpn_1x_coco.py' +model = dict( + backbone=dict( + gcb=dict(ratio=1. / 4., ), stage_with_gcb=(False, True, True, True))) +work_dir = './work_dirs/mask_rcnn_r4_gcb_c3-c5_r50_fpn_1x' diff --git a/configs/gcnet/mask_rcnn_r50_fpn_sbn_1x.py b/configs/gcnet/mask_rcnn_r50_fpn_sbn_1x.py deleted file mode 100644 index f273a3a62c9f44f8b23a38ace06b278ac7f009fe..0000000000000000000000000000000000000000 --- a/configs/gcnet/mask_rcnn_r50_fpn_sbn_1x.py +++ /dev/null @@ -1,191 +0,0 @@ -# model settings -model = dict( - type='MaskRCNN', - pretrained='torchvision://resnet50', - backbone=dict( - type='ResNet', - depth=50, - num_stages=4, - out_indices=(0, 1, 2, 3), - frozen_stages=1, - norm_cfg=dict(type='SyncBN', requires_grad=True), - norm_eval=False, - style='pytorch'), - neck=dict( - type='FPN', - in_channels=[256, 512, 1024, 2048], - out_channels=256, - num_outs=5), - rpn_head=dict( - type='RPNHead', - in_channels=256, - feat_channels=256, - anchor_scales=[8], - anchor_ratios=[0.5, 1.0, 2.0], - anchor_strides=[4, 8, 16, 32, 64], - target_means=[.0, .0, .0, .0], - target_stds=[1.0, 1.0, 1.0, 1.0], - loss_cls=dict( - type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0)), - bbox_roi_extractor=dict( - type='SingleRoIExtractor', - roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2), - out_channels=256, - featmap_strides=[4, 8, 16, 32]), - bbox_head=dict( - type='SharedFCBBoxHead', - num_fcs=2, - in_channels=256, - fc_out_channels=1024, - roi_feat_size=7, - num_classes=81, - target_means=[0., 0., 0., 0.], - target_stds=[0.1, 0.1, 0.2, 0.2], - reg_class_agnostic=False, - loss_cls=dict( - type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)), - mask_roi_extractor=dict( - type='SingleRoIExtractor', - roi_layer=dict(type='RoIAlign', out_size=14, sample_num=2), - out_channels=256, - featmap_strides=[4, 8, 16, 32]), - mask_head=dict( - type='FCNMaskHead', - num_convs=4, - in_channels=256, - conv_out_channels=256, - num_classes=81, - loss_mask=dict( - type='CrossEntropyLoss', use_mask=True, loss_weight=1.0))) -# model training and testing settings -train_cfg = dict( - rpn=dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.7, - neg_iou_thr=0.3, - min_pos_iou=0.3, - ignore_iof_thr=-1), - sampler=dict( - type='RandomSampler', - num=256, - pos_fraction=0.5, - neg_pos_ub=-1, - add_gt_as_proposals=False), - allowed_border=0, - pos_weight=-1, - debug=False), - rpn_proposal=dict( - nms_across_levels=False, - nms_pre=2000, - nms_post=2000, - max_num=2000, - nms_thr=0.7, - min_bbox_size=0), - rcnn=dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.5, - neg_iou_thr=0.5, - min_pos_iou=0.5, - ignore_iof_thr=-1), - sampler=dict( - type='RandomSampler', - num=512, - pos_fraction=0.25, - neg_pos_ub=-1, - add_gt_as_proposals=True), - mask_size=28, - pos_weight=-1, - debug=False)) -test_cfg = dict( - rpn=dict( - nms_across_levels=False, - nms_pre=1000, - nms_post=1000, - max_num=1000, - nms_thr=0.7, - min_bbox_size=0), - rcnn=dict( - score_thr=0.05, - nms=dict(type='nms', iou_thr=0.5), - max_per_img=100, - mask_thr_binary=0.5)) -# dataset settings -dataset_type = 'CocoDataset' -data_root = 'data/coco/' -img_norm_cfg = dict( - mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) -train_pipeline = [ - dict(type='LoadImageFromFile'), - dict(type='LoadAnnotations', with_bbox=True, with_mask=True), - dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), - dict(type='RandomFlip', flip_ratio=0.5), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='DefaultFormatBundle'), - dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']), -] -test_pipeline = [ - dict(type='LoadImageFromFile'), - dict( - type='MultiScaleFlipAug', - img_scale=(1333, 800), - flip=False, - transforms=[ - dict(type='Resize', keep_ratio=True), - dict(type='RandomFlip'), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='ImageToTensor', keys=['img']), - dict(type='Collect', keys=['img']), - ]) -] -data = dict( - imgs_per_gpu=2, - workers_per_gpu=2, - train=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_train2017.json', - img_prefix=data_root + 'train2017/', - pipeline=train_pipeline), - val=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline), - test=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline)) -evaluation = dict(interval=1, metric=['bbox', 'segm']) -# optimizer -optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) -optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) -# learning policy -lr_config = dict( - policy='step', - warmup='linear', - warmup_iters=500, - warmup_ratio=1.0 / 3, - step=[8, 11]) -checkpoint_config = dict(interval=1) -# yapf:disable -log_config = dict( - interval=50, - hooks=[ - dict(type='TextLoggerHook'), - # dict(type='TensorboardLoggerHook') - ]) -# yapf:enable -# runtime settings -total_epochs = 12 -dist_params = dict(backend='nccl') -log_level = 'INFO' -work_dir = './work_dirs/mask_rcnn_r50_fpn_sbn_1x' -load_from = None -resume_from = None -workflow = [('train', 1)] diff --git a/configs/gcnet/mask_rcnn_r50_fpn_syncbn-backbone_1x_coco.py b/configs/gcnet/mask_rcnn_r50_fpn_syncbn-backbone_1x_coco.py new file mode 100644 index 0000000000000000000000000000000000000000..a3f33c5254fd76445c728032830fdd40df9fedf8 --- /dev/null +++ b/configs/gcnet/mask_rcnn_r50_fpn_syncbn-backbone_1x_coco.py @@ -0,0 +1,5 @@ +_base_ = '../mask_rcnn/mask_rcnn_r50_fpn_1x_coco.py' +model = dict( + backbone=dict( + norm_cfg=dict(type='SyncBN', requires_grad=True), norm_eval=False)) +work_dir = './work_dirs/mask_rcnn_r50_fpn_sbn_1x' diff --git a/configs/gcnet/mask_rcnn_r50_fpn_syncbn-backbone_r16_gcb_c3-c5_1x_coco.py b/configs/gcnet/mask_rcnn_r50_fpn_syncbn-backbone_r16_gcb_c3-c5_1x_coco.py new file mode 100644 index 0000000000000000000000000000000000000000..26560197170f9ed934cb16b1ab7c6314425e65e4 --- /dev/null +++ b/configs/gcnet/mask_rcnn_r50_fpn_syncbn-backbone_r16_gcb_c3-c5_1x_coco.py @@ -0,0 +1,8 @@ +_base_ = '../mask_rcnn/mask_rcnn_r50_fpn_1x_coco.py' +model = dict( + backbone=dict( + norm_cfg=dict(type='SyncBN', requires_grad=True), + norm_eval=False, + stage_with_gcb=(False, True, True, True), + gcb=dict(ratio=1. / 16., ))) +work_dir = './work_dirs/mask_rcnn_r16_gcb_c3-c5_r50_fpn_syncbn_1x' diff --git a/configs/gcnet/mask_rcnn_r50_fpn_syncbn-backbone_r4_gcb_c3-c5_1x_coco.py b/configs/gcnet/mask_rcnn_r50_fpn_syncbn-backbone_r4_gcb_c3-c5_1x_coco.py new file mode 100644 index 0000000000000000000000000000000000000000..eb0a9bb49c59c83a5fb136004b62b7ffd5efc7e1 --- /dev/null +++ b/configs/gcnet/mask_rcnn_r50_fpn_syncbn-backbone_r4_gcb_c3-c5_1x_coco.py @@ -0,0 +1,8 @@ +_base_ = '../mask_rcnn/mask_rcnn_r50_fpn_1x_coco.py' +model = dict( + backbone=dict( + norm_cfg=dict(type='SyncBN', requires_grad=True), + norm_eval=False, + gcb=dict(ratio=1. / 4., ), + stage_with_gcb=(False, True, True, True))) +work_dir = './work_dirs/mask_rcnn_r4_gcb_c3-c5_r50_fpn_syncbn_1x' diff --git a/configs/ghm/retinanet_ghm_r50_fpn_1x.py b/configs/ghm/retinanet_ghm_r50_fpn_1x.py deleted file mode 100644 index 3126a430c7fdff1a818abe12b402b978acf02c16..0000000000000000000000000000000000000000 --- a/configs/ghm/retinanet_ghm_r50_fpn_1x.py +++ /dev/null @@ -1,132 +0,0 @@ -# model settings -model = dict( - type='RetinaNet', - pretrained='torchvision://resnet50', - backbone=dict( - type='ResNet', - depth=50, - num_stages=4, - out_indices=(0, 1, 2, 3), - frozen_stages=1, - norm_cfg=dict(type='BN', requires_grad=True), - style='pytorch'), - neck=dict( - type='FPN', - in_channels=[256, 512, 1024, 2048], - out_channels=256, - start_level=1, - add_extra_convs=True, - num_outs=5), - bbox_head=dict( - type='RetinaHead', - num_classes=81, - in_channels=256, - stacked_convs=4, - feat_channels=256, - octave_base_scale=4, - scales_per_octave=3, - anchor_ratios=[0.5, 1.0, 2.0], - anchor_strides=[8, 16, 32, 64, 128], - target_means=[.0, .0, .0, .0], - target_stds=[1.0, 1.0, 1.0, 1.0], - loss_cls=dict( - type='GHMC', - bins=30, - momentum=0.75, - use_sigmoid=True, - loss_weight=1.0), - loss_bbox=dict( - type='GHMR', mu=0.02, bins=10, momentum=0.7, loss_weight=10.0))) -# training and testing settings -train_cfg = dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.5, - neg_iou_thr=0.4, - min_pos_iou=0, - ignore_iof_thr=-1), - allowed_border=-1, - pos_weight=-1, - debug=False) -test_cfg = dict( - nms_pre=1000, - min_bbox_size=0, - score_thr=0.05, - nms=dict(type='nms', iou_thr=0.5), - max_per_img=100) -# dataset settings -dataset_type = 'CocoDataset' -data_root = 'data/coco/' -img_norm_cfg = dict( - mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) -train_pipeline = [ - dict(type='LoadImageFromFile'), - dict(type='LoadAnnotations', with_bbox=True), - dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), - dict(type='RandomFlip', flip_ratio=0.5), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='DefaultFormatBundle'), - dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), -] -test_pipeline = [ - dict(type='LoadImageFromFile'), - dict( - type='MultiScaleFlipAug', - img_scale=(1333, 800), - flip=False, - transforms=[ - dict(type='Resize', keep_ratio=True), - dict(type='RandomFlip'), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='ImageToTensor', keys=['img']), - dict(type='Collect', keys=['img']), - ]) -] -data = dict( - imgs_per_gpu=2, - workers_per_gpu=2, - train=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_train2017.json', - img_prefix=data_root + 'train2017/', - pipeline=train_pipeline), - val=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline), - test=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline)) -evaluation = dict(interval=1, metric='bbox') -# optimizer -optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) -optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) -# learning policy -lr_config = dict( - policy='step', - warmup='linear', - warmup_iters=500, - warmup_ratio=1.0 / 3, - step=[8, 11]) -checkpoint_config = dict(interval=1) -# yapf:disable -log_config = dict( - interval=50, - hooks=[ - dict(type='TextLoggerHook'), - # dict(type='TensorboardLoggerHook') - ]) -# yapf:enable -# runtime settings -total_epochs = 12 -dist_params = dict(backend='nccl') -log_level = 'INFO' -work_dir = './work_dirs/ghm' -load_from = None -resume_from = None -workflow = [('train', 1)] diff --git a/configs/ghm/retinanet_ghm_r50_fpn_1x_coco.py b/configs/ghm/retinanet_ghm_r50_fpn_1x_coco.py new file mode 100644 index 0000000000000000000000000000000000000000..42a921fdde064ccde81faa9bc8309e94acbf087a --- /dev/null +++ b/configs/ghm/retinanet_ghm_r50_fpn_1x_coco.py @@ -0,0 +1,18 @@ +_base_ = '../retinanet/retinanet_r50_fpn_1x_coco.py' +model = dict( + bbox_head=dict( + loss_cls=dict( + _delete_=True, + type='GHMC', + bins=30, + momentum=0.75, + use_sigmoid=True, + loss_weight=1.0), + loss_bbox=dict( + _delete_=True, + type='GHMR', + mu=0.02, + bins=10, + momentum=0.7, + loss_weight=10.0))) +work_dir = './work_dirs/ghm' diff --git a/configs/gn+ws/faster_rcnn_r50_fpn_gn_ws-all_1x_coco.py b/configs/gn+ws/faster_rcnn_r50_fpn_gn_ws-all_1x_coco.py new file mode 100644 index 0000000000000000000000000000000000000000..39d2480860429e2a7b7734fc41c2d2f8d750d361 --- /dev/null +++ b/configs/gn+ws/faster_rcnn_r50_fpn_gn_ws-all_1x_coco.py @@ -0,0 +1,13 @@ +_base_ = '../faster_rcnn/faster_rcnn_r50_fpn_1x_coco.py' +conv_cfg = dict(type='ConvWS') +norm_cfg = dict(type='GN', num_groups=32, requires_grad=True) +model = dict( + pretrained='open-mmlab://jhu/resnet50_gn_ws', + backbone=dict(conv_cfg=conv_cfg, norm_cfg=norm_cfg), + neck=dict(conv_cfg=conv_cfg, norm_cfg=norm_cfg), + bbox_head=dict( + type='Shared4Conv1FCBBoxHead', + conv_out_channels=256, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg)) +work_dir = './work_dirs/faster_rcnn_r50_fpn_gn_ws_1x' diff --git a/configs/gn+ws/faster_rcnn_r50_fpn_gn_ws_1x.py b/configs/gn+ws/faster_rcnn_r50_fpn_gn_ws_1x.py deleted file mode 100644 index 2cd1969c56478e89e3268ecf7e6b2092f715354d..0000000000000000000000000000000000000000 --- a/configs/gn+ws/faster_rcnn_r50_fpn_gn_ws_1x.py +++ /dev/null @@ -1,182 +0,0 @@ -# model settings -conv_cfg = dict(type='ConvWS') -norm_cfg = dict(type='GN', num_groups=32, requires_grad=True) -model = dict( - type='FasterRCNN', - pretrained='open-mmlab://jhu/resnet50_gn_ws', - backbone=dict( - type='ResNet', - depth=50, - num_stages=4, - out_indices=(0, 1, 2, 3), - frozen_stages=1, - style='pytorch', - conv_cfg=conv_cfg, - norm_cfg=norm_cfg), - neck=dict( - type='FPN', - in_channels=[256, 512, 1024, 2048], - out_channels=256, - num_outs=5, - conv_cfg=conv_cfg, - norm_cfg=norm_cfg), - rpn_head=dict( - type='RPNHead', - in_channels=256, - feat_channels=256, - anchor_scales=[8], - anchor_ratios=[0.5, 1.0, 2.0], - anchor_strides=[4, 8, 16, 32, 64], - target_means=[.0, .0, .0, .0], - target_stds=[1.0, 1.0, 1.0, 1.0], - loss_cls=dict( - type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0)), - bbox_roi_extractor=dict( - type='SingleRoIExtractor', - roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2), - out_channels=256, - featmap_strides=[4, 8, 16, 32]), - bbox_head=dict( - type='ConvFCBBoxHead', - num_shared_convs=4, - num_shared_fcs=1, - in_channels=256, - conv_out_channels=256, - fc_out_channels=1024, - roi_feat_size=7, - num_classes=81, - target_means=[0., 0., 0., 0.], - target_stds=[0.1, 0.1, 0.2, 0.2], - reg_class_agnostic=False, - conv_cfg=conv_cfg, - norm_cfg=norm_cfg, - loss_cls=dict( - type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0))) -# model training and testing settings -train_cfg = dict( - rpn=dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.7, - neg_iou_thr=0.3, - min_pos_iou=0.3, - ignore_iof_thr=-1), - sampler=dict( - type='RandomSampler', - num=256, - pos_fraction=0.5, - neg_pos_ub=-1, - add_gt_as_proposals=False), - allowed_border=0, - pos_weight=-1, - debug=False), - rpn_proposal=dict( - nms_across_levels=False, - nms_pre=2000, - nms_post=2000, - max_num=2000, - nms_thr=0.7, - min_bbox_size=0), - rcnn=dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.5, - neg_iou_thr=0.5, - min_pos_iou=0.5, - ignore_iof_thr=-1), - sampler=dict( - type='RandomSampler', - num=512, - pos_fraction=0.25, - neg_pos_ub=-1, - add_gt_as_proposals=True), - pos_weight=-1, - debug=False)) -test_cfg = dict( - rpn=dict( - nms_across_levels=False, - nms_pre=1000, - nms_post=1000, - max_num=1000, - nms_thr=0.7, - min_bbox_size=0), - rcnn=dict( - score_thr=0.05, nms=dict(type='nms', iou_thr=0.5), max_per_img=100)) -# dataset settings -dataset_type = 'CocoDataset' -data_root = 'data/coco/' -img_norm_cfg = dict( - mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) -train_pipeline = [ - dict(type='LoadImageFromFile'), - dict(type='LoadAnnotations', with_bbox=True), - dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), - dict(type='RandomFlip', flip_ratio=0.5), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='DefaultFormatBundle'), - dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), -] -test_pipeline = [ - dict(type='LoadImageFromFile'), - dict( - type='MultiScaleFlipAug', - img_scale=(1333, 800), - flip=False, - transforms=[ - dict(type='Resize', keep_ratio=True), - dict(type='RandomFlip'), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='ImageToTensor', keys=['img']), - dict(type='Collect', keys=['img']), - ]) -] -data = dict( - imgs_per_gpu=2, - workers_per_gpu=2, - train=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_train2017.json', - img_prefix=data_root + 'train2017/', - pipeline=train_pipeline), - val=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline), - test=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline)) -evaluation = dict(interval=1, metric='bbox') -# optimizer -optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) -optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) -# learning policy -lr_config = dict( - policy='step', - warmup='linear', - warmup_iters=500, - warmup_ratio=1.0 / 3, - step=[8, 11]) -checkpoint_config = dict(interval=1) -# yapf:disable -log_config = dict( - interval=50, - hooks=[ - dict(type='TextLoggerHook'), - # dict(type='TensorboardLoggerHook') - ]) -# yapf:enable -# runtime settings -total_epochs = 12 -dist_params = dict(backend='nccl') -log_level = 'INFO' -work_dir = './work_dirs/faster_rcnn_r50_fpn_gn_ws_1x' -load_from = None -resume_from = None -workflow = [('train', 1)] diff --git a/configs/gn+ws/mask_rcnn_r50_fpn_gn_ws-all_20_23_24e_coco.py b/configs/gn+ws/mask_rcnn_r50_fpn_gn_ws-all_20_23_24e_coco.py new file mode 100644 index 0000000000000000000000000000000000000000..bb241407840152fdda64907d39bad790249c980a --- /dev/null +++ b/configs/gn+ws/mask_rcnn_r50_fpn_gn_ws-all_20_23_24e_coco.py @@ -0,0 +1,5 @@ +_base_ = './mask_rcnn_r50_fpn_gn_ws-all_2x_coco.py' +# learning policy +lr_config = dict(step=[20, 23]) +total_epochs = 24 +work_dir = './work_dirs/mask_rcnn_r50_fpn_gn_ws_20_23_24e' diff --git a/configs/gn+ws/mask_rcnn_r50_fpn_gn_ws-all_2x_coco.py b/configs/gn+ws/mask_rcnn_r50_fpn_gn_ws-all_2x_coco.py new file mode 100644 index 0000000000000000000000000000000000000000..da3f400941d3a298c12b0f2c5a4fb90225cc3461 --- /dev/null +++ b/configs/gn+ws/mask_rcnn_r50_fpn_gn_ws-all_2x_coco.py @@ -0,0 +1,17 @@ +_base_ = '../mask_rcnn/mask_rcnn_r50_fpn_1x_coco.py' +conv_cfg = dict(type='ConvWS') +norm_cfg = dict(type='GN', num_groups=32, requires_grad=True) +model = dict( + pretrained='open-mmlab://jhu/resnet50_gn_ws', + backbone=dict(conv_cfg=conv_cfg, norm_cfg=norm_cfg), + neck=dict(conv_cfg=conv_cfg, norm_cfg=norm_cfg), + bbox_head=dict( + type='Shared4Conv1FCBBoxHead', + conv_out_channels=256, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg), + mask_head=dict(conv_cfg=conv_cfg, norm_cfg=norm_cfg)) +# learning policy +lr_config = dict(step=[16, 22]) +total_epochs = 24 +work_dir = './work_dirs/mask_rcnn_r50_fpn_gn_ws_2x' diff --git a/configs/gn+ws/mask_rcnn_r50_fpn_gn_ws_20_23_24e.py b/configs/gn+ws/mask_rcnn_r50_fpn_gn_ws_20_23_24e.py deleted file mode 100644 index 5a698f269d767b47b49cfb023357b933d52f3bb1..0000000000000000000000000000000000000000 --- a/configs/gn+ws/mask_rcnn_r50_fpn_gn_ws_20_23_24e.py +++ /dev/null @@ -1,201 +0,0 @@ -# model settings -conv_cfg = dict(type='ConvWS') -norm_cfg = dict(type='GN', num_groups=32, requires_grad=True) -model = dict( - type='MaskRCNN', - pretrained='open-mmlab://jhu/resnet50_gn_ws', - backbone=dict( - type='ResNet', - depth=50, - num_stages=4, - out_indices=(0, 1, 2, 3), - frozen_stages=1, - style='pytorch', - conv_cfg=conv_cfg, - norm_cfg=norm_cfg), - neck=dict( - type='FPN', - in_channels=[256, 512, 1024, 2048], - out_channels=256, - num_outs=5, - conv_cfg=conv_cfg, - norm_cfg=norm_cfg), - rpn_head=dict( - type='RPNHead', - in_channels=256, - feat_channels=256, - anchor_scales=[8], - anchor_ratios=[0.5, 1.0, 2.0], - anchor_strides=[4, 8, 16, 32, 64], - target_means=[.0, .0, .0, .0], - target_stds=[1.0, 1.0, 1.0, 1.0], - loss_cls=dict( - type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0)), - bbox_roi_extractor=dict( - type='SingleRoIExtractor', - roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2), - out_channels=256, - featmap_strides=[4, 8, 16, 32]), - bbox_head=dict( - type='ConvFCBBoxHead', - num_shared_convs=4, - num_shared_fcs=1, - in_channels=256, - conv_out_channels=256, - fc_out_channels=1024, - roi_feat_size=7, - num_classes=81, - target_means=[0., 0., 0., 0.], - target_stds=[0.1, 0.1, 0.2, 0.2], - reg_class_agnostic=False, - conv_cfg=conv_cfg, - norm_cfg=norm_cfg, - loss_cls=dict( - type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)), - mask_roi_extractor=dict( - type='SingleRoIExtractor', - roi_layer=dict(type='RoIAlign', out_size=14, sample_num=2), - out_channels=256, - featmap_strides=[4, 8, 16, 32]), - mask_head=dict( - type='FCNMaskHead', - num_convs=4, - in_channels=256, - conv_out_channels=256, - num_classes=81, - conv_cfg=conv_cfg, - norm_cfg=norm_cfg, - loss_mask=dict( - type='CrossEntropyLoss', use_mask=True, loss_weight=1.0))) -# model training and testing settings -train_cfg = dict( - rpn=dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.7, - neg_iou_thr=0.3, - min_pos_iou=0.3, - ignore_iof_thr=-1), - sampler=dict( - type='RandomSampler', - num=256, - pos_fraction=0.5, - neg_pos_ub=-1, - add_gt_as_proposals=False), - allowed_border=0, - pos_weight=-1, - debug=False), - rpn_proposal=dict( - nms_across_levels=False, - nms_pre=2000, - nms_post=2000, - max_num=2000, - nms_thr=0.7, - min_bbox_size=0), - rcnn=dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.5, - neg_iou_thr=0.5, - min_pos_iou=0.5, - ignore_iof_thr=-1), - sampler=dict( - type='RandomSampler', - num=512, - pos_fraction=0.25, - neg_pos_ub=-1, - add_gt_as_proposals=True), - mask_size=28, - pos_weight=-1, - debug=False)) -test_cfg = dict( - rpn=dict( - nms_across_levels=False, - nms_pre=1000, - nms_post=1000, - max_num=1000, - nms_thr=0.7, - min_bbox_size=0), - rcnn=dict( - score_thr=0.05, - nms=dict(type='nms', iou_thr=0.5), - max_per_img=100, - mask_thr_binary=0.5)) -# dataset settings -dataset_type = 'CocoDataset' -data_root = 'data/coco/' -img_norm_cfg = dict( - mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) -train_pipeline = [ - dict(type='LoadImageFromFile'), - dict(type='LoadAnnotations', with_bbox=True, with_mask=True), - dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), - dict(type='RandomFlip', flip_ratio=0.5), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='DefaultFormatBundle'), - dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']), -] -test_pipeline = [ - dict(type='LoadImageFromFile'), - dict( - type='MultiScaleFlipAug', - img_scale=(1333, 800), - flip=False, - transforms=[ - dict(type='Resize', keep_ratio=True), - dict(type='RandomFlip'), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='ImageToTensor', keys=['img']), - dict(type='Collect', keys=['img']), - ]) -] -data = dict( - imgs_per_gpu=2, - workers_per_gpu=2, - train=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_train2017.json', - img_prefix=data_root + 'train2017/', - pipeline=train_pipeline), - val=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline), - test=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline)) -evaluation = dict(interval=1, metric=['bbox', 'segm']) -# optimizer -optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) -optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) -# learning policy -lr_config = dict( - policy='step', - warmup='linear', - warmup_iters=500, - warmup_ratio=1.0 / 3, - step=[20, 23]) -checkpoint_config = dict(interval=1) -# yapf:disable -log_config = dict( - interval=50, - hooks=[ - dict(type='TextLoggerHook'), - # dict(type='TensorboardLoggerHook') - ]) -# yapf:enable -# runtime settings -total_epochs = 24 -dist_params = dict(backend='nccl') -log_level = 'INFO' -work_dir = './work_dirs/mask_rcnn_r50_fpn_gn_ws_20_23_24e' -load_from = None -resume_from = None -workflow = [('train', 1)] diff --git a/configs/gn+ws/mask_rcnn_r50_fpn_gn_ws_2x.py b/configs/gn+ws/mask_rcnn_r50_fpn_gn_ws_2x.py deleted file mode 100644 index 00dc587a33b829808d241415731511649002779e..0000000000000000000000000000000000000000 --- a/configs/gn+ws/mask_rcnn_r50_fpn_gn_ws_2x.py +++ /dev/null @@ -1,201 +0,0 @@ -# model settings -conv_cfg = dict(type='ConvWS') -norm_cfg = dict(type='GN', num_groups=32, requires_grad=True) -model = dict( - type='MaskRCNN', - pretrained='open-mmlab://jhu/resnet50_gn_ws', - backbone=dict( - type='ResNet', - depth=50, - num_stages=4, - out_indices=(0, 1, 2, 3), - frozen_stages=1, - style='pytorch', - conv_cfg=conv_cfg, - norm_cfg=norm_cfg), - neck=dict( - type='FPN', - in_channels=[256, 512, 1024, 2048], - out_channels=256, - num_outs=5, - conv_cfg=conv_cfg, - norm_cfg=norm_cfg), - rpn_head=dict( - type='RPNHead', - in_channels=256, - feat_channels=256, - anchor_scales=[8], - anchor_ratios=[0.5, 1.0, 2.0], - anchor_strides=[4, 8, 16, 32, 64], - target_means=[.0, .0, .0, .0], - target_stds=[1.0, 1.0, 1.0, 1.0], - loss_cls=dict( - type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0)), - bbox_roi_extractor=dict( - type='SingleRoIExtractor', - roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2), - out_channels=256, - featmap_strides=[4, 8, 16, 32]), - bbox_head=dict( - type='ConvFCBBoxHead', - num_shared_convs=4, - num_shared_fcs=1, - in_channels=256, - conv_out_channels=256, - fc_out_channels=1024, - roi_feat_size=7, - num_classes=81, - target_means=[0., 0., 0., 0.], - target_stds=[0.1, 0.1, 0.2, 0.2], - reg_class_agnostic=False, - conv_cfg=conv_cfg, - norm_cfg=norm_cfg, - loss_cls=dict( - type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)), - mask_roi_extractor=dict( - type='SingleRoIExtractor', - roi_layer=dict(type='RoIAlign', out_size=14, sample_num=2), - out_channels=256, - featmap_strides=[4, 8, 16, 32]), - mask_head=dict( - type='FCNMaskHead', - num_convs=4, - in_channels=256, - conv_out_channels=256, - num_classes=81, - conv_cfg=conv_cfg, - norm_cfg=norm_cfg, - loss_mask=dict( - type='CrossEntropyLoss', use_mask=True, loss_weight=1.0))) -# model training and testing settings -train_cfg = dict( - rpn=dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.7, - neg_iou_thr=0.3, - min_pos_iou=0.3, - ignore_iof_thr=-1), - sampler=dict( - type='RandomSampler', - num=256, - pos_fraction=0.5, - neg_pos_ub=-1, - add_gt_as_proposals=False), - allowed_border=0, - pos_weight=-1, - debug=False), - rpn_proposal=dict( - nms_across_levels=False, - nms_pre=2000, - nms_post=2000, - max_num=2000, - nms_thr=0.7, - min_bbox_size=0), - rcnn=dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.5, - neg_iou_thr=0.5, - min_pos_iou=0.5, - ignore_iof_thr=-1), - sampler=dict( - type='RandomSampler', - num=512, - pos_fraction=0.25, - neg_pos_ub=-1, - add_gt_as_proposals=True), - mask_size=28, - pos_weight=-1, - debug=False)) -test_cfg = dict( - rpn=dict( - nms_across_levels=False, - nms_pre=1000, - nms_post=1000, - max_num=1000, - nms_thr=0.7, - min_bbox_size=0), - rcnn=dict( - score_thr=0.05, - nms=dict(type='nms', iou_thr=0.5), - max_per_img=100, - mask_thr_binary=0.5)) -# dataset settings -dataset_type = 'CocoDataset' -data_root = 'data/coco/' -img_norm_cfg = dict( - mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) -train_pipeline = [ - dict(type='LoadImageFromFile'), - dict(type='LoadAnnotations', with_bbox=True, with_mask=True), - dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), - dict(type='RandomFlip', flip_ratio=0.5), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='DefaultFormatBundle'), - dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']), -] -test_pipeline = [ - dict(type='LoadImageFromFile'), - dict( - type='MultiScaleFlipAug', - img_scale=(1333, 800), - flip=False, - transforms=[ - dict(type='Resize', keep_ratio=True), - dict(type='RandomFlip'), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='ImageToTensor', keys=['img']), - dict(type='Collect', keys=['img']), - ]) -] -data = dict( - imgs_per_gpu=2, - workers_per_gpu=2, - train=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_train2017.json', - img_prefix=data_root + 'train2017/', - pipeline=train_pipeline), - val=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline), - test=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline)) -evaluation = dict(interval=1, metric=['bbox', 'segm']) -# optimizer -optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) -optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) -# learning policy -lr_config = dict( - policy='step', - warmup='linear', - warmup_iters=500, - warmup_ratio=1.0 / 3, - step=[16, 22]) -checkpoint_config = dict(interval=1) -# yapf:disable -log_config = dict( - interval=50, - hooks=[ - dict(type='TextLoggerHook'), - # dict(type='TensorboardLoggerHook') - ]) -# yapf:enable -# runtime settings -total_epochs = 24 -dist_params = dict(backend='nccl') -log_level = 'INFO' -work_dir = './work_dirs/mask_rcnn_r50_fpn_gn_ws_2x' -load_from = None -resume_from = None -workflow = [('train', 1)] diff --git a/configs/gn+ws/mask_rcnn_x101_32x4d_fpn_gn_ws-all_2x_coco.py b/configs/gn+ws/mask_rcnn_x101_32x4d_fpn_gn_ws-all_2x_coco.py new file mode 100644 index 0000000000000000000000000000000000000000..40c067f2db725ecadfb550b614a4f311b8ac34e9 --- /dev/null +++ b/configs/gn+ws/mask_rcnn_x101_32x4d_fpn_gn_ws-all_2x_coco.py @@ -0,0 +1,18 @@ +_base_ = './mask_rcnn_r50_fpn_gn_ws-all_2x_coco.py' +# model settings +conv_cfg = dict(type='ConvWS') +norm_cfg = dict(type='GN', num_groups=32, requires_grad=True) +model = dict( + pretrained='open-mmlab://jhu/resnext101_32x4d_gn_ws', + backbone=dict( + type='ResNeXt', + depth=101, + groups=32, + base_width=4, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + style='pytorch', + conv_cfg=conv_cfg, + norm_cfg=norm_cfg)) +work_dir = './work_dirs/mask_rcnn_x101_32x4d_fpn_gn_ws_2x' diff --git a/configs/gn+ws/mask_rcnn_x101_32x4d_fpn_gn_ws_2x.py b/configs/gn+ws/mask_rcnn_x101_32x4d_fpn_gn_ws_2x.py deleted file mode 100644 index 11077506df6909bf914eb8eb84c3d512cc6a83ec..0000000000000000000000000000000000000000 --- a/configs/gn+ws/mask_rcnn_x101_32x4d_fpn_gn_ws_2x.py +++ /dev/null @@ -1,203 +0,0 @@ -# model settings -conv_cfg = dict(type='ConvWS') -norm_cfg = dict(type='GN', num_groups=32, requires_grad=True) -model = dict( - type='MaskRCNN', - pretrained='open-mmlab://jhu/resnext101_32x4d_gn_ws', - backbone=dict( - type='ResNeXt', - depth=101, - groups=32, - base_width=4, - num_stages=4, - out_indices=(0, 1, 2, 3), - frozen_stages=1, - style='pytorch', - conv_cfg=conv_cfg, - norm_cfg=norm_cfg), - neck=dict( - type='FPN', - in_channels=[256, 512, 1024, 2048], - out_channels=256, - num_outs=5, - conv_cfg=conv_cfg, - norm_cfg=norm_cfg), - rpn_head=dict( - type='RPNHead', - in_channels=256, - feat_channels=256, - anchor_scales=[8], - anchor_ratios=[0.5, 1.0, 2.0], - anchor_strides=[4, 8, 16, 32, 64], - target_means=[.0, .0, .0, .0], - target_stds=[1.0, 1.0, 1.0, 1.0], - loss_cls=dict( - type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0)), - bbox_roi_extractor=dict( - type='SingleRoIExtractor', - roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2), - out_channels=256, - featmap_strides=[4, 8, 16, 32]), - bbox_head=dict( - type='ConvFCBBoxHead', - num_shared_convs=4, - num_shared_fcs=1, - in_channels=256, - conv_out_channels=256, - fc_out_channels=1024, - roi_feat_size=7, - num_classes=81, - target_means=[0., 0., 0., 0.], - target_stds=[0.1, 0.1, 0.2, 0.2], - reg_class_agnostic=False, - conv_cfg=conv_cfg, - norm_cfg=norm_cfg, - loss_cls=dict( - type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)), - mask_roi_extractor=dict( - type='SingleRoIExtractor', - roi_layer=dict(type='RoIAlign', out_size=14, sample_num=2), - out_channels=256, - featmap_strides=[4, 8, 16, 32]), - mask_head=dict( - type='FCNMaskHead', - num_convs=4, - in_channels=256, - conv_out_channels=256, - num_classes=81, - conv_cfg=conv_cfg, - norm_cfg=norm_cfg, - loss_mask=dict( - type='CrossEntropyLoss', use_mask=True, loss_weight=1.0))) -# model training and testing settings -train_cfg = dict( - rpn=dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.7, - neg_iou_thr=0.3, - min_pos_iou=0.3, - ignore_iof_thr=-1), - sampler=dict( - type='RandomSampler', - num=256, - pos_fraction=0.5, - neg_pos_ub=-1, - add_gt_as_proposals=False), - allowed_border=0, - pos_weight=-1, - debug=False), - rpn_proposal=dict( - nms_across_levels=False, - nms_pre=2000, - nms_post=2000, - max_num=2000, - nms_thr=0.7, - min_bbox_size=0), - rcnn=dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.5, - neg_iou_thr=0.5, - min_pos_iou=0.5, - ignore_iof_thr=-1), - sampler=dict( - type='RandomSampler', - num=512, - pos_fraction=0.25, - neg_pos_ub=-1, - add_gt_as_proposals=True), - mask_size=28, - pos_weight=-1, - debug=False)) -test_cfg = dict( - rpn=dict( - nms_across_levels=False, - nms_pre=1000, - nms_post=1000, - max_num=1000, - nms_thr=0.7, - min_bbox_size=0), - rcnn=dict( - score_thr=0.05, - nms=dict(type='nms', iou_thr=0.5), - max_per_img=100, - mask_thr_binary=0.5)) -# dataset settings -dataset_type = 'CocoDataset' -data_root = 'data/coco/' -img_norm_cfg = dict( - mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) -train_pipeline = [ - dict(type='LoadImageFromFile'), - dict(type='LoadAnnotations', with_bbox=True, with_mask=True), - dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), - dict(type='RandomFlip', flip_ratio=0.5), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='DefaultFormatBundle'), - dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']), -] -test_pipeline = [ - dict(type='LoadImageFromFile'), - dict( - type='MultiScaleFlipAug', - img_scale=(1333, 800), - flip=False, - transforms=[ - dict(type='Resize', keep_ratio=True), - dict(type='RandomFlip'), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='ImageToTensor', keys=['img']), - dict(type='Collect', keys=['img']), - ]) -] -data = dict( - imgs_per_gpu=2, - workers_per_gpu=2, - train=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_train2017.json', - img_prefix=data_root + 'train2017/', - pipeline=train_pipeline), - val=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline), - test=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline)) -evaluation = dict(interval=1, metric=['bbox', 'segm']) -# optimizer -optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) -optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) -# learning policy -lr_config = dict( - policy='step', - warmup='linear', - warmup_iters=500, - warmup_ratio=1.0 / 3, - step=[16, 22]) -checkpoint_config = dict(interval=1) -# yapf:disable -log_config = dict( - interval=50, - hooks=[ - dict(type='TextLoggerHook'), - # dict(type='TensorboardLoggerHook') - ]) -# yapf:enable -# runtime settings -total_epochs = 24 -dist_params = dict(backend='nccl') -log_level = 'INFO' -work_dir = './work_dirs/mask_rcnn_x101_32x4d_fpn_gn_ws_2x' -load_from = None -resume_from = None -workflow = [('train', 1)] diff --git a/configs/gn/mask_rcnn_r101_fpn_gn-all_2x_coco.py b/configs/gn/mask_rcnn_r101_fpn_gn-all_2x_coco.py new file mode 100644 index 0000000000000000000000000000000000000000..0ea44d2eb09675b4dc47ffa6fde370382cbc8e88 --- /dev/null +++ b/configs/gn/mask_rcnn_r101_fpn_gn-all_2x_coco.py @@ -0,0 +1,4 @@ +_base_ = './mask_rcnn_r50_fpn_gn-all_2x_coco.py' +model = dict( + pretrained='open-mmlab://detectron/resnet101_gn', backbone=dict(depth=101)) +work_dir = './work_dirs/mask_rcnn_r101_fpn_gn_2x' diff --git a/configs/gn/mask_rcnn_r101_fpn_gn_2x.py b/configs/gn/mask_rcnn_r101_fpn_gn_2x.py deleted file mode 100644 index d13670960ee48dd44eaa45102aa5bfd77e355fb8..0000000000000000000000000000000000000000 --- a/configs/gn/mask_rcnn_r101_fpn_gn_2x.py +++ /dev/null @@ -1,197 +0,0 @@ -# model settings -norm_cfg = dict(type='GN', num_groups=32, requires_grad=True) - -model = dict( - type='MaskRCNN', - pretrained='open-mmlab://detectron/resnet101_gn', - backbone=dict( - type='ResNet', - depth=101, - num_stages=4, - out_indices=(0, 1, 2, 3), - frozen_stages=1, - style='pytorch', - norm_cfg=norm_cfg), - neck=dict( - type='FPN', - in_channels=[256, 512, 1024, 2048], - out_channels=256, - num_outs=5, - norm_cfg=norm_cfg), - rpn_head=dict( - type='RPNHead', - in_channels=256, - feat_channels=256, - anchor_scales=[8], - anchor_ratios=[0.5, 1.0, 2.0], - anchor_strides=[4, 8, 16, 32, 64], - target_means=[.0, .0, .0, .0], - target_stds=[1.0, 1.0, 1.0, 1.0], - loss_cls=dict( - type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0)), - bbox_roi_extractor=dict( - type='SingleRoIExtractor', - roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2), - out_channels=256, - featmap_strides=[4, 8, 16, 32]), - bbox_head=dict( - type='ConvFCBBoxHead', - num_shared_convs=4, - num_shared_fcs=1, - in_channels=256, - conv_out_channels=256, - fc_out_channels=1024, - roi_feat_size=7, - num_classes=81, - target_means=[0., 0., 0., 0.], - target_stds=[0.1, 0.1, 0.2, 0.2], - reg_class_agnostic=False, - norm_cfg=norm_cfg, - loss_cls=dict( - type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)), - mask_roi_extractor=dict( - type='SingleRoIExtractor', - roi_layer=dict(type='RoIAlign', out_size=14, sample_num=2), - out_channels=256, - featmap_strides=[4, 8, 16, 32]), - mask_head=dict( - type='FCNMaskHead', - num_convs=4, - in_channels=256, - conv_out_channels=256, - num_classes=81, - norm_cfg=norm_cfg, - loss_mask=dict( - type='CrossEntropyLoss', use_mask=True, loss_weight=1.0))) -# model training and testing settings -train_cfg = dict( - rpn=dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.7, - neg_iou_thr=0.3, - min_pos_iou=0.3, - ignore_iof_thr=-1), - sampler=dict( - type='RandomSampler', - num=256, - pos_fraction=0.5, - neg_pos_ub=-1, - add_gt_as_proposals=False), - allowed_border=0, - pos_weight=-1, - debug=False), - rpn_proposal=dict( - nms_across_levels=False, - nms_pre=2000, - nms_post=2000, - max_num=2000, - nms_thr=0.7, - min_bbox_size=0), - rcnn=dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.5, - neg_iou_thr=0.5, - min_pos_iou=0.5, - ignore_iof_thr=-1), - sampler=dict( - type='RandomSampler', - num=512, - pos_fraction=0.25, - neg_pos_ub=-1, - add_gt_as_proposals=True), - mask_size=28, - pos_weight=-1, - debug=False)) -test_cfg = dict( - rpn=dict( - nms_across_levels=False, - nms_pre=1000, - nms_post=1000, - max_num=1000, - nms_thr=0.7, - min_bbox_size=0), - rcnn=dict( - score_thr=0.05, - nms=dict(type='nms', iou_thr=0.5), - max_per_img=100, - mask_thr_binary=0.5)) -# dataset settings -dataset_type = 'CocoDataset' -data_root = 'data/coco/' -img_norm_cfg = dict( - mean=[102.9801, 115.9465, 122.7717], std=[1.0, 1.0, 1.0], to_rgb=False) -train_pipeline = [ - dict(type='LoadImageFromFile'), - dict(type='LoadAnnotations', with_bbox=True, with_mask=True), - dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), - dict(type='RandomFlip', flip_ratio=0.5), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='DefaultFormatBundle'), - dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']), -] -test_pipeline = [ - dict(type='LoadImageFromFile'), - dict( - type='MultiScaleFlipAug', - img_scale=(1333, 800), - flip=False, - transforms=[ - dict(type='Resize', keep_ratio=True), - dict(type='RandomFlip'), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='ImageToTensor', keys=['img']), - dict(type='Collect', keys=['img']), - ]) -] -data = dict( - imgs_per_gpu=2, - workers_per_gpu=2, - train=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_train2017.json', - img_prefix=data_root + 'train2017/', - pipeline=train_pipeline), - val=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline), - test=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline)) -evaluation = dict(interval=1, metric=['bbox', 'segm']) -# optimizer -optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) -optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) -# learning policy -lr_config = dict( - policy='step', - warmup='linear', - warmup_iters=500, - warmup_ratio=1.0 / 3, - step=[16, 22]) -checkpoint_config = dict(interval=1) -# yapf:disable -log_config = dict( - interval=50, - hooks=[ - dict(type='TextLoggerHook'), - # dict(type='TensorboardLoggerHook') - ]) -# yapf:enable -# runtime settings -total_epochs = 24 -dist_params = dict(backend='nccl') -log_level = 'INFO' -work_dir = './work_dirs/mask_rcnn_r101_fpn_gn_2x' -load_from = None -resume_from = None -workflow = [('train', 1)] diff --git a/configs/gn/mask_rcnn_r50_fpn_gn-all_2x_coco.py b/configs/gn/mask_rcnn_r50_fpn_gn-all_2x_coco.py new file mode 100644 index 0000000000000000000000000000000000000000..6fd8b4c90066addb2d7bd3d5565bf953ca68a091 --- /dev/null +++ b/configs/gn/mask_rcnn_r50_fpn_gn-all_2x_coco.py @@ -0,0 +1,46 @@ +_base_ = '../mask_rcnn/mask_rcnn_r50_fpn_1x_coco.py' +norm_cfg = dict(type='GN', num_groups=32, requires_grad=True) +model = dict( + pretrained='open-mmlab://detectron/resnet50_gn', + backbone=dict(norm_cfg=norm_cfg), + neck=dict(norm_cfg=norm_cfg), + bbox_head=dict( + type='Shared4Conv1FCBBoxHead', + conv_out_channels=256, + norm_cfg=norm_cfg), + mask_head=dict(norm_cfg=norm_cfg)) +img_norm_cfg = dict( + mean=[102.9801, 115.9465, 122.7717], std=[1.0, 1.0, 1.0], to_rgb=False) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True, with_mask=True), + dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +data = dict( + train=dict(pipeline=train_pipeline), + val=dict(pipeline=test_pipeline), + test=dict(pipeline=test_pipeline)) +# learning policy +lr_config = dict(step=[16, 22]) +total_epochs = 24 +work_dir = './work_dirs/mask_rcnn_r50_fpn_gn_2x' diff --git a/configs/gn/mask_rcnn_r50_fpn_gn-all_contrib_2x_coco.py b/configs/gn/mask_rcnn_r50_fpn_gn-all_contrib_2x_coco.py new file mode 100644 index 0000000000000000000000000000000000000000..e01585675f003716ae0df59cff958532bdc20926 --- /dev/null +++ b/configs/gn/mask_rcnn_r50_fpn_gn-all_contrib_2x_coco.py @@ -0,0 +1,15 @@ +_base_ = '../mask_rcnn/mask_rcnn_r50_fpn_1x_coco.py' +norm_cfg = dict(type='GN', num_groups=32, requires_grad=True) +model = dict( + pretrained='open-mmlab://contrib/resnet50_gn', + backbone=dict(norm_cfg=norm_cfg), + neck=dict(norm_cfg=norm_cfg), + bbox_head=dict( + type='Shared4Conv1FCBBoxHead', + conv_out_channels=256, + norm_cfg=norm_cfg), + mask_head=dict(norm_cfg=norm_cfg)) +# learning policy +lr_config = dict(step=[16, 22]) +total_epochs = 24 +work_dir = './work_dirs/mask_rcnn_r50_fpn_gn_contrib_2x' diff --git a/configs/gn/mask_rcnn_r50_fpn_gn_2x.py b/configs/gn/mask_rcnn_r50_fpn_gn_2x.py deleted file mode 100644 index e0e09b5687b884200f1f9315c737c9b124505e81..0000000000000000000000000000000000000000 --- a/configs/gn/mask_rcnn_r50_fpn_gn_2x.py +++ /dev/null @@ -1,197 +0,0 @@ -# model settings -norm_cfg = dict(type='GN', num_groups=32, requires_grad=True) - -model = dict( - type='MaskRCNN', - pretrained='open-mmlab://detectron/resnet50_gn', - backbone=dict( - type='ResNet', - depth=50, - num_stages=4, - out_indices=(0, 1, 2, 3), - frozen_stages=1, - style='pytorch', - norm_cfg=norm_cfg), - neck=dict( - type='FPN', - in_channels=[256, 512, 1024, 2048], - out_channels=256, - num_outs=5, - norm_cfg=norm_cfg), - rpn_head=dict( - type='RPNHead', - in_channels=256, - feat_channels=256, - anchor_scales=[8], - anchor_ratios=[0.5, 1.0, 2.0], - anchor_strides=[4, 8, 16, 32, 64], - target_means=[.0, .0, .0, .0], - target_stds=[1.0, 1.0, 1.0, 1.0], - loss_cls=dict( - type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0)), - bbox_roi_extractor=dict( - type='SingleRoIExtractor', - roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2), - out_channels=256, - featmap_strides=[4, 8, 16, 32]), - bbox_head=dict( - type='ConvFCBBoxHead', - num_shared_convs=4, - num_shared_fcs=1, - in_channels=256, - conv_out_channels=256, - fc_out_channels=1024, - roi_feat_size=7, - num_classes=81, - target_means=[0., 0., 0., 0.], - target_stds=[0.1, 0.1, 0.2, 0.2], - reg_class_agnostic=False, - norm_cfg=norm_cfg, - loss_cls=dict( - type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)), - mask_roi_extractor=dict( - type='SingleRoIExtractor', - roi_layer=dict(type='RoIAlign', out_size=14, sample_num=2), - out_channels=256, - featmap_strides=[4, 8, 16, 32]), - mask_head=dict( - type='FCNMaskHead', - num_convs=4, - in_channels=256, - conv_out_channels=256, - num_classes=81, - norm_cfg=norm_cfg, - loss_mask=dict( - type='CrossEntropyLoss', use_mask=True, loss_weight=1.0))) -# model training and testing settings -train_cfg = dict( - rpn=dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.7, - neg_iou_thr=0.3, - min_pos_iou=0.3, - ignore_iof_thr=-1), - sampler=dict( - type='RandomSampler', - num=256, - pos_fraction=0.5, - neg_pos_ub=-1, - add_gt_as_proposals=False), - allowed_border=0, - pos_weight=-1, - debug=False), - rpn_proposal=dict( - nms_across_levels=False, - nms_pre=2000, - nms_post=2000, - max_num=2000, - nms_thr=0.7, - min_bbox_size=0), - rcnn=dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.5, - neg_iou_thr=0.5, - min_pos_iou=0.5, - ignore_iof_thr=-1), - sampler=dict( - type='RandomSampler', - num=512, - pos_fraction=0.25, - neg_pos_ub=-1, - add_gt_as_proposals=True), - mask_size=28, - pos_weight=-1, - debug=False)) -test_cfg = dict( - rpn=dict( - nms_across_levels=False, - nms_pre=1000, - nms_post=1000, - max_num=1000, - nms_thr=0.7, - min_bbox_size=0), - rcnn=dict( - score_thr=0.05, - nms=dict(type='nms', iou_thr=0.5), - max_per_img=100, - mask_thr_binary=0.5)) -# dataset settings -dataset_type = 'CocoDataset' -data_root = 'data/coco/' -img_norm_cfg = dict( - mean=[102.9801, 115.9465, 122.7717], std=[1.0, 1.0, 1.0], to_rgb=False) -train_pipeline = [ - dict(type='LoadImageFromFile'), - dict(type='LoadAnnotations', with_bbox=True, with_mask=True), - dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), - dict(type='RandomFlip', flip_ratio=0.5), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='DefaultFormatBundle'), - dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']), -] -test_pipeline = [ - dict(type='LoadImageFromFile'), - dict( - type='MultiScaleFlipAug', - img_scale=(1333, 800), - flip=False, - transforms=[ - dict(type='Resize', keep_ratio=True), - dict(type='RandomFlip'), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='ImageToTensor', keys=['img']), - dict(type='Collect', keys=['img']), - ]) -] -data = dict( - imgs_per_gpu=2, - workers_per_gpu=2, - train=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_train2017.json', - img_prefix=data_root + 'train2017/', - pipeline=train_pipeline), - val=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline), - test=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline)) -evaluation = dict(interval=1, metric=['bbox', 'segm']) -# optimizer -optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) -optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) -# learning policy -lr_config = dict( - policy='step', - warmup='linear', - warmup_iters=500, - warmup_ratio=1.0 / 3, - step=[16, 22]) -checkpoint_config = dict(interval=1) -# yapf:disable -log_config = dict( - interval=50, - hooks=[ - dict(type='TextLoggerHook'), - # dict(type='TensorboardLoggerHook') - ]) -# yapf:enable -# runtime settings -total_epochs = 24 -dist_params = dict(backend='nccl') -log_level = 'INFO' -work_dir = './work_dirs/mask_rcnn_r50_fpn_gn_2x' -load_from = None -resume_from = None -workflow = [('train', 1)] diff --git a/configs/gn/mask_rcnn_r50_fpn_gn_contrib_2x.py b/configs/gn/mask_rcnn_r50_fpn_gn_contrib_2x.py deleted file mode 100644 index 271f6d25d12c43329df74dc02cd5b38dee560058..0000000000000000000000000000000000000000 --- a/configs/gn/mask_rcnn_r50_fpn_gn_contrib_2x.py +++ /dev/null @@ -1,197 +0,0 @@ -# model settings -norm_cfg = dict(type='GN', num_groups=32, requires_grad=True) - -model = dict( - type='MaskRCNN', - pretrained='open-mmlab://contrib/resnet50_gn', - backbone=dict( - type='ResNet', - depth=50, - num_stages=4, - out_indices=(0, 1, 2, 3), - frozen_stages=1, - style='pytorch', - norm_cfg=norm_cfg), - neck=dict( - type='FPN', - in_channels=[256, 512, 1024, 2048], - out_channels=256, - num_outs=5, - norm_cfg=norm_cfg), - rpn_head=dict( - type='RPNHead', - in_channels=256, - feat_channels=256, - anchor_scales=[8], - anchor_ratios=[0.5, 1.0, 2.0], - anchor_strides=[4, 8, 16, 32, 64], - target_means=[.0, .0, .0, .0], - target_stds=[1.0, 1.0, 1.0, 1.0], - loss_cls=dict( - type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0)), - bbox_roi_extractor=dict( - type='SingleRoIExtractor', - roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2), - out_channels=256, - featmap_strides=[4, 8, 16, 32]), - bbox_head=dict( - type='ConvFCBBoxHead', - num_shared_convs=4, - num_shared_fcs=1, - in_channels=256, - conv_out_channels=256, - fc_out_channels=1024, - roi_feat_size=7, - num_classes=81, - target_means=[0., 0., 0., 0.], - target_stds=[0.1, 0.1, 0.2, 0.2], - reg_class_agnostic=False, - norm_cfg=norm_cfg, - loss_cls=dict( - type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)), - mask_roi_extractor=dict( - type='SingleRoIExtractor', - roi_layer=dict(type='RoIAlign', out_size=14, sample_num=2), - out_channels=256, - featmap_strides=[4, 8, 16, 32]), - mask_head=dict( - type='FCNMaskHead', - num_convs=4, - in_channels=256, - conv_out_channels=256, - num_classes=81, - norm_cfg=norm_cfg, - loss_mask=dict( - type='CrossEntropyLoss', use_mask=True, loss_weight=1.0))) -# model training and testing settings -train_cfg = dict( - rpn=dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.7, - neg_iou_thr=0.3, - min_pos_iou=0.3, - ignore_iof_thr=-1), - sampler=dict( - type='RandomSampler', - num=256, - pos_fraction=0.5, - neg_pos_ub=-1, - add_gt_as_proposals=False), - allowed_border=0, - pos_weight=-1, - debug=False), - rpn_proposal=dict( - nms_across_levels=False, - nms_pre=2000, - nms_post=2000, - max_num=2000, - nms_thr=0.7, - min_bbox_size=0), - rcnn=dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.5, - neg_iou_thr=0.5, - min_pos_iou=0.5, - ignore_iof_thr=-1), - sampler=dict( - type='RandomSampler', - num=512, - pos_fraction=0.25, - neg_pos_ub=-1, - add_gt_as_proposals=True), - mask_size=28, - pos_weight=-1, - debug=False)) -test_cfg = dict( - rpn=dict( - nms_across_levels=False, - nms_pre=1000, - nms_post=1000, - max_num=1000, - nms_thr=0.7, - min_bbox_size=0), - rcnn=dict( - score_thr=0.05, - nms=dict(type='nms', iou_thr=0.5), - max_per_img=100, - mask_thr_binary=0.5)) -# dataset settings -dataset_type = 'CocoDataset' -data_root = 'data/coco/' -img_norm_cfg = dict( - mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) -train_pipeline = [ - dict(type='LoadImageFromFile'), - dict(type='LoadAnnotations', with_bbox=True, with_mask=True), - dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), - dict(type='RandomFlip', flip_ratio=0.5), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='DefaultFormatBundle'), - dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']), -] -test_pipeline = [ - dict(type='LoadImageFromFile'), - dict( - type='MultiScaleFlipAug', - img_scale=(1333, 800), - flip=False, - transforms=[ - dict(type='Resize', keep_ratio=True), - dict(type='RandomFlip'), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='ImageToTensor', keys=['img']), - dict(type='Collect', keys=['img']), - ]) -] -data = dict( - imgs_per_gpu=2, - workers_per_gpu=2, - train=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_train2017.json', - img_prefix=data_root + 'train2017/', - pipeline=train_pipeline), - val=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline), - test=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline)) -evaluation = dict(interval=1, metric=['bbox', 'segm']) -# optimizer -optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) -optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) -# learning policy -lr_config = dict( - policy='step', - warmup='linear', - warmup_iters=500, - warmup_ratio=1.0 / 3, - step=[16, 22]) -checkpoint_config = dict(interval=1) -# yapf:disable -log_config = dict( - interval=50, - hooks=[ - dict(type='TextLoggerHook'), - # dict(type='TensorboardLoggerHook') - ]) -# yapf:enable -# runtime settings -total_epochs = 24 -dist_params = dict(backend='nccl') -log_level = 'INFO' -work_dir = './work_dirs/mask_rcnn_r50_fpn_gn_contrib_2x' -load_from = None -resume_from = None -workflow = [('train', 1)] diff --git a/configs/grid_rcnn/grid_rcnn_gn_head_x101_32x4d_fpn_2x.py b/configs/grid_rcnn/grid_rcnn_gn_head_x101_32x4d_fpn_2x.py deleted file mode 100644 index fd459b0cb3832036e700fd93479e7b136058a389..0000000000000000000000000000000000000000 --- a/configs/grid_rcnn/grid_rcnn_gn_head_x101_32x4d_fpn_2x.py +++ /dev/null @@ -1,189 +0,0 @@ -# model settings -model = dict( - type='GridRCNN', - pretrained='open-mmlab://resnext101_32x4d', - backbone=dict( - type='ResNeXt', - depth=101, - groups=32, - base_width=4, - num_stages=4, - out_indices=(0, 1, 2, 3), - frozen_stages=1, - norm_cfg=dict(type='BN', requires_grad=True), - style='pytorch'), - neck=dict( - type='FPN', - in_channels=[256, 512, 1024, 2048], - out_channels=256, - num_outs=5), - rpn_head=dict( - type='RPNHead', - in_channels=256, - feat_channels=256, - anchor_scales=[8], - anchor_ratios=[0.5, 1.0, 2.0], - anchor_strides=[4, 8, 16, 32, 64], - target_means=[.0, .0, .0, .0], - target_stds=[1.0, 1.0, 1.0, 1.0], - loss_cls=dict( - type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0)), - bbox_roi_extractor=dict( - type='SingleRoIExtractor', - roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2), - out_channels=256, - featmap_strides=[4, 8, 16, 32]), - bbox_head=dict( - type='SharedFCBBoxHead', - with_reg=False, - num_fcs=2, - in_channels=256, - fc_out_channels=1024, - roi_feat_size=7, - num_classes=81, - target_means=[0., 0., 0., 0.], - target_stds=[0.1, 0.1, 0.2, 0.2], - reg_class_agnostic=False), - grid_roi_extractor=dict( - type='SingleRoIExtractor', - roi_layer=dict(type='RoIAlign', out_size=14, sample_num=2), - out_channels=256, - featmap_strides=[4, 8, 16, 32]), - grid_head=dict( - type='GridHead', - grid_points=9, - num_convs=8, - in_channels=256, - point_feat_channels=64, - norm_cfg=dict(type='GN', num_groups=36), - loss_grid=dict( - type='CrossEntropyLoss', use_sigmoid=True, loss_weight=15))) -# model training and testing settings -train_cfg = dict( - rpn=dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.7, - neg_iou_thr=0.3, - min_pos_iou=0.3, - ignore_iof_thr=-1), - sampler=dict( - type='RandomSampler', - num=256, - pos_fraction=0.5, - neg_pos_ub=-1, - add_gt_as_proposals=False), - allowed_border=0, - pos_weight=-1, - debug=False), - rpn_proposal=dict( - nms_across_levels=False, - nms_pre=2000, - nms_post=2000, - max_num=2000, - nms_thr=0.7, - min_bbox_size=0), - rcnn=dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.5, - neg_iou_thr=0.5, - min_pos_iou=0.5, - ignore_iof_thr=-1), - sampler=dict( - type='RandomSampler', - num=512, - pos_fraction=0.25, - neg_pos_ub=-1, - add_gt_as_proposals=True), - pos_radius=1, - pos_weight=-1, - max_num_grid=192, - debug=False)) -test_cfg = dict( - rpn=dict( - nms_across_levels=False, - nms_pre=1000, - nms_post=1000, - max_num=1000, - nms_thr=0.7, - min_bbox_size=0), - rcnn=dict( - score_thr=0.03, nms=dict(type='nms', iou_thr=0.3), max_per_img=100)) -# dataset settings -dataset_type = 'CocoDataset' -data_root = 'data/coco/' -img_norm_cfg = dict( - mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) -train_pipeline = [ - dict(type='LoadImageFromFile'), - dict(type='LoadAnnotations', with_bbox=True), - dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), - dict(type='RandomFlip', flip_ratio=0.5), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='DefaultFormatBundle'), - dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), -] -test_pipeline = [ - dict(type='LoadImageFromFile'), - dict( - type='MultiScaleFlipAug', - img_scale=(1333, 800), - flip=False, - transforms=[ - dict(type='Resize', keep_ratio=True), - dict(type='RandomFlip'), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='ImageToTensor', keys=['img']), - dict(type='Collect', keys=['img']), - ]) -] -data = dict( - imgs_per_gpu=2, - workers_per_gpu=2, - train=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_train2017.json', - img_prefix=data_root + 'train2017/', - pipeline=train_pipeline), - val=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline), - test=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline)) -evaluation = dict(interval=1, metric='bbox') -# optimizer -optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) -optimizer_config = dict(grad_clip=None) -# learning policy -lr_config = dict( - policy='step', - warmup='linear', - warmup_iters=3665, - warmup_ratio=1.0 / 80, - step=[17, 23]) -checkpoint_config = dict(interval=1) -# yapf:disable -log_config = dict( - interval=50, - hooks=[ - dict(type='TextLoggerHook'), - # dict(type='TensorboardLoggerHook') - ]) -# yapf:enable -# runtime settings -total_epochs = 25 -dist_params = dict(backend='nccl') -log_level = 'INFO' -work_dir = './work_dirs/grid_rcnn_gn_head_x101_32x4d_fpn_2x' -load_from = None -resume_from = None -workflow = [('train', 1)] diff --git a/configs/grid_rcnn/grid_rcnn_gn_head_r50_fpn_2x.py b/configs/grid_rcnn/grid_rcnn_r50_fpn_gn-head_2x_coco.py similarity index 63% rename from configs/grid_rcnn/grid_rcnn_gn_head_r50_fpn_2x.py rename to configs/grid_rcnn/grid_rcnn_r50_fpn_gn-head_2x_coco.py index 810e6ddecd6ede880b4ee2cc92284799d921d3d3..e835ab7e805b7981441b079d5a4616c96467af42 100644 --- a/configs/grid_rcnn/grid_rcnn_gn_head_r50_fpn_2x.py +++ b/configs/grid_rcnn/grid_rcnn_r50_fpn_gn-head_2x_coco.py @@ -1,3 +1,6 @@ +_base_ = [ + '../_base_/datasets/coco_detection.py', '../_base_/default_runtime.py' +] # model settings model = dict( type='GridRCNN', @@ -33,9 +36,8 @@ model = dict( out_channels=256, featmap_strides=[4, 8, 16, 32]), bbox_head=dict( - type='SharedFCBBoxHead', + type='Shared2FCBBoxHead', with_reg=False, - num_fcs=2, in_channels=256, fc_out_channels=1024, roi_feat_size=7, @@ -109,55 +111,6 @@ test_cfg = dict( min_bbox_size=0), rcnn=dict( score_thr=0.03, nms=dict(type='nms', iou_thr=0.3), max_per_img=100)) -# dataset settings -dataset_type = 'CocoDataset' -data_root = 'data/coco/' -img_norm_cfg = dict( - mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) -train_pipeline = [ - dict(type='LoadImageFromFile'), - dict(type='LoadAnnotations', with_bbox=True), - dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), - dict(type='RandomFlip', flip_ratio=0.5), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='DefaultFormatBundle'), - dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), -] -test_pipeline = [ - dict(type='LoadImageFromFile'), - dict( - type='MultiScaleFlipAug', - img_scale=(1333, 800), - flip=False, - transforms=[ - dict(type='Resize', keep_ratio=True), - dict(type='RandomFlip'), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='ImageToTensor', keys=['img']), - dict(type='Collect', keys=['img']), - ]) -] -data = dict( - imgs_per_gpu=2, - workers_per_gpu=2, - train=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_train2017.json', - img_prefix=data_root + 'train2017/', - pipeline=train_pipeline), - val=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline), - test=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline)) -evaluation = dict(interval=1, metric='bbox') # optimizer optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) optimizer_config = dict(grad_clip=None) @@ -168,20 +121,5 @@ lr_config = dict( warmup_iters=3665, warmup_ratio=1.0 / 80, step=[17, 23]) -checkpoint_config = dict(interval=1) -# yapf:disable -log_config = dict( - interval=50, - hooks=[ - dict(type='TextLoggerHook'), - # dict(type='TensorboardLoggerHook') - ]) -# yapf:enable -# runtime settings total_epochs = 25 -dist_params = dict(backend='nccl') -log_level = 'INFO' work_dir = './work_dirs/grid_rcnn_gn_head_r50_fpn_2x' -load_from = None -resume_from = None -workflow = [('train', 1)] diff --git a/configs/grid_rcnn/grid_rcnn_x101_32x4d_fpn_gn-head_2x_coco.py b/configs/grid_rcnn/grid_rcnn_x101_32x4d_fpn_gn-head_2x_coco.py new file mode 100644 index 0000000000000000000000000000000000000000..c42aed798605f9b8a1db8e06cfbf320df96d8622 --- /dev/null +++ b/configs/grid_rcnn/grid_rcnn_x101_32x4d_fpn_gn-head_2x_coco.py @@ -0,0 +1,24 @@ +_base_ = './grid_rcnn_r50_fpn_gn-head_2x_coco.py' +model = dict( + pretrained='open-mmlab://resnext101_32x4d', + backbone=dict( + type='ResNeXt', + depth=101, + groups=32, + base_width=4, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + style='pytorch')) +# optimizer +optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) +optimizer_config = dict(grad_clip=None) +# learning policy +lr_config = dict( + policy='step', + warmup='linear', + warmup_iters=3665, + warmup_ratio=1.0 / 80, + step=[17, 23]) +total_epochs = 25 +work_dir = './work_dirs/grid_rcnn_gn_head_x101_32x4d_fpn_2x' diff --git a/configs/guided_anchoring/ga_fast_r50_caffe_fpn_1x.py b/configs/guided_anchoring/ga_fast_r50_caffe_fpn_1x.py deleted file mode 100644 index d5a4753ebec622e116eac0c5ba44dd081348b9da..0000000000000000000000000000000000000000 --- a/configs/guided_anchoring/ga_fast_r50_caffe_fpn_1x.py +++ /dev/null @@ -1,137 +0,0 @@ -# model settings -model = dict( - type='FastRCNN', - pretrained='open-mmlab://resnet50_caffe', - backbone=dict( - type='ResNet', - depth=50, - num_stages=4, - out_indices=(0, 1, 2, 3), - frozen_stages=1, - norm_cfg=dict(type='BN', requires_grad=False), - norm_eval=True, - style='caffe'), - neck=dict( - type='FPN', - in_channels=[256, 512, 1024, 2048], - out_channels=256, - num_outs=5), - bbox_roi_extractor=dict( - type='SingleRoIExtractor', - roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2), - out_channels=256, - featmap_strides=[4, 8, 16, 32]), - bbox_head=dict( - type='SharedFCBBoxHead', - num_fcs=2, - in_channels=256, - fc_out_channels=1024, - roi_feat_size=7, - num_classes=81, - target_means=[0., 0., 0., 0.], - target_stds=[0.05, 0.05, 0.1, 0.1], - reg_class_agnostic=False, - loss_cls=dict( - type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0))) -# model training and testing settings -train_cfg = dict( - rcnn=dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.6, - neg_iou_thr=0.6, - min_pos_iou=0.6, - ignore_iof_thr=-1), - sampler=dict( - type='RandomSampler', - num=256, - pos_fraction=0.25, - neg_pos_ub=-1, - add_gt_as_proposals=True), - pos_weight=-1, - debug=False)) -test_cfg = dict( - rcnn=dict( - score_thr=1e-3, nms=dict(type='nms', iou_thr=0.5), max_per_img=100)) -# dataset settings -dataset_type = 'CocoDataset' -data_root = 'data/coco/' -img_norm_cfg = dict( - mean=[102.9801, 115.9465, 122.7717], std=[1.0, 1.0, 1.0], to_rgb=False) -train_pipeline = [ - dict(type='LoadImageFromFile'), - dict(type='LoadProposals', num_max_proposals=300), - dict(type='LoadAnnotations', with_bbox=True), - dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), - dict(type='RandomFlip', flip_ratio=0.5), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='DefaultFormatBundle'), - dict(type='Collect', keys=['img', 'proposals', 'gt_bboxes', 'gt_labels']), -] -test_pipeline = [ - dict(type='LoadImageFromFile'), - dict(type='LoadProposals', num_max_proposals=None), - dict( - type='MultiScaleFlipAug', - img_scale=(1333, 800), - flip=False, - transforms=[ - dict(type='Resize', keep_ratio=True), - dict(type='RandomFlip'), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='ImageToTensor', keys=['img']), - dict(type='Collect', keys=['img', 'proposals']), - ]) -] -data = dict( - imgs_per_gpu=2, - workers_per_gpu=2, - train=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_train2017.json', - proposal_file=data_root + 'proposals/ga_rpn_r50_fpn_1x_train2017.pkl', - img_prefix=data_root + 'train2017/', - pipeline=train_pipeline), - val=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - proposal_file=data_root + 'proposals/ga_rpn_r50_fpn_1x_val2017.pkl', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline), - test=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - proposal_file=data_root + 'proposals/ga_rpn_r50_fpn_1x_val2017.pkl', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline)) -evaluation = dict(interval=1, metric='bbox') -# optimizer -optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) -optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) -# learning policy -lr_config = dict( - policy='step', - warmup='linear', - warmup_iters=500, - warmup_ratio=1.0 / 3, - step=[8, 11]) -checkpoint_config = dict(interval=1) -# yapf:disable -log_config = dict( - interval=50, - hooks=[ - dict(type='TextLoggerHook'), - # dict(type='TensorboardLoggerHook') - ]) -# yapf:enable -# runtime settings -total_epochs = 12 -dist_params = dict(backend='nccl') -log_level = 'INFO' -work_dir = './work_dirs/ga_fast_rcnn_r50_caffe_fpn_1x' -load_from = None -resume_from = None -workflow = [('train', 1)] diff --git a/configs/guided_anchoring/ga_fast_r50_caffe_fpn_1x_coco.py b/configs/guided_anchoring/ga_fast_r50_caffe_fpn_1x_coco.py new file mode 100644 index 0000000000000000000000000000000000000000..3056f2247ad7c313065c1769a907cdc09f7f4c12 --- /dev/null +++ b/configs/guided_anchoring/ga_fast_r50_caffe_fpn_1x_coco.py @@ -0,0 +1,60 @@ +_base_ = '../fast_rcnn/fast_rcnn_r50_fpn_1x_coco.py' +model = dict( + pretrained='open-mmlab://resnet50_caffe', + backbone=dict( + type='ResNet', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=False), + style='caffe'), + bbox_head=dict(target_stds=[0.05, 0.05, 0.1, 0.1])) +# model training and testing settings +train_cfg = dict( + rcnn=dict( + assigner=dict(pos_iou_thr=0.6, neg_iou_thr=0.6, min_pos_iou=0.6), + sampler=dict(num=256))) +test_cfg = dict(rcnn=dict(score_thr=1e-3)) +dataset_type = 'CocoDataset' +data_root = 'data/coco/' +img_norm_cfg = dict( + mean=[102.9801, 115.9465, 122.7717], std=[1.0, 1.0, 1.0], to_rgb=False) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadProposals', num_max_proposals=300), + dict(type='LoadAnnotations', with_bbox=True), + dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'proposals', 'gt_bboxes', 'gt_labels']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadProposals', num_max_proposals=None), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img', 'proposals']), + ]) +] +data = dict( + train=dict( + proposal_file=data_root + 'proposals/ga_rpn_r50_fpn_1x_train2017.pkl', + pipeline=train_pipeline), + val=dict( + proposal_file=data_root + 'proposals/ga_rpn_r50_fpn_1x_val2017.pkl', + pipeline=test_pipeline), + test=dict( + proposal_file=data_root + 'proposals/ga_rpn_r50_fpn_1x_val2017.pkl', + pipeline=test_pipeline)) +work_dir = './work_dirs/ga_fast_rcnn_r50_caffe_fpn_1x' diff --git a/configs/guided_anchoring/ga_faster_r50_caffe_fpn_1x.py b/configs/guided_anchoring/ga_faster_r50_caffe_fpn_1x.py deleted file mode 100644 index 57da8d486b43f4e588587bbacf3b1652c0e843e0..0000000000000000000000000000000000000000 --- a/configs/guided_anchoring/ga_faster_r50_caffe_fpn_1x.py +++ /dev/null @@ -1,200 +0,0 @@ -# model settings -model = dict( - type='FasterRCNN', - pretrained='open-mmlab://resnet50_caffe', - backbone=dict( - type='ResNet', - depth=50, - num_stages=4, - out_indices=(0, 1, 2, 3), - frozen_stages=1, - norm_cfg=dict(type='BN', requires_grad=False), - norm_eval=True, - style='caffe'), - neck=dict( - type='FPN', - in_channels=[256, 512, 1024, 2048], - out_channels=256, - num_outs=5), - rpn_head=dict( - type='GARPNHead', - in_channels=256, - feat_channels=256, - octave_base_scale=8, - scales_per_octave=3, - octave_ratios=[0.5, 1.0, 2.0], - anchor_strides=[4, 8, 16, 32, 64], - anchor_base_sizes=None, - anchoring_means=[.0, .0, .0, .0], - anchoring_stds=[0.07, 0.07, 0.14, 0.14], - target_means=(.0, .0, .0, .0), - target_stds=[0.07, 0.07, 0.11, 0.11], - loc_filter_thr=0.01, - loss_loc=dict( - type='FocalLoss', - use_sigmoid=True, - gamma=2.0, - alpha=0.25, - loss_weight=1.0), - loss_shape=dict(type='BoundedIoULoss', beta=0.2, loss_weight=1.0), - loss_cls=dict( - type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)), - bbox_roi_extractor=dict( - type='SingleRoIExtractor', - roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2), - out_channels=256, - featmap_strides=[4, 8, 16, 32]), - bbox_head=dict( - type='SharedFCBBoxHead', - num_fcs=2, - in_channels=256, - fc_out_channels=1024, - roi_feat_size=7, - num_classes=81, - target_means=[0., 0., 0., 0.], - target_stds=[0.05, 0.05, 0.1, 0.1], - reg_class_agnostic=False, - loss_cls=dict( - type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0))) -# model training and testing settings -train_cfg = dict( - rpn=dict( - ga_assigner=dict( - type='ApproxMaxIoUAssigner', - pos_iou_thr=0.7, - neg_iou_thr=0.3, - min_pos_iou=0.3, - ignore_iof_thr=-1), - ga_sampler=dict( - type='RandomSampler', - num=256, - pos_fraction=0.5, - neg_pos_ub=-1, - add_gt_as_proposals=False), - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.7, - neg_iou_thr=0.3, - min_pos_iou=0.3, - ignore_iof_thr=-1), - sampler=dict( - type='RandomSampler', - num=256, - pos_fraction=0.5, - neg_pos_ub=-1, - add_gt_as_proposals=False), - allowed_border=-1, - pos_weight=-1, - center_ratio=0.2, - ignore_ratio=0.5, - debug=False), - rpn_proposal=dict( - nms_across_levels=False, - nms_pre=2000, - nms_post=2000, - max_num=300, - nms_thr=0.7, - min_bbox_size=0), - rcnn=dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.6, - neg_iou_thr=0.6, - min_pos_iou=0.6, - ignore_iof_thr=-1), - sampler=dict( - type='RandomSampler', - num=256, - pos_fraction=0.25, - neg_pos_ub=-1, - add_gt_as_proposals=True), - pos_weight=-1, - debug=False)) -test_cfg = dict( - rpn=dict( - nms_across_levels=False, - nms_pre=1000, - nms_post=1000, - max_num=300, - nms_thr=0.7, - min_bbox_size=0), - rcnn=dict( - score_thr=1e-3, nms=dict(type='nms', iou_thr=0.5), max_per_img=100)) -# dataset settings -dataset_type = 'CocoDataset' -data_root = 'data/coco/' -img_norm_cfg = dict( - mean=[102.9801, 115.9465, 122.7717], std=[1.0, 1.0, 1.0], to_rgb=False) -train_pipeline = [ - dict(type='LoadImageFromFile'), - dict(type='LoadAnnotations', with_bbox=True), - dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), - dict(type='RandomFlip', flip_ratio=0.5), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='DefaultFormatBundle'), - dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), -] -test_pipeline = [ - dict(type='LoadImageFromFile'), - dict( - type='MultiScaleFlipAug', - img_scale=(1333, 800), - flip=False, - transforms=[ - dict(type='Resize', keep_ratio=True), - dict(type='RandomFlip'), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='ImageToTensor', keys=['img']), - dict(type='Collect', keys=['img']), - ]) -] -data = dict( - imgs_per_gpu=2, - workers_per_gpu=2, - train=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_train2017.json', - img_prefix=data_root + 'train2017/', - pipeline=train_pipeline), - val=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline), - test=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline)) -evaluation = dict(interval=1, metric='bbox') -# optimizer -optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) -optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) -# learning policy -lr_config = dict( - policy='step', - warmup='linear', - warmup_iters=500, - warmup_ratio=1.0 / 3, - step=[8, 11]) -checkpoint_config = dict(interval=1) -# yapf:disable -log_config = dict( - interval=50, - hooks=[ - dict(type='TextLoggerHook'), - # dict(type='TensorboardLoggerHook') - ]) -# yapf:enable -# runtime settings -total_epochs = 12 -dist_params = dict(backend='nccl') -log_level = 'INFO' -work_dir = './work_dirs/ga_faster_rcnn_r50_caffe_fpn_1x' -load_from = None -resume_from = None -workflow = [('train', 1)] diff --git a/configs/guided_anchoring/ga_faster_r50_caffe_fpn_1x_coco.py b/configs/guided_anchoring/ga_faster_r50_caffe_fpn_1x_coco.py new file mode 100644 index 0000000000000000000000000000000000000000..0fd06b184c2e66d17793b9b0ff287f0a45f08949 --- /dev/null +++ b/configs/guided_anchoring/ga_faster_r50_caffe_fpn_1x_coco.py @@ -0,0 +1,52 @@ +_base_ = '../faster_rcnn/faster_rcnn_r50_caffe_fpn_1x_coco.py' +model = dict( + rpn_head=dict( + _delete_=True, + type='GARPNHead', + in_channels=256, + feat_channels=256, + octave_base_scale=8, + scales_per_octave=3, + octave_ratios=[0.5, 1.0, 2.0], + anchor_strides=[4, 8, 16, 32, 64], + anchor_base_sizes=None, + anchoring_means=[.0, .0, .0, .0], + anchoring_stds=[0.07, 0.07, 0.14, 0.14], + target_means=(.0, .0, .0, .0), + target_stds=[0.07, 0.07, 0.11, 0.11], + loc_filter_thr=0.01, + loss_loc=dict( + type='FocalLoss', + use_sigmoid=True, + gamma=2.0, + alpha=0.25, + loss_weight=1.0), + loss_shape=dict(type='BoundedIoULoss', beta=0.2, loss_weight=1.0), + loss_cls=dict( + type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), + loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)), + bbox_head=dict(target_stds=[0.05, 0.05, 0.1, 0.1])) +# model training and testing settings +train_cfg = dict( + rpn=dict( + ga_assigner=dict( + type='ApproxMaxIoUAssigner', + pos_iou_thr=0.7, + neg_iou_thr=0.3, + min_pos_iou=0.3, + ignore_iof_thr=-1), + ga_sampler=dict( + type='RandomSampler', + num=256, + pos_fraction=0.5, + neg_pos_ub=-1, + add_gt_as_proposals=False), + allowed_border=-1, + center_ratio=0.2, + ignore_ratio=0.5), + rpn_proposal=dict(max_num=300), + rcnn=dict( + assigner=dict(pos_iou_thr=0.6, neg_iou_thr=0.6, min_pos_iou=0.6), + sampler=dict(type='RandomSampler', num=256))) +test_cfg = dict(rpn=dict(max_num=300), rcnn=dict(score_thr=1e-3)) +work_dir = './work_dirs/ga_faster_rcnn_r50_caffe_fpn_1x' diff --git a/configs/guided_anchoring/ga_faster_r50_fpn_1x_coco.py b/configs/guided_anchoring/ga_faster_r50_fpn_1x_coco.py new file mode 100644 index 0000000000000000000000000000000000000000..482845e8e57ff325bb447797f1ddb0e381934299 --- /dev/null +++ b/configs/guided_anchoring/ga_faster_r50_fpn_1x_coco.py @@ -0,0 +1,52 @@ +_base_ = '../faster_rcnn/faster_rcnn_r50_fpn_1x_coco.py' +model = dict( + rpn_head=dict( + _delete_=True, + type='GARPNHead', + in_channels=256, + feat_channels=256, + octave_base_scale=8, + scales_per_octave=3, + octave_ratios=[0.5, 1.0, 2.0], + anchor_strides=[4, 8, 16, 32, 64], + anchor_base_sizes=None, + anchoring_means=[.0, .0, .0, .0], + anchoring_stds=[0.07, 0.07, 0.14, 0.14], + target_means=(.0, .0, .0, .0), + target_stds=[0.07, 0.07, 0.11, 0.11], + loc_filter_thr=0.01, + loss_loc=dict( + type='FocalLoss', + use_sigmoid=True, + gamma=2.0, + alpha=0.25, + loss_weight=1.0), + loss_shape=dict(type='BoundedIoULoss', beta=0.2, loss_weight=1.0), + loss_cls=dict( + type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), + loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)), + bbox_head=dict(target_stds=[0.05, 0.05, 0.1, 0.1])) +# model training and testing settings +train_cfg = dict( + rpn=dict( + ga_assigner=dict( + type='ApproxMaxIoUAssigner', + pos_iou_thr=0.7, + neg_iou_thr=0.3, + min_pos_iou=0.3, + ignore_iof_thr=-1), + ga_sampler=dict( + type='RandomSampler', + num=256, + pos_fraction=0.5, + neg_pos_ub=-1, + add_gt_as_proposals=False), + allowed_border=-1, + center_ratio=0.2, + ignore_ratio=0.5), + rpn_proposal=dict(max_num=300), + rcnn=dict( + assigner=dict(pos_iou_thr=0.6, neg_iou_thr=0.6, min_pos_iou=0.6), + sampler=dict(type='RandomSampler', num=256))) +test_cfg = dict(rpn=dict(max_num=300), rcnn=dict(score_thr=1e-3)) +work_dir = './work_dirs/ga_faster_rcnn_r50_fpn_1x' diff --git a/configs/guided_anchoring/ga_faster_x101_32x4d_fpn_1x.py b/configs/guided_anchoring/ga_faster_x101_32x4d_fpn_1x.py deleted file mode 100644 index 57a39228b5ce84c91367e102826875eb8fec4f44..0000000000000000000000000000000000000000 --- a/configs/guided_anchoring/ga_faster_x101_32x4d_fpn_1x.py +++ /dev/null @@ -1,201 +0,0 @@ -# model settings -model = dict( - type='FasterRCNN', - pretrained='open-mmlab://resnext101_32x4d', - backbone=dict( - type='ResNeXt', - depth=101, - groups=32, - base_width=4, - num_stages=4, - out_indices=(0, 1, 2, 3), - frozen_stages=1, - norm_cfg=dict(type='BN', requires_grad=True), - style='pytorch'), - neck=dict( - type='FPN', - in_channels=[256, 512, 1024, 2048], - out_channels=256, - num_outs=5), - rpn_head=dict( - type='GARPNHead', - in_channels=256, - feat_channels=256, - octave_base_scale=8, - scales_per_octave=3, - octave_ratios=[0.5, 1.0, 2.0], - anchor_strides=[4, 8, 16, 32, 64], - anchor_base_sizes=None, - anchoring_means=[.0, .0, .0, .0], - anchoring_stds=[0.07, 0.07, 0.14, 0.14], - target_means=(.0, .0, .0, .0), - target_stds=[0.07, 0.07, 0.11, 0.11], - loc_filter_thr=0.01, - loss_loc=dict( - type='FocalLoss', - use_sigmoid=True, - gamma=2.0, - alpha=0.25, - loss_weight=1.0), - loss_shape=dict(type='BoundedIoULoss', beta=0.2, loss_weight=1.0), - loss_cls=dict( - type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)), - bbox_roi_extractor=dict( - type='SingleRoIExtractor', - roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2), - out_channels=256, - featmap_strides=[4, 8, 16, 32]), - bbox_head=dict( - type='SharedFCBBoxHead', - num_fcs=2, - in_channels=256, - fc_out_channels=1024, - roi_feat_size=7, - num_classes=81, - target_means=[0., 0., 0., 0.], - target_stds=[0.05, 0.05, 0.1, 0.1], - reg_class_agnostic=False, - loss_cls=dict( - type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0))) -# model training and testing settings -train_cfg = dict( - rpn=dict( - ga_assigner=dict( - type='ApproxMaxIoUAssigner', - pos_iou_thr=0.7, - neg_iou_thr=0.3, - min_pos_iou=0.3, - ignore_iof_thr=-1), - ga_sampler=dict( - type='RandomSampler', - num=256, - pos_fraction=0.5, - neg_pos_ub=-1, - add_gt_as_proposals=False), - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.7, - neg_iou_thr=0.3, - min_pos_iou=0.3, - ignore_iof_thr=-1), - sampler=dict( - type='RandomSampler', - num=256, - pos_fraction=0.5, - neg_pos_ub=-1, - add_gt_as_proposals=False), - allowed_border=-1, - pos_weight=-1, - center_ratio=0.2, - ignore_ratio=0.5, - debug=False), - rpn_proposal=dict( - nms_across_levels=False, - nms_pre=2000, - nms_post=2000, - max_num=300, - nms_thr=0.7, - min_bbox_size=0), - rcnn=dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.6, - neg_iou_thr=0.6, - min_pos_iou=0.6, - ignore_iof_thr=-1), - sampler=dict( - type='RandomSampler', - num=256, - pos_fraction=0.25, - neg_pos_ub=-1, - add_gt_as_proposals=True), - pos_weight=-1, - debug=False)) -test_cfg = dict( - rpn=dict( - nms_across_levels=False, - nms_pre=1000, - nms_post=1000, - max_num=300, - nms_thr=0.7, - min_bbox_size=0), - rcnn=dict( - score_thr=1e-3, nms=dict(type='nms', iou_thr=0.5), max_per_img=100)) -# dataset settings -dataset_type = 'CocoDataset' -data_root = 'data/coco/' -img_norm_cfg = dict( - mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) -train_pipeline = [ - dict(type='LoadImageFromFile'), - dict(type='LoadAnnotations', with_bbox=True), - dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), - dict(type='RandomFlip', flip_ratio=0.5), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='DefaultFormatBundle'), - dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), -] -test_pipeline = [ - dict(type='LoadImageFromFile'), - dict( - type='MultiScaleFlipAug', - img_scale=(1333, 800), - flip=False, - transforms=[ - dict(type='Resize', keep_ratio=True), - dict(type='RandomFlip'), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='ImageToTensor', keys=['img']), - dict(type='Collect', keys=['img']), - ]) -] -data = dict( - imgs_per_gpu=2, - workers_per_gpu=2, - train=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_train2017.json', - img_prefix=data_root + 'train2017/', - pipeline=train_pipeline), - val=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline), - test=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline)) -evaluation = dict(interval=1, metric='bbox') -# optimizer -optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) -optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) -# learning policy -lr_config = dict( - policy='step', - warmup='linear', - warmup_iters=500, - warmup_ratio=1.0 / 3, - step=[8, 11]) -checkpoint_config = dict(interval=1) -# yapf:disable -log_config = dict( - interval=50, - hooks=[ - dict(type='TextLoggerHook'), - # dict(type='TensorboardLoggerHook') - ]) -# yapf:enable -# runtime settings -total_epochs = 12 -dist_params = dict(backend='nccl') -log_level = 'INFO' -work_dir = './work_dirs/ga_faster_rcnn_x101_32x4d_fpn_1x' -load_from = None -resume_from = None -workflow = [('train', 1)] diff --git a/configs/guided_anchoring/ga_faster_x101_32x4d_fpn_1x_coco.py b/configs/guided_anchoring/ga_faster_x101_32x4d_fpn_1x_coco.py new file mode 100644 index 0000000000000000000000000000000000000000..f0a55e39702c510ea6a654726340108ff9335f67 --- /dev/null +++ b/configs/guided_anchoring/ga_faster_x101_32x4d_fpn_1x_coco.py @@ -0,0 +1,14 @@ +_base_ = './ga_faster_r50_fpn_1x_coco.py' +model = dict( + pretrained='open-mmlab://resnext101_32x4d', + backbone=dict( + type='ResNeXt', + depth=101, + groups=32, + base_width=4, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + style='pytorch')) +work_dir = './work_dirs/ga_faster_rcnn_x101_32x4d_fpn_1x' diff --git a/configs/guided_anchoring/ga_retinanet_r50_caffe_fpn_1x.py b/configs/guided_anchoring/ga_retinanet_r50_caffe_fpn_1x.py deleted file mode 100644 index 084b851a87303be5aef934d603f6e287dbccbff7..0000000000000000000000000000000000000000 --- a/configs/guided_anchoring/ga_retinanet_r50_caffe_fpn_1x.py +++ /dev/null @@ -1,157 +0,0 @@ -# model settings -model = dict( - type='RetinaNet', - pretrained='open-mmlab://resnet50_caffe', - backbone=dict( - type='ResNet', - depth=50, - num_stages=4, - out_indices=(0, 1, 2, 3), - frozen_stages=1, - norm_cfg=dict(type='BN', requires_grad=False), - norm_eval=True, - style='caffe'), - neck=dict( - type='FPN', - in_channels=[256, 512, 1024, 2048], - out_channels=256, - start_level=1, - add_extra_convs=True, - num_outs=5), - bbox_head=dict( - type='GARetinaHead', - num_classes=81, - in_channels=256, - stacked_convs=4, - feat_channels=256, - octave_base_scale=4, - scales_per_octave=3, - octave_ratios=[0.5, 1.0, 2.0], - anchor_strides=[8, 16, 32, 64, 128], - anchor_base_sizes=None, - anchoring_means=[.0, .0, .0, .0], - anchoring_stds=[1.0, 1.0, 1.0, 1.0], - target_means=(.0, .0, .0, .0), - target_stds=[1.0, 1.0, 1.0, 1.0], - loc_filter_thr=0.01, - loss_loc=dict( - type='FocalLoss', - use_sigmoid=True, - gamma=2.0, - alpha=0.25, - loss_weight=1.0), - loss_shape=dict(type='BoundedIoULoss', beta=0.2, loss_weight=1.0), - loss_cls=dict( - type='FocalLoss', - use_sigmoid=True, - gamma=2.0, - alpha=0.25, - loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=0.04, loss_weight=1.0))) -# training and testing settings -train_cfg = dict( - ga_assigner=dict( - type='ApproxMaxIoUAssigner', - pos_iou_thr=0.5, - neg_iou_thr=0.4, - min_pos_iou=0.4, - ignore_iof_thr=-1), - ga_sampler=dict( - type='RandomSampler', - num=256, - pos_fraction=0.5, - neg_pos_ub=-1, - add_gt_as_proposals=False), - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.5, - neg_iou_thr=0.5, - min_pos_iou=0.0, - ignore_iof_thr=-1), - allowed_border=-1, - pos_weight=-1, - center_ratio=0.2, - ignore_ratio=0.5, - debug=False) -test_cfg = dict( - nms_pre=1000, - min_bbox_size=0, - score_thr=0.05, - nms=dict(type='nms', iou_thr=0.5), - max_per_img=100) -# dataset settings -dataset_type = 'CocoDataset' -data_root = 'data/coco/' -img_norm_cfg = dict( - mean=[102.9801, 115.9465, 122.7717], std=[1.0, 1.0, 1.0], to_rgb=False) -train_pipeline = [ - dict(type='LoadImageFromFile'), - dict(type='LoadAnnotations', with_bbox=True), - dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), - dict(type='RandomFlip', flip_ratio=0.5), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='DefaultFormatBundle'), - dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), -] -test_pipeline = [ - dict(type='LoadImageFromFile'), - dict( - type='MultiScaleFlipAug', - img_scale=(1333, 800), - flip=False, - transforms=[ - dict(type='Resize', keep_ratio=True), - dict(type='RandomFlip'), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='ImageToTensor', keys=['img']), - dict(type='Collect', keys=['img']), - ]) -] -data = dict( - imgs_per_gpu=2, - workers_per_gpu=2, - train=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_train2017.json', - img_prefix=data_root + 'train2017/', - pipeline=train_pipeline), - val=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline), - test=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline)) -evaluation = dict(interval=1, metric='bbox') -# optimizer -optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) -optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) -# learning policy -lr_config = dict( - policy='step', - warmup='linear', - warmup_iters=500, - warmup_ratio=1.0 / 3, - step=[8, 11]) -checkpoint_config = dict(interval=1) -# yapf:disable -log_config = dict( - interval=50, - hooks=[ - dict(type='TextLoggerHook'), - # dict(type='TensorboardLoggerHook') - ]) -# yapf:enable -# runtime settings -total_epochs = 12 -dist_params = dict(backend='nccl') -log_level = 'INFO' -work_dir = './work_dirs/ga_retinanet_r50_caffe_fpn_1x' -load_from = None -resume_from = None -workflow = [('train', 1)] diff --git a/configs/guided_anchoring/ga_retinanet_r50_caffe_fpn_1x_coco.py b/configs/guided_anchoring/ga_retinanet_r50_caffe_fpn_1x_coco.py new file mode 100644 index 0000000000000000000000000000000000000000..63da4ec1de981fcc452c7f3c78aa09a9b17833e4 --- /dev/null +++ b/configs/guided_anchoring/ga_retinanet_r50_caffe_fpn_1x_coco.py @@ -0,0 +1,51 @@ +_base_ = '../retinanet/retinanet_r50_caffe_fpn_1x_coco.py' +model = dict( + bbox_head=dict( + _delete_=True, + type='GARetinaHead', + num_classes=81, + in_channels=256, + stacked_convs=4, + feat_channels=256, + octave_base_scale=4, + scales_per_octave=3, + octave_ratios=[0.5, 1.0, 2.0], + anchor_strides=[8, 16, 32, 64, 128], + anchor_base_sizes=None, + anchoring_means=[.0, .0, .0, .0], + anchoring_stds=[1.0, 1.0, 1.0, 1.0], + target_means=(.0, .0, .0, .0), + target_stds=[1.0, 1.0, 1.0, 1.0], + loc_filter_thr=0.01, + loss_loc=dict( + type='FocalLoss', + use_sigmoid=True, + gamma=2.0, + alpha=0.25, + loss_weight=1.0), + loss_shape=dict(type='BoundedIoULoss', beta=0.2, loss_weight=1.0), + loss_cls=dict( + type='FocalLoss', + use_sigmoid=True, + gamma=2.0, + alpha=0.25, + loss_weight=1.0), + loss_bbox=dict(type='SmoothL1Loss', beta=0.04, loss_weight=1.0))) +# training and testing settings +train_cfg = dict( + ga_assigner=dict( + type='ApproxMaxIoUAssigner', + pos_iou_thr=0.5, + neg_iou_thr=0.4, + min_pos_iou=0.4, + ignore_iof_thr=-1), + ga_sampler=dict( + type='RandomSampler', + num=256, + pos_fraction=0.5, + neg_pos_ub=-1, + add_gt_as_proposals=False), + assigner=dict(neg_iou_thr=0.5, min_pos_iou=0.0), + center_ratio=0.2, + ignore_ratio=0.5) +work_dir = './work_dirs/ga_retinanet_r50_caffe_fpn_1x' diff --git a/configs/guided_anchoring/ga_retinanet_r50_fpn_1x_coco.py b/configs/guided_anchoring/ga_retinanet_r50_fpn_1x_coco.py new file mode 100644 index 0000000000000000000000000000000000000000..35f1d33e299b1c4faa1aa7612163ecf5a614c005 --- /dev/null +++ b/configs/guided_anchoring/ga_retinanet_r50_fpn_1x_coco.py @@ -0,0 +1,51 @@ +_base_ = '../retinanet/retinanet_r50_fpn_1x_coco.py' +model = dict( + bbox_head=dict( + _delete_=True, + type='GARetinaHead', + num_classes=81, + in_channels=256, + stacked_convs=4, + feat_channels=256, + octave_base_scale=4, + scales_per_octave=3, + octave_ratios=[0.5, 1.0, 2.0], + anchor_strides=[8, 16, 32, 64, 128], + anchor_base_sizes=None, + anchoring_means=[.0, .0, .0, .0], + anchoring_stds=[1.0, 1.0, 1.0, 1.0], + target_means=(.0, .0, .0, .0), + target_stds=[1.0, 1.0, 1.0, 1.0], + loc_filter_thr=0.01, + loss_loc=dict( + type='FocalLoss', + use_sigmoid=True, + gamma=2.0, + alpha=0.25, + loss_weight=1.0), + loss_shape=dict(type='BoundedIoULoss', beta=0.2, loss_weight=1.0), + loss_cls=dict( + type='FocalLoss', + use_sigmoid=True, + gamma=2.0, + alpha=0.25, + loss_weight=1.0), + loss_bbox=dict(type='SmoothL1Loss', beta=0.04, loss_weight=1.0))) +# training and testing settings +train_cfg = dict( + ga_assigner=dict( + type='ApproxMaxIoUAssigner', + pos_iou_thr=0.5, + neg_iou_thr=0.4, + min_pos_iou=0.4, + ignore_iof_thr=-1), + ga_sampler=dict( + type='RandomSampler', + num=256, + pos_fraction=0.5, + neg_pos_ub=-1, + add_gt_as_proposals=False), + assigner=dict(neg_iou_thr=0.5, min_pos_iou=0.0), + center_ratio=0.2, + ignore_ratio=0.5) +work_dir = './work_dirs/ga_retinanet_r50_fpn_1x' diff --git a/configs/guided_anchoring/ga_retinanet_x101_32x4d_fpn_1x.py b/configs/guided_anchoring/ga_retinanet_x101_32x4d_fpn_1x.py deleted file mode 100644 index a47fad03aa128d24ad07522cd9a88f5698df3893..0000000000000000000000000000000000000000 --- a/configs/guided_anchoring/ga_retinanet_x101_32x4d_fpn_1x.py +++ /dev/null @@ -1,158 +0,0 @@ -# model settings -model = dict( - type='RetinaNet', - pretrained='open-mmlab://resnext101_32x4d', - backbone=dict( - type='ResNeXt', - depth=101, - groups=32, - base_width=4, - num_stages=4, - out_indices=(0, 1, 2, 3), - frozen_stages=1, - norm_cfg=dict(type='BN', requires_grad=True), - style='pytorch'), - neck=dict( - type='FPN', - in_channels=[256, 512, 1024, 2048], - out_channels=256, - start_level=1, - add_extra_convs=True, - num_outs=5), - bbox_head=dict( - type='GARetinaHead', - num_classes=81, - in_channels=256, - stacked_convs=4, - feat_channels=256, - octave_base_scale=4, - scales_per_octave=3, - octave_ratios=[0.5, 1.0, 2.0], - anchor_strides=[8, 16, 32, 64, 128], - anchor_base_sizes=None, - anchoring_means=[.0, .0, .0, .0], - anchoring_stds=[1.0, 1.0, 1.0, 1.0], - target_means=(.0, .0, .0, .0), - target_stds=[1.0, 1.0, 1.0, 1.0], - loc_filter_thr=0.01, - loss_loc=dict( - type='FocalLoss', - use_sigmoid=True, - gamma=2.0, - alpha=0.25, - loss_weight=1.0), - loss_shape=dict(type='BoundedIoULoss', beta=0.2, loss_weight=1.0), - loss_cls=dict( - type='FocalLoss', - use_sigmoid=True, - gamma=2.0, - alpha=0.25, - loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=0.04, loss_weight=1.0))) -# training and testing settings -train_cfg = dict( - ga_assigner=dict( - type='ApproxMaxIoUAssigner', - pos_iou_thr=0.5, - neg_iou_thr=0.4, - min_pos_iou=0.4, - ignore_iof_thr=-1), - ga_sampler=dict( - type='RandomSampler', - num=256, - pos_fraction=0.5, - neg_pos_ub=-1, - add_gt_as_proposals=False), - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.5, - neg_iou_thr=0.5, - min_pos_iou=0.0, - ignore_iof_thr=-1), - allowed_border=-1, - pos_weight=-1, - center_ratio=0.2, - ignore_ratio=0.5, - debug=False) -test_cfg = dict( - nms_pre=1000, - min_bbox_size=0, - score_thr=0.05, - nms=dict(type='nms', iou_thr=0.5), - max_per_img=100) -# dataset settings -dataset_type = 'CocoDataset' -data_root = 'data/coco/' -img_norm_cfg = dict( - mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) -train_pipeline = [ - dict(type='LoadImageFromFile'), - dict(type='LoadAnnotations', with_bbox=True), - dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), - dict(type='RandomFlip', flip_ratio=0.5), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='DefaultFormatBundle'), - dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), -] -test_pipeline = [ - dict(type='LoadImageFromFile'), - dict( - type='MultiScaleFlipAug', - img_scale=(1333, 800), - flip=False, - transforms=[ - dict(type='Resize', keep_ratio=True), - dict(type='RandomFlip'), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='ImageToTensor', keys=['img']), - dict(type='Collect', keys=['img']), - ]) -] -data = dict( - imgs_per_gpu=2, - workers_per_gpu=2, - train=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_train2017.json', - img_prefix=data_root + 'train2017/', - pipeline=train_pipeline), - val=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline), - test=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline)) -evaluation = dict(interval=1, metric='bbox') -# optimizer -optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) -optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) -# learning policy -lr_config = dict( - policy='step', - warmup='linear', - warmup_iters=500, - warmup_ratio=1.0 / 3, - step=[8, 11]) -checkpoint_config = dict(interval=1) -# yapf:disable -log_config = dict( - interval=50, - hooks=[ - dict(type='TextLoggerHook'), - # dict(type='TensorboardLoggerHook') - ]) -# yapf:enable -# runtime settings -total_epochs = 12 -dist_params = dict(backend='nccl') -log_level = 'INFO' -work_dir = './work_dirs/ga_retinanet_x101_32x4d_fpn_1x' -load_from = None -resume_from = None -workflow = [('train', 1)] diff --git a/configs/guided_anchoring/ga_retinanet_x101_32x4d_fpn_1x_coco.py b/configs/guided_anchoring/ga_retinanet_x101_32x4d_fpn_1x_coco.py new file mode 100644 index 0000000000000000000000000000000000000000..8889358e6bd77432529252ea9d1c8ccc3f0584cc --- /dev/null +++ b/configs/guided_anchoring/ga_retinanet_x101_32x4d_fpn_1x_coco.py @@ -0,0 +1,14 @@ +_base_ = './ga_retinanet_r50_fpn_1x_coco.py' +model = dict( + pretrained='open-mmlab://resnext101_32x4d', + backbone=dict( + type='ResNeXt', + depth=101, + groups=32, + base_width=4, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + style='pytorch')) +work_dir = './work_dirs/ga_retinanet_x101_32x4d_fpn_1x' diff --git a/configs/guided_anchoring/ga_rpn_r101_caffe_rpn_1x.py b/configs/guided_anchoring/ga_rpn_r101_caffe_rpn_1x.py deleted file mode 100644 index 6629f12bb8d24916e89be2923aef6d5abf3a6497..0000000000000000000000000000000000000000 --- a/configs/guided_anchoring/ga_rpn_r101_caffe_rpn_1x.py +++ /dev/null @@ -1,158 +0,0 @@ -# model settings -model = dict( - type='RPN', - pretrained='open-mmlab://resnet101_caffe', - backbone=dict( - type='ResNet', - depth=101, - num_stages=4, - out_indices=(0, 1, 2, 3), - frozen_stages=1, - norm_cfg=dict(type='BN', requires_grad=False), - norm_eval=True, - style='caffe'), - neck=dict( - type='FPN', - in_channels=[256, 512, 1024, 2048], - out_channels=256, - num_outs=5), - rpn_head=dict( - type='GARPNHead', - in_channels=256, - feat_channels=256, - octave_base_scale=8, - scales_per_octave=3, - octave_ratios=[0.5, 1.0, 2.0], - anchor_strides=[4, 8, 16, 32, 64], - anchor_base_sizes=None, - anchoring_means=[.0, .0, .0, .0], - anchoring_stds=[0.07, 0.07, 0.14, 0.14], - target_means=(.0, .0, .0, .0), - target_stds=[0.07, 0.07, 0.11, 0.11], - loc_filter_thr=0.01, - loss_loc=dict( - type='FocalLoss', - use_sigmoid=True, - gamma=2.0, - alpha=0.25, - loss_weight=1.0), - loss_shape=dict(type='BoundedIoULoss', beta=0.2, loss_weight=1.0), - loss_cls=dict( - type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0))) -# model training and testing settings -train_cfg = dict( - rpn=dict( - ga_assigner=dict( - type='ApproxMaxIoUAssigner', - pos_iou_thr=0.7, - neg_iou_thr=0.3, - min_pos_iou=0.3, - ignore_iof_thr=-1), - ga_sampler=dict( - type='RandomSampler', - num=256, - pos_fraction=0.5, - neg_pos_ub=-1, - add_gt_as_proposals=False), - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.7, - neg_iou_thr=0.3, - min_pos_iou=0.3, - ignore_iof_thr=-1), - sampler=dict( - type='RandomSampler', - num=256, - pos_fraction=0.5, - neg_pos_ub=-1, - add_gt_as_proposals=False), - allowed_border=-1, - pos_weight=-1, - center_ratio=0.2, - ignore_ratio=0.5, - debug=False)) -test_cfg = dict( - rpn=dict( - nms_across_levels=False, - nms_pre=2000, - nms_post=2000, - max_num=2000, - nms_thr=0.7, - min_bbox_size=0)) -# dataset settings -dataset_type = 'CocoDataset' -data_root = 'data/coco/' -img_norm_cfg = dict( - mean=[102.9801, 115.9465, 122.7717], std=[1.0, 1.0, 1.0], to_rgb=False) -train_pipeline = [ - dict(type='LoadImageFromFile'), - dict(type='LoadAnnotations', with_bbox=True, with_label=False), - dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), - dict(type='RandomFlip', flip_ratio=0.5), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='DefaultFormatBundle'), - dict(type='Collect', keys=['img', 'gt_bboxes']), -] -test_pipeline = [ - dict(type='LoadImageFromFile'), - dict( - type='MultiScaleFlipAug', - img_scale=(1333, 800), - flip=False, - transforms=[ - dict(type='Resize', keep_ratio=True), - dict(type='RandomFlip'), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='ImageToTensor', keys=['img']), - dict(type='Collect', keys=['img']), - ]) -] -data = dict( - imgs_per_gpu=2, - workers_per_gpu=2, - train=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_train2017.json', - img_prefix=data_root + 'train2017/', - pipeline=train_pipeline), - val=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline), - test=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline)) -evaluation = dict(interval=1, metric='proposal_fast') -# optimizer -optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) -# runner configs -optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) -lr_config = dict( - policy='step', - warmup='linear', - warmup_iters=500, - warmup_ratio=1.0 / 3, - step=[8, 11]) -checkpoint_config = dict(interval=1) -# yapf:disable -log_config = dict( - interval=50, - hooks=[ - dict(type='TextLoggerHook'), - # dict(type='TensorboardLoggerHook') - ]) -# yapf:enable -# runtime settings -total_epochs = 12 -dist_params = dict(backend='nccl') -log_level = 'INFO' -work_dir = './work_dirs/ga_rpn_r101_caffe_fpn_1x' -load_from = None -resume_from = None -workflow = [('train', 1)] diff --git a/configs/guided_anchoring/ga_rpn_r101_caffe_rpn_1x_coco.py b/configs/guided_anchoring/ga_rpn_r101_caffe_rpn_1x_coco.py new file mode 100644 index 0000000000000000000000000000000000000000..69b91b53b941d69cc575027ed11296fa74cf8950 --- /dev/null +++ b/configs/guided_anchoring/ga_rpn_r101_caffe_rpn_1x_coco.py @@ -0,0 +1,5 @@ +_base_ = './ga_rpn_r50_caffe_fpn_1x_coco.py' +# model settings +model = dict( + pretrained='open-mmlab://resnet101_caffe', backbone=dict(depth=101)) +work_dir = './work_dirs/ga_rpn_r101_caffe_fpn_1x' diff --git a/configs/guided_anchoring/ga_rpn_r50_caffe_fpn_1x.py b/configs/guided_anchoring/ga_rpn_r50_caffe_fpn_1x.py deleted file mode 100644 index 507849e3c59a47cf089f5af957d1093902da8a7c..0000000000000000000000000000000000000000 --- a/configs/guided_anchoring/ga_rpn_r50_caffe_fpn_1x.py +++ /dev/null @@ -1,158 +0,0 @@ -# model settings -model = dict( - type='RPN', - pretrained='open-mmlab://resnet50_caffe', - backbone=dict( - type='ResNet', - depth=50, - num_stages=4, - out_indices=(0, 1, 2, 3), - frozen_stages=1, - norm_cfg=dict(type='BN', requires_grad=False), - norm_eval=True, - style='caffe'), - neck=dict( - type='FPN', - in_channels=[256, 512, 1024, 2048], - out_channels=256, - num_outs=5), - rpn_head=dict( - type='GARPNHead', - in_channels=256, - feat_channels=256, - octave_base_scale=8, - scales_per_octave=3, - octave_ratios=[0.5, 1.0, 2.0], - anchor_strides=[4, 8, 16, 32, 64], - anchor_base_sizes=None, - anchoring_means=[.0, .0, .0, .0], - anchoring_stds=[0.07, 0.07, 0.14, 0.14], - target_means=(.0, .0, .0, .0), - target_stds=[0.07, 0.07, 0.11, 0.11], - loc_filter_thr=0.01, - loss_loc=dict( - type='FocalLoss', - use_sigmoid=True, - gamma=2.0, - alpha=0.25, - loss_weight=1.0), - loss_shape=dict(type='BoundedIoULoss', beta=0.2, loss_weight=1.0), - loss_cls=dict( - type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0))) -# model training and testing settings -train_cfg = dict( - rpn=dict( - ga_assigner=dict( - type='ApproxMaxIoUAssigner', - pos_iou_thr=0.7, - neg_iou_thr=0.3, - min_pos_iou=0.3, - ignore_iof_thr=-1), - ga_sampler=dict( - type='RandomSampler', - num=256, - pos_fraction=0.5, - neg_pos_ub=-1, - add_gt_as_proposals=False), - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.7, - neg_iou_thr=0.3, - min_pos_iou=0.3, - ignore_iof_thr=-1), - sampler=dict( - type='RandomSampler', - num=256, - pos_fraction=0.5, - neg_pos_ub=-1, - add_gt_as_proposals=False), - allowed_border=-1, - pos_weight=-1, - center_ratio=0.2, - ignore_ratio=0.5, - debug=False)) -test_cfg = dict( - rpn=dict( - nms_across_levels=False, - nms_pre=2000, - nms_post=2000, - max_num=2000, - nms_thr=0.7, - min_bbox_size=0)) -# dataset settings -dataset_type = 'CocoDataset' -data_root = 'data/coco/' -img_norm_cfg = dict( - mean=[102.9801, 115.9465, 122.7717], std=[1.0, 1.0, 1.0], to_rgb=False) -train_pipeline = [ - dict(type='LoadImageFromFile'), - dict(type='LoadAnnotations', with_bbox=True, with_label=False), - dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), - dict(type='RandomFlip', flip_ratio=0.5), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='DefaultFormatBundle'), - dict(type='Collect', keys=['img', 'gt_bboxes']), -] -test_pipeline = [ - dict(type='LoadImageFromFile'), - dict( - type='MultiScaleFlipAug', - img_scale=(1333, 800), - flip=False, - transforms=[ - dict(type='Resize', keep_ratio=True), - dict(type='RandomFlip'), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='ImageToTensor', keys=['img']), - dict(type='Collect', keys=['img']), - ]) -] -data = dict( - imgs_per_gpu=2, - workers_per_gpu=2, - train=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_train2017.json', - img_prefix=data_root + 'train2017/', - pipeline=train_pipeline), - val=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline), - test=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline)) -evaluation = dict(interval=1, metric='proposal_fast') -# optimizer -optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) -# runner configs -optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) -lr_config = dict( - policy='step', - warmup='linear', - warmup_iters=500, - warmup_ratio=1.0 / 3, - step=[8, 11]) -checkpoint_config = dict(interval=1) -# yapf:disable -log_config = dict( - interval=50, - hooks=[ - dict(type='TextLoggerHook'), - # dict(type='TensorboardLoggerHook') - ]) -# yapf:enable -# runtime settings -total_epochs = 12 -dist_params = dict(backend='nccl') -log_level = 'INFO' -work_dir = './work_dirs/ga_rpn_r50_caffe_fpn_1x' -load_from = None -resume_from = None -workflow = [('train', 1)] diff --git a/configs/guided_anchoring/ga_rpn_r50_caffe_fpn_1x_coco.py b/configs/guided_anchoring/ga_rpn_r50_caffe_fpn_1x_coco.py new file mode 100644 index 0000000000000000000000000000000000000000..56a2e343ad89ab1c402e95a4ad3fd3020db7f081 --- /dev/null +++ b/configs/guided_anchoring/ga_rpn_r50_caffe_fpn_1x_coco.py @@ -0,0 +1,46 @@ +_base_ = '../rpn/rpn_r50_caffe_fpn_1x_coco.py' +model = dict( + rpn_head=dict( + _delete_=True, + type='GARPNHead', + in_channels=256, + feat_channels=256, + octave_base_scale=8, + scales_per_octave=3, + octave_ratios=[0.5, 1.0, 2.0], + anchor_strides=[4, 8, 16, 32, 64], + anchor_base_sizes=None, + anchoring_means=[.0, .0, .0, .0], + anchoring_stds=[0.07, 0.07, 0.14, 0.14], + target_means=(.0, .0, .0, .0), + target_stds=[0.07, 0.07, 0.11, 0.11], + loc_filter_thr=0.01, + loss_loc=dict( + type='FocalLoss', + use_sigmoid=True, + gamma=2.0, + alpha=0.25, + loss_weight=1.0), + loss_shape=dict(type='BoundedIoULoss', beta=0.2, loss_weight=1.0), + loss_cls=dict( + type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), + loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0))) +# model training and testing settings +train_cfg = dict( + rpn=dict( + ga_assigner=dict( + type='ApproxMaxIoUAssigner', + pos_iou_thr=0.7, + neg_iou_thr=0.3, + min_pos_iou=0.3, + ignore_iof_thr=-1), + ga_sampler=dict( + type='RandomSampler', + num=256, + pos_fraction=0.5, + neg_pos_ub=-1, + add_gt_as_proposals=False), + allowed_border=-1, + center_ratio=0.2, + ignore_ratio=0.5)) +work_dir = './work_dirs/ga_rpn_r50_caffe_fpn_1x' diff --git a/configs/guided_anchoring/ga_rpn_r50_fpn_1x_coco.py b/configs/guided_anchoring/ga_rpn_r50_fpn_1x_coco.py new file mode 100644 index 0000000000000000000000000000000000000000..682a7a7b73a1c19a8ea0cb37816047478868bdc1 --- /dev/null +++ b/configs/guided_anchoring/ga_rpn_r50_fpn_1x_coco.py @@ -0,0 +1,46 @@ +_base_ = '../rpn/rpn_r50_fpn_1x_coco.py' +model = dict( + rpn_head=dict( + _delete_=True, + type='GARPNHead', + in_channels=256, + feat_channels=256, + octave_base_scale=8, + scales_per_octave=3, + octave_ratios=[0.5, 1.0, 2.0], + anchor_strides=[4, 8, 16, 32, 64], + anchor_base_sizes=None, + anchoring_means=[.0, .0, .0, .0], + anchoring_stds=[0.07, 0.07, 0.14, 0.14], + target_means=(.0, .0, .0, .0), + target_stds=[0.07, 0.07, 0.11, 0.11], + loc_filter_thr=0.01, + loss_loc=dict( + type='FocalLoss', + use_sigmoid=True, + gamma=2.0, + alpha=0.25, + loss_weight=1.0), + loss_shape=dict(type='BoundedIoULoss', beta=0.2, loss_weight=1.0), + loss_cls=dict( + type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), + loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0))) +# model training and testing settings +train_cfg = dict( + rpn=dict( + ga_assigner=dict( + type='ApproxMaxIoUAssigner', + pos_iou_thr=0.7, + neg_iou_thr=0.3, + min_pos_iou=0.3, + ignore_iof_thr=-1), + ga_sampler=dict( + type='RandomSampler', + num=256, + pos_fraction=0.5, + neg_pos_ub=-1, + add_gt_as_proposals=False), + allowed_border=-1, + center_ratio=0.2, + ignore_ratio=0.5)) +work_dir = './work_dirs/ga_rpn_r50_fpn_1x' diff --git a/configs/guided_anchoring/ga_rpn_x101_32x4d_fpn_1x.py b/configs/guided_anchoring/ga_rpn_x101_32x4d_fpn_1x.py deleted file mode 100644 index a8b00113abcef48c7c54b7f9101febacdf7a2dd0..0000000000000000000000000000000000000000 --- a/configs/guided_anchoring/ga_rpn_x101_32x4d_fpn_1x.py +++ /dev/null @@ -1,159 +0,0 @@ -# model settings -model = dict( - type='RPN', - pretrained='open-mmlab://resnext101_32x4d', - backbone=dict( - type='ResNeXt', - depth=101, - groups=32, - base_width=4, - num_stages=4, - out_indices=(0, 1, 2, 3), - frozen_stages=1, - norm_cfg=dict(type='BN', requires_grad=True), - style='pytorch'), - neck=dict( - type='FPN', - in_channels=[256, 512, 1024, 2048], - out_channels=256, - num_outs=5), - rpn_head=dict( - type='GARPNHead', - in_channels=256, - feat_channels=256, - octave_base_scale=8, - scales_per_octave=3, - octave_ratios=[0.5, 1.0, 2.0], - anchor_strides=[4, 8, 16, 32, 64], - anchor_base_sizes=None, - anchoring_means=[.0, .0, .0, .0], - anchoring_stds=[0.07, 0.07, 0.14, 0.14], - target_means=(.0, .0, .0, .0), - target_stds=[0.07, 0.07, 0.11, 0.11], - loc_filter_thr=0.01, - loss_loc=dict( - type='FocalLoss', - use_sigmoid=True, - gamma=2.0, - alpha=0.25, - loss_weight=1.0), - loss_shape=dict(type='BoundedIoULoss', beta=0.2, loss_weight=1.0), - loss_cls=dict( - type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0))) -# model training and testing settings -train_cfg = dict( - rpn=dict( - ga_assigner=dict( - type='ApproxMaxIoUAssigner', - pos_iou_thr=0.7, - neg_iou_thr=0.3, - min_pos_iou=0.3, - ignore_iof_thr=-1), - ga_sampler=dict( - type='RandomSampler', - num=256, - pos_fraction=0.5, - neg_pos_ub=-1, - add_gt_as_proposals=False), - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.7, - neg_iou_thr=0.3, - min_pos_iou=0.3, - ignore_iof_thr=-1), - sampler=dict( - type='RandomSampler', - num=256, - pos_fraction=0.5, - neg_pos_ub=-1, - add_gt_as_proposals=False), - allowed_border=-1, - pos_weight=-1, - center_ratio=0.2, - ignore_ratio=0.5, - debug=False)) -test_cfg = dict( - rpn=dict( - nms_across_levels=False, - nms_pre=2000, - nms_post=2000, - max_num=2000, - nms_thr=0.7, - min_bbox_size=0)) -# dataset settings -dataset_type = 'CocoDataset' -data_root = 'data/coco/' -img_norm_cfg = dict( - mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) -train_pipeline = [ - dict(type='LoadImageFromFile'), - dict(type='LoadAnnotations', with_bbox=True, with_label=False), - dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), - dict(type='RandomFlip', flip_ratio=0.5), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='DefaultFormatBundle'), - dict(type='Collect', keys=['img', 'gt_bboxes']), -] -test_pipeline = [ - dict(type='LoadImageFromFile'), - dict( - type='MultiScaleFlipAug', - img_scale=(1333, 800), - flip=False, - transforms=[ - dict(type='Resize', keep_ratio=True), - dict(type='RandomFlip'), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='ImageToTensor', keys=['img']), - dict(type='Collect', keys=['img']), - ]) -] -data = dict( - imgs_per_gpu=2, - workers_per_gpu=2, - train=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_train2017.json', - img_prefix=data_root + 'train2017/', - pipeline=train_pipeline), - val=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline), - test=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline)) -evaluation = dict(interval=1, metric='proposal_fast') -# optimizer -optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) -# runner configs -optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) -lr_config = dict( - policy='step', - warmup='linear', - warmup_iters=500, - warmup_ratio=1.0 / 3, - step=[8, 11]) -checkpoint_config = dict(interval=1) -# yapf:disable -log_config = dict( - interval=50, - hooks=[ - dict(type='TextLoggerHook'), - # dict(type='TensorboardLoggerHook') - ]) -# yapf:enable -# runtime settings -total_epochs = 12 -dist_params = dict(backend='nccl') -log_level = 'INFO' -work_dir = './work_dirs/ga_rpn_x101_32x4d_fpn_1x' -load_from = None -resume_from = None -workflow = [('train', 1)] diff --git a/configs/guided_anchoring/ga_rpn_x101_32x4d_fpn_1x_coco.py b/configs/guided_anchoring/ga_rpn_x101_32x4d_fpn_1x_coco.py new file mode 100644 index 0000000000000000000000000000000000000000..a03dc61261e1a84a7d3df2bcec38f62074d6f7e4 --- /dev/null +++ b/configs/guided_anchoring/ga_rpn_x101_32x4d_fpn_1x_coco.py @@ -0,0 +1,14 @@ +_base_ = './ga_rpn_r50_fpn_1x_coco.py' +model = dict( + pretrained='open-mmlab://resnext101_32x4d', + backbone=dict( + type='ResNeXt', + depth=101, + groups=32, + base_width=4, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + style='pytorch')) +work_dir = './work_dirs/ga_rpn_x101_32x4d_fpn_1x' diff --git a/configs/hrnet/cascade_mask_rcnn_hrnetv2p_w32_20e.py b/configs/hrnet/cascade_mask_rcnn_hrnetv2p_w32_20e.py deleted file mode 100644 index feb96f579b6cba90a2c7a0e2b1325e6cd559b72b..0000000000000000000000000000000000000000 --- a/configs/hrnet/cascade_mask_rcnn_hrnetv2p_w32_20e.py +++ /dev/null @@ -1,269 +0,0 @@ -# model settings -model = dict( - type='CascadeRCNN', - num_stages=3, - pretrained='open-mmlab://msra/hrnetv2_w32', - backbone=dict( - type='HRNet', - extra=dict( - stage1=dict( - num_modules=1, - num_branches=1, - block='BOTTLENECK', - num_blocks=(4, ), - num_channels=(64, )), - stage2=dict( - num_modules=1, - num_branches=2, - block='BASIC', - num_blocks=(4, 4), - num_channels=(32, 64)), - stage3=dict( - num_modules=4, - num_branches=3, - block='BASIC', - num_blocks=(4, 4, 4), - num_channels=(32, 64, 128)), - stage4=dict( - num_modules=3, - num_branches=4, - block='BASIC', - num_blocks=(4, 4, 4, 4), - num_channels=(32, 64, 128, 256)))), - neck=dict(type='HRFPN', in_channels=[32, 64, 128, 256], out_channels=256), - rpn_head=dict( - type='RPNHead', - in_channels=256, - feat_channels=256, - anchor_scales=[8], - anchor_ratios=[0.5, 1.0, 2.0], - anchor_strides=[4, 8, 16, 32, 64], - target_means=[.0, .0, .0, .0], - target_stds=[1.0, 1.0, 1.0, 1.0], - loss_cls=dict( - type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0)), - bbox_roi_extractor=dict( - type='SingleRoIExtractor', - roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2), - out_channels=256, - featmap_strides=[4, 8, 16, 32]), - bbox_head=[ - dict( - type='SharedFCBBoxHead', - num_fcs=2, - in_channels=256, - fc_out_channels=1024, - roi_feat_size=7, - num_classes=81, - target_means=[0., 0., 0., 0.], - target_stds=[0.1, 0.1, 0.2, 0.2], - reg_class_agnostic=True, - loss_cls=dict( - type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)), - dict( - type='SharedFCBBoxHead', - num_fcs=2, - in_channels=256, - fc_out_channels=1024, - roi_feat_size=7, - num_classes=81, - target_means=[0., 0., 0., 0.], - target_stds=[0.05, 0.05, 0.1, 0.1], - reg_class_agnostic=True, - loss_cls=dict( - type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)), - dict( - type='SharedFCBBoxHead', - num_fcs=2, - in_channels=256, - fc_out_channels=1024, - roi_feat_size=7, - num_classes=81, - target_means=[0., 0., 0., 0.], - target_stds=[0.033, 0.033, 0.067, 0.067], - reg_class_agnostic=True, - loss_cls=dict( - type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)) - ], - mask_roi_extractor=dict( - type='SingleRoIExtractor', - roi_layer=dict(type='RoIAlign', out_size=14, sample_num=2), - out_channels=256, - featmap_strides=[4, 8, 16, 32]), - mask_head=dict( - type='FCNMaskHead', - num_convs=4, - in_channels=256, - conv_out_channels=256, - num_classes=81, - loss_mask=dict( - type='CrossEntropyLoss', use_mask=True, loss_weight=1.0))) -# model training and testing settings -train_cfg = dict( - rpn=dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.7, - neg_iou_thr=0.3, - min_pos_iou=0.3, - ignore_iof_thr=-1), - sampler=dict( - type='RandomSampler', - num=256, - pos_fraction=0.5, - neg_pos_ub=-1, - add_gt_as_proposals=False), - allowed_border=0, - pos_weight=-1, - debug=False), - rpn_proposal=dict( - nms_across_levels=False, - nms_pre=2000, - nms_post=2000, - max_num=2000, - nms_thr=0.7, - min_bbox_size=0), - rcnn=[ - dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.5, - neg_iou_thr=0.5, - min_pos_iou=0.5, - ignore_iof_thr=-1), - sampler=dict( - type='RandomSampler', - num=512, - pos_fraction=0.25, - neg_pos_ub=-1, - add_gt_as_proposals=True), - mask_size=28, - pos_weight=-1, - debug=False), - dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.6, - neg_iou_thr=0.6, - min_pos_iou=0.6, - ignore_iof_thr=-1), - sampler=dict( - type='RandomSampler', - num=512, - pos_fraction=0.25, - neg_pos_ub=-1, - add_gt_as_proposals=True), - mask_size=28, - pos_weight=-1, - debug=False), - dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.7, - neg_iou_thr=0.7, - min_pos_iou=0.7, - ignore_iof_thr=-1), - sampler=dict( - type='RandomSampler', - num=512, - pos_fraction=0.25, - neg_pos_ub=-1, - add_gt_as_proposals=True), - mask_size=28, - pos_weight=-1, - debug=False) - ], - stage_loss_weights=[1, 0.5, 0.25]) -test_cfg = dict( - rpn=dict( - nms_across_levels=False, - nms_pre=1000, - nms_post=1000, - max_num=1000, - nms_thr=0.7, - min_bbox_size=0), - rcnn=dict( - score_thr=0.05, - nms=dict(type='nms', iou_thr=0.5), - max_per_img=100, - mask_thr_binary=0.5)) -# dataset settings -dataset_type = 'CocoDataset' -data_root = 'data/coco/' -img_norm_cfg = dict( - mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) -train_pipeline = [ - dict(type='LoadImageFromFile'), - dict(type='LoadAnnotations', with_bbox=True, with_mask=True), - dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), - dict(type='RandomFlip', flip_ratio=0.5), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='DefaultFormatBundle'), - dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']), -] -test_pipeline = [ - dict(type='LoadImageFromFile'), - dict( - type='MultiScaleFlipAug', - img_scale=(1333, 800), - flip=False, - transforms=[ - dict(type='Resize', keep_ratio=True), - dict(type='RandomFlip'), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='ImageToTensor', keys=['img']), - dict(type='Collect', keys=['img']), - ]) -] -data = dict( - imgs_per_gpu=2, - workers_per_gpu=2, - train=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_train2017.json', - img_prefix=data_root + 'train2017/', - pipeline=train_pipeline), - val=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline), - test=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline)) -evaluation = dict(interval=1, metric=['bbox', 'segm']) -# optimizer -optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) -optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) -# learning policy -lr_config = dict( - policy='step', - warmup='linear', - warmup_iters=500, - warmup_ratio=1.0 / 3, - step=[16, 19]) -checkpoint_config = dict(interval=1) -# yapf:disable -log_config = dict( - interval=50, - hooks=[ - dict(type='TextLoggerHook'), - # dict(type='TensorboardLoggerHook') - ]) -# yapf:enable -# runtime settings -total_epochs = 20 -dist_params = dict(backend='nccl') -log_level = 'INFO' -work_dir = './work_dirs/cascade_mask_rcnn_hrnetv2p_w32_20e' -load_from = None -resume_from = None -workflow = [('train', 1)] diff --git a/configs/hrnet/cascade_mask_rcnn_hrnetv2p_w32_20e_coco.py b/configs/hrnet/cascade_mask_rcnn_hrnetv2p_w32_20e_coco.py new file mode 100644 index 0000000000000000000000000000000000000000..6886e844f6ee02f4a8bfa7327bd66b4b55106028 --- /dev/null +++ b/configs/hrnet/cascade_mask_rcnn_hrnetv2p_w32_20e_coco.py @@ -0,0 +1,40 @@ +_base_ = '../cascade_rcnn/cascade_mask_rcnn_r50_fpn_1x_coco.py' +model = dict( + pretrained='open-mmlab://msra/hrnetv2_w32', + backbone=dict( + _delete_=True, + type='HRNet', + extra=dict( + stage1=dict( + num_modules=1, + num_branches=1, + block='BOTTLENECK', + num_blocks=(4, ), + num_channels=(64, )), + stage2=dict( + num_modules=1, + num_branches=2, + block='BASIC', + num_blocks=(4, 4), + num_channels=(32, 64)), + stage3=dict( + num_modules=4, + num_branches=3, + block='BASIC', + num_blocks=(4, 4, 4), + num_channels=(32, 64, 128)), + stage4=dict( + num_modules=3, + num_branches=4, + block='BASIC', + num_blocks=(4, 4, 4, 4), + num_channels=(32, 64, 128, 256)))), + neck=dict( + _delete_=True, + type='HRFPN', + in_channels=[32, 64, 128, 256], + out_channels=256)) +# learning policy +lr_config = dict(step=[16, 19]) +total_epochs = 20 +work_dir = './work_dirs/cascade_mask_rcnn_hrnetv2p_w32_20e' diff --git a/configs/hrnet/cascade_rcnn_hrnetv2p_w32_20e.py b/configs/hrnet/cascade_rcnn_hrnetv2p_w32_20e.py deleted file mode 100644 index 7fc85b80e9262519cedab83f540fba615149464b..0000000000000000000000000000000000000000 --- a/configs/hrnet/cascade_rcnn_hrnetv2p_w32_20e.py +++ /dev/null @@ -1,250 +0,0 @@ -# model settings -model = dict( - type='CascadeRCNN', - num_stages=3, - pretrained='open-mmlab://msra/hrnetv2_w32', - backbone=dict( - type='HRNet', - extra=dict( - stage1=dict( - num_modules=1, - num_branches=1, - block='BOTTLENECK', - num_blocks=(4, ), - num_channels=(64, )), - stage2=dict( - num_modules=1, - num_branches=2, - block='BASIC', - num_blocks=(4, 4), - num_channels=(32, 64)), - stage3=dict( - num_modules=4, - num_branches=3, - block='BASIC', - num_blocks=(4, 4, 4), - num_channels=(32, 64, 128)), - stage4=dict( - num_modules=3, - num_branches=4, - block='BASIC', - num_blocks=(4, 4, 4, 4), - num_channels=(32, 64, 128, 256)))), - neck=dict(type='HRFPN', in_channels=[32, 64, 128, 256], out_channels=256), - rpn_head=dict( - type='RPNHead', - in_channels=256, - feat_channels=256, - anchor_scales=[8], - anchor_ratios=[0.5, 1.0, 2.0], - anchor_strides=[4, 8, 16, 32, 64], - target_means=[.0, .0, .0, .0], - target_stds=[1.0, 1.0, 1.0, 1.0], - loss_cls=dict( - type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0)), - bbox_roi_extractor=dict( - type='SingleRoIExtractor', - roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2), - out_channels=256, - featmap_strides=[4, 8, 16, 32]), - bbox_head=[ - dict( - type='SharedFCBBoxHead', - num_fcs=2, - in_channels=256, - fc_out_channels=1024, - roi_feat_size=7, - num_classes=81, - target_means=[0., 0., 0., 0.], - target_stds=[0.1, 0.1, 0.2, 0.2], - reg_class_agnostic=True, - loss_cls=dict( - type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)), - dict( - type='SharedFCBBoxHead', - num_fcs=2, - in_channels=256, - fc_out_channels=1024, - roi_feat_size=7, - num_classes=81, - target_means=[0., 0., 0., 0.], - target_stds=[0.05, 0.05, 0.1, 0.1], - reg_class_agnostic=True, - loss_cls=dict( - type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)), - dict( - type='SharedFCBBoxHead', - num_fcs=2, - in_channels=256, - fc_out_channels=1024, - roi_feat_size=7, - num_classes=81, - target_means=[0., 0., 0., 0.], - target_stds=[0.033, 0.033, 0.067, 0.067], - reg_class_agnostic=True, - loss_cls=dict( - type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)), - ]) -# model training and testing settings -train_cfg = dict( - rpn=dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.7, - neg_iou_thr=0.3, - min_pos_iou=0.3, - ignore_iof_thr=-1), - sampler=dict( - type='RandomSampler', - num=256, - pos_fraction=0.5, - neg_pos_ub=-1, - add_gt_as_proposals=False), - allowed_border=0, - pos_weight=-1, - debug=False), - rpn_proposal=dict( - nms_across_levels=False, - nms_pre=2000, - nms_post=2000, - max_num=2000, - nms_thr=0.7, - min_bbox_size=0), - rcnn=[ - dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.5, - neg_iou_thr=0.5, - min_pos_iou=0.5, - ignore_iof_thr=-1), - sampler=dict( - type='RandomSampler', - num=512, - pos_fraction=0.25, - neg_pos_ub=-1, - add_gt_as_proposals=True), - pos_weight=-1, - debug=False), - dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.6, - neg_iou_thr=0.6, - min_pos_iou=0.6, - ignore_iof_thr=-1), - sampler=dict( - type='RandomSampler', - num=512, - pos_fraction=0.25, - neg_pos_ub=-1, - add_gt_as_proposals=True), - pos_weight=-1, - debug=False), - dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.7, - neg_iou_thr=0.7, - min_pos_iou=0.7, - ignore_iof_thr=-1), - sampler=dict( - type='RandomSampler', - num=512, - pos_fraction=0.25, - neg_pos_ub=-1, - add_gt_as_proposals=True), - pos_weight=-1, - debug=False) - ], - stage_loss_weights=[1, 0.5, 0.25]) -test_cfg = dict( - rpn=dict( - nms_across_levels=False, - nms_pre=1000, - nms_post=1000, - max_num=1000, - nms_thr=0.7, - min_bbox_size=0), - rcnn=dict( - score_thr=0.05, nms=dict(type='nms', iou_thr=0.5), max_per_img=100)) -# dataset settings -dataset_type = 'CocoDataset' -data_root = 'data/coco/' -img_norm_cfg = dict( - mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) -train_pipeline = [ - dict(type='LoadImageFromFile'), - dict(type='LoadAnnotations', with_bbox=True), - dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), - dict(type='RandomFlip', flip_ratio=0.5), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='DefaultFormatBundle'), - dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), -] -test_pipeline = [ - dict(type='LoadImageFromFile'), - dict( - type='MultiScaleFlipAug', - img_scale=(1333, 800), - flip=False, - transforms=[ - dict(type='Resize', keep_ratio=True), - dict(type='RandomFlip'), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='ImageToTensor', keys=['img']), - dict(type='Collect', keys=['img']), - ]) -] -data = dict( - imgs_per_gpu=2, - workers_per_gpu=2, - train=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_train2017.json', - img_prefix=data_root + 'train2017/', - pipeline=train_pipeline), - val=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline), - test=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline)) -evaluation = dict(interval=1, metric='bbox') -# optimizer -optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) -optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) -# learning policy -lr_config = dict( - policy='step', - warmup='linear', - warmup_iters=500, - warmup_ratio=1.0 / 3, - step=[16, 19]) -checkpoint_config = dict(interval=1) -# yapf:disable -log_config = dict( - interval=50, - hooks=[ - dict(type='TextLoggerHook'), - # dict(type='TensorboardLoggerHook') - ]) -# yapf:enable -# runtime settings -total_epochs = 20 -dist_params = dict(backend='nccl') -log_level = 'INFO' -work_dir = './work_dirs/cascade_rcnn_hrnetv2p_w32' -load_from = None -resume_from = None -workflow = [('train', 1)] diff --git a/configs/hrnet/cascade_rcnn_hrnetv2p_w32_20e_coco.py b/configs/hrnet/cascade_rcnn_hrnetv2p_w32_20e_coco.py new file mode 100644 index 0000000000000000000000000000000000000000..f0b4791393abfdcf117e11d095a49ff23a5093f5 --- /dev/null +++ b/configs/hrnet/cascade_rcnn_hrnetv2p_w32_20e_coco.py @@ -0,0 +1,40 @@ +_base_ = '../cascade_rcnn/cascade_rcnn_r50_fpn_1x_coco.py' +model = dict( + pretrained='open-mmlab://msra/hrnetv2_w32', + backbone=dict( + _delete_=True, + type='HRNet', + extra=dict( + stage1=dict( + num_modules=1, + num_branches=1, + block='BOTTLENECK', + num_blocks=(4, ), + num_channels=(64, )), + stage2=dict( + num_modules=1, + num_branches=2, + block='BASIC', + num_blocks=(4, 4), + num_channels=(32, 64)), + stage3=dict( + num_modules=4, + num_branches=3, + block='BASIC', + num_blocks=(4, 4, 4), + num_channels=(32, 64, 128)), + stage4=dict( + num_modules=3, + num_branches=4, + block='BASIC', + num_blocks=(4, 4, 4, 4), + num_channels=(32, 64, 128, 256)))), + neck=dict( + _delete_=True, + type='HRFPN', + in_channels=[32, 64, 128, 256], + out_channels=256)) +# learning policy +lr_config = dict(step=[16, 19]) +total_epochs = 20 +work_dir = './work_dirs/cascade_rcnn_hrnetv2p_w32' diff --git a/configs/hrnet/faster_rcnn_hrnetv2p_w18_1x.py b/configs/hrnet/faster_rcnn_hrnetv2p_w18_1x.py deleted file mode 100644 index 44a0c46e62f8acb231b1ec35da47c7d5e5570b8c..0000000000000000000000000000000000000000 --- a/configs/hrnet/faster_rcnn_hrnetv2p_w18_1x.py +++ /dev/null @@ -1,191 +0,0 @@ -# model settings -model = dict( - type='FasterRCNN', - pretrained='open-mmlab://msra/hrnetv2_w18', - backbone=dict( - type='HRNet', - extra=dict( - stage1=dict( - num_modules=1, - num_branches=1, - block='BOTTLENECK', - num_blocks=(4, ), - num_channels=(64, )), - stage2=dict( - num_modules=1, - num_branches=2, - block='BASIC', - num_blocks=(4, 4), - num_channels=(18, 36)), - stage3=dict( - num_modules=4, - num_branches=3, - block='BASIC', - num_blocks=(4, 4, 4), - num_channels=(18, 36, 72)), - stage4=dict( - num_modules=3, - num_branches=4, - block='BASIC', - num_blocks=(4, 4, 4, 4), - num_channels=(18, 36, 72, 144)))), - neck=dict(type='HRFPN', in_channels=[18, 36, 72, 144], out_channels=256), - rpn_head=dict( - type='RPNHead', - in_channels=256, - feat_channels=256, - anchor_scales=[8], - anchor_ratios=[0.5, 1.0, 2.0], - anchor_strides=[4, 8, 16, 32, 64], - target_means=[.0, .0, .0, .0], - target_stds=[1.0, 1.0, 1.0, 1.0], - loss_cls=dict( - type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0)), - bbox_roi_extractor=dict( - type='SingleRoIExtractor', - roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2), - out_channels=256, - featmap_strides=[4, 8, 16, 32]), - bbox_head=dict( - type='SharedFCBBoxHead', - num_fcs=2, - in_channels=256, - fc_out_channels=1024, - roi_feat_size=7, - num_classes=81, - target_means=[0., 0., 0., 0.], - target_stds=[0.1, 0.1, 0.2, 0.2], - reg_class_agnostic=False, - loss_cls=dict( - type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0))) -# model training and testing settings -train_cfg = dict( - rpn=dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.7, - neg_iou_thr=0.3, - min_pos_iou=0.3, - ignore_iof_thr=-1), - sampler=dict( - type='RandomSampler', - num=256, - pos_fraction=0.5, - neg_pos_ub=-1, - add_gt_as_proposals=False), - allowed_border=0, - pos_weight=-1, - debug=False), - rpn_proposal=dict( - nms_across_levels=False, - nms_pre=2000, - nms_post=2000, - max_num=2000, - nms_thr=0.7, - min_bbox_size=0), - rcnn=dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.5, - neg_iou_thr=0.5, - min_pos_iou=0.5, - ignore_iof_thr=-1), - sampler=dict( - type='RandomSampler', - num=512, - pos_fraction=0.25, - neg_pos_ub=-1, - add_gt_as_proposals=True), - pos_weight=-1, - debug=False)) -test_cfg = dict( - rpn=dict( - nms_across_levels=False, - nms_pre=1000, - nms_post=1000, - max_num=1000, - nms_thr=0.7, - min_bbox_size=0), - rcnn=dict( - score_thr=0.05, nms=dict(type='nms', iou_thr=0.5), max_per_img=100) - # soft-nms is also supported for rcnn testing - # e.g., nms=dict(type='soft_nms', iou_thr=0.5, min_score=0.05) -) -# dataset settings -dataset_type = 'CocoDataset' -data_root = 'data/coco/' -img_norm_cfg = dict( - mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) -train_pipeline = [ - dict(type='LoadImageFromFile'), - dict(type='LoadAnnotations', with_bbox=True), - dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), - dict(type='RandomFlip', flip_ratio=0.5), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='DefaultFormatBundle'), - dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), -] -test_pipeline = [ - dict(type='LoadImageFromFile'), - dict( - type='MultiScaleFlipAug', - img_scale=(1333, 800), - flip=False, - transforms=[ - dict(type='Resize', keep_ratio=True), - dict(type='RandomFlip'), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='ImageToTensor', keys=['img']), - dict(type='Collect', keys=['img']), - ]) -] -data = dict( - imgs_per_gpu=2, - workers_per_gpu=2, - train=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_train2017.json', - img_prefix=data_root + 'train2017/', - pipeline=train_pipeline), - val=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline), - test=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline)) -evaluation = dict(interval=1, metric='bbox') -# optimizer -optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) -optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) -# learning policy -lr_config = dict( - policy='step', - warmup='linear', - warmup_iters=500, - warmup_ratio=1.0 / 3, - step=[8, 11]) -checkpoint_config = dict(interval=1) -# yapf:disable -log_config = dict( - interval=50, - hooks=[ - dict(type='TextLoggerHook'), - # dict(type='TensorboardLoggerHook') - ]) -# yapf:enable -# runtime settings -total_epochs = 12 -dist_params = dict(backend='nccl') -log_level = 'INFO' -work_dir = './work_dirs/faster_rcnn_hrnetv2p_w18_1x' -load_from = None -resume_from = None -workflow = [('train', 1)] diff --git a/configs/hrnet/faster_rcnn_hrnetv2p_w18_1x_coco.py b/configs/hrnet/faster_rcnn_hrnetv2p_w18_1x_coco.py new file mode 100644 index 0000000000000000000000000000000000000000..58de31a63d218e99ec38f4653aedba978c12ea4c --- /dev/null +++ b/configs/hrnet/faster_rcnn_hrnetv2p_w18_1x_coco.py @@ -0,0 +1,11 @@ +_base_ = './faster_rcnn_hrnetv2p_w32_1x_coco.py' +# model settings +model = dict( + pretrained='open-mmlab://msra/hrnetv2_w18', + backbone=dict( + extra=dict( + stage2=dict(num_channels=(18, 36)), + stage3=dict(num_channels=(18, 36, 72)), + stage4=dict(num_channels=(18, 36, 72, 144)))), + neck=dict(type='HRFPN', in_channels=[18, 36, 72, 144], out_channels=256)) +work_dir = './work_dirs/faster_rcnn_hrnetv2p_w18_1x' diff --git a/configs/hrnet/faster_rcnn_hrnetv2p_w32_1x.py b/configs/hrnet/faster_rcnn_hrnetv2p_w32_1x.py deleted file mode 100644 index 820dda100fcaa108418444ff16025e2b55447597..0000000000000000000000000000000000000000 --- a/configs/hrnet/faster_rcnn_hrnetv2p_w32_1x.py +++ /dev/null @@ -1,191 +0,0 @@ -# model settings -model = dict( - type='FasterRCNN', - pretrained='open-mmlab://msra/hrnetv2_w32', - backbone=dict( - type='HRNet', - extra=dict( - stage1=dict( - num_modules=1, - num_branches=1, - block='BOTTLENECK', - num_blocks=(4, ), - num_channels=(64, )), - stage2=dict( - num_modules=1, - num_branches=2, - block='BASIC', - num_blocks=(4, 4), - num_channels=(32, 64)), - stage3=dict( - num_modules=4, - num_branches=3, - block='BASIC', - num_blocks=(4, 4, 4), - num_channels=(32, 64, 128)), - stage4=dict( - num_modules=3, - num_branches=4, - block='BASIC', - num_blocks=(4, 4, 4, 4), - num_channels=(32, 64, 128, 256)))), - neck=dict(type='HRFPN', in_channels=[32, 64, 128, 256], out_channels=256), - rpn_head=dict( - type='RPNHead', - in_channels=256, - feat_channels=256, - anchor_scales=[8], - anchor_ratios=[0.5, 1.0, 2.0], - anchor_strides=[4, 8, 16, 32, 64], - target_means=[.0, .0, .0, .0], - target_stds=[1.0, 1.0, 1.0, 1.0], - loss_cls=dict( - type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0)), - bbox_roi_extractor=dict( - type='SingleRoIExtractor', - roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2), - out_channels=256, - featmap_strides=[4, 8, 16, 32]), - bbox_head=dict( - type='SharedFCBBoxHead', - num_fcs=2, - in_channels=256, - fc_out_channels=1024, - roi_feat_size=7, - num_classes=81, - target_means=[0., 0., 0., 0.], - target_stds=[0.1, 0.1, 0.2, 0.2], - reg_class_agnostic=False, - loss_cls=dict( - type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0))) -# model training and testing settings -train_cfg = dict( - rpn=dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.7, - neg_iou_thr=0.3, - min_pos_iou=0.3, - ignore_iof_thr=-1), - sampler=dict( - type='RandomSampler', - num=256, - pos_fraction=0.5, - neg_pos_ub=-1, - add_gt_as_proposals=False), - allowed_border=0, - pos_weight=-1, - debug=False), - rpn_proposal=dict( - nms_across_levels=False, - nms_pre=2000, - nms_post=2000, - max_num=2000, - nms_thr=0.7, - min_bbox_size=0), - rcnn=dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.5, - neg_iou_thr=0.5, - min_pos_iou=0.5, - ignore_iof_thr=-1), - sampler=dict( - type='RandomSampler', - num=512, - pos_fraction=0.25, - neg_pos_ub=-1, - add_gt_as_proposals=True), - pos_weight=-1, - debug=False)) -test_cfg = dict( - rpn=dict( - nms_across_levels=False, - nms_pre=1000, - nms_post=1000, - max_num=1000, - nms_thr=0.7, - min_bbox_size=0), - rcnn=dict( - score_thr=0.05, nms=dict(type='nms', iou_thr=0.5), max_per_img=100) - # soft-nms is also supported for rcnn testing - # e.g., nms=dict(type='soft_nms', iou_thr=0.5, min_score=0.05) -) -# dataset settings -dataset_type = 'CocoDataset' -data_root = 'data/coco/' -img_norm_cfg = dict( - mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) -train_pipeline = [ - dict(type='LoadImageFromFile'), - dict(type='LoadAnnotations', with_bbox=True), - dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), - dict(type='RandomFlip', flip_ratio=0.5), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='DefaultFormatBundle'), - dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), -] -test_pipeline = [ - dict(type='LoadImageFromFile'), - dict( - type='MultiScaleFlipAug', - img_scale=(1333, 800), - flip=False, - transforms=[ - dict(type='Resize', keep_ratio=True), - dict(type='RandomFlip'), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='ImageToTensor', keys=['img']), - dict(type='Collect', keys=['img']), - ]) -] -data = dict( - imgs_per_gpu=2, - workers_per_gpu=2, - train=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_train2017.json', - img_prefix=data_root + 'train2017/', - pipeline=train_pipeline), - val=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline), - test=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline)) -evaluation = dict(interval=1, metric='bbox') -# optimizer -optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) -optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) -# learning policy -lr_config = dict( - policy='step', - warmup='linear', - warmup_iters=500, - warmup_ratio=1.0 / 3, - step=[8, 11]) -checkpoint_config = dict(interval=1) -# yapf:disable -log_config = dict( - interval=50, - hooks=[ - dict(type='TextLoggerHook'), - # dict(type='TensorboardLoggerHook') - ]) -# yapf:enable -# runtime settings -total_epochs = 12 -dist_params = dict(backend='nccl') -log_level = 'INFO' -work_dir = './work_dirs/faster_rcnn_hrnetv2p_w32_1x' -load_from = None -resume_from = None -workflow = [('train', 1)] diff --git a/configs/hrnet/faster_rcnn_hrnetv2p_w32_1x_coco.py b/configs/hrnet/faster_rcnn_hrnetv2p_w32_1x_coco.py new file mode 100644 index 0000000000000000000000000000000000000000..cb56ca63bdc8b1621390f45e3ac776733a51f4d8 --- /dev/null +++ b/configs/hrnet/faster_rcnn_hrnetv2p_w32_1x_coco.py @@ -0,0 +1,37 @@ +_base_ = '../faster_rcnn/faster_rcnn_r50_fpn_1x_coco.py' +model = dict( + pretrained='open-mmlab://msra/hrnetv2_w32', + backbone=dict( + _delete_=True, + type='HRNet', + extra=dict( + stage1=dict( + num_modules=1, + num_branches=1, + block='BOTTLENECK', + num_blocks=(4, ), + num_channels=(64, )), + stage2=dict( + num_modules=1, + num_branches=2, + block='BASIC', + num_blocks=(4, 4), + num_channels=(32, 64)), + stage3=dict( + num_modules=4, + num_branches=3, + block='BASIC', + num_blocks=(4, 4, 4), + num_channels=(32, 64, 128)), + stage4=dict( + num_modules=3, + num_branches=4, + block='BASIC', + num_blocks=(4, 4, 4, 4), + num_channels=(32, 64, 128, 256)))), + neck=dict( + _delete_=True, + type='HRFPN', + in_channels=[32, 64, 128, 256], + out_channels=256)) +work_dir = './work_dirs/faster_rcnn_hrnetv2p_w32_1x' diff --git a/configs/hrnet/faster_rcnn_hrnetv2p_w40_1x.py b/configs/hrnet/faster_rcnn_hrnetv2p_w40_1x.py deleted file mode 100644 index 6a9bcf03b8b497e7ce39b49e46a29fcb91bfd3f7..0000000000000000000000000000000000000000 --- a/configs/hrnet/faster_rcnn_hrnetv2p_w40_1x.py +++ /dev/null @@ -1,191 +0,0 @@ -# model settings -model = dict( - type='FasterRCNN', - pretrained='open-mmlab://msra/hrnetv2_w40', - backbone=dict( - type='HRNet', - extra=dict( - stage1=dict( - num_modules=1, - num_branches=1, - block='BOTTLENECK', - num_blocks=(4, ), - num_channels=(64, )), - stage2=dict( - num_modules=1, - num_branches=2, - block='BASIC', - num_blocks=(4, 4), - num_channels=(40, 80)), - stage3=dict( - num_modules=4, - num_branches=3, - block='BASIC', - num_blocks=(4, 4, 4), - num_channels=(40, 80, 160)), - stage4=dict( - num_modules=3, - num_branches=4, - block='BASIC', - num_blocks=(4, 4, 4, 4), - num_channels=(40, 80, 160, 320)))), - neck=dict(type='HRFPN', in_channels=[40, 80, 160, 320], out_channels=256), - rpn_head=dict( - type='RPNHead', - in_channels=256, - feat_channels=256, - anchor_scales=[8], - anchor_ratios=[0.5, 1.0, 2.0], - anchor_strides=[4, 8, 16, 32, 64], - target_means=[.0, .0, .0, .0], - target_stds=[1.0, 1.0, 1.0, 1.0], - loss_cls=dict( - type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0)), - bbox_roi_extractor=dict( - type='SingleRoIExtractor', - roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2), - out_channels=256, - featmap_strides=[4, 8, 16, 32]), - bbox_head=dict( - type='SharedFCBBoxHead', - num_fcs=2, - in_channels=256, - fc_out_channels=1024, - roi_feat_size=7, - num_classes=81, - target_means=[0., 0., 0., 0.], - target_stds=[0.1, 0.1, 0.2, 0.2], - reg_class_agnostic=False, - loss_cls=dict( - type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0))) -# model training and testing settings -train_cfg = dict( - rpn=dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.7, - neg_iou_thr=0.3, - min_pos_iou=0.3, - ignore_iof_thr=-1), - sampler=dict( - type='RandomSampler', - num=256, - pos_fraction=0.5, - neg_pos_ub=-1, - add_gt_as_proposals=False), - allowed_border=0, - pos_weight=-1, - debug=False), - rpn_proposal=dict( - nms_across_levels=False, - nms_pre=2000, - nms_post=2000, - max_num=2000, - nms_thr=0.7, - min_bbox_size=0), - rcnn=dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.5, - neg_iou_thr=0.5, - min_pos_iou=0.5, - ignore_iof_thr=-1), - sampler=dict( - type='RandomSampler', - num=512, - pos_fraction=0.25, - neg_pos_ub=-1, - add_gt_as_proposals=True), - pos_weight=-1, - debug=False)) -test_cfg = dict( - rpn=dict( - nms_across_levels=False, - nms_pre=1000, - nms_post=1000, - max_num=1000, - nms_thr=0.7, - min_bbox_size=0), - rcnn=dict( - score_thr=0.05, nms=dict(type='nms', iou_thr=0.5), max_per_img=100) - # soft-nms is also supported for rcnn testing - # e.g., nms=dict(type='soft_nms', iou_thr=0.5, min_score=0.05) -) -# dataset settings -dataset_type = 'CocoDataset' -data_root = 'data/coco/' -img_norm_cfg = dict( - mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) -train_pipeline = [ - dict(type='LoadImageFromFile'), - dict(type='LoadAnnotations', with_bbox=True), - dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), - dict(type='RandomFlip', flip_ratio=0.5), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='DefaultFormatBundle'), - dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), -] -test_pipeline = [ - dict(type='LoadImageFromFile'), - dict( - type='MultiScaleFlipAug', - img_scale=(1333, 800), - flip=False, - transforms=[ - dict(type='Resize', keep_ratio=True), - dict(type='RandomFlip'), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='ImageToTensor', keys=['img']), - dict(type='Collect', keys=['img']), - ]) -] -data = dict( - imgs_per_gpu=2, - workers_per_gpu=2, - train=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_train2017.json', - img_prefix=data_root + 'train2017/', - pipeline=train_pipeline), - val=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline), - test=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline)) -evaluation = dict(interval=1, metric='bbox') -# optimizer -optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) -optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) -# learning policy -lr_config = dict( - policy='step', - warmup='linear', - warmup_iters=500, - warmup_ratio=1.0 / 3, - step=[8, 11]) -checkpoint_config = dict(interval=1) -# yapf:disable -log_config = dict( - interval=50, - hooks=[ - dict(type='TextLoggerHook'), - # dict(type='TensorboardLoggerHook') - ]) -# yapf:enable -# runtime settings -total_epochs = 12 -dist_params = dict(backend='nccl') -log_level = 'INFO' -work_dir = './work_dirs/faster_rcnn_hrnetv2p_w40_1x' -load_from = None -resume_from = None -workflow = [('train', 1)] diff --git a/configs/hrnet/faster_rcnn_hrnetv2p_w40_1x_coco.py b/configs/hrnet/faster_rcnn_hrnetv2p_w40_1x_coco.py new file mode 100644 index 0000000000000000000000000000000000000000..3445cc6bd2f034648a07f2fc6afb7e226c8f4e07 --- /dev/null +++ b/configs/hrnet/faster_rcnn_hrnetv2p_w40_1x_coco.py @@ -0,0 +1,11 @@ +_base_ = './faster_rcnn_hrnetv2p_w32_1x_coco.py' +model = dict( + pretrained='open-mmlab://msra/hrnetv2_w40', + backbone=dict( + type='HRNet', + extra=dict( + stage2=dict(num_channels=(40, 80)), + stage3=dict(num_channels=(40, 80, 160)), + stage4=dict(num_channels=(40, 80, 160, 320)))), + neck=dict(type='HRFPN', in_channels=[40, 80, 160, 320], out_channels=256)) +work_dir = './work_dirs/faster_rcnn_hrnetv2p_w40_1x' diff --git a/configs/hrnet/fcos_hrnetv2p_w32_gn-head_4x4_1x_coco.py b/configs/hrnet/fcos_hrnetv2p_w32_gn-head_4x4_1x_coco.py new file mode 100644 index 0000000000000000000000000000000000000000..33e1ce33538fe4f24ffbfff251844f7af8e57388 --- /dev/null +++ b/configs/hrnet/fcos_hrnetv2p_w32_gn-head_4x4_1x_coco.py @@ -0,0 +1,39 @@ +_base_ = '../fcos/fcos_r50_caffe_fpn_gn-head_4x4_1x_coco.py' +model = dict( + pretrained='open-mmlab://msra/hrnetv2_w32', + backbone=dict( + _delete_=True, + type='HRNet', + extra=dict( + stage1=dict( + num_modules=1, + num_branches=1, + block='BOTTLENECK', + num_blocks=(4, ), + num_channels=(64, )), + stage2=dict( + num_modules=1, + num_branches=2, + block='BASIC', + num_blocks=(4, 4), + num_channels=(32, 64)), + stage3=dict( + num_modules=4, + num_branches=3, + block='BASIC', + num_blocks=(4, 4, 4), + num_channels=(32, 64, 128)), + stage4=dict( + num_modules=3, + num_branches=4, + block='BASIC', + num_blocks=(4, 4, 4, 4), + num_channels=(32, 64, 128, 256)))), + neck=dict( + _delete_=True, + type='HRFPN', + in_channels=[32, 64, 128, 256], + out_channels=256, + stride=2, + num_outs=5)) +work_dir = './work_dirs/fcos_hrnetv2p_w32_gn_1x_4gpu' diff --git a/configs/hrnet/fcos_hrnetv2p_w32_gn_1x_4gpu.py b/configs/hrnet/fcos_hrnetv2p_w32_gn_1x_4gpu.py deleted file mode 100644 index e2c52250c96095d328380112f56234b3df5ec7dd..0000000000000000000000000000000000000000 --- a/configs/hrnet/fcos_hrnetv2p_w32_gn_1x_4gpu.py +++ /dev/null @@ -1,151 +0,0 @@ -# model settings -model = dict( - type='FCOS', - pretrained='open-mmlab://msra/hrnetv2_w32', - backbone=dict( - type='HRNet', - extra=dict( - stage1=dict( - num_modules=1, - num_branches=1, - block='BOTTLENECK', - num_blocks=(4, ), - num_channels=(64, )), - stage2=dict( - num_modules=1, - num_branches=2, - block='BASIC', - num_blocks=(4, 4), - num_channels=(32, 64)), - stage3=dict( - num_modules=4, - num_branches=3, - block='BASIC', - num_blocks=(4, 4, 4), - num_channels=(32, 64, 128)), - stage4=dict( - num_modules=3, - num_branches=4, - block='BASIC', - num_blocks=(4, 4, 4, 4), - num_channels=(32, 64, 128, 256)))), - neck=dict( - type='HRFPN', - in_channels=[32, 64, 128, 256], - out_channels=256, - stride=2, - num_outs=5), - bbox_head=dict( - type='FCOSHead', - num_classes=81, - in_channels=256, - stacked_convs=4, - feat_channels=256, - strides=[8, 16, 32, 64, 128], - loss_cls=dict( - type='FocalLoss', - use_sigmoid=True, - gamma=2.0, - alpha=0.25, - loss_weight=1.0), - loss_bbox=dict(type='IoULoss', loss_weight=1.0), - loss_centerness=dict( - type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0))) -# training and testing settings -train_cfg = dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.5, - neg_iou_thr=0.4, - min_pos_iou=0, - ignore_iof_thr=-1), - allowed_border=-1, - pos_weight=-1, - debug=False) -test_cfg = dict( - nms_pre=1000, - min_bbox_size=0, - score_thr=0.05, - nms=dict(type='nms', iou_thr=0.5), - max_per_img=100) -# dataset settings -dataset_type = 'CocoDataset' -data_root = 'data/coco/' -img_norm_cfg = dict( - mean=[102.9801, 115.9465, 122.7717], std=[1.0, 1.0, 1.0], to_rgb=False) -train_pipeline = [ - dict(type='LoadImageFromFile'), - dict(type='LoadAnnotations', with_bbox=True), - dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), - dict(type='RandomFlip', flip_ratio=0.5), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='DefaultFormatBundle'), - dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), -] -test_pipeline = [ - dict(type='LoadImageFromFile'), - dict( - type='MultiScaleFlipAug', - img_scale=(1333, 800), - flip=False, - transforms=[ - dict(type='Resize', keep_ratio=True), - dict(type='RandomFlip'), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='ImageToTensor', keys=['img']), - dict(type='Collect', keys=['img']), - ]) -] -data = dict( - imgs_per_gpu=4, - workers_per_gpu=4, - train=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_train2017.json', - img_prefix=data_root + 'train2017/', - pipeline=train_pipeline), - val=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline), - test=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline)) -evaluation = dict(interval=1, metric='bbox') -# optimizer -optimizer = dict( - type='SGD', - lr=0.01, - momentum=0.9, - weight_decay=0.0001, - paramwise_options=dict(bias_lr_mult=2., bias_decay_mult=0.)) -optimizer_config = dict(grad_clip=None) -# learning policy -lr_config = dict( - policy='step', - warmup='constant', - warmup_iters=500, - warmup_ratio=1.0 / 3, - step=[8, 11]) -checkpoint_config = dict(interval=1) -# yapf:disable -log_config = dict( - interval=50, - hooks=[ - dict(type='TextLoggerHook'), - # dict(type='TensorboardLoggerHook') - ]) -# yapf:enable -# runtime settings -total_epochs = 12 -dist_params = dict(backend='nccl') -log_level = 'INFO' -work_dir = './work_dirs/fcos_hrnetv2p_w32_gn_1x_4gpu' -load_from = None -resume_from = None -workflow = [('train', 1)] diff --git a/configs/hrnet/htc_hrnetv2p_w32_20e.py b/configs/hrnet/htc_hrnetv2p_w32_20e.py deleted file mode 100644 index 40836c592bfcf2aa80f018703fad0842d0bfd088..0000000000000000000000000000000000000000 --- a/configs/hrnet/htc_hrnetv2p_w32_20e.py +++ /dev/null @@ -1,310 +0,0 @@ -# model settings -model = dict( - type='HybridTaskCascade', - num_stages=3, - pretrained='open-mmlab://msra/hrnetv2_w32', - interleaved=True, - mask_info_flow=True, - backbone=dict( - type='HRNet', - extra=dict( - stage1=dict( - num_modules=1, - num_branches=1, - block='BOTTLENECK', - num_blocks=(4, ), - num_channels=(64, )), - stage2=dict( - num_modules=1, - num_branches=2, - block='BASIC', - num_blocks=(4, 4), - num_channels=(32, 64)), - stage3=dict( - num_modules=4, - num_branches=3, - block='BASIC', - num_blocks=(4, 4, 4), - num_channels=(32, 64, 128)), - stage4=dict( - num_modules=3, - num_branches=4, - block='BASIC', - num_blocks=(4, 4, 4, 4), - num_channels=(32, 64, 128, 256)))), - neck=dict(type='HRFPN', in_channels=[32, 64, 128, 256], out_channels=256), - rpn_head=dict( - type='RPNHead', - in_channels=256, - feat_channels=256, - anchor_scales=[8], - anchor_ratios=[0.5, 1.0, 2.0], - anchor_strides=[4, 8, 16, 32, 64], - target_means=[.0, .0, .0, .0], - target_stds=[1.0, 1.0, 1.0, 1.0], - loss_cls=dict( - type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0)), - bbox_roi_extractor=dict( - type='SingleRoIExtractor', - roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2), - out_channels=256, - featmap_strides=[4, 8, 16, 32]), - bbox_head=[ - dict( - type='SharedFCBBoxHead', - num_fcs=2, - in_channels=256, - fc_out_channels=1024, - roi_feat_size=7, - num_classes=81, - target_means=[0., 0., 0., 0.], - target_stds=[0.1, 0.1, 0.2, 0.2], - reg_class_agnostic=True, - loss_cls=dict( - type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)), - dict( - type='SharedFCBBoxHead', - num_fcs=2, - in_channels=256, - fc_out_channels=1024, - roi_feat_size=7, - num_classes=81, - target_means=[0., 0., 0., 0.], - target_stds=[0.05, 0.05, 0.1, 0.1], - reg_class_agnostic=True, - loss_cls=dict( - type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)), - dict( - type='SharedFCBBoxHead', - num_fcs=2, - in_channels=256, - fc_out_channels=1024, - roi_feat_size=7, - num_classes=81, - target_means=[0., 0., 0., 0.], - target_stds=[0.033, 0.033, 0.067, 0.067], - reg_class_agnostic=True, - loss_cls=dict( - type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)) - ], - mask_roi_extractor=dict( - type='SingleRoIExtractor', - roi_layer=dict(type='RoIAlign', out_size=14, sample_num=2), - out_channels=256, - featmap_strides=[4, 8, 16, 32]), - mask_head=[ - dict( - type='HTCMaskHead', - with_conv_res=False, - num_convs=4, - in_channels=256, - conv_out_channels=256, - num_classes=81, - loss_mask=dict( - type='CrossEntropyLoss', use_mask=True, loss_weight=1.0)), - dict( - type='HTCMaskHead', - num_convs=4, - in_channels=256, - conv_out_channels=256, - num_classes=81, - loss_mask=dict( - type='CrossEntropyLoss', use_mask=True, loss_weight=1.0)), - dict( - type='HTCMaskHead', - num_convs=4, - in_channels=256, - conv_out_channels=256, - num_classes=81, - loss_mask=dict( - type='CrossEntropyLoss', use_mask=True, loss_weight=1.0)) - ], - semantic_roi_extractor=dict( - type='SingleRoIExtractor', - roi_layer=dict(type='RoIAlign', out_size=14, sample_num=2), - out_channels=256, - featmap_strides=[8]), - semantic_head=dict( - type='FusedSemanticHead', - num_ins=5, - fusion_level=1, - num_convs=4, - in_channels=256, - conv_out_channels=256, - num_classes=183, - ignore_label=255, - loss_weight=0.2)) -# model training and testing settings -train_cfg = dict( - rpn=dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.7, - neg_iou_thr=0.3, - min_pos_iou=0.3, - ignore_iof_thr=-1), - sampler=dict( - type='RandomSampler', - num=256, - pos_fraction=0.5, - neg_pos_ub=-1, - add_gt_as_proposals=False), - allowed_border=0, - pos_weight=-1, - debug=False), - rpn_proposal=dict( - nms_across_levels=False, - nms_pre=2000, - nms_post=2000, - max_num=2000, - nms_thr=0.7, - min_bbox_size=0), - rcnn=[ - dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.5, - neg_iou_thr=0.5, - min_pos_iou=0.5, - ignore_iof_thr=-1), - sampler=dict( - type='RandomSampler', - num=512, - pos_fraction=0.25, - neg_pos_ub=-1, - add_gt_as_proposals=True), - mask_size=28, - pos_weight=-1, - debug=False), - dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.6, - neg_iou_thr=0.6, - min_pos_iou=0.6, - ignore_iof_thr=-1), - sampler=dict( - type='RandomSampler', - num=512, - pos_fraction=0.25, - neg_pos_ub=-1, - add_gt_as_proposals=True), - mask_size=28, - pos_weight=-1, - debug=False), - dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.7, - neg_iou_thr=0.7, - min_pos_iou=0.7, - ignore_iof_thr=-1), - sampler=dict( - type='RandomSampler', - num=512, - pos_fraction=0.25, - neg_pos_ub=-1, - add_gt_as_proposals=True), - mask_size=28, - pos_weight=-1, - debug=False) - ], - stage_loss_weights=[1, 0.5, 0.25]) -test_cfg = dict( - rpn=dict( - nms_across_levels=False, - nms_pre=1000, - nms_post=1000, - max_num=1000, - nms_thr=0.7, - min_bbox_size=0), - rcnn=dict( - score_thr=0.001, - nms=dict(type='nms', iou_thr=0.5), - max_per_img=100, - mask_thr_binary=0.5)) -# dataset settings -dataset_type = 'CocoDataset' -data_root = 'data/coco/' -img_norm_cfg = dict( - mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) -train_pipeline = [ - dict(type='LoadImageFromFile'), - dict( - type='LoadAnnotations', with_bbox=True, with_mask=True, with_seg=True), - dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), - dict(type='RandomFlip', flip_ratio=0.5), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='SegRescale', scale_factor=1 / 8), - dict(type='DefaultFormatBundle'), - dict( - type='Collect', - keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks', 'gt_semantic_seg']), -] -test_pipeline = [ - dict(type='LoadImageFromFile'), - dict( - type='MultiScaleFlipAug', - img_scale=(1333, 800), - flip=False, - transforms=[ - dict(type='Resize', keep_ratio=True), - dict(type='RandomFlip', flip_ratio=0.5), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='ImageToTensor', keys=['img']), - dict(type='Collect', keys=['img']), - ]) -] -data = dict( - imgs_per_gpu=2, - workers_per_gpu=2, - train=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_train2017.json', - img_prefix=data_root + 'train2017/', - seg_prefix=data_root + 'stuffthingmaps/train2017/', - pipeline=train_pipeline), - val=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline), - test=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline)) -evaluation = dict(interval=1, metric=['bbox', 'segm']) -# optimizer -optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) -optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) -# learning policy -lr_config = dict( - policy='step', - warmup='linear', - warmup_iters=500, - warmup_ratio=1.0 / 3, - step=[16, 19]) -checkpoint_config = dict(interval=1) -# yapf:disable -log_config = dict( - interval=50, - hooks=[ - dict(type='TextLoggerHook'), - # dict(type='TensorboardLoggerHook') - ]) -# yapf:enable -# runtime settings -total_epochs = 20 -dist_params = dict(backend='nccl') -log_level = 'INFO' -work_dir = './work_dirs/htc_hrnetv2p_w32_20e' -load_from = None -resume_from = None -workflow = [('train', 1)] diff --git a/configs/hrnet/htc_hrnetv2p_w32_20e_coco.py b/configs/hrnet/htc_hrnetv2p_w32_20e_coco.py new file mode 100644 index 0000000000000000000000000000000000000000..eba65a8e63ffc552de144fb774b4021c9a6c5751 --- /dev/null +++ b/configs/hrnet/htc_hrnetv2p_w32_20e_coco.py @@ -0,0 +1,37 @@ +_base_ = '../htc/htc_r50_fpn_20e_coco.py' +model = dict( + pretrained='open-mmlab://msra/hrnetv2_w32', + backbone=dict( + _delete_=True, + type='HRNet', + extra=dict( + stage1=dict( + num_modules=1, + num_branches=1, + block='BOTTLENECK', + num_blocks=(4, ), + num_channels=(64, )), + stage2=dict( + num_modules=1, + num_branches=2, + block='BASIC', + num_blocks=(4, 4), + num_channels=(32, 64)), + stage3=dict( + num_modules=4, + num_branches=3, + block='BASIC', + num_blocks=(4, 4, 4), + num_channels=(32, 64, 128)), + stage4=dict( + num_modules=3, + num_branches=4, + block='BASIC', + num_blocks=(4, 4, 4, 4), + num_channels=(32, 64, 128, 256)))), + neck=dict( + _delete_=True, + type='HRFPN', + in_channels=[32, 64, 128, 256], + out_channels=256)) +work_dir = './work_dirs/htc_hrnetv2p_w32_20e' diff --git a/configs/hrnet/mask_rcnn_hrnetv2p_w18_1x.py b/configs/hrnet/mask_rcnn_hrnetv2p_w18_1x.py deleted file mode 100644 index 0e562ffb4af7f640be06a9036e15cd457884ac3d..0000000000000000000000000000000000000000 --- a/configs/hrnet/mask_rcnn_hrnetv2p_w18_1x.py +++ /dev/null @@ -1,206 +0,0 @@ -# model settings -model = dict( - type='MaskRCNN', - pretrained='open-mmlab://msra/hrnetv2_w18', - backbone=dict( - type='HRNet', - extra=dict( - stage1=dict( - num_modules=1, - num_branches=1, - block='BOTTLENECK', - num_blocks=(4, ), - num_channels=(64, )), - stage2=dict( - num_modules=1, - num_branches=2, - block='BASIC', - num_blocks=(4, 4), - num_channels=(18, 36)), - stage3=dict( - num_modules=4, - num_branches=3, - block='BASIC', - num_blocks=(4, 4, 4), - num_channels=(18, 36, 72)), - stage4=dict( - num_modules=3, - num_branches=4, - block='BASIC', - num_blocks=(4, 4, 4, 4), - num_channels=(18, 36, 72, 144)))), - neck=dict(type='HRFPN', in_channels=[18, 36, 72, 144], out_channels=256), - rpn_head=dict( - type='RPNHead', - in_channels=256, - feat_channels=256, - anchor_scales=[8], - anchor_ratios=[0.5, 1.0, 2.0], - anchor_strides=[4, 8, 16, 32, 64], - target_means=[.0, .0, .0, .0], - target_stds=[1.0, 1.0, 1.0, 1.0], - loss_cls=dict( - type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0)), - bbox_roi_extractor=dict( - type='SingleRoIExtractor', - roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2), - out_channels=256, - featmap_strides=[4, 8, 16, 32]), - bbox_head=dict( - type='SharedFCBBoxHead', - num_fcs=2, - in_channels=256, - fc_out_channels=1024, - roi_feat_size=7, - num_classes=81, - target_means=[0., 0., 0., 0.], - target_stds=[0.1, 0.1, 0.2, 0.2], - reg_class_agnostic=False, - loss_cls=dict( - type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)), - mask_roi_extractor=dict( - type='SingleRoIExtractor', - roi_layer=dict(type='RoIAlign', out_size=14, sample_num=2), - out_channels=256, - featmap_strides=[4, 8, 16, 32]), - mask_head=dict( - type='FCNMaskHead', - num_convs=4, - in_channels=256, - conv_out_channels=256, - num_classes=81, - loss_mask=dict( - type='CrossEntropyLoss', use_mask=True, loss_weight=1.0))) -# model training and testing settings -train_cfg = dict( - rpn=dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.7, - neg_iou_thr=0.3, - min_pos_iou=0.3, - ignore_iof_thr=-1), - sampler=dict( - type='RandomSampler', - num=256, - pos_fraction=0.5, - neg_pos_ub=-1, - add_gt_as_proposals=False), - allowed_border=0, - pos_weight=-1, - debug=False), - rpn_proposal=dict( - nms_across_levels=False, - nms_pre=2000, - nms_post=2000, - max_num=2000, - nms_thr=0.7, - min_bbox_size=0), - rcnn=dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.5, - neg_iou_thr=0.5, - min_pos_iou=0.5, - ignore_iof_thr=-1), - sampler=dict( - type='RandomSampler', - num=512, - pos_fraction=0.25, - neg_pos_ub=-1, - add_gt_as_proposals=True), - mask_size=28, - pos_weight=-1, - debug=False)) -test_cfg = dict( - rpn=dict( - nms_across_levels=False, - nms_pre=1000, - nms_post=1000, - max_num=1000, - nms_thr=0.7, - min_bbox_size=0), - rcnn=dict( - score_thr=0.05, - nms=dict(type='nms', iou_thr=0.5), - max_per_img=100, - mask_thr_binary=0.5)) -# dataset settings -dataset_type = 'CocoDataset' -data_root = 'data/coco/' -img_norm_cfg = dict( - mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) -train_pipeline = [ - dict(type='LoadImageFromFile'), - dict(type='LoadAnnotations', with_bbox=True, with_mask=True), - dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), - dict(type='RandomFlip', flip_ratio=0.5), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='DefaultFormatBundle'), - dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']), -] -test_pipeline = [ - dict(type='LoadImageFromFile'), - dict( - type='MultiScaleFlipAug', - img_scale=(1333, 800), - flip=False, - transforms=[ - dict(type='Resize', keep_ratio=True), - dict(type='RandomFlip'), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='ImageToTensor', keys=['img']), - dict(type='Collect', keys=['img']), - ]) -] -data = dict( - imgs_per_gpu=2, - workers_per_gpu=2, - train=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_train2017.json', - img_prefix=data_root + 'train2017/', - pipeline=train_pipeline), - val=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline), - test=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline)) -evaluation = dict(interval=1, metric=['bbox', 'segm']) -# optimizer -# if you use 8 GPUs for training, please change lr to 0.02 -optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) -optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) -# learning policy -lr_config = dict( - policy='step', - warmup='linear', - warmup_iters=500, - warmup_ratio=1.0 / 3, - step=[8, 11]) -checkpoint_config = dict(interval=1) -# yapf:disable -log_config = dict( - interval=50, - hooks=[ - dict(type='TextLoggerHook'), - # dict(type='TensorboardLoggerHook') - ]) -# yapf:enable -# runtime settings -total_epochs = 12 -dist_params = dict(backend='nccl') -log_level = 'INFO' -work_dir = './work_dirs/mask_rcnn_hrnetv2p_w18_1x' -load_from = None -resume_from = None -workflow = [('train', 1)] diff --git a/configs/hrnet/mask_rcnn_hrnetv2p_w18_1x_coco.py b/configs/hrnet/mask_rcnn_hrnetv2p_w18_1x_coco.py new file mode 100644 index 0000000000000000000000000000000000000000..f52f1efbe0f73639c86f81ec36a31ba6053d1e5f --- /dev/null +++ b/configs/hrnet/mask_rcnn_hrnetv2p_w18_1x_coco.py @@ -0,0 +1,10 @@ +_base_ = './mask_rcnn_hrnetv2p_w32_1x_coco.py' +model = dict( + pretrained='open-mmlab://msra/hrnetv2_w18', + backbone=dict( + extra=dict( + stage2=dict(num_channels=(18, 36)), + stage3=dict(num_channels=(18, 36, 72)), + stage4=dict(num_channels=(18, 36, 72, 144)))), + neck=dict(type='HRFPN', in_channels=[18, 36, 72, 144], out_channels=256)) +work_dir = './work_dirs/mask_rcnn_hrnetv2p_w18_1x' diff --git a/configs/hrnet/mask_rcnn_hrnetv2p_w32_1x.py b/configs/hrnet/mask_rcnn_hrnetv2p_w32_1x.py deleted file mode 100644 index 4511426f437866a0d92c4513c8da5dc334196e1d..0000000000000000000000000000000000000000 --- a/configs/hrnet/mask_rcnn_hrnetv2p_w32_1x.py +++ /dev/null @@ -1,205 +0,0 @@ -# model settings -model = dict( - type='MaskRCNN', - pretrained='open-mmlab://msra/hrnetv2_w32', - backbone=dict( - type='HRNet', - extra=dict( - stage1=dict( - num_modules=1, - num_branches=1, - block='BOTTLENECK', - num_blocks=(4, ), - num_channels=(64, )), - stage2=dict( - num_modules=1, - num_branches=2, - block='BASIC', - num_blocks=(4, 4), - num_channels=(32, 64)), - stage3=dict( - num_modules=4, - num_branches=3, - block='BASIC', - num_blocks=(4, 4, 4), - num_channels=(32, 64, 128)), - stage4=dict( - num_modules=3, - num_branches=4, - block='BASIC', - num_blocks=(4, 4, 4, 4), - num_channels=(32, 64, 128, 256)))), - neck=dict(type='HRFPN', in_channels=[32, 64, 128, 256], out_channels=256), - rpn_head=dict( - type='RPNHead', - in_channels=256, - feat_channels=256, - anchor_scales=[8], - anchor_ratios=[0.5, 1.0, 2.0], - anchor_strides=[4, 8, 16, 32, 64], - target_means=[.0, .0, .0, .0], - target_stds=[1.0, 1.0, 1.0, 1.0], - loss_cls=dict( - type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0)), - bbox_roi_extractor=dict( - type='SingleRoIExtractor', - roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2), - out_channels=256, - featmap_strides=[4, 8, 16, 32]), - bbox_head=dict( - type='SharedFCBBoxHead', - num_fcs=2, - in_channels=256, - fc_out_channels=1024, - roi_feat_size=7, - num_classes=81, - target_means=[0., 0., 0., 0.], - target_stds=[0.1, 0.1, 0.2, 0.2], - reg_class_agnostic=False, - loss_cls=dict( - type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)), - mask_roi_extractor=dict( - type='SingleRoIExtractor', - roi_layer=dict(type='RoIAlign', out_size=14, sample_num=2), - out_channels=256, - featmap_strides=[4, 8, 16, 32]), - mask_head=dict( - type='FCNMaskHead', - num_convs=4, - in_channels=256, - conv_out_channels=256, - num_classes=81, - loss_mask=dict( - type='CrossEntropyLoss', use_mask=True, loss_weight=1.0))) -# model training and testing settings -train_cfg = dict( - rpn=dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.7, - neg_iou_thr=0.3, - min_pos_iou=0.3, - ignore_iof_thr=-1), - sampler=dict( - type='RandomSampler', - num=256, - pos_fraction=0.5, - neg_pos_ub=-1, - add_gt_as_proposals=False), - allowed_border=0, - pos_weight=-1, - debug=False), - rpn_proposal=dict( - nms_across_levels=False, - nms_pre=2000, - nms_post=2000, - max_num=2000, - nms_thr=0.7, - min_bbox_size=0), - rcnn=dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.5, - neg_iou_thr=0.5, - min_pos_iou=0.5, - ignore_iof_thr=-1), - sampler=dict( - type='RandomSampler', - num=512, - pos_fraction=0.25, - neg_pos_ub=-1, - add_gt_as_proposals=True), - mask_size=28, - pos_weight=-1, - debug=False)) -test_cfg = dict( - rpn=dict( - nms_across_levels=False, - nms_pre=1000, - nms_post=1000, - max_num=1000, - nms_thr=0.7, - min_bbox_size=0), - rcnn=dict( - score_thr=0.05, - nms=dict(type='nms', iou_thr=0.5), - max_per_img=100, - mask_thr_binary=0.5)) -# dataset settings -dataset_type = 'CocoDataset' -data_root = 'data/coco/' -img_norm_cfg = dict( - mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) -train_pipeline = [ - dict(type='LoadImageFromFile'), - dict(type='LoadAnnotations', with_bbox=True, with_mask=True), - dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), - dict(type='RandomFlip', flip_ratio=0.5), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='DefaultFormatBundle'), - dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']), -] -test_pipeline = [ - dict(type='LoadImageFromFile'), - dict( - type='MultiScaleFlipAug', - img_scale=(1333, 800), - flip=False, - transforms=[ - dict(type='Resize', keep_ratio=True), - dict(type='RandomFlip'), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='ImageToTensor', keys=['img']), - dict(type='Collect', keys=['img']), - ]) -] -data = dict( - imgs_per_gpu=2, - workers_per_gpu=2, - train=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_train2017.json', - img_prefix=data_root + 'train2017/', - pipeline=train_pipeline), - val=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline), - test=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline)) -evaluation = dict(interval=1, metric=['bbox', 'segm']) -# optimizer -optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) -optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) -# learning policy -lr_config = dict( - policy='step', - warmup='linear', - warmup_iters=500, - warmup_ratio=1.0 / 3, - step=[8, 11]) -checkpoint_config = dict(interval=1) -# yapf:disable -log_config = dict( - interval=50, - hooks=[ - dict(type='TextLoggerHook'), - # dict(type='TensorboardLoggerHook') - ]) -# yapf:enable -# runtime settings -total_epochs = 12 -dist_params = dict(backend='nccl') -log_level = 'INFO' -work_dir = './work_dirs/mask_rcnn_hrnetv2p_w32_1x' -load_from = None -resume_from = None -workflow = [('train', 1)] diff --git a/configs/hrnet/mask_rcnn_hrnetv2p_w32_1x_coco.py b/configs/hrnet/mask_rcnn_hrnetv2p_w32_1x_coco.py new file mode 100644 index 0000000000000000000000000000000000000000..7abeeda7a179851103324507fef9dfb7b7765880 --- /dev/null +++ b/configs/hrnet/mask_rcnn_hrnetv2p_w32_1x_coco.py @@ -0,0 +1,37 @@ +_base_ = '../mask_rcnn/mask_rcnn_r50_fpn_1x_coco.py' +model = dict( + pretrained='open-mmlab://msra/hrnetv2_w32', + backbone=dict( + _delete_=True, + type='HRNet', + extra=dict( + stage1=dict( + num_modules=1, + num_branches=1, + block='BOTTLENECK', + num_blocks=(4, ), + num_channels=(64, )), + stage2=dict( + num_modules=1, + num_branches=2, + block='BASIC', + num_blocks=(4, 4), + num_channels=(32, 64)), + stage3=dict( + num_modules=4, + num_branches=3, + block='BASIC', + num_blocks=(4, 4, 4), + num_channels=(32, 64, 128)), + stage4=dict( + num_modules=3, + num_branches=4, + block='BASIC', + num_blocks=(4, 4, 4, 4), + num_channels=(32, 64, 128, 256)))), + neck=dict( + _delete_=True, + type='HRFPN', + in_channels=[32, 64, 128, 256], + out_channels=256)) +work_dir = './work_dirs/mask_rcnn_hrnetv2p_w32_1x' diff --git a/configs/htc/htc_dconv_c3-c5_mstrain_400_1400_x101_64x4d_fpn_20e.py b/configs/htc/htc_dconv_c3-c5_mstrain_400_1400_x101_64x4d_fpn_20e.py deleted file mode 100644 index ab1afd859c2bcddbb31d80f33e96222086f867c3..0000000000000000000000000000000000000000 --- a/configs/htc/htc_dconv_c3-c5_mstrain_400_1400_x101_64x4d_fpn_20e.py +++ /dev/null @@ -1,303 +0,0 @@ -# model settings -model = dict( - type='HybridTaskCascade', - num_stages=3, - pretrained='open-mmlab://resnext101_64x4d', - interleaved=True, - mask_info_flow=True, - backbone=dict( - type='ResNeXt', - depth=101, - groups=64, - base_width=4, - num_stages=4, - out_indices=(0, 1, 2, 3), - frozen_stages=1, - norm_cfg=dict(type='BN', requires_grad=True), - style='pytorch', - dcn=dict(type='DCN', deformable_groups=1, fallback_on_stride=False), - stage_with_dcn=(False, True, True, True)), - neck=dict( - type='FPN', - in_channels=[256, 512, 1024, 2048], - out_channels=256, - num_outs=5), - rpn_head=dict( - type='RPNHead', - in_channels=256, - feat_channels=256, - anchor_scales=[8], - anchor_ratios=[0.5, 1.0, 2.0], - anchor_strides=[4, 8, 16, 32, 64], - target_means=[.0, .0, .0, .0], - target_stds=[1.0, 1.0, 1.0, 1.0], - loss_cls=dict( - type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0)), - bbox_roi_extractor=dict( - type='SingleRoIExtractor', - roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2), - out_channels=256, - featmap_strides=[4, 8, 16, 32]), - bbox_head=[ - dict( - type='SharedFCBBoxHead', - num_fcs=2, - in_channels=256, - fc_out_channels=1024, - roi_feat_size=7, - num_classes=81, - target_means=[0., 0., 0., 0.], - target_stds=[0.1, 0.1, 0.2, 0.2], - reg_class_agnostic=True, - loss_cls=dict( - type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)), - dict( - type='SharedFCBBoxHead', - num_fcs=2, - in_channels=256, - fc_out_channels=1024, - roi_feat_size=7, - num_classes=81, - target_means=[0., 0., 0., 0.], - target_stds=[0.05, 0.05, 0.1, 0.1], - reg_class_agnostic=True, - loss_cls=dict( - type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)), - dict( - type='SharedFCBBoxHead', - num_fcs=2, - in_channels=256, - fc_out_channels=1024, - roi_feat_size=7, - num_classes=81, - target_means=[0., 0., 0., 0.], - target_stds=[0.033, 0.033, 0.067, 0.067], - reg_class_agnostic=True, - loss_cls=dict( - type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)) - ], - mask_roi_extractor=dict( - type='SingleRoIExtractor', - roi_layer=dict(type='RoIAlign', out_size=14, sample_num=2), - out_channels=256, - featmap_strides=[4, 8, 16, 32]), - mask_head=[ - dict( - type='HTCMaskHead', - with_conv_res=False, - num_convs=4, - in_channels=256, - conv_out_channels=256, - num_classes=81, - loss_mask=dict( - type='CrossEntropyLoss', use_mask=True, loss_weight=1.0)), - dict( - type='HTCMaskHead', - num_convs=4, - in_channels=256, - conv_out_channels=256, - num_classes=81, - loss_mask=dict( - type='CrossEntropyLoss', use_mask=True, loss_weight=1.0)), - dict( - type='HTCMaskHead', - num_convs=4, - in_channels=256, - conv_out_channels=256, - num_classes=81, - loss_mask=dict( - type='CrossEntropyLoss', use_mask=True, loss_weight=1.0)) - ], - semantic_roi_extractor=dict( - type='SingleRoIExtractor', - roi_layer=dict(type='RoIAlign', out_size=14, sample_num=2), - out_channels=256, - featmap_strides=[8]), - semantic_head=dict( - type='FusedSemanticHead', - num_ins=5, - fusion_level=1, - num_convs=4, - in_channels=256, - conv_out_channels=256, - num_classes=183, - ignore_label=255, - loss_weight=0.2)) -# model training and testing settings -train_cfg = dict( - rpn=dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.7, - neg_iou_thr=0.3, - min_pos_iou=0.3, - ignore_iof_thr=-1), - sampler=dict( - type='RandomSampler', - num=256, - pos_fraction=0.5, - neg_pos_ub=-1, - add_gt_as_proposals=False), - allowed_border=0, - pos_weight=-1, - debug=False), - rpn_proposal=dict( - nms_across_levels=False, - nms_pre=2000, - nms_post=2000, - max_num=2000, - nms_thr=0.7, - min_bbox_size=0), - rcnn=[ - dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.5, - neg_iou_thr=0.5, - min_pos_iou=0.5, - ignore_iof_thr=-1), - sampler=dict( - type='RandomSampler', - num=512, - pos_fraction=0.25, - neg_pos_ub=-1, - add_gt_as_proposals=True), - mask_size=28, - pos_weight=-1, - debug=False), - dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.6, - neg_iou_thr=0.6, - min_pos_iou=0.6, - ignore_iof_thr=-1), - sampler=dict( - type='RandomSampler', - num=512, - pos_fraction=0.25, - neg_pos_ub=-1, - add_gt_as_proposals=True), - mask_size=28, - pos_weight=-1, - debug=False), - dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.7, - neg_iou_thr=0.7, - min_pos_iou=0.7, - ignore_iof_thr=-1), - sampler=dict( - type='RandomSampler', - num=512, - pos_fraction=0.25, - neg_pos_ub=-1, - add_gt_as_proposals=True), - mask_size=28, - pos_weight=-1, - debug=False) - ], - stage_loss_weights=[1, 0.5, 0.25]) -test_cfg = dict( - rpn=dict( - nms_across_levels=False, - nms_pre=1000, - nms_post=1000, - max_num=1000, - nms_thr=0.7, - min_bbox_size=0), - rcnn=dict( - score_thr=0.001, - nms=dict(type='nms', iou_thr=0.5), - max_per_img=100, - mask_thr_binary=0.5)) -# dataset settings -dataset_type = 'CocoDataset' -data_root = 'data/coco/' -img_norm_cfg = dict( - mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) -train_pipeline = [ - dict(type='LoadImageFromFile'), - dict( - type='LoadAnnotations', with_bbox=True, with_mask=True, with_seg=True), - dict( - type='Resize', - img_scale=[(1600, 400), (1600, 1400)], - multiscale_mode='range', - keep_ratio=True), - dict(type='RandomFlip', flip_ratio=0.5), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='SegRescale', scale_factor=1 / 8), - dict(type='DefaultFormatBundle'), - dict( - type='Collect', - keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks', 'gt_semantic_seg']), -] -test_pipeline = [ - dict(type='LoadImageFromFile'), - dict( - type='MultiScaleFlipAug', - img_scale=(1333, 800), - flip=False, - transforms=[ - dict(type='Resize', keep_ratio=True), - dict(type='RandomFlip', flip_ratio=0.5), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='ImageToTensor', keys=['img']), - dict(type='Collect', keys=['img']), - ]) -] -data = dict( - imgs_per_gpu=1, - workers_per_gpu=1, - train=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_train2017.json', - img_prefix=data_root + 'train2017/', - seg_prefix=data_root + 'stuffthingmaps/train2017/', - pipeline=train_pipeline), - val=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline), - test=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline)) -evaluation = dict(interval=1, metric=['bbox', 'segm']) -# optimizer -optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) -optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) -# learning policy -lr_config = dict( - policy='step', - warmup='linear', - warmup_iters=500, - warmup_ratio=1.0 / 3, - step=[16, 19]) -checkpoint_config = dict(interval=1) -# yapf:disable -log_config = dict( - interval=50, - hooks=[ - dict(type='TextLoggerHook'), - # dict(type='TensorboardLoggerHook') - ]) -# yapf:enable -# runtime settings -total_epochs = 20 -dist_params = dict(backend='nccl') -log_level = 'INFO' -work_dir = './work_dirs/htc_dconv_c3-c5_mstrain_400_1400_x101_64x4d_fpn_20e' -load_from = None -resume_from = None -workflow = [('train', 1)] diff --git a/configs/htc/htc_r101_fpn_20e.py b/configs/htc/htc_r101_fpn_20e.py deleted file mode 100644 index 5b46ab53b1ffc8a1bcf039f0b3527ee514f59c9a..0000000000000000000000000000000000000000 --- a/configs/htc/htc_r101_fpn_20e.py +++ /dev/null @@ -1,295 +0,0 @@ -# model settings -model = dict( - type='HybridTaskCascade', - num_stages=3, - pretrained='torchvision://resnet101', - interleaved=True, - mask_info_flow=True, - backbone=dict( - type='ResNet', - depth=101, - num_stages=4, - out_indices=(0, 1, 2, 3), - frozen_stages=1, - norm_cfg=dict(type='BN', requires_grad=True), - style='pytorch'), - neck=dict( - type='FPN', - in_channels=[256, 512, 1024, 2048], - out_channels=256, - num_outs=5), - rpn_head=dict( - type='RPNHead', - in_channels=256, - feat_channels=256, - anchor_scales=[8], - anchor_ratios=[0.5, 1.0, 2.0], - anchor_strides=[4, 8, 16, 32, 64], - target_means=[.0, .0, .0, .0], - target_stds=[1.0, 1.0, 1.0, 1.0], - loss_cls=dict( - type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0)), - bbox_roi_extractor=dict( - type='SingleRoIExtractor', - roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2), - out_channels=256, - featmap_strides=[4, 8, 16, 32]), - bbox_head=[ - dict( - type='SharedFCBBoxHead', - num_fcs=2, - in_channels=256, - fc_out_channels=1024, - roi_feat_size=7, - num_classes=81, - target_means=[0., 0., 0., 0.], - target_stds=[0.1, 0.1, 0.2, 0.2], - reg_class_agnostic=True, - loss_cls=dict( - type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)), - dict( - type='SharedFCBBoxHead', - num_fcs=2, - in_channels=256, - fc_out_channels=1024, - roi_feat_size=7, - num_classes=81, - target_means=[0., 0., 0., 0.], - target_stds=[0.05, 0.05, 0.1, 0.1], - reg_class_agnostic=True, - loss_cls=dict( - type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)), - dict( - type='SharedFCBBoxHead', - num_fcs=2, - in_channels=256, - fc_out_channels=1024, - roi_feat_size=7, - num_classes=81, - target_means=[0., 0., 0., 0.], - target_stds=[0.033, 0.033, 0.067, 0.067], - reg_class_agnostic=True, - loss_cls=dict( - type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)) - ], - mask_roi_extractor=dict( - type='SingleRoIExtractor', - roi_layer=dict(type='RoIAlign', out_size=14, sample_num=2), - out_channels=256, - featmap_strides=[4, 8, 16, 32]), - mask_head=[ - dict( - type='HTCMaskHead', - with_conv_res=False, - num_convs=4, - in_channels=256, - conv_out_channels=256, - num_classes=81, - loss_mask=dict( - type='CrossEntropyLoss', use_mask=True, loss_weight=1.0)), - dict( - type='HTCMaskHead', - num_convs=4, - in_channels=256, - conv_out_channels=256, - num_classes=81, - loss_mask=dict( - type='CrossEntropyLoss', use_mask=True, loss_weight=1.0)), - dict( - type='HTCMaskHead', - num_convs=4, - in_channels=256, - conv_out_channels=256, - num_classes=81, - loss_mask=dict( - type='CrossEntropyLoss', use_mask=True, loss_weight=1.0)) - ], - semantic_roi_extractor=dict( - type='SingleRoIExtractor', - roi_layer=dict(type='RoIAlign', out_size=14, sample_num=2), - out_channels=256, - featmap_strides=[8]), - semantic_head=dict( - type='FusedSemanticHead', - num_ins=5, - fusion_level=1, - num_convs=4, - in_channels=256, - conv_out_channels=256, - num_classes=183, - ignore_label=255, - loss_weight=0.2)) -# model training and testing settings -train_cfg = dict( - rpn=dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.7, - neg_iou_thr=0.3, - min_pos_iou=0.3, - ignore_iof_thr=-1), - sampler=dict( - type='RandomSampler', - num=256, - pos_fraction=0.5, - neg_pos_ub=-1, - add_gt_as_proposals=False), - allowed_border=0, - pos_weight=-1, - debug=False), - rpn_proposal=dict( - nms_across_levels=False, - nms_pre=2000, - nms_post=2000, - max_num=2000, - nms_thr=0.7, - min_bbox_size=0), - rcnn=[ - dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.5, - neg_iou_thr=0.5, - min_pos_iou=0.5, - ignore_iof_thr=-1), - sampler=dict( - type='RandomSampler', - num=512, - pos_fraction=0.25, - neg_pos_ub=-1, - add_gt_as_proposals=True), - mask_size=28, - pos_weight=-1, - debug=False), - dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.6, - neg_iou_thr=0.6, - min_pos_iou=0.6, - ignore_iof_thr=-1), - sampler=dict( - type='RandomSampler', - num=512, - pos_fraction=0.25, - neg_pos_ub=-1, - add_gt_as_proposals=True), - mask_size=28, - pos_weight=-1, - debug=False), - dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.7, - neg_iou_thr=0.7, - min_pos_iou=0.7, - ignore_iof_thr=-1), - sampler=dict( - type='RandomSampler', - num=512, - pos_fraction=0.25, - neg_pos_ub=-1, - add_gt_as_proposals=True), - mask_size=28, - pos_weight=-1, - debug=False) - ], - stage_loss_weights=[1, 0.5, 0.25]) -test_cfg = dict( - rpn=dict( - nms_across_levels=False, - nms_pre=1000, - nms_post=1000, - max_num=1000, - nms_thr=0.7, - min_bbox_size=0), - rcnn=dict( - score_thr=0.001, - nms=dict(type='nms', iou_thr=0.5), - max_per_img=100, - mask_thr_binary=0.5)) -# dataset settings -dataset_type = 'CocoDataset' -data_root = 'data/coco/' -img_norm_cfg = dict( - mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) -train_pipeline = [ - dict(type='LoadImageFromFile'), - dict( - type='LoadAnnotations', with_bbox=True, with_mask=True, with_seg=True), - dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), - dict(type='RandomFlip', flip_ratio=0.5), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='SegRescale', scale_factor=1 / 8), - dict(type='DefaultFormatBundle'), - dict( - type='Collect', - keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks', 'gt_semantic_seg']), -] -test_pipeline = [ - dict(type='LoadImageFromFile'), - dict( - type='MultiScaleFlipAug', - img_scale=(1333, 800), - flip=False, - transforms=[ - dict(type='Resize', keep_ratio=True), - dict(type='RandomFlip', flip_ratio=0.5), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='ImageToTensor', keys=['img']), - dict(type='Collect', keys=['img']), - ]) -] -data = dict( - imgs_per_gpu=2, - workers_per_gpu=2, - train=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_train2017.json', - img_prefix=data_root + 'train2017/', - seg_prefix=data_root + 'stuffthingmaps/train2017/', - pipeline=train_pipeline), - val=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline), - test=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline)) -evaluation = dict(interval=1, metric=['bbox', 'segm']) -# optimizer -optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) -optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) -# learning policy -lr_config = dict( - policy='step', - warmup='linear', - warmup_iters=500, - warmup_ratio=1.0 / 3, - step=[16, 19]) -checkpoint_config = dict(interval=1) -# yapf:disable -log_config = dict( - interval=50, - hooks=[ - dict(type='TextLoggerHook'), - # dict(type='TensorboardLoggerHook') - ]) -# yapf:enable -# runtime settings -total_epochs = 20 -dist_params = dict(backend='nccl') -log_level = 'INFO' -work_dir = './work_dirs/htc_r101_fpn_20e' -load_from = None -resume_from = None -workflow = [('train', 1)] diff --git a/configs/htc/htc_r101_fpn_20e_coco.py b/configs/htc/htc_r101_fpn_20e_coco.py new file mode 100644 index 0000000000000000000000000000000000000000..fa34f13317456ac2934e1757241bf7871b1364b4 --- /dev/null +++ b/configs/htc/htc_r101_fpn_20e_coco.py @@ -0,0 +1,6 @@ +_base_ = './htc_r50_fpn_1x_coco.py' +model = dict(pretrained='torchvision://resnet101', backbone=dict(depth=101)) +# learning policy +lr_config = dict(step=[16, 19]) +total_epochs = 20 +work_dir = './work_dirs/htc_r101_fpn_20e' diff --git a/configs/htc/htc_r50_fpn_1x.py b/configs/htc/htc_r50_fpn_1x.py deleted file mode 100644 index f28d7df24f8d009a1c5b56e2988a545cff917ce1..0000000000000000000000000000000000000000 --- a/configs/htc/htc_r50_fpn_1x.py +++ /dev/null @@ -1,295 +0,0 @@ -# model settings -model = dict( - type='HybridTaskCascade', - num_stages=3, - pretrained='torchvision://resnet50', - interleaved=True, - mask_info_flow=True, - backbone=dict( - type='ResNet', - depth=50, - num_stages=4, - out_indices=(0, 1, 2, 3), - frozen_stages=1, - norm_cfg=dict(type='BN', requires_grad=True), - style='pytorch'), - neck=dict( - type='FPN', - in_channels=[256, 512, 1024, 2048], - out_channels=256, - num_outs=5), - rpn_head=dict( - type='RPNHead', - in_channels=256, - feat_channels=256, - anchor_scales=[8], - anchor_ratios=[0.5, 1.0, 2.0], - anchor_strides=[4, 8, 16, 32, 64], - target_means=[.0, .0, .0, .0], - target_stds=[1.0, 1.0, 1.0, 1.0], - loss_cls=dict( - type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0)), - bbox_roi_extractor=dict( - type='SingleRoIExtractor', - roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2), - out_channels=256, - featmap_strides=[4, 8, 16, 32]), - bbox_head=[ - dict( - type='SharedFCBBoxHead', - num_fcs=2, - in_channels=256, - fc_out_channels=1024, - roi_feat_size=7, - num_classes=81, - target_means=[0., 0., 0., 0.], - target_stds=[0.1, 0.1, 0.2, 0.2], - reg_class_agnostic=True, - loss_cls=dict( - type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)), - dict( - type='SharedFCBBoxHead', - num_fcs=2, - in_channels=256, - fc_out_channels=1024, - roi_feat_size=7, - num_classes=81, - target_means=[0., 0., 0., 0.], - target_stds=[0.05, 0.05, 0.1, 0.1], - reg_class_agnostic=True, - loss_cls=dict( - type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)), - dict( - type='SharedFCBBoxHead', - num_fcs=2, - in_channels=256, - fc_out_channels=1024, - roi_feat_size=7, - num_classes=81, - target_means=[0., 0., 0., 0.], - target_stds=[0.033, 0.033, 0.067, 0.067], - reg_class_agnostic=True, - loss_cls=dict( - type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)) - ], - mask_roi_extractor=dict( - type='SingleRoIExtractor', - roi_layer=dict(type='RoIAlign', out_size=14, sample_num=2), - out_channels=256, - featmap_strides=[4, 8, 16, 32]), - mask_head=[ - dict( - type='HTCMaskHead', - with_conv_res=False, - num_convs=4, - in_channels=256, - conv_out_channels=256, - num_classes=81, - loss_mask=dict( - type='CrossEntropyLoss', use_mask=True, loss_weight=1.0)), - dict( - type='HTCMaskHead', - num_convs=4, - in_channels=256, - conv_out_channels=256, - num_classes=81, - loss_mask=dict( - type='CrossEntropyLoss', use_mask=True, loss_weight=1.0)), - dict( - type='HTCMaskHead', - num_convs=4, - in_channels=256, - conv_out_channels=256, - num_classes=81, - loss_mask=dict( - type='CrossEntropyLoss', use_mask=True, loss_weight=1.0)) - ], - semantic_roi_extractor=dict( - type='SingleRoIExtractor', - roi_layer=dict(type='RoIAlign', out_size=14, sample_num=2), - out_channels=256, - featmap_strides=[8]), - semantic_head=dict( - type='FusedSemanticHead', - num_ins=5, - fusion_level=1, - num_convs=4, - in_channels=256, - conv_out_channels=256, - num_classes=183, - ignore_label=255, - loss_weight=0.2)) -# model training and testing settings -train_cfg = dict( - rpn=dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.7, - neg_iou_thr=0.3, - min_pos_iou=0.3, - ignore_iof_thr=-1), - sampler=dict( - type='RandomSampler', - num=256, - pos_fraction=0.5, - neg_pos_ub=-1, - add_gt_as_proposals=False), - allowed_border=0, - pos_weight=-1, - debug=False), - rpn_proposal=dict( - nms_across_levels=False, - nms_pre=2000, - nms_post=2000, - max_num=2000, - nms_thr=0.7, - min_bbox_size=0), - rcnn=[ - dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.5, - neg_iou_thr=0.5, - min_pos_iou=0.5, - ignore_iof_thr=-1), - sampler=dict( - type='RandomSampler', - num=512, - pos_fraction=0.25, - neg_pos_ub=-1, - add_gt_as_proposals=True), - mask_size=28, - pos_weight=-1, - debug=False), - dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.6, - neg_iou_thr=0.6, - min_pos_iou=0.6, - ignore_iof_thr=-1), - sampler=dict( - type='RandomSampler', - num=512, - pos_fraction=0.25, - neg_pos_ub=-1, - add_gt_as_proposals=True), - mask_size=28, - pos_weight=-1, - debug=False), - dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.7, - neg_iou_thr=0.7, - min_pos_iou=0.7, - ignore_iof_thr=-1), - sampler=dict( - type='RandomSampler', - num=512, - pos_fraction=0.25, - neg_pos_ub=-1, - add_gt_as_proposals=True), - mask_size=28, - pos_weight=-1, - debug=False) - ], - stage_loss_weights=[1, 0.5, 0.25]) -test_cfg = dict( - rpn=dict( - nms_across_levels=False, - nms_pre=1000, - nms_post=1000, - max_num=1000, - nms_thr=0.7, - min_bbox_size=0), - rcnn=dict( - score_thr=0.001, - nms=dict(type='nms', iou_thr=0.5), - max_per_img=100, - mask_thr_binary=0.5)) -# dataset settings -dataset_type = 'CocoDataset' -data_root = 'data/coco/' -img_norm_cfg = dict( - mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) -train_pipeline = [ - dict(type='LoadImageFromFile'), - dict( - type='LoadAnnotations', with_bbox=True, with_mask=True, with_seg=True), - dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), - dict(type='RandomFlip', flip_ratio=0.5), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='SegRescale', scale_factor=1 / 8), - dict(type='DefaultFormatBundle'), - dict( - type='Collect', - keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks', 'gt_semantic_seg']), -] -test_pipeline = [ - dict(type='LoadImageFromFile'), - dict( - type='MultiScaleFlipAug', - img_scale=(1333, 800), - flip=False, - transforms=[ - dict(type='Resize', keep_ratio=True), - dict(type='RandomFlip', flip_ratio=0.5), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='ImageToTensor', keys=['img']), - dict(type='Collect', keys=['img']), - ]) -] -data = dict( - imgs_per_gpu=2, - workers_per_gpu=2, - train=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_train2017.json', - img_prefix=data_root + 'train2017/', - seg_prefix=data_root + 'stuffthingmaps/train2017/', - pipeline=train_pipeline), - val=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline), - test=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline)) -evaluation = dict(interval=1, metric=['bbox', 'segm']) -# optimizer -optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) -optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) -# learning policy -lr_config = dict( - policy='step', - warmup='linear', - warmup_iters=500, - warmup_ratio=1.0 / 3, - step=[8, 11]) -checkpoint_config = dict(interval=1) -# yapf:disable -log_config = dict( - interval=50, - hooks=[ - dict(type='TextLoggerHook'), - # dict(type='TensorboardLoggerHook') - ]) -# yapf:enable -# runtime settings -total_epochs = 12 -dist_params = dict(backend='nccl') -log_level = 'INFO' -work_dir = './work_dirs/htc_r50_fpn_1x' -load_from = None -resume_from = None -workflow = [('train', 1)] diff --git a/configs/htc/htc_r50_fpn_1x_coco.py b/configs/htc/htc_r50_fpn_1x_coco.py new file mode 100644 index 0000000000000000000000000000000000000000..6da8e5fc206e693e1b88c4baaef7096470751ccf --- /dev/null +++ b/configs/htc/htc_r50_fpn_1x_coco.py @@ -0,0 +1,56 @@ +_base_ = './htc_without_semantic_r50_fpn_1x_coco.py' +model = dict( + semantic_roi_extractor=dict( + type='SingleRoIExtractor', + roi_layer=dict(type='RoIAlign', out_size=14, sample_num=2), + out_channels=256, + featmap_strides=[8]), + semantic_head=dict( + type='FusedSemanticHead', + num_ins=5, + fusion_level=1, + num_convs=4, + in_channels=256, + conv_out_channels=256, + num_classes=183, + ignore_label=255, + loss_weight=0.2)) +data_root = 'data/coco/' +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='LoadAnnotations', with_bbox=True, with_mask=True, with_seg=True), + dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='SegRescale', scale_factor=1 / 8), + dict(type='DefaultFormatBundle'), + dict( + type='Collect', + keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks', 'gt_semantic_seg']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +data = dict( + train=dict( + seg_prefix=data_root + 'stuffthingmaps/train2017/', + pipeline=train_pipeline), + val=dict(pipeline=test_pipeline), + test=dict(pipeline=test_pipeline)) +work_dir = './work_dirs/htc_r50_fpn_1x' diff --git a/configs/htc/htc_r50_fpn_20e.py b/configs/htc/htc_r50_fpn_20e.py deleted file mode 100644 index ec596b9ac089002ed5c85614102bf0dd1e6de24c..0000000000000000000000000000000000000000 --- a/configs/htc/htc_r50_fpn_20e.py +++ /dev/null @@ -1,295 +0,0 @@ -# model settings -model = dict( - type='HybridTaskCascade', - num_stages=3, - pretrained='torchvision://resnet50', - interleaved=True, - mask_info_flow=True, - backbone=dict( - type='ResNet', - depth=50, - num_stages=4, - out_indices=(0, 1, 2, 3), - frozen_stages=1, - norm_cfg=dict(type='BN', requires_grad=True), - style='pytorch'), - neck=dict( - type='FPN', - in_channels=[256, 512, 1024, 2048], - out_channels=256, - num_outs=5), - rpn_head=dict( - type='RPNHead', - in_channels=256, - feat_channels=256, - anchor_scales=[8], - anchor_ratios=[0.5, 1.0, 2.0], - anchor_strides=[4, 8, 16, 32, 64], - target_means=[.0, .0, .0, .0], - target_stds=[1.0, 1.0, 1.0, 1.0], - loss_cls=dict( - type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0)), - bbox_roi_extractor=dict( - type='SingleRoIExtractor', - roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2), - out_channels=256, - featmap_strides=[4, 8, 16, 32]), - bbox_head=[ - dict( - type='SharedFCBBoxHead', - num_fcs=2, - in_channels=256, - fc_out_channels=1024, - roi_feat_size=7, - num_classes=81, - target_means=[0., 0., 0., 0.], - target_stds=[0.1, 0.1, 0.2, 0.2], - reg_class_agnostic=True, - loss_cls=dict( - type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)), - dict( - type='SharedFCBBoxHead', - num_fcs=2, - in_channels=256, - fc_out_channels=1024, - roi_feat_size=7, - num_classes=81, - target_means=[0., 0., 0., 0.], - target_stds=[0.05, 0.05, 0.1, 0.1], - reg_class_agnostic=True, - loss_cls=dict( - type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)), - dict( - type='SharedFCBBoxHead', - num_fcs=2, - in_channels=256, - fc_out_channels=1024, - roi_feat_size=7, - num_classes=81, - target_means=[0., 0., 0., 0.], - target_stds=[0.033, 0.033, 0.067, 0.067], - reg_class_agnostic=True, - loss_cls=dict( - type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)) - ], - mask_roi_extractor=dict( - type='SingleRoIExtractor', - roi_layer=dict(type='RoIAlign', out_size=14, sample_num=2), - out_channels=256, - featmap_strides=[4, 8, 16, 32]), - mask_head=[ - dict( - type='HTCMaskHead', - with_conv_res=False, - num_convs=4, - in_channels=256, - conv_out_channels=256, - num_classes=81, - loss_mask=dict( - type='CrossEntropyLoss', use_mask=True, loss_weight=1.0)), - dict( - type='HTCMaskHead', - num_convs=4, - in_channels=256, - conv_out_channels=256, - num_classes=81, - loss_mask=dict( - type='CrossEntropyLoss', use_mask=True, loss_weight=1.0)), - dict( - type='HTCMaskHead', - num_convs=4, - in_channels=256, - conv_out_channels=256, - num_classes=81, - loss_mask=dict( - type='CrossEntropyLoss', use_mask=True, loss_weight=1.0)) - ], - semantic_roi_extractor=dict( - type='SingleRoIExtractor', - roi_layer=dict(type='RoIAlign', out_size=14, sample_num=2), - out_channels=256, - featmap_strides=[8]), - semantic_head=dict( - type='FusedSemanticHead', - num_ins=5, - fusion_level=1, - num_convs=4, - in_channels=256, - conv_out_channels=256, - num_classes=183, - ignore_label=255, - loss_weight=0.2)) -# model training and testing settings -train_cfg = dict( - rpn=dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.7, - neg_iou_thr=0.3, - min_pos_iou=0.3, - ignore_iof_thr=-1), - sampler=dict( - type='RandomSampler', - num=256, - pos_fraction=0.5, - neg_pos_ub=-1, - add_gt_as_proposals=False), - allowed_border=0, - pos_weight=-1, - debug=False), - rpn_proposal=dict( - nms_across_levels=False, - nms_pre=2000, - nms_post=2000, - max_num=2000, - nms_thr=0.7, - min_bbox_size=0), - rcnn=[ - dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.5, - neg_iou_thr=0.5, - min_pos_iou=0.5, - ignore_iof_thr=-1), - sampler=dict( - type='RandomSampler', - num=512, - pos_fraction=0.25, - neg_pos_ub=-1, - add_gt_as_proposals=True), - mask_size=28, - pos_weight=-1, - debug=False), - dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.6, - neg_iou_thr=0.6, - min_pos_iou=0.6, - ignore_iof_thr=-1), - sampler=dict( - type='RandomSampler', - num=512, - pos_fraction=0.25, - neg_pos_ub=-1, - add_gt_as_proposals=True), - mask_size=28, - pos_weight=-1, - debug=False), - dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.7, - neg_iou_thr=0.7, - min_pos_iou=0.7, - ignore_iof_thr=-1), - sampler=dict( - type='RandomSampler', - num=512, - pos_fraction=0.25, - neg_pos_ub=-1, - add_gt_as_proposals=True), - mask_size=28, - pos_weight=-1, - debug=False) - ], - stage_loss_weights=[1, 0.5, 0.25]) -test_cfg = dict( - rpn=dict( - nms_across_levels=False, - nms_pre=1000, - nms_post=1000, - max_num=1000, - nms_thr=0.7, - min_bbox_size=0), - rcnn=dict( - score_thr=0.001, - nms=dict(type='nms', iou_thr=0.5), - max_per_img=100, - mask_thr_binary=0.5)) -# dataset settings -dataset_type = 'CocoDataset' -data_root = 'data/coco/' -img_norm_cfg = dict( - mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) -train_pipeline = [ - dict(type='LoadImageFromFile'), - dict( - type='LoadAnnotations', with_bbox=True, with_mask=True, with_seg=True), - dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), - dict(type='RandomFlip', flip_ratio=0.5), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='SegRescale', scale_factor=1 / 8), - dict(type='DefaultFormatBundle'), - dict( - type='Collect', - keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks', 'gt_semantic_seg']), -] -test_pipeline = [ - dict(type='LoadImageFromFile'), - dict( - type='MultiScaleFlipAug', - img_scale=(1333, 800), - flip=False, - transforms=[ - dict(type='Resize', keep_ratio=True), - dict(type='RandomFlip', flip_ratio=0.5), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='ImageToTensor', keys=['img']), - dict(type='Collect', keys=['img']), - ]) -] -data = dict( - imgs_per_gpu=2, - workers_per_gpu=2, - train=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_train2017.json', - img_prefix=data_root + 'train2017/', - seg_prefix=data_root + 'stuffthingmaps/train2017/', - pipeline=train_pipeline), - val=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline), - test=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline)) -evaluation = dict(interval=1, metric=['bbox', 'segm']) -# optimizer -optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) -optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) -# learning policy -lr_config = dict( - policy='step', - warmup='linear', - warmup_iters=500, - warmup_ratio=1.0 / 3, - step=[16, 19]) -checkpoint_config = dict(interval=1) -# yapf:disable -log_config = dict( - interval=50, - hooks=[ - dict(type='TextLoggerHook'), - # dict(type='TensorboardLoggerHook') - ]) -# yapf:enable -# runtime settings -total_epochs = 20 -dist_params = dict(backend='nccl') -log_level = 'INFO' -work_dir = './work_dirs/htc_r50_fpn_20e' -load_from = None -resume_from = None -workflow = [('train', 1)] diff --git a/configs/htc/htc_r50_fpn_20e_coco.py b/configs/htc/htc_r50_fpn_20e_coco.py new file mode 100644 index 0000000000000000000000000000000000000000..00947bf35d9d4f1e31382404c7cbf6cb3b252b96 --- /dev/null +++ b/configs/htc/htc_r50_fpn_20e_coco.py @@ -0,0 +1,5 @@ +_base_ = './htc_r50_fpn_1x_coco.py' +# learning policy +lr_config = dict(step=[16, 19]) +total_epochs = 20 +work_dir = './work_dirs/htc_r50_fpn_20e' diff --git a/configs/htc/htc_without_semantic_r50_fpn_1x.py b/configs/htc/htc_without_semantic_r50_fpn_1x_coco.py similarity index 77% rename from configs/htc/htc_without_semantic_r50_fpn_1x.py rename to configs/htc/htc_without_semantic_r50_fpn_1x_coco.py index 6616e8e202f07c9135eab0c780f8642fe700ccea..ec0bbce44c1ed3683a48c176f580493daefa0a8a 100644 --- a/configs/htc/htc_without_semantic_r50_fpn_1x.py +++ b/configs/htc/htc_without_semantic_r50_fpn_1x_coco.py @@ -1,3 +1,7 @@ +_base_ = [ + '../_base_/datasets/coco_instance.py', + '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py' +] # model settings model = dict( type='HybridTaskCascade', @@ -37,8 +41,7 @@ model = dict( featmap_strides=[4, 8, 16, 32]), bbox_head=[ dict( - type='SharedFCBBoxHead', - num_fcs=2, + type='Shared2FCBBoxHead', in_channels=256, fc_out_channels=1024, roi_feat_size=7, @@ -50,8 +53,7 @@ model = dict( type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)), dict( - type='SharedFCBBoxHead', - num_fcs=2, + type='Shared2FCBBoxHead', in_channels=256, fc_out_channels=1024, roi_feat_size=7, @@ -63,8 +65,7 @@ model = dict( type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)), dict( - type='SharedFCBBoxHead', - num_fcs=2, + type='Shared2FCBBoxHead', in_channels=256, fc_out_channels=1024, roi_feat_size=7, @@ -197,21 +198,8 @@ test_cfg = dict( nms=dict(type='nms', iou_thr=0.5), max_per_img=100, mask_thr_binary=0.5)) -# dataset settings -dataset_type = 'CocoDataset' -data_root = 'data/coco/' img_norm_cfg = dict( mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) -train_pipeline = [ - dict(type='LoadImageFromFile'), - dict(type='LoadAnnotations', with_bbox=True, with_mask=True), - dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), - dict(type='RandomFlip', flip_ratio=0.5), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='DefaultFormatBundle'), - dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']), -] test_pipeline = [ dict(type='LoadImageFromFile'), dict( @@ -228,48 +216,5 @@ test_pipeline = [ ]) ] data = dict( - imgs_per_gpu=2, - workers_per_gpu=2, - train=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_train2017.json', - img_prefix=data_root + 'train2017/', - pipeline=train_pipeline), - val=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline), - test=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline)) -evaluation = dict(interval=1, metric=['bbox', 'segm']) -# optimizer -optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) -optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) -# learning policy -lr_config = dict( - policy='step', - warmup='linear', - warmup_iters=500, - warmup_ratio=1.0 / 3, - step=[8, 11]) -checkpoint_config = dict(interval=1) -# yapf:disable -log_config = dict( - interval=50, - hooks=[ - dict(type='TextLoggerHook'), - # dict(type='TensorboardLoggerHook') - ]) -# yapf:enable -# runtime settings -total_epochs = 12 -dist_params = dict(backend='nccl') -log_level = 'INFO' + val=dict(pipeline=test_pipeline), test=dict(pipeline=test_pipeline)) work_dir = './work_dirs/htc_without_semantic_r50_fpn_1x' -load_from = None -resume_from = None -workflow = [('train', 1)] diff --git a/configs/htc/htc_x101_32x4d_fpn_16x1_20e_coco.py b/configs/htc/htc_x101_32x4d_fpn_16x1_20e_coco.py new file mode 100644 index 0000000000000000000000000000000000000000..cbb32558dea4230b23cdb14c47fa687a21dd66fc --- /dev/null +++ b/configs/htc/htc_x101_32x4d_fpn_16x1_20e_coco.py @@ -0,0 +1,18 @@ +_base_ = './htc_r50_fpn_1x_coco.py' +model = dict( + pretrained='open-mmlab://resnext101_32x4d', + backbone=dict( + type='ResNeXt', + depth=101, + groups=32, + base_width=4, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + style='pytorch')) +data = dict(imgs_per_gpu=1, workers_per_gpu=1) +# learning policy +lr_config = dict(step=[16, 19]) +total_epochs = 20 +work_dir = './work_dirs/htc_x101_32x4d_fpn_20e' diff --git a/configs/htc/htc_x101_32x4d_fpn_20e_16gpu.py b/configs/htc/htc_x101_32x4d_fpn_20e_16gpu.py deleted file mode 100644 index 65c94a661bb2d676a3494150b26a4d7d2dfdeadc..0000000000000000000000000000000000000000 --- a/configs/htc/htc_x101_32x4d_fpn_20e_16gpu.py +++ /dev/null @@ -1,297 +0,0 @@ -# model settings -model = dict( - type='HybridTaskCascade', - num_stages=3, - pretrained='open-mmlab://resnext101_32x4d', - interleaved=True, - mask_info_flow=True, - backbone=dict( - type='ResNeXt', - depth=101, - groups=32, - base_width=4, - num_stages=4, - out_indices=(0, 1, 2, 3), - frozen_stages=1, - norm_cfg=dict(type='BN', requires_grad=True), - style='pytorch'), - neck=dict( - type='FPN', - in_channels=[256, 512, 1024, 2048], - out_channels=256, - num_outs=5), - rpn_head=dict( - type='RPNHead', - in_channels=256, - feat_channels=256, - anchor_scales=[8], - anchor_ratios=[0.5, 1.0, 2.0], - anchor_strides=[4, 8, 16, 32, 64], - target_means=[.0, .0, .0, .0], - target_stds=[1.0, 1.0, 1.0, 1.0], - loss_cls=dict( - type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0)), - bbox_roi_extractor=dict( - type='SingleRoIExtractor', - roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2), - out_channels=256, - featmap_strides=[4, 8, 16, 32]), - bbox_head=[ - dict( - type='SharedFCBBoxHead', - num_fcs=2, - in_channels=256, - fc_out_channels=1024, - roi_feat_size=7, - num_classes=81, - target_means=[0., 0., 0., 0.], - target_stds=[0.1, 0.1, 0.2, 0.2], - reg_class_agnostic=True, - loss_cls=dict( - type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)), - dict( - type='SharedFCBBoxHead', - num_fcs=2, - in_channels=256, - fc_out_channels=1024, - roi_feat_size=7, - num_classes=81, - target_means=[0., 0., 0., 0.], - target_stds=[0.05, 0.05, 0.1, 0.1], - reg_class_agnostic=True, - loss_cls=dict( - type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)), - dict( - type='SharedFCBBoxHead', - num_fcs=2, - in_channels=256, - fc_out_channels=1024, - roi_feat_size=7, - num_classes=81, - target_means=[0., 0., 0., 0.], - target_stds=[0.033, 0.033, 0.067, 0.067], - reg_class_agnostic=True, - loss_cls=dict( - type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)) - ], - mask_roi_extractor=dict( - type='SingleRoIExtractor', - roi_layer=dict(type='RoIAlign', out_size=14, sample_num=2), - out_channels=256, - featmap_strides=[4, 8, 16, 32]), - mask_head=[ - dict( - type='HTCMaskHead', - with_conv_res=False, - num_convs=4, - in_channels=256, - conv_out_channels=256, - num_classes=81, - loss_mask=dict( - type='CrossEntropyLoss', use_mask=True, loss_weight=1.0)), - dict( - type='HTCMaskHead', - num_convs=4, - in_channels=256, - conv_out_channels=256, - num_classes=81, - loss_mask=dict( - type='CrossEntropyLoss', use_mask=True, loss_weight=1.0)), - dict( - type='HTCMaskHead', - num_convs=4, - in_channels=256, - conv_out_channels=256, - num_classes=81, - loss_mask=dict( - type='CrossEntropyLoss', use_mask=True, loss_weight=1.0)) - ], - semantic_roi_extractor=dict( - type='SingleRoIExtractor', - roi_layer=dict(type='RoIAlign', out_size=14, sample_num=2), - out_channels=256, - featmap_strides=[8]), - semantic_head=dict( - type='FusedSemanticHead', - num_ins=5, - fusion_level=1, - num_convs=4, - in_channels=256, - conv_out_channels=256, - num_classes=183, - ignore_label=255, - loss_weight=0.2)) -# model training and testing settings -train_cfg = dict( - rpn=dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.7, - neg_iou_thr=0.3, - min_pos_iou=0.3, - ignore_iof_thr=-1), - sampler=dict( - type='RandomSampler', - num=256, - pos_fraction=0.5, - neg_pos_ub=-1, - add_gt_as_proposals=False), - allowed_border=0, - pos_weight=-1, - debug=False), - rpn_proposal=dict( - nms_across_levels=False, - nms_pre=2000, - nms_post=2000, - max_num=2000, - nms_thr=0.7, - min_bbox_size=0), - rcnn=[ - dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.5, - neg_iou_thr=0.5, - min_pos_iou=0.5, - ignore_iof_thr=-1), - sampler=dict( - type='RandomSampler', - num=512, - pos_fraction=0.25, - neg_pos_ub=-1, - add_gt_as_proposals=True), - mask_size=28, - pos_weight=-1, - debug=False), - dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.6, - neg_iou_thr=0.6, - min_pos_iou=0.6, - ignore_iof_thr=-1), - sampler=dict( - type='RandomSampler', - num=512, - pos_fraction=0.25, - neg_pos_ub=-1, - add_gt_as_proposals=True), - mask_size=28, - pos_weight=-1, - debug=False), - dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.7, - neg_iou_thr=0.7, - min_pos_iou=0.7, - ignore_iof_thr=-1), - sampler=dict( - type='RandomSampler', - num=512, - pos_fraction=0.25, - neg_pos_ub=-1, - add_gt_as_proposals=True), - mask_size=28, - pos_weight=-1, - debug=False) - ], - stage_loss_weights=[1, 0.5, 0.25]) -test_cfg = dict( - rpn=dict( - nms_across_levels=False, - nms_pre=1000, - nms_post=1000, - max_num=1000, - nms_thr=0.7, - min_bbox_size=0), - rcnn=dict( - score_thr=0.001, - nms=dict(type='nms', iou_thr=0.5), - max_per_img=100, - mask_thr_binary=0.5)) -# dataset settings -dataset_type = 'CocoDataset' -data_root = 'data/coco/' -img_norm_cfg = dict( - mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) -train_pipeline = [ - dict(type='LoadImageFromFile'), - dict( - type='LoadAnnotations', with_bbox=True, with_mask=True, with_seg=True), - dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), - dict(type='RandomFlip', flip_ratio=0.5), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='SegRescale', scale_factor=1 / 8), - dict(type='DefaultFormatBundle'), - dict( - type='Collect', - keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks', 'gt_semantic_seg']), -] -test_pipeline = [ - dict(type='LoadImageFromFile'), - dict( - type='MultiScaleFlipAug', - img_scale=(1333, 800), - flip=False, - transforms=[ - dict(type='Resize', keep_ratio=True), - dict(type='RandomFlip', flip_ratio=0.5), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='ImageToTensor', keys=['img']), - dict(type='Collect', keys=['img']), - ]) -] -data = dict( - imgs_per_gpu=1, - workers_per_gpu=1, - train=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_train2017.json', - img_prefix=data_root + 'train2017/', - seg_prefix=data_root + 'stuffthingmaps/train2017/', - pipeline=train_pipeline), - val=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline), - test=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline)) -evaluation = dict(interval=1, metric=['bbox', 'segm']) -# optimizer -optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) -optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) -# learning policy -lr_config = dict( - policy='step', - warmup='linear', - warmup_iters=500, - warmup_ratio=1.0 / 3, - step=[16, 19]) -checkpoint_config = dict(interval=1) -# yapf:disable -log_config = dict( - interval=50, - hooks=[ - dict(type='TextLoggerHook'), - # dict(type='TensorboardLoggerHook') - ]) -# yapf:enable -# runtime settings -total_epochs = 20 -dist_params = dict(backend='nccl') -log_level = 'INFO' -work_dir = './work_dirs/htc_x101_32x4d_fpn_20e' -load_from = None -resume_from = None -workflow = [('train', 1)] diff --git a/configs/htc/htc_x101_64x4d_fpn_16x1_20e_coco.py b/configs/htc/htc_x101_64x4d_fpn_16x1_20e_coco.py new file mode 100644 index 0000000000000000000000000000000000000000..797d219d3b79453413c2b71ec73759bf097db7bd --- /dev/null +++ b/configs/htc/htc_x101_64x4d_fpn_16x1_20e_coco.py @@ -0,0 +1,18 @@ +_base_ = './htc_r50_fpn_1x_coco.py' +model = dict( + pretrained='open-mmlab://resnext101_64x4d', + backbone=dict( + type='ResNeXt', + depth=101, + groups=64, + base_width=4, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + style='pytorch')) +data = dict(imgs_per_gpu=1, workers_per_gpu=1) +# learning policy +lr_config = dict(step=[16, 19]) +total_epochs = 20 +work_dir = './work_dirs/htc_x101_64x4d_fpn_20e' diff --git a/configs/htc/htc_x101_64x4d_fpn_20e_16gpu.py b/configs/htc/htc_x101_64x4d_fpn_20e_16gpu.py deleted file mode 100644 index 574736b5741f9ba18e53e8709ec471b764f8acea..0000000000000000000000000000000000000000 --- a/configs/htc/htc_x101_64x4d_fpn_20e_16gpu.py +++ /dev/null @@ -1,297 +0,0 @@ -# model settings -model = dict( - type='HybridTaskCascade', - num_stages=3, - pretrained='open-mmlab://resnext101_64x4d', - interleaved=True, - mask_info_flow=True, - backbone=dict( - type='ResNeXt', - depth=101, - groups=64, - base_width=4, - num_stages=4, - out_indices=(0, 1, 2, 3), - frozen_stages=1, - norm_cfg=dict(type='BN', requires_grad=True), - style='pytorch'), - neck=dict( - type='FPN', - in_channels=[256, 512, 1024, 2048], - out_channels=256, - num_outs=5), - rpn_head=dict( - type='RPNHead', - in_channels=256, - feat_channels=256, - anchor_scales=[8], - anchor_ratios=[0.5, 1.0, 2.0], - anchor_strides=[4, 8, 16, 32, 64], - target_means=[.0, .0, .0, .0], - target_stds=[1.0, 1.0, 1.0, 1.0], - loss_cls=dict( - type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0)), - bbox_roi_extractor=dict( - type='SingleRoIExtractor', - roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2), - out_channels=256, - featmap_strides=[4, 8, 16, 32]), - bbox_head=[ - dict( - type='SharedFCBBoxHead', - num_fcs=2, - in_channels=256, - fc_out_channels=1024, - roi_feat_size=7, - num_classes=81, - target_means=[0., 0., 0., 0.], - target_stds=[0.1, 0.1, 0.2, 0.2], - reg_class_agnostic=True, - loss_cls=dict( - type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)), - dict( - type='SharedFCBBoxHead', - num_fcs=2, - in_channels=256, - fc_out_channels=1024, - roi_feat_size=7, - num_classes=81, - target_means=[0., 0., 0., 0.], - target_stds=[0.05, 0.05, 0.1, 0.1], - reg_class_agnostic=True, - loss_cls=dict( - type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)), - dict( - type='SharedFCBBoxHead', - num_fcs=2, - in_channels=256, - fc_out_channels=1024, - roi_feat_size=7, - num_classes=81, - target_means=[0., 0., 0., 0.], - target_stds=[0.033, 0.033, 0.067, 0.067], - reg_class_agnostic=True, - loss_cls=dict( - type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)) - ], - mask_roi_extractor=dict( - type='SingleRoIExtractor', - roi_layer=dict(type='RoIAlign', out_size=14, sample_num=2), - out_channels=256, - featmap_strides=[4, 8, 16, 32]), - mask_head=[ - dict( - type='HTCMaskHead', - with_conv_res=False, - num_convs=4, - in_channels=256, - conv_out_channels=256, - num_classes=81, - loss_mask=dict( - type='CrossEntropyLoss', use_mask=True, loss_weight=1.0)), - dict( - type='HTCMaskHead', - num_convs=4, - in_channels=256, - conv_out_channels=256, - num_classes=81, - loss_mask=dict( - type='CrossEntropyLoss', use_mask=True, loss_weight=1.0)), - dict( - type='HTCMaskHead', - num_convs=4, - in_channels=256, - conv_out_channels=256, - num_classes=81, - loss_mask=dict( - type='CrossEntropyLoss', use_mask=True, loss_weight=1.0)) - ], - semantic_roi_extractor=dict( - type='SingleRoIExtractor', - roi_layer=dict(type='RoIAlign', out_size=14, sample_num=2), - out_channels=256, - featmap_strides=[8]), - semantic_head=dict( - type='FusedSemanticHead', - num_ins=5, - fusion_level=1, - num_convs=4, - in_channels=256, - conv_out_channels=256, - num_classes=183, - ignore_label=255, - loss_weight=0.2)) -# model training and testing settings -train_cfg = dict( - rpn=dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.7, - neg_iou_thr=0.3, - min_pos_iou=0.3, - ignore_iof_thr=-1), - sampler=dict( - type='RandomSampler', - num=256, - pos_fraction=0.5, - neg_pos_ub=-1, - add_gt_as_proposals=False), - allowed_border=0, - pos_weight=-1, - debug=False), - rpn_proposal=dict( - nms_across_levels=False, - nms_pre=2000, - nms_post=2000, - max_num=2000, - nms_thr=0.7, - min_bbox_size=0), - rcnn=[ - dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.5, - neg_iou_thr=0.5, - min_pos_iou=0.5, - ignore_iof_thr=-1), - sampler=dict( - type='RandomSampler', - num=512, - pos_fraction=0.25, - neg_pos_ub=-1, - add_gt_as_proposals=True), - mask_size=28, - pos_weight=-1, - debug=False), - dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.6, - neg_iou_thr=0.6, - min_pos_iou=0.6, - ignore_iof_thr=-1), - sampler=dict( - type='RandomSampler', - num=512, - pos_fraction=0.25, - neg_pos_ub=-1, - add_gt_as_proposals=True), - mask_size=28, - pos_weight=-1, - debug=False), - dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.7, - neg_iou_thr=0.7, - min_pos_iou=0.7, - ignore_iof_thr=-1), - sampler=dict( - type='RandomSampler', - num=512, - pos_fraction=0.25, - neg_pos_ub=-1, - add_gt_as_proposals=True), - mask_size=28, - pos_weight=-1, - debug=False) - ], - stage_loss_weights=[1, 0.5, 0.25]) -test_cfg = dict( - rpn=dict( - nms_across_levels=False, - nms_pre=1000, - nms_post=1000, - max_num=1000, - nms_thr=0.7, - min_bbox_size=0), - rcnn=dict( - score_thr=0.001, - nms=dict(type='nms', iou_thr=0.5), - max_per_img=100, - mask_thr_binary=0.5)) -# dataset settings -dataset_type = 'CocoDataset' -data_root = 'data/coco/' -img_norm_cfg = dict( - mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) -train_pipeline = [ - dict(type='LoadImageFromFile'), - dict( - type='LoadAnnotations', with_bbox=True, with_mask=True, with_seg=True), - dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), - dict(type='RandomFlip', flip_ratio=0.5), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='SegRescale', scale_factor=1 / 8), - dict(type='DefaultFormatBundle'), - dict( - type='Collect', - keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks', 'gt_semantic_seg']), -] -test_pipeline = [ - dict(type='LoadImageFromFile'), - dict( - type='MultiScaleFlipAug', - img_scale=(1333, 800), - flip=False, - transforms=[ - dict(type='Resize', keep_ratio=True), - dict(type='RandomFlip', flip_ratio=0.5), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='ImageToTensor', keys=['img']), - dict(type='Collect', keys=['img']), - ]) -] -data = dict( - imgs_per_gpu=1, - workers_per_gpu=1, - train=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_train2017.json', - img_prefix=data_root + 'train2017/', - seg_prefix=data_root + 'stuffthingmaps/train2017/', - pipeline=train_pipeline), - val=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline), - test=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline)) -evaluation = dict(interval=1, metric=['bbox', 'segm']) -# optimizer -optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) -optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) -# learning policy -lr_config = dict( - policy='step', - warmup='linear', - warmup_iters=500, - warmup_ratio=1.0 / 3, - step=[16, 19]) -checkpoint_config = dict(interval=1) -# yapf:disable -log_config = dict( - interval=50, - hooks=[ - dict(type='TextLoggerHook'), - # dict(type='TensorboardLoggerHook') - ]) -# yapf:enable -# runtime settings -total_epochs = 20 -dist_params = dict(backend='nccl') -log_level = 'INFO' -work_dir = './work_dirs/htc_x101_64x4d_fpn_20e' -load_from = None -resume_from = None -workflow = [('train', 1)] diff --git a/configs/htc/htc_x101_64x4d_fpn_dconv_c3-c5_mstrain_400_1400_16x1_20e_coco.py b/configs/htc/htc_x101_64x4d_fpn_dconv_c3-c5_mstrain_400_1400_16x1_20e_coco.py new file mode 100644 index 0000000000000000000000000000000000000000..45027582dd68a6ca1e6dd79a7e213559853868dd --- /dev/null +++ b/configs/htc/htc_x101_64x4d_fpn_dconv_c3-c5_mstrain_400_1400_16x1_20e_coco.py @@ -0,0 +1,42 @@ +_base_ = './htc_r50_fpn_1x_coco.py' +model = dict( + pretrained='open-mmlab://resnext101_64x4d', + backbone=dict( + type='ResNeXt', + depth=101, + groups=64, + base_width=4, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + style='pytorch', + dcn=dict(type='DCN', deformable_groups=1, fallback_on_stride=False), + stage_with_dcn=(False, True, True, True))) +# dataset settings +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='LoadAnnotations', with_bbox=True, with_mask=True, with_seg=True), + dict( + type='Resize', + img_scale=[(1600, 400), (1600, 1400)], + multiscale_mode='range', + keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='SegRescale', scale_factor=1 / 8), + dict(type='DefaultFormatBundle'), + dict( + type='Collect', + keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks', 'gt_semantic_seg']), +] +data = dict( + imgs_per_gpu=1, workers_per_gpu=1, train=dict(pipeline=train_pipeline)) +# learning policy +lr_config = dict(step=[16, 19]) +total_epochs = 20 +work_dir = './work_dirs/htc_dconv_c3-c5_mstrain_400_1400_x101_64x4d_fpn_20e' diff --git a/configs/instaboost/cascade_mask_rcnn_r50_fpn_instaboost_4x.py b/configs/instaboost/cascade_mask_rcnn_r50_fpn_instaboost_4x.py deleted file mode 100644 index 4ce744df3f375a288085ab4f48e13039df5a24b5..0000000000000000000000000000000000000000 --- a/configs/instaboost/cascade_mask_rcnn_r50_fpn_instaboost_4x.py +++ /dev/null @@ -1,265 +0,0 @@ -# model settings -model = dict( - type='CascadeRCNN', - num_stages=3, - pretrained='torchvision://resnet50', - backbone=dict( - type='ResNet', - depth=50, - num_stages=4, - out_indices=(0, 1, 2, 3), - frozen_stages=1, - norm_cfg=dict(type='BN', requires_grad=True), - style='pytorch'), - neck=dict( - type='FPN', - in_channels=[256, 512, 1024, 2048], - out_channels=256, - num_outs=5), - rpn_head=dict( - type='RPNHead', - in_channels=256, - feat_channels=256, - anchor_scales=[8], - anchor_ratios=[0.5, 1.0, 2.0], - anchor_strides=[4, 8, 16, 32, 64], - target_means=[.0, .0, .0, .0], - target_stds=[1.0, 1.0, 1.0, 1.0], - loss_cls=dict( - type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0)), - bbox_roi_extractor=dict( - type='SingleRoIExtractor', - roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2), - out_channels=256, - featmap_strides=[4, 8, 16, 32]), - bbox_head=[ - dict( - type='SharedFCBBoxHead', - num_fcs=2, - in_channels=256, - fc_out_channels=1024, - roi_feat_size=7, - num_classes=81, - target_means=[0., 0., 0., 0.], - target_stds=[0.1, 0.1, 0.2, 0.2], - reg_class_agnostic=True, - loss_cls=dict( - type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)), - dict( - type='SharedFCBBoxHead', - num_fcs=2, - in_channels=256, - fc_out_channels=1024, - roi_feat_size=7, - num_classes=81, - target_means=[0., 0., 0., 0.], - target_stds=[0.05, 0.05, 0.1, 0.1], - reg_class_agnostic=True, - loss_cls=dict( - type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)), - dict( - type='SharedFCBBoxHead', - num_fcs=2, - in_channels=256, - fc_out_channels=1024, - roi_feat_size=7, - num_classes=81, - target_means=[0., 0., 0., 0.], - target_stds=[0.033, 0.033, 0.067, 0.067], - reg_class_agnostic=True, - loss_cls=dict( - type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)) - ], - mask_roi_extractor=dict( - type='SingleRoIExtractor', - roi_layer=dict(type='RoIAlign', out_size=14, sample_num=2), - out_channels=256, - featmap_strides=[4, 8, 16, 32]), - mask_head=dict( - type='FCNMaskHead', - num_convs=4, - in_channels=256, - conv_out_channels=256, - num_classes=81, - loss_mask=dict( - type='CrossEntropyLoss', use_mask=True, loss_weight=1.0))) -# model training and testing settings -train_cfg = dict( - rpn=dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.7, - neg_iou_thr=0.3, - min_pos_iou=0.3, - ignore_iof_thr=-1), - sampler=dict( - type='RandomSampler', - num=256, - pos_fraction=0.5, - neg_pos_ub=-1, - add_gt_as_proposals=False), - allowed_border=0, - pos_weight=-1, - debug=False), - rpn_proposal=dict( - nms_across_levels=False, - nms_pre=2000, - nms_post=2000, - max_num=2000, - nms_thr=0.7, - min_bbox_size=0), - rcnn=[ - dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.5, - neg_iou_thr=0.5, - min_pos_iou=0.5, - ignore_iof_thr=-1), - sampler=dict( - type='RandomSampler', - num=512, - pos_fraction=0.25, - neg_pos_ub=-1, - add_gt_as_proposals=True), - mask_size=28, - pos_weight=-1, - debug=False), - dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.6, - neg_iou_thr=0.6, - min_pos_iou=0.6, - ignore_iof_thr=-1), - sampler=dict( - type='RandomSampler', - num=512, - pos_fraction=0.25, - neg_pos_ub=-1, - add_gt_as_proposals=True), - mask_size=28, - pos_weight=-1, - debug=False), - dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.7, - neg_iou_thr=0.7, - min_pos_iou=0.7, - ignore_iof_thr=-1), - sampler=dict( - type='RandomSampler', - num=512, - pos_fraction=0.25, - neg_pos_ub=-1, - add_gt_as_proposals=True), - mask_size=28, - pos_weight=-1, - debug=False) - ], - stage_loss_weights=[1, 0.5, 0.25]) -test_cfg = dict( - rpn=dict( - nms_across_levels=False, - nms_pre=1000, - nms_post=1000, - max_num=1000, - nms_thr=0.7, - min_bbox_size=0), - rcnn=dict( - score_thr=0.05, - nms=dict(type='nms', iou_thr=0.5), - max_per_img=100, - mask_thr_binary=0.5)) -# dataset settings -dataset_type = 'CocoDataset' -data_root = 'data/coco/' -img_norm_cfg = dict( - mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) -train_pipeline = [ - dict(type='LoadImageFromFile'), - dict( - type='InstaBoost', - action_candidate=('normal', 'horizontal', 'skip'), - action_prob=(1, 0, 0), - scale=(0.8, 1.2), - dx=15, - dy=15, - theta=(-1, 1), - color_prob=0.5, - hflag=False, - aug_ratio=0.5), - dict(type='LoadAnnotations', with_bbox=True, with_mask=True), - dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), - dict(type='RandomFlip', flip_ratio=0.5), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='DefaultFormatBundle'), - dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']), -] -test_pipeline = [ - dict(type='LoadImageFromFile'), - dict( - type='MultiScaleFlipAug', - img_scale=(1333, 800), - flip=False, - transforms=[ - dict(type='Resize', keep_ratio=True), - dict(type='RandomFlip'), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='ImageToTensor', keys=['img']), - dict(type='Collect', keys=['img']), - ]) -] -data = dict( - imgs_per_gpu=2, - workers_per_gpu=2, - train=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_train2017.json', - img_prefix=data_root + 'train2017/', - pipeline=train_pipeline), - val=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline), - test=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline)) -evaluation = dict(interval=1, metric=['bbox', 'segm']) -# optimizer -optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) -optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) -# learning policy -lr_config = dict( - policy='step', - warmup='linear', - warmup_iters=500, - warmup_ratio=1.0 / 3, - step=[32, 44]) -checkpoint_config = dict(interval=1) -# yapf:disable -log_config = dict( - interval=50, - hooks=[ - dict(type='TextLoggerHook'), - # dict(type='TensorboardLoggerHook') - ]) -# yapf:enable -# runtime settings -total_epochs = 48 -dist_params = dict(backend='nccl') -log_level = 'INFO' -work_dir = './work_dirs/cascade_mask_rcnn_r50_fpn_instaboost_4x' -load_from = None -resume_from = None -workflow = [('train', 1)] diff --git a/configs/instaboost/cascade_mask_rcnn_r50_fpn_instaboost_4x_coco.py b/configs/instaboost/cascade_mask_rcnn_r50_fpn_instaboost_4x_coco.py new file mode 100644 index 0000000000000000000000000000000000000000..80ae5bee3031cbc652fc79bd12c1ac138323ebad --- /dev/null +++ b/configs/instaboost/cascade_mask_rcnn_r50_fpn_instaboost_4x_coco.py @@ -0,0 +1,29 @@ +_base_ = '../cascade_rcnn/cascade_mask_rcnn_r50_fpn_1x_coco.py' +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='InstaBoost', + action_candidate=('normal', 'horizontal', 'skip'), + action_prob=(1, 0, 0), + scale=(0.8, 1.2), + dx=15, + dy=15, + theta=(-1, 1), + color_prob=0.5, + hflag=False, + aug_ratio=0.5), + dict(type='LoadAnnotations', with_bbox=True, with_mask=True), + dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']), +] +data = dict(train=dict(pipeline=train_pipeline)) +# learning policy +lr_config = dict(step=[32, 44]) +total_epochs = 48 +work_dir = './work_dirs/cascade_mask_rcnn_r50_fpn_instaboost_4x' diff --git a/configs/instaboost/mask_rcnn_r50_fpn_instaboost_4x.py b/configs/instaboost/mask_rcnn_r50_fpn_instaboost_4x.py deleted file mode 100644 index 169652dc7cc5e0c37010a234a669293306fcd078..0000000000000000000000000000000000000000 --- a/configs/instaboost/mask_rcnn_r50_fpn_instaboost_4x.py +++ /dev/null @@ -1,201 +0,0 @@ -# model settings -model = dict( - type='MaskRCNN', - pretrained='torchvision://resnet50', - backbone=dict( - type='ResNet', - depth=50, - num_stages=4, - out_indices=(0, 1, 2, 3), - frozen_stages=1, - norm_cfg=dict(type='BN', requires_grad=True), - style='pytorch'), - neck=dict( - type='FPN', - in_channels=[256, 512, 1024, 2048], - out_channels=256, - num_outs=5), - rpn_head=dict( - type='RPNHead', - in_channels=256, - feat_channels=256, - anchor_scales=[8], - anchor_ratios=[0.5, 1.0, 2.0], - anchor_strides=[4, 8, 16, 32, 64], - target_means=[.0, .0, .0, .0], - target_stds=[1.0, 1.0, 1.0, 1.0], - loss_cls=dict( - type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0)), - bbox_roi_extractor=dict( - type='SingleRoIExtractor', - roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2), - out_channels=256, - featmap_strides=[4, 8, 16, 32]), - bbox_head=dict( - type='SharedFCBBoxHead', - num_fcs=2, - in_channels=256, - fc_out_channels=1024, - roi_feat_size=7, - num_classes=81, - target_means=[0., 0., 0., 0.], - target_stds=[0.1, 0.1, 0.2, 0.2], - reg_class_agnostic=False, - loss_cls=dict( - type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)), - mask_roi_extractor=dict( - type='SingleRoIExtractor', - roi_layer=dict(type='RoIAlign', out_size=14, sample_num=2), - out_channels=256, - featmap_strides=[4, 8, 16, 32]), - mask_head=dict( - type='FCNMaskHead', - num_convs=4, - in_channels=256, - conv_out_channels=256, - num_classes=81, - loss_mask=dict( - type='CrossEntropyLoss', use_mask=True, loss_weight=1.0))) -# model training and testing settings -train_cfg = dict( - rpn=dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.7, - neg_iou_thr=0.3, - min_pos_iou=0.3, - ignore_iof_thr=-1), - sampler=dict( - type='RandomSampler', - num=256, - pos_fraction=0.5, - neg_pos_ub=-1, - add_gt_as_proposals=False), - allowed_border=0, - pos_weight=-1, - debug=False), - rpn_proposal=dict( - nms_across_levels=False, - nms_pre=2000, - nms_post=2000, - max_num=2000, - nms_thr=0.7, - min_bbox_size=0), - rcnn=dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.5, - neg_iou_thr=0.5, - min_pos_iou=0.5, - ignore_iof_thr=-1), - sampler=dict( - type='RandomSampler', - num=512, - pos_fraction=0.25, - neg_pos_ub=-1, - add_gt_as_proposals=True), - mask_size=28, - pos_weight=-1, - debug=False)) -test_cfg = dict( - rpn=dict( - nms_across_levels=False, - nms_pre=1000, - nms_post=1000, - max_num=1000, - nms_thr=0.7, - min_bbox_size=0), - rcnn=dict( - score_thr=0.05, - nms=dict(type='nms', iou_thr=0.5), - max_per_img=100, - mask_thr_binary=0.5)) -# dataset settings -dataset_type = 'CocoDataset' -data_root = 'data/coco/' -img_norm_cfg = dict( - mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) -train_pipeline = [ - dict(type='LoadImageFromFile'), - dict( - type='InstaBoost', - action_candidate=('normal', 'horizontal', 'skip'), - action_prob=(1, 0, 0), - scale=(0.8, 1.2), - dx=15, - dy=15, - theta=(-1, 1), - color_prob=0.5, - hflag=False, - aug_ratio=0.5), - dict(type='LoadAnnotations', with_bbox=True, with_mask=True), - dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), - dict(type='RandomFlip', flip_ratio=0.5), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='DefaultFormatBundle'), - dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']), -] -test_pipeline = [ - dict(type='LoadImageFromFile'), - dict( - type='MultiScaleFlipAug', - img_scale=(1333, 800), - flip=False, - transforms=[ - dict(type='Resize', keep_ratio=True), - dict(type='RandomFlip'), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='ImageToTensor', keys=['img']), - dict(type='Collect', keys=['img']), - ]) -] -data = dict( - imgs_per_gpu=2, - workers_per_gpu=2, - train=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_train2017.json', - img_prefix=data_root + 'train2017/', - pipeline=train_pipeline), - val=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline), - test=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline)) -evaluation = dict(interval=1, metric=['bbox', 'segm']) -# optimizer -optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) -optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) -# learning policy -lr_config = dict( - policy='step', - warmup='linear', - warmup_iters=500, - warmup_ratio=1.0 / 3, - step=[32, 44]) -checkpoint_config = dict(interval=1) -# yapf:disable -log_config = dict( - interval=50, - hooks=[ - dict(type='TextLoggerHook'), - # dict(type='TensorboardLoggerHook') - ]) -# yapf:enable -# runtime settings -total_epochs = 48 -dist_params = dict(backend='nccl') -log_level = 'INFO' -work_dir = './work_dirs/mask_rcnn_r50_fpn_instaboost_4x' -load_from = None -resume_from = None -workflow = [('train', 1)] diff --git a/configs/instaboost/mask_rcnn_r50_fpn_instaboost_4x_coco.py b/configs/instaboost/mask_rcnn_r50_fpn_instaboost_4x_coco.py new file mode 100644 index 0000000000000000000000000000000000000000..8df3596225e00e26d7e17cd1029caf805c1790d1 --- /dev/null +++ b/configs/instaboost/mask_rcnn_r50_fpn_instaboost_4x_coco.py @@ -0,0 +1,29 @@ +_base_ = '../mask_rcnn/mask_rcnn_r50_fpn_1x_coco.py' +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='InstaBoost', + action_candidate=('normal', 'horizontal', 'skip'), + action_prob=(1, 0, 0), + scale=(0.8, 1.2), + dx=15, + dy=15, + theta=(-1, 1), + color_prob=0.5, + hflag=False, + aug_ratio=0.5), + dict(type='LoadAnnotations', with_bbox=True, with_mask=True), + dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']), +] +data = dict(train=dict(pipeline=train_pipeline)) +# learning policy +lr_config = dict(step=[32, 44]) +total_epochs = 48 +work_dir = './work_dirs/mask_rcnn_r50_fpn_instaboost_4x' diff --git a/configs/instaboost/ssd300_coco_instaboost_4x.py b/configs/instaboost/ssd300_coco_instaboost_4x.py deleted file mode 100644 index 3c7de8700b37b298ee3608f0471340c457afada5..0000000000000000000000000000000000000000 --- a/configs/instaboost/ssd300_coco_instaboost_4x.py +++ /dev/null @@ -1,146 +0,0 @@ -# model settings -input_size = 300 -model = dict( - type='SingleStageDetector', - pretrained='open-mmlab://vgg16_caffe', - backbone=dict( - type='SSDVGG', - input_size=input_size, - depth=16, - with_last_pool=False, - ceil_mode=True, - out_indices=(3, 4), - out_feature_indices=(22, 34), - l2_norm_scale=20), - neck=None, - bbox_head=dict( - type='SSDHead', - input_size=input_size, - in_channels=(512, 1024, 512, 256, 256, 256), - num_classes=81, - anchor_strides=(8, 16, 32, 64, 100, 300), - basesize_ratio_range=(0.15, 0.9), - anchor_ratios=([2], [2, 3], [2, 3], [2, 3], [2], [2]), - target_means=(.0, .0, .0, .0), - target_stds=(0.1, 0.1, 0.2, 0.2))) -cudnn_benchmark = True -train_cfg = dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.5, - neg_iou_thr=0.5, - min_pos_iou=0., - ignore_iof_thr=-1, - gt_max_assign_all=False), - smoothl1_beta=1., - allowed_border=-1, - pos_weight=-1, - neg_pos_ratio=3, - debug=False) -test_cfg = dict( - nms=dict(type='nms', iou_thr=0.45), - min_bbox_size=0, - score_thr=0.02, - max_per_img=200) -# model training and testing settings -# dataset settings -dataset_type = 'CocoDataset' -data_root = 'data/coco/' -img_norm_cfg = dict(mean=[123.675, 116.28, 103.53], std=[1, 1, 1], to_rgb=True) -train_pipeline = [ - dict(type='LoadImageFromFile', to_float32=True), - dict( - type='InstaBoost', - action_candidate=('normal', 'horizontal', 'skip'), - action_prob=(1, 0, 0), - scale=(0.8, 1.2), - dx=15, - dy=15, - theta=(-1, 1), - color_prob=0.5, - hflag=False, - aug_ratio=0.5), - dict(type='LoadAnnotations', with_bbox=True), - dict( - type='PhotoMetricDistortion', - brightness_delta=32, - contrast_range=(0.5, 1.5), - saturation_range=(0.5, 1.5), - hue_delta=18), - dict( - type='Expand', - mean=img_norm_cfg['mean'], - to_rgb=img_norm_cfg['to_rgb'], - ratio_range=(1, 4)), - dict( - type='MinIoURandomCrop', - min_ious=(0.1, 0.3, 0.5, 0.7, 0.9), - min_crop_size=0.3), - dict(type='Resize', img_scale=(300, 300), keep_ratio=False), - dict(type='Normalize', **img_norm_cfg), - dict(type='RandomFlip', flip_ratio=0.5), - dict(type='DefaultFormatBundle'), - dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), -] -test_pipeline = [ - dict(type='LoadImageFromFile'), - dict( - type='MultiScaleFlipAug', - img_scale=(300, 300), - flip=False, - transforms=[ - dict(type='Resize', keep_ratio=False), - dict(type='Normalize', **img_norm_cfg), - dict(type='ImageToTensor', keys=['img']), - dict(type='Collect', keys=['img']), - ]) -] -data = dict( - imgs_per_gpu=8, - workers_per_gpu=3, - train=dict( - type='RepeatDataset', - times=5, - dataset=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_train2017.json', - img_prefix=data_root + 'train2017/', - pipeline=train_pipeline)), - val=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline), - test=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline)) -evaluation = dict(interval=1, metric='bbox') -# optimizer -optimizer = dict(type='SGD', lr=2e-3, momentum=0.9, weight_decay=5e-4) -optimizer_config = dict() -# learning policy -lr_config = dict( - policy='step', - warmup='linear', - warmup_iters=500, - warmup_ratio=1.0 / 3, - step=[64, 88]) -checkpoint_config = dict(interval=1) -# yapf:disable -log_config = dict( - interval=50, - hooks=[ - dict(type='TextLoggerHook'), - # dict(type='TensorboardLoggerHook') - ]) -# yapf:enable -# runtime settings -total_epochs = 96 -dist_params = dict(backend='nccl') -log_level = 'INFO' -work_dir = './work_dirs/ssd300_coco_instaboost_4x' -load_from = None -resume_from = None -workflow = [('train', 1)] diff --git a/configs/instaboost/ssd300_coco_instaboost_4x_coco.py b/configs/instaboost/ssd300_coco_instaboost_4x_coco.py new file mode 100644 index 0000000000000000000000000000000000000000..472d18341e034221bcc247ad148d98f21f22607e --- /dev/null +++ b/configs/instaboost/ssd300_coco_instaboost_4x_coco.py @@ -0,0 +1,42 @@ +_base_ = '../ssd/ssd300_coco.py' +img_norm_cfg = dict(mean=[123.675, 116.28, 103.53], std=[1, 1, 1], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile', to_float32=True), + dict( + type='InstaBoost', + action_candidate=('normal', 'horizontal', 'skip'), + action_prob=(1, 0, 0), + scale=(0.8, 1.2), + dx=15, + dy=15, + theta=(-1, 1), + color_prob=0.5, + hflag=False, + aug_ratio=0.5), + dict(type='LoadAnnotations', with_bbox=True), + dict( + type='PhotoMetricDistortion', + brightness_delta=32, + contrast_range=(0.5, 1.5), + saturation_range=(0.5, 1.5), + hue_delta=18), + dict( + type='Expand', + mean=img_norm_cfg['mean'], + to_rgb=img_norm_cfg['to_rgb'], + ratio_range=(1, 4)), + dict( + type='MinIoURandomCrop', + min_ious=(0.1, 0.3, 0.5, 0.7, 0.9), + min_crop_size=0.3), + dict(type='Resize', img_scale=(300, 300), keep_ratio=False), + dict(type='Normalize', **img_norm_cfg), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), +] +data = dict(train=dict(dataset=dict(pipeline=train_pipeline))) +# learning policy +lr_config = dict(step=[64, 88]) +total_epochs = 96 +work_dir = './work_dirs/ssd300_coco_instaboost_4x' diff --git a/configs/libra_rcnn/libra_fast_rcnn_r50_fpn_1x.py b/configs/libra_rcnn/libra_fast_rcnn_r50_fpn_1x.py deleted file mode 100644 index 839225268c8a591eb337c65686b84a6f319b062d..0000000000000000000000000000000000000000 --- a/configs/libra_rcnn/libra_fast_rcnn_r50_fpn_1x.py +++ /dev/null @@ -1,155 +0,0 @@ -# model settings -model = dict( - type='FastRCNN', - pretrained='torchvision://resnet50', - backbone=dict( - type='ResNet', - depth=50, - num_stages=4, - out_indices=(0, 1, 2, 3), - frozen_stages=1, - norm_cfg=dict(type='BN', requires_grad=True), - style='pytorch'), - neck=[ - dict( - type='FPN', - in_channels=[256, 512, 1024, 2048], - out_channels=256, - num_outs=5), - dict( - type='BFP', - in_channels=256, - num_levels=5, - refine_level=2, - refine_type='non_local') - ], - bbox_roi_extractor=dict( - type='SingleRoIExtractor', - roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2), - out_channels=256, - featmap_strides=[4, 8, 16, 32]), - bbox_head=dict( - type='SharedFCBBoxHead', - num_fcs=2, - in_channels=256, - fc_out_channels=1024, - roi_feat_size=7, - num_classes=81, - target_means=[0., 0., 0., 0.], - target_stds=[0.1, 0.1, 0.2, 0.2], - reg_class_agnostic=False, - loss_cls=dict( - type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), - loss_bbox=dict( - type='BalancedL1Loss', - alpha=0.5, - gamma=1.5, - beta=1.0, - loss_weight=1.0))) -# model training and testing settings -train_cfg = dict( - rcnn=dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.5, - neg_iou_thr=0.5, - min_pos_iou=0.5, - ignore_iof_thr=-1), - sampler=dict( - type='CombinedSampler', - num=512, - pos_fraction=0.25, - add_gt_as_proposals=True, - pos_sampler=dict(type='InstanceBalancedPosSampler'), - neg_sampler=dict( - type='IoUBalancedNegSampler', - floor_thr=-1, - floor_fraction=0, - num_bins=3)), - pos_weight=-1, - debug=False)) -test_cfg = dict( - rcnn=dict( - score_thr=0.05, nms=dict(type='nms', iou_thr=0.5), max_per_img=100)) -# dataset settings -dataset_type = 'CocoDataset' -data_root = 'data/coco/' -img_norm_cfg = dict( - mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) -train_pipeline = [ - dict(type='LoadImageFromFile'), - dict(type='LoadProposals', num_max_proposals=2000), - dict(type='LoadAnnotations', with_bbox=True), - dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), - dict(type='RandomFlip', flip_ratio=0.5), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='DefaultFormatBundle'), - dict(type='Collect', keys=['img', 'proposals', 'gt_bboxes', 'gt_labels']), -] -test_pipeline = [ - dict(type='LoadImageFromFile'), - dict(type='LoadProposals', num_max_proposals=None), - dict( - type='MultiScaleFlipAug', - img_scale=(1333, 800), - flip=False, - transforms=[ - dict(type='Resize', keep_ratio=True), - dict(type='RandomFlip'), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='ImageToTensor', keys=['img']), - dict(type='Collect', keys=['img', 'proposals']), - ]) -] -data = dict( - imgs_per_gpu=2, - workers_per_gpu=2, - train=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_train2017.json', - proposal_file=data_root + - 'libra_proposals/rpn_r50_fpn_1x_train2017.pkl', - img_prefix=data_root + 'train2017/', - pipeline=train_pipeline), - val=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - proposal_file=data_root + 'libra_proposals/rpn_r50_fpn_1x_val2017.pkl', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline), - test=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - proposal_file=data_root + 'libra_proposals/rpn_r50_fpn_1x_val2017.pkl', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline)) -evaluation = dict(interval=1, metric='bbox') -# optimizer -optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) -optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) -# learning policy -lr_config = dict( - policy='step', - warmup='linear', - warmup_iters=500, - warmup_ratio=1.0 / 3, - step=[8, 11]) -checkpoint_config = dict(interval=1) -# yapf:disable -log_config = dict( - interval=50, - hooks=[ - dict(type='TextLoggerHook'), - # dict(type='TensorboardLoggerHook') - ]) -# yapf:enable -# runtime settings -total_epochs = 12 -dist_params = dict(backend='nccl') -log_level = 'INFO' -work_dir = './work_dirs/libra_fast_rcnn_r50_fpn_1x' -load_from = None -resume_from = None -workflow = [('train', 1)] diff --git a/configs/libra_rcnn/libra_fast_rcnn_r50_fpn_1x_coco.py b/configs/libra_rcnn/libra_fast_rcnn_r50_fpn_1x_coco.py new file mode 100644 index 0000000000000000000000000000000000000000..440fe9785400da644c9219120b13639079faf00b --- /dev/null +++ b/configs/libra_rcnn/libra_fast_rcnn_r50_fpn_1x_coco.py @@ -0,0 +1,50 @@ +_base_ = '../fast_rcnn/fast_rcnn_r50_fpn_1x_coco.py' +# model settings +model = dict( + neck=[ + dict( + type='FPN', + in_channels=[256, 512, 1024, 2048], + out_channels=256, + num_outs=5), + dict( + type='BFP', + in_channels=256, + num_levels=5, + refine_level=2, + refine_type='non_local') + ], + bbox_head=dict( + loss_bbox=dict( + _delete_=True, + type='BalancedL1Loss', + alpha=0.5, + gamma=1.5, + beta=1.0, + loss_weight=1.0))) +# model training and testing settings +train_cfg = dict( + rcnn=dict( + sampler=dict( + _delete_=True, + type='CombinedSampler', + num=512, + pos_fraction=0.25, + add_gt_as_proposals=True, + pos_sampler=dict(type='InstanceBalancedPosSampler'), + neg_sampler=dict( + type='IoUBalancedNegSampler', + floor_thr=-1, + floor_fraction=0, + num_bins=3)))) +# dataset settings +dataset_type = 'CocoDataset' +data_root = 'data/coco/' +data = dict( + train=dict(proposal_file=data_root + + 'libra_proposals/rpn_r50_fpn_1x_train2017.pkl'), + val=dict(proposal_file=data_root + + 'libra_proposals/rpn_r50_fpn_1x_val2017.pkl'), + test=dict(proposal_file=data_root + + 'libra_proposals/rpn_r50_fpn_1x_val2017.pkl')) +work_dir = './work_dirs/libra_fast_rcnn_r50_fpn_1x' diff --git a/configs/libra_rcnn/libra_faster_rcnn_r101_fpn_1x.py b/configs/libra_rcnn/libra_faster_rcnn_r101_fpn_1x.py deleted file mode 100644 index af3a0b6749bfbf2d77ed0459b3a49c8f4aefb2fe..0000000000000000000000000000000000000000 --- a/configs/libra_rcnn/libra_faster_rcnn_r101_fpn_1x.py +++ /dev/null @@ -1,194 +0,0 @@ -# model settings -model = dict( - type='FasterRCNN', - pretrained='torchvision://resnet101', - backbone=dict( - type='ResNet', - depth=101, - num_stages=4, - out_indices=(0, 1, 2, 3), - frozen_stages=1, - norm_cfg=dict(type='BN', requires_grad=True), - style='pytorch'), - neck=[ - dict( - type='FPN', - in_channels=[256, 512, 1024, 2048], - out_channels=256, - num_outs=5), - dict( - type='BFP', - in_channels=256, - num_levels=5, - refine_level=2, - refine_type='non_local') - ], - rpn_head=dict( - type='RPNHead', - in_channels=256, - feat_channels=256, - anchor_scales=[8], - anchor_ratios=[0.5, 1.0, 2.0], - anchor_strides=[4, 8, 16, 32, 64], - target_means=[.0, .0, .0, .0], - target_stds=[1.0, 1.0, 1.0, 1.0], - loss_cls=dict( - type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0)), - bbox_roi_extractor=dict( - type='SingleRoIExtractor', - roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2), - out_channels=256, - featmap_strides=[4, 8, 16, 32]), - bbox_head=dict( - type='SharedFCBBoxHead', - num_fcs=2, - in_channels=256, - fc_out_channels=1024, - roi_feat_size=7, - num_classes=81, - target_means=[0., 0., 0., 0.], - target_stds=[0.1, 0.1, 0.2, 0.2], - reg_class_agnostic=False, - loss_cls=dict( - type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), - loss_bbox=dict( - type='BalancedL1Loss', - alpha=0.5, - gamma=1.5, - beta=1.0, - loss_weight=1.0))) -# model training and testing settings -train_cfg = dict( - rpn=dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.7, - neg_iou_thr=0.3, - min_pos_iou=0.3, - ignore_iof_thr=-1), - sampler=dict( - type='RandomSampler', - num=256, - pos_fraction=0.5, - neg_pos_ub=5, - add_gt_as_proposals=False), - allowed_border=-1, - pos_weight=-1, - debug=False), - rpn_proposal=dict( - nms_across_levels=False, - nms_pre=2000, - nms_post=2000, - max_num=2000, - nms_thr=0.7, - min_bbox_size=0), - rcnn=dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.5, - neg_iou_thr=0.5, - min_pos_iou=0.5, - ignore_iof_thr=-1), - sampler=dict( - type='CombinedSampler', - num=512, - pos_fraction=0.25, - add_gt_as_proposals=True, - pos_sampler=dict(type='InstanceBalancedPosSampler'), - neg_sampler=dict( - type='IoUBalancedNegSampler', - floor_thr=-1, - floor_fraction=0, - num_bins=3)), - pos_weight=-1, - debug=False)) -test_cfg = dict( - rpn=dict( - nms_across_levels=False, - nms_pre=1000, - nms_post=1000, - max_num=1000, - nms_thr=0.7, - min_bbox_size=0), - rcnn=dict( - score_thr=0.05, nms=dict(type='nms', iou_thr=0.5), max_per_img=100) - # soft-nms is also supported for rcnn testing - # e.g., nms=dict(type='soft_nms', iou_thr=0.5, min_score=0.05) -) -# dataset settings -dataset_type = 'CocoDataset' -data_root = 'data/coco/' -img_norm_cfg = dict( - mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) -train_pipeline = [ - dict(type='LoadImageFromFile'), - dict(type='LoadAnnotations', with_bbox=True), - dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), - dict(type='RandomFlip', flip_ratio=0.5), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='DefaultFormatBundle'), - dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), -] -test_pipeline = [ - dict(type='LoadImageFromFile'), - dict( - type='MultiScaleFlipAug', - img_scale=(1333, 800), - flip=False, - transforms=[ - dict(type='Resize', keep_ratio=True), - dict(type='RandomFlip'), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='ImageToTensor', keys=['img']), - dict(type='Collect', keys=['img']), - ]) -] -data = dict( - imgs_per_gpu=2, - workers_per_gpu=2, - train=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_train2017.json', - img_prefix=data_root + 'train2017/', - pipeline=train_pipeline), - val=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline), - test=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline)) -evaluation = dict(interval=1, metric='bbox') -# optimizer -optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) -optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) -# learning policy -lr_config = dict( - policy='step', - warmup='linear', - warmup_iters=500, - warmup_ratio=1.0 / 3, - step=[8, 11]) -checkpoint_config = dict(interval=1) -# yapf:disable -log_config = dict( - interval=50, - hooks=[ - dict(type='TextLoggerHook'), - # dict(type='TensorboardLoggerHook') - ]) -# yapf:enable -# runtime settings -total_epochs = 12 -dist_params = dict(backend='nccl') -log_level = 'INFO' -work_dir = './work_dirs/libra_faster_rcnn_r101_fpn_1x' -load_from = None -resume_from = None -workflow = [('train', 1)] diff --git a/configs/libra_rcnn/libra_faster_rcnn_r101_fpn_1x_coco.py b/configs/libra_rcnn/libra_faster_rcnn_r101_fpn_1x_coco.py new file mode 100644 index 0000000000000000000000000000000000000000..6b4c15a28899758e5371649fb728f6db46f91028 --- /dev/null +++ b/configs/libra_rcnn/libra_faster_rcnn_r101_fpn_1x_coco.py @@ -0,0 +1,3 @@ +_base_ = './libra_faster_rcnn_r50_fpn_1x_coco.py' +model = dict(pretrained='torchvision://resnet101', backbone=dict(depth=101)) +work_dir = './work_dirs/libra_faster_rcnn_r101_fpn_1x' diff --git a/configs/libra_rcnn/libra_faster_rcnn_r50_fpn_1x.py b/configs/libra_rcnn/libra_faster_rcnn_r50_fpn_1x.py deleted file mode 100644 index 41de0c62259ffc79584d3ef1e443c5e4e945c70d..0000000000000000000000000000000000000000 --- a/configs/libra_rcnn/libra_faster_rcnn_r50_fpn_1x.py +++ /dev/null @@ -1,194 +0,0 @@ -# model settings -model = dict( - type='FasterRCNN', - pretrained='torchvision://resnet50', - backbone=dict( - type='ResNet', - depth=50, - num_stages=4, - out_indices=(0, 1, 2, 3), - frozen_stages=1, - norm_cfg=dict(type='BN', requires_grad=True), - style='pytorch'), - neck=[ - dict( - type='FPN', - in_channels=[256, 512, 1024, 2048], - out_channels=256, - num_outs=5), - dict( - type='BFP', - in_channels=256, - num_levels=5, - refine_level=2, - refine_type='non_local') - ], - rpn_head=dict( - type='RPNHead', - in_channels=256, - feat_channels=256, - anchor_scales=[8], - anchor_ratios=[0.5, 1.0, 2.0], - anchor_strides=[4, 8, 16, 32, 64], - target_means=[.0, .0, .0, .0], - target_stds=[1.0, 1.0, 1.0, 1.0], - loss_cls=dict( - type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0)), - bbox_roi_extractor=dict( - type='SingleRoIExtractor', - roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2), - out_channels=256, - featmap_strides=[4, 8, 16, 32]), - bbox_head=dict( - type='SharedFCBBoxHead', - num_fcs=2, - in_channels=256, - fc_out_channels=1024, - roi_feat_size=7, - num_classes=81, - target_means=[0., 0., 0., 0.], - target_stds=[0.1, 0.1, 0.2, 0.2], - reg_class_agnostic=False, - loss_cls=dict( - type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), - loss_bbox=dict( - type='BalancedL1Loss', - alpha=0.5, - gamma=1.5, - beta=1.0, - loss_weight=1.0))) -# model training and testing settings -train_cfg = dict( - rpn=dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.7, - neg_iou_thr=0.3, - min_pos_iou=0.3, - ignore_iof_thr=-1), - sampler=dict( - type='RandomSampler', - num=256, - pos_fraction=0.5, - neg_pos_ub=5, - add_gt_as_proposals=False), - allowed_border=-1, - pos_weight=-1, - debug=False), - rpn_proposal=dict( - nms_across_levels=False, - nms_pre=2000, - nms_post=2000, - max_num=2000, - nms_thr=0.7, - min_bbox_size=0), - rcnn=dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.5, - neg_iou_thr=0.5, - min_pos_iou=0.5, - ignore_iof_thr=-1), - sampler=dict( - type='CombinedSampler', - num=512, - pos_fraction=0.25, - add_gt_as_proposals=True, - pos_sampler=dict(type='InstanceBalancedPosSampler'), - neg_sampler=dict( - type='IoUBalancedNegSampler', - floor_thr=-1, - floor_fraction=0, - num_bins=3)), - pos_weight=-1, - debug=False)) -test_cfg = dict( - rpn=dict( - nms_across_levels=False, - nms_pre=1000, - nms_post=1000, - max_num=1000, - nms_thr=0.7, - min_bbox_size=0), - rcnn=dict( - score_thr=0.05, nms=dict(type='nms', iou_thr=0.5), max_per_img=100) - # soft-nms is also supported for rcnn testing - # e.g., nms=dict(type='soft_nms', iou_thr=0.5, min_score=0.05) -) -# dataset settings -dataset_type = 'CocoDataset' -data_root = 'data/coco/' -img_norm_cfg = dict( - mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) -train_pipeline = [ - dict(type='LoadImageFromFile'), - dict(type='LoadAnnotations', with_bbox=True), - dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), - dict(type='RandomFlip', flip_ratio=0.5), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='DefaultFormatBundle'), - dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), -] -test_pipeline = [ - dict(type='LoadImageFromFile'), - dict( - type='MultiScaleFlipAug', - img_scale=(1333, 800), - flip=False, - transforms=[ - dict(type='Resize', keep_ratio=True), - dict(type='RandomFlip'), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='ImageToTensor', keys=['img']), - dict(type='Collect', keys=['img']), - ]) -] -data = dict( - imgs_per_gpu=2, - workers_per_gpu=2, - train=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_train2017.json', - img_prefix=data_root + 'train2017/', - pipeline=train_pipeline), - val=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline), - test=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline)) -evaluation = dict(interval=1, metric='bbox') -# optimizer -optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) -optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) -# learning policy -lr_config = dict( - policy='step', - warmup='linear', - warmup_iters=500, - warmup_ratio=1.0 / 3, - step=[8, 11]) -checkpoint_config = dict(interval=1) -# yapf:disable -log_config = dict( - interval=50, - hooks=[ - dict(type='TextLoggerHook'), - # dict(type='TensorboardLoggerHook') - ]) -# yapf:enable -# runtime settings -total_epochs = 12 -dist_params = dict(backend='nccl') -log_level = 'INFO' -work_dir = './work_dirs/libra_faster_rcnn_r50_fpn_1x' -load_from = None -resume_from = None -workflow = [('train', 1)] diff --git a/configs/libra_rcnn/libra_faster_rcnn_r50_fpn_1x_coco.py b/configs/libra_rcnn/libra_faster_rcnn_r50_fpn_1x_coco.py new file mode 100644 index 0000000000000000000000000000000000000000..5e99672974d80d237e8bfea19f7243cc7724adb7 --- /dev/null +++ b/configs/libra_rcnn/libra_faster_rcnn_r50_fpn_1x_coco.py @@ -0,0 +1,41 @@ +_base_ = '../faster_rcnn/faster_rcnn_r50_fpn_1x_coco.py' +# model settings +model = dict( + neck=[ + dict( + type='FPN', + in_channels=[256, 512, 1024, 2048], + out_channels=256, + num_outs=5), + dict( + type='BFP', + in_channels=256, + num_levels=5, + refine_level=2, + refine_type='non_local') + ], + bbox_head=dict( + loss_bbox=dict( + _delete_=True, + type='BalancedL1Loss', + alpha=0.5, + gamma=1.5, + beta=1.0, + loss_weight=1.0))) +# model training and testing settings +train_cfg = dict( + rpn=dict(sampler=dict(neg_pos_ub=5), allowed_border=-1), + rcnn=dict( + sampler=dict( + _delete_=True, + type='CombinedSampler', + num=512, + pos_fraction=0.25, + add_gt_as_proposals=True, + pos_sampler=dict(type='InstanceBalancedPosSampler'), + neg_sampler=dict( + type='IoUBalancedNegSampler', + floor_thr=-1, + floor_fraction=0, + num_bins=3)))) +work_dir = './work_dirs/libra_faster_rcnn_r50_fpn_1x' diff --git a/configs/libra_rcnn/libra_faster_rcnn_x101_64x4d_fpn_1x.py b/configs/libra_rcnn/libra_faster_rcnn_x101_64x4d_fpn_1x.py deleted file mode 100644 index 50ec1df825ac0d853c02a08a7636ceb589e5c556..0000000000000000000000000000000000000000 --- a/configs/libra_rcnn/libra_faster_rcnn_x101_64x4d_fpn_1x.py +++ /dev/null @@ -1,196 +0,0 @@ -# model settings -model = dict( - type='FasterRCNN', - pretrained='open-mmlab://resnext101_64x4d', - backbone=dict( - type='ResNeXt', - depth=101, - groups=64, - base_width=4, - num_stages=4, - out_indices=(0, 1, 2, 3), - frozen_stages=1, - norm_cfg=dict(type='BN', requires_grad=True), - style='pytorch'), - neck=[ - dict( - type='FPN', - in_channels=[256, 512, 1024, 2048], - out_channels=256, - num_outs=5), - dict( - type='BFP', - in_channels=256, - num_levels=5, - refine_level=2, - refine_type='non_local') - ], - rpn_head=dict( - type='RPNHead', - in_channels=256, - feat_channels=256, - anchor_scales=[8], - anchor_ratios=[0.5, 1.0, 2.0], - anchor_strides=[4, 8, 16, 32, 64], - target_means=[.0, .0, .0, .0], - target_stds=[1.0, 1.0, 1.0, 1.0], - loss_cls=dict( - type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0)), - bbox_roi_extractor=dict( - type='SingleRoIExtractor', - roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2), - out_channels=256, - featmap_strides=[4, 8, 16, 32]), - bbox_head=dict( - type='SharedFCBBoxHead', - num_fcs=2, - in_channels=256, - fc_out_channels=1024, - roi_feat_size=7, - num_classes=81, - target_means=[0., 0., 0., 0.], - target_stds=[0.1, 0.1, 0.2, 0.2], - reg_class_agnostic=False, - loss_cls=dict( - type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), - loss_bbox=dict( - type='BalancedL1Loss', - alpha=0.5, - gamma=1.5, - beta=1.0, - loss_weight=1.0))) -# model training and testing settings -train_cfg = dict( - rpn=dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.7, - neg_iou_thr=0.3, - min_pos_iou=0.3, - ignore_iof_thr=-1), - sampler=dict( - type='RandomSampler', - num=256, - pos_fraction=0.5, - neg_pos_ub=5, - add_gt_as_proposals=False), - allowed_border=-1, - pos_weight=-1, - debug=False), - rpn_proposal=dict( - nms_across_levels=False, - nms_pre=2000, - nms_post=2000, - max_num=2000, - nms_thr=0.7, - min_bbox_size=0), - rcnn=dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.5, - neg_iou_thr=0.5, - min_pos_iou=0.5, - ignore_iof_thr=-1), - sampler=dict( - type='CombinedSampler', - num=512, - pos_fraction=0.25, - add_gt_as_proposals=True, - pos_sampler=dict(type='InstanceBalancedPosSampler'), - neg_sampler=dict( - type='IoUBalancedNegSampler', - floor_thr=-1, - floor_fraction=0, - num_bins=3)), - pos_weight=-1, - debug=False)) -test_cfg = dict( - rpn=dict( - nms_across_levels=False, - nms_pre=1000, - nms_post=1000, - max_num=1000, - nms_thr=0.7, - min_bbox_size=0), - rcnn=dict( - score_thr=0.05, nms=dict(type='nms', iou_thr=0.5), max_per_img=100) - # soft-nms is also supported for rcnn testing - # e.g., nms=dict(type='soft_nms', iou_thr=0.5, min_score=0.05) -) -# dataset settings -dataset_type = 'CocoDataset' -data_root = 'data/coco/' -img_norm_cfg = dict( - mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) -train_pipeline = [ - dict(type='LoadImageFromFile'), - dict(type='LoadAnnotations', with_bbox=True), - dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), - dict(type='RandomFlip', flip_ratio=0.5), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='DefaultFormatBundle'), - dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), -] -test_pipeline = [ - dict(type='LoadImageFromFile'), - dict( - type='MultiScaleFlipAug', - img_scale=(1333, 800), - flip=False, - transforms=[ - dict(type='Resize', keep_ratio=True), - dict(type='RandomFlip'), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='ImageToTensor', keys=['img']), - dict(type='Collect', keys=['img']), - ]) -] -data = dict( - imgs_per_gpu=2, - workers_per_gpu=2, - train=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_train2017.json', - img_prefix=data_root + 'train2017/', - pipeline=train_pipeline), - val=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline), - test=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline)) -evaluation = dict(interval=1, metric='bbox') -# optimizer -optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) -optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) -# learning policy -lr_config = dict( - policy='step', - warmup='linear', - warmup_iters=500, - warmup_ratio=1.0 / 3, - step=[8, 11]) -checkpoint_config = dict(interval=1) -# yapf:disable -log_config = dict( - interval=50, - hooks=[ - dict(type='TextLoggerHook'), - # dict(type='TensorboardLoggerHook') - ]) -# yapf:enable -# runtime settings -total_epochs = 12 -dist_params = dict(backend='nccl') -log_level = 'INFO' -work_dir = './work_dirs/libra_faster_rcnn_x101_64x4d_fpn_1x' -load_from = None -resume_from = None -workflow = [('train', 1)] diff --git a/configs/libra_rcnn/libra_faster_rcnn_x101_64x4d_fpn_1x_coco.py b/configs/libra_rcnn/libra_faster_rcnn_x101_64x4d_fpn_1x_coco.py new file mode 100644 index 0000000000000000000000000000000000000000..87674040644f45e7a49676e48a063d4ef338346a --- /dev/null +++ b/configs/libra_rcnn/libra_faster_rcnn_x101_64x4d_fpn_1x_coco.py @@ -0,0 +1,14 @@ +_base_ = './libra_faster_rcnn_r50_fpn_1x_coco.py' +model = dict( + pretrained='open-mmlab://resnext101_64x4d', + backbone=dict( + type='ResNeXt', + depth=101, + groups=64, + base_width=4, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + style='pytorch')) +work_dir = './work_dirs/libra_faster_rcnn_x101_64x4d_fpn_1x' diff --git a/configs/libra_rcnn/libra_retinanet_r50_fpn_1x.py b/configs/libra_rcnn/libra_retinanet_r50_fpn_1x.py deleted file mode 100644 index 543ec4a99d6868a9f1041b80641bc758252388fd..0000000000000000000000000000000000000000 --- a/configs/libra_rcnn/libra_retinanet_r50_fpn_1x.py +++ /dev/null @@ -1,144 +0,0 @@ -# model settings -model = dict( - type='RetinaNet', - pretrained='torchvision://resnet50', - backbone=dict( - type='ResNet', - depth=50, - num_stages=4, - out_indices=(0, 1, 2, 3), - frozen_stages=1, - norm_cfg=dict(type='BN', requires_grad=True), - style='pytorch'), - neck=[ - dict( - type='FPN', - in_channels=[256, 512, 1024, 2048], - out_channels=256, - start_level=1, - add_extra_convs=True, - num_outs=5), - dict( - type='BFP', - in_channels=256, - num_levels=5, - refine_level=1, - refine_type='non_local') - ], - bbox_head=dict( - type='RetinaHead', - num_classes=81, - in_channels=256, - stacked_convs=4, - feat_channels=256, - octave_base_scale=4, - scales_per_octave=3, - anchor_ratios=[0.5, 1.0, 2.0], - anchor_strides=[8, 16, 32, 64, 128], - target_means=[.0, .0, .0, .0], - target_stds=[1.0, 1.0, 1.0, 1.0], - loss_cls=dict( - type='FocalLoss', - use_sigmoid=True, - gamma=2.0, - alpha=0.25, - loss_weight=1.0), - loss_bbox=dict( - type='BalancedL1Loss', - alpha=0.5, - gamma=1.5, - beta=0.11, - loss_weight=1.0))) -# training and testing settings -train_cfg = dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.5, - neg_iou_thr=0.4, - min_pos_iou=0, - ignore_iof_thr=-1), - allowed_border=-1, - pos_weight=-1, - debug=False) -test_cfg = dict( - nms_pre=1000, - min_bbox_size=0, - score_thr=0.05, - nms=dict(type='nms', iou_thr=0.5), - max_per_img=100) -# dataset settings -dataset_type = 'CocoDataset' -data_root = 'data/coco/' -img_norm_cfg = dict( - mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) -train_pipeline = [ - dict(type='LoadImageFromFile'), - dict(type='LoadAnnotations', with_bbox=True), - dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), - dict(type='RandomFlip', flip_ratio=0.5), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='DefaultFormatBundle'), - dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), -] -test_pipeline = [ - dict(type='LoadImageFromFile'), - dict( - type='MultiScaleFlipAug', - img_scale=(1333, 800), - flip=False, - transforms=[ - dict(type='Resize', keep_ratio=True), - dict(type='RandomFlip'), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='ImageToTensor', keys=['img']), - dict(type='Collect', keys=['img']), - ]) -] -data = dict( - imgs_per_gpu=2, - workers_per_gpu=2, - train=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_train2017.json', - img_prefix=data_root + 'train2017/', - pipeline=train_pipeline), - val=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline), - test=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline)) -evaluation = dict(interval=1, metric='bbox') -# optimizer -optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) -optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) -# learning policy -lr_config = dict( - policy='step', - warmup='linear', - warmup_iters=500, - warmup_ratio=1.0 / 3, - step=[8, 11]) -checkpoint_config = dict(interval=1) -# yapf:disable -log_config = dict( - interval=50, - hooks=[ - dict(type='TextLoggerHook'), - # dict(type='TensorboardLoggerHook') - ]) -# yapf:enable -# runtime settings -total_epochs = 12 -dist_params = dict(backend='nccl') -log_level = 'INFO' -work_dir = './work_dirs/libra_retinanet_r50_fpn_1x' -load_from = None -resume_from = None -workflow = [('train', 1)] diff --git a/configs/libra_rcnn/libra_retinanet_r50_fpn_1x_coco.py b/configs/libra_rcnn/libra_retinanet_r50_fpn_1x_coco.py new file mode 100644 index 0000000000000000000000000000000000000000..34f6222701eacec140f04e85ce14f6922494f4b6 --- /dev/null +++ b/configs/libra_rcnn/libra_retinanet_r50_fpn_1x_coco.py @@ -0,0 +1,27 @@ +_base_ = '../retinanet/retinanet_r50_fpn_1x_coco.py' +# model settings +model = dict( + neck=[ + dict( + type='FPN', + in_channels=[256, 512, 1024, 2048], + out_channels=256, + start_level=1, + add_extra_convs=True, + num_outs=5), + dict( + type='BFP', + in_channels=256, + num_levels=5, + refine_level=1, + refine_type='non_local') + ], + bbox_head=dict( + loss_bbox=dict( + _delete_=True, + type='BalancedL1Loss', + alpha=0.5, + gamma=1.5, + beta=0.11, + loss_weight=1.0))) +work_dir = './work_dirs/libra_retinanet_r50_fpn_1x' diff --git a/configs/mask_rcnn/mask_rcnn_r101_fpn_1x_coco.py b/configs/mask_rcnn/mask_rcnn_r101_fpn_1x_coco.py new file mode 100644 index 0000000000000000000000000000000000000000..92efafc29e5b258dc1163a0801bd025f1464925d --- /dev/null +++ b/configs/mask_rcnn/mask_rcnn_r101_fpn_1x_coco.py @@ -0,0 +1,3 @@ +_base_ = './mask_rcnn_r50_fpn_1x_coco.py' +model = dict(pretrained='torchvision://resnet101', backbone=dict(depth=101)) +work_dir = './work_dirs/mask_rcnn_r101_fpn_1x' diff --git a/configs/mask_rcnn/mask_rcnn_r50_caffe_c4_1x_coco.py b/configs/mask_rcnn/mask_rcnn_r50_caffe_c4_1x_coco.py new file mode 100644 index 0000000000000000000000000000000000000000..16a4fd648751149fb25992d1fc400a52e54ca15c --- /dev/null +++ b/configs/mask_rcnn/mask_rcnn_r50_caffe_c4_1x_coco.py @@ -0,0 +1,40 @@ +_base_ = [ + '../_base_/models/mask_rcnn_r50_caffe_c4.py', + '../_base_/datasets/coco_instance.py', + '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py' +] +# use caffe img_norm +img_norm_cfg = dict( + mean=[102.9801, 115.9465, 122.7717], std=[1.0, 1.0, 1.0], to_rgb=False) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True, with_mask=True), + dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +data = dict( + train=dict(pipeline=train_pipeline), + val=dict(pipeline=test_pipeline), + test=dict(pipeline=test_pipeline)) +# optimizer +optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) +work_dir = './work_dirs/mask_rcnn_r50_caffe_c4_1x' diff --git a/configs/mask_rcnn/mask_rcnn_r50_caffe_fpn_1x_coco.py b/configs/mask_rcnn/mask_rcnn_r50_caffe_fpn_1x_coco.py new file mode 100644 index 0000000000000000000000000000000000000000..5e8a6d653f4b354d2e6e28acba5441aef653dbfd --- /dev/null +++ b/configs/mask_rcnn/mask_rcnn_r50_caffe_fpn_1x_coco.py @@ -0,0 +1,37 @@ +_base_ = './mask_rcnn_r50_fpn_1x_coco.py' +model = dict( + pretrained='open-mmlab://resnet50_caffe', + backbone=dict(norm_cfg=dict(requires_grad=False), style='caffe')) +# use caffe img_norm +img_norm_cfg = dict( + mean=[102.9801, 115.9465, 122.7717], std=[1.0, 1.0, 1.0], to_rgb=False) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True, with_mask=True), + dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +data = dict( + train=dict(pipeline=train_pipeline), + val=dict(pipeline=test_pipeline), + test=dict(pipeline=test_pipeline)) +work_dir = './work_dirs/mask_rcnn_r50_caffe_fpn_1x' diff --git a/configs/mask_rcnn/mask_rcnn_r50_fpn_1x_coco.py b/configs/mask_rcnn/mask_rcnn_r50_fpn_1x_coco.py new file mode 100644 index 0000000000000000000000000000000000000000..6d84957d2aff0fb8da863c5d034520ebf036268f --- /dev/null +++ b/configs/mask_rcnn/mask_rcnn_r50_fpn_1x_coco.py @@ -0,0 +1,6 @@ +_base_ = [ + '../_base_/models/mask_rcnn_r50_fpn.py', + '../_base_/datasets/coco_instance.py', + '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py' +] +work_dir = './work_dirs/mask_rcnn_r50_fpn_1x' diff --git a/configs/mask_rcnn/mask_rcnn_x101_32x4d_fpn_1x_coco.py b/configs/mask_rcnn/mask_rcnn_x101_32x4d_fpn_1x_coco.py new file mode 100644 index 0000000000000000000000000000000000000000..3092a71c042a81e52118935c2dfaaae8bd7d0279 --- /dev/null +++ b/configs/mask_rcnn/mask_rcnn_x101_32x4d_fpn_1x_coco.py @@ -0,0 +1,14 @@ +_base_ = './mask_rcnn_r101_fpn_1x_coco.py' +model = dict( + pretrained='open-mmlab://resnext101_32x4d', + backbone=dict( + type='ResNeXt', + depth=101, + groups=32, + base_width=4, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + style='pytorch')) +work_dir = './work_dirs/mask_rcnn_x101_32x4d_fpn_1x' diff --git a/configs/mask_rcnn/mask_rcnn_x101_64x4d_fpn_1x_coco.py b/configs/mask_rcnn/mask_rcnn_x101_64x4d_fpn_1x_coco.py new file mode 100644 index 0000000000000000000000000000000000000000..35a56b07d67d4bf2f86b3b000130a3fd7d26b7a9 --- /dev/null +++ b/configs/mask_rcnn/mask_rcnn_x101_64x4d_fpn_1x_coco.py @@ -0,0 +1,14 @@ +_base_ = './mask_rcnn_x101_32x4d_fpn_1x_coco.py' +model = dict( + pretrained='open-mmlab://resnext101_64x4d', + backbone=dict( + type='ResNeXt', + depth=101, + groups=64, + base_width=4, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + style='pytorch')) +work_dir = './work_dirs/mask_rcnn_x101_64x4d_fpn_1x' diff --git a/configs/mask_rcnn_r101_fpn_1x.py b/configs/mask_rcnn_r101_fpn_1x.py deleted file mode 100644 index 47451f09cf664ef786bd70225606f2a2d18c25ae..0000000000000000000000000000000000000000 --- a/configs/mask_rcnn_r101_fpn_1x.py +++ /dev/null @@ -1,190 +0,0 @@ -# model settings -model = dict( - type='MaskRCNN', - pretrained='torchvision://resnet101', - backbone=dict( - type='ResNet', - depth=101, - num_stages=4, - out_indices=(0, 1, 2, 3), - frozen_stages=1, - norm_cfg=dict(type='BN', requires_grad=True), - style='pytorch'), - neck=dict( - type='FPN', - in_channels=[256, 512, 1024, 2048], - out_channels=256, - num_outs=5), - rpn_head=dict( - type='RPNHead', - in_channels=256, - feat_channels=256, - anchor_scales=[8], - anchor_ratios=[0.5, 1.0, 2.0], - anchor_strides=[4, 8, 16, 32, 64], - target_means=[.0, .0, .0, .0], - target_stds=[1.0, 1.0, 1.0, 1.0], - loss_cls=dict( - type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0)), - bbox_roi_extractor=dict( - type='SingleRoIExtractor', - roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2), - out_channels=256, - featmap_strides=[4, 8, 16, 32]), - bbox_head=dict( - type='SharedFCBBoxHead', - num_fcs=2, - in_channels=256, - fc_out_channels=1024, - roi_feat_size=7, - num_classes=81, - target_means=[0., 0., 0., 0.], - target_stds=[0.1, 0.1, 0.2, 0.2], - reg_class_agnostic=False, - loss_cls=dict( - type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)), - mask_roi_extractor=dict( - type='SingleRoIExtractor', - roi_layer=dict(type='RoIAlign', out_size=14, sample_num=2), - out_channels=256, - featmap_strides=[4, 8, 16, 32]), - mask_head=dict( - type='FCNMaskHead', - num_convs=4, - in_channels=256, - conv_out_channels=256, - num_classes=81, - loss_mask=dict( - type='CrossEntropyLoss', use_mask=True, loss_weight=1.0))) -# model training and testing settings -train_cfg = dict( - rpn=dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.7, - neg_iou_thr=0.3, - min_pos_iou=0.3, - ignore_iof_thr=-1), - sampler=dict( - type='RandomSampler', - num=256, - pos_fraction=0.5, - neg_pos_ub=-1, - add_gt_as_proposals=False), - allowed_border=0, - pos_weight=-1, - debug=False), - rpn_proposal=dict( - nms_across_levels=False, - nms_pre=2000, - nms_post=2000, - max_num=2000, - nms_thr=0.7, - min_bbox_size=0), - rcnn=dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.5, - neg_iou_thr=0.5, - min_pos_iou=0.5, - ignore_iof_thr=-1), - sampler=dict( - type='RandomSampler', - num=512, - pos_fraction=0.25, - neg_pos_ub=-1, - add_gt_as_proposals=True), - mask_size=28, - pos_weight=-1, - debug=False)) -test_cfg = dict( - rpn=dict( - nms_across_levels=False, - nms_pre=1000, - nms_post=1000, - max_num=1000, - nms_thr=0.7, - min_bbox_size=0), - rcnn=dict( - score_thr=0.05, - nms=dict(type='nms', iou_thr=0.5), - max_per_img=100, - mask_thr_binary=0.5)) -# dataset settings -dataset_type = 'CocoDataset' -data_root = 'data/coco/' -img_norm_cfg = dict( - mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) -train_pipeline = [ - dict(type='LoadImageFromFile'), - dict(type='LoadAnnotations', with_bbox=True, with_mask=True), - dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), - dict(type='RandomFlip', flip_ratio=0.5), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='DefaultFormatBundle'), - dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']), -] -test_pipeline = [ - dict(type='LoadImageFromFile'), - dict( - type='MultiScaleFlipAug', - img_scale=(1333, 800), - flip=False, - transforms=[ - dict(type='Resize', keep_ratio=True), - dict(type='RandomFlip'), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='ImageToTensor', keys=['img']), - dict(type='Collect', keys=['img']), - ]) -] -data = dict( - imgs_per_gpu=2, - workers_per_gpu=2, - train=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_train2017.json', - img_prefix=data_root + 'train2017/', - pipeline=train_pipeline), - val=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline), - test=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline)) -evaluation = dict(interval=1, metric=['bbox', 'segm']) -# optimizer -optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) -optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) -# learning policy -lr_config = dict( - policy='step', - warmup='linear', - warmup_iters=500, - warmup_ratio=1.0 / 3, - step=[8, 11]) -checkpoint_config = dict(interval=1) -# yapf:disable -log_config = dict( - interval=50, - hooks=[ - dict(type='TextLoggerHook'), - # dict(type='TensorboardLoggerHook') - ]) -# yapf:enable -# runtime settings -total_epochs = 12 -dist_params = dict(backend='nccl') -log_level = 'INFO' -work_dir = './work_dirs/mask_rcnn_r101_fpn_1x' -load_from = None -resume_from = None -workflow = [('train', 1)] diff --git a/configs/mask_rcnn_x101_32x4d_fpn_1x.py b/configs/mask_rcnn_x101_32x4d_fpn_1x.py deleted file mode 100644 index 0e33ef4ea1b14ab7b3589a7bfe03d03205122901..0000000000000000000000000000000000000000 --- a/configs/mask_rcnn_x101_32x4d_fpn_1x.py +++ /dev/null @@ -1,192 +0,0 @@ -# model settings -model = dict( - type='MaskRCNN', - pretrained='open-mmlab://resnext101_32x4d', - backbone=dict( - type='ResNeXt', - depth=101, - groups=32, - base_width=4, - num_stages=4, - out_indices=(0, 1, 2, 3), - frozen_stages=1, - norm_cfg=dict(type='BN', requires_grad=True), - style='pytorch'), - neck=dict( - type='FPN', - in_channels=[256, 512, 1024, 2048], - out_channels=256, - num_outs=5), - rpn_head=dict( - type='RPNHead', - in_channels=256, - feat_channels=256, - anchor_scales=[8], - anchor_ratios=[0.5, 1.0, 2.0], - anchor_strides=[4, 8, 16, 32, 64], - target_means=[.0, .0, .0, .0], - target_stds=[1.0, 1.0, 1.0, 1.0], - loss_cls=dict( - type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0)), - bbox_roi_extractor=dict( - type='SingleRoIExtractor', - roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2), - out_channels=256, - featmap_strides=[4, 8, 16, 32]), - bbox_head=dict( - type='SharedFCBBoxHead', - num_fcs=2, - in_channels=256, - fc_out_channels=1024, - roi_feat_size=7, - num_classes=81, - target_means=[0., 0., 0., 0.], - target_stds=[0.1, 0.1, 0.2, 0.2], - reg_class_agnostic=False, - loss_cls=dict( - type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)), - mask_roi_extractor=dict( - type='SingleRoIExtractor', - roi_layer=dict(type='RoIAlign', out_size=14, sample_num=2), - out_channels=256, - featmap_strides=[4, 8, 16, 32]), - mask_head=dict( - type='FCNMaskHead', - num_convs=4, - in_channels=256, - conv_out_channels=256, - num_classes=81, - loss_mask=dict( - type='CrossEntropyLoss', use_mask=True, loss_weight=1.0))) -# model training and testing settings -train_cfg = dict( - rpn=dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.7, - neg_iou_thr=0.3, - min_pos_iou=0.3, - ignore_iof_thr=-1), - sampler=dict( - type='RandomSampler', - num=256, - pos_fraction=0.5, - neg_pos_ub=-1, - add_gt_as_proposals=False), - allowed_border=0, - pos_weight=-1, - debug=False), - rpn_proposal=dict( - nms_across_levels=False, - nms_pre=2000, - nms_post=2000, - max_num=2000, - nms_thr=0.7, - min_bbox_size=0), - rcnn=dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.5, - neg_iou_thr=0.5, - min_pos_iou=0.5, - ignore_iof_thr=-1), - sampler=dict( - type='RandomSampler', - num=512, - pos_fraction=0.25, - neg_pos_ub=-1, - add_gt_as_proposals=True), - mask_size=28, - pos_weight=-1, - debug=False)) -test_cfg = dict( - rpn=dict( - nms_across_levels=False, - nms_pre=1000, - nms_post=1000, - max_num=1000, - nms_thr=0.7, - min_bbox_size=0), - rcnn=dict( - score_thr=0.05, - nms=dict(type='nms', iou_thr=0.5), - max_per_img=100, - mask_thr_binary=0.5)) -# dataset settings -dataset_type = 'CocoDataset' -data_root = 'data/coco/' -img_norm_cfg = dict( - mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) -train_pipeline = [ - dict(type='LoadImageFromFile'), - dict(type='LoadAnnotations', with_bbox=True, with_mask=True), - dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), - dict(type='RandomFlip', flip_ratio=0.5), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='DefaultFormatBundle'), - dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']), -] -test_pipeline = [ - dict(type='LoadImageFromFile'), - dict( - type='MultiScaleFlipAug', - img_scale=(1333, 800), - flip=False, - transforms=[ - dict(type='Resize', keep_ratio=True), - dict(type='RandomFlip'), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='ImageToTensor', keys=['img']), - dict(type='Collect', keys=['img']), - ]) -] -data = dict( - imgs_per_gpu=2, - workers_per_gpu=2, - train=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_train2017.json', - img_prefix=data_root + 'train2017/', - pipeline=train_pipeline), - val=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline), - test=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline)) -evaluation = dict(interval=1, metric=['bbox', 'segm']) -# optimizer -optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) -optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) -# learning policy -lr_config = dict( - policy='step', - warmup='linear', - warmup_iters=500, - warmup_ratio=1.0 / 3, - step=[8, 11]) -checkpoint_config = dict(interval=1) -# yapf:disable -log_config = dict( - interval=50, - hooks=[ - dict(type='TextLoggerHook'), - # dict(type='TensorboardLoggerHook') - ]) -# yapf:enable -# runtime settings -total_epochs = 12 -dist_params = dict(backend='nccl') -log_level = 'INFO' -work_dir = './work_dirs/mask_rcnn_x101_32x4d_fpn_1x' -load_from = None -resume_from = None -workflow = [('train', 1)] diff --git a/configs/mask_rcnn_x101_64x4d_fpn_1x.py b/configs/mask_rcnn_x101_64x4d_fpn_1x.py deleted file mode 100644 index 02ef2cee3cb14dbadcae00063f383142623c47d9..0000000000000000000000000000000000000000 --- a/configs/mask_rcnn_x101_64x4d_fpn_1x.py +++ /dev/null @@ -1,192 +0,0 @@ -# model settings -model = dict( - type='MaskRCNN', - pretrained='open-mmlab://resnext101_64x4d', - backbone=dict( - type='ResNeXt', - depth=101, - groups=64, - base_width=4, - num_stages=4, - out_indices=(0, 1, 2, 3), - frozen_stages=1, - norm_cfg=dict(type='BN', requires_grad=True), - style='pytorch'), - neck=dict( - type='FPN', - in_channels=[256, 512, 1024, 2048], - out_channels=256, - num_outs=5), - rpn_head=dict( - type='RPNHead', - in_channels=256, - feat_channels=256, - anchor_scales=[8], - anchor_ratios=[0.5, 1.0, 2.0], - anchor_strides=[4, 8, 16, 32, 64], - target_means=[.0, .0, .0, .0], - target_stds=[1.0, 1.0, 1.0, 1.0], - loss_cls=dict( - type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0)), - bbox_roi_extractor=dict( - type='SingleRoIExtractor', - roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2), - out_channels=256, - featmap_strides=[4, 8, 16, 32]), - bbox_head=dict( - type='SharedFCBBoxHead', - num_fcs=2, - in_channels=256, - fc_out_channels=1024, - roi_feat_size=7, - num_classes=81, - target_means=[0., 0., 0., 0.], - target_stds=[0.1, 0.1, 0.2, 0.2], - reg_class_agnostic=False, - loss_cls=dict( - type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)), - mask_roi_extractor=dict( - type='SingleRoIExtractor', - roi_layer=dict(type='RoIAlign', out_size=14, sample_num=2), - out_channels=256, - featmap_strides=[4, 8, 16, 32]), - mask_head=dict( - type='FCNMaskHead', - num_convs=4, - in_channels=256, - conv_out_channels=256, - num_classes=81, - loss_mask=dict( - type='CrossEntropyLoss', use_mask=True, loss_weight=1.0))) -# model training and testing settings -train_cfg = dict( - rpn=dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.7, - neg_iou_thr=0.3, - min_pos_iou=0.3, - ignore_iof_thr=-1), - sampler=dict( - type='RandomSampler', - num=256, - pos_fraction=0.5, - neg_pos_ub=-1, - add_gt_as_proposals=False), - allowed_border=0, - pos_weight=-1, - debug=False), - rpn_proposal=dict( - nms_across_levels=False, - nms_pre=2000, - nms_post=2000, - max_num=2000, - nms_thr=0.7, - min_bbox_size=0), - rcnn=dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.5, - neg_iou_thr=0.5, - min_pos_iou=0.5, - ignore_iof_thr=-1), - sampler=dict( - type='RandomSampler', - num=512, - pos_fraction=0.25, - neg_pos_ub=-1, - add_gt_as_proposals=True), - mask_size=28, - pos_weight=-1, - debug=False)) -test_cfg = dict( - rpn=dict( - nms_across_levels=False, - nms_pre=1000, - nms_post=1000, - max_num=1000, - nms_thr=0.7, - min_bbox_size=0), - rcnn=dict( - score_thr=0.05, - nms=dict(type='nms', iou_thr=0.5), - max_per_img=100, - mask_thr_binary=0.5)) -# dataset settings -dataset_type = 'CocoDataset' -data_root = 'data/coco/' -img_norm_cfg = dict( - mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) -train_pipeline = [ - dict(type='LoadImageFromFile'), - dict(type='LoadAnnotations', with_bbox=True, with_mask=True), - dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), - dict(type='RandomFlip', flip_ratio=0.5), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='DefaultFormatBundle'), - dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']), -] -test_pipeline = [ - dict(type='LoadImageFromFile'), - dict( - type='MultiScaleFlipAug', - img_scale=(1333, 800), - flip=False, - transforms=[ - dict(type='Resize', keep_ratio=True), - dict(type='RandomFlip'), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='ImageToTensor', keys=['img']), - dict(type='Collect', keys=['img']), - ]) -] -data = dict( - imgs_per_gpu=2, - workers_per_gpu=2, - train=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_train2017.json', - img_prefix=data_root + 'train2017/', - pipeline=train_pipeline), - val=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline), - test=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline)) -evaluation = dict(interval=1, metric=['bbox', 'segm']) -# optimizer -optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) -optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) -# learning policy -lr_config = dict( - policy='step', - warmup='linear', - warmup_iters=500, - warmup_ratio=1.0 / 3, - step=[8, 11]) -checkpoint_config = dict(interval=1) -# yapf:disable -log_config = dict( - interval=50, - hooks=[ - dict(type='TextLoggerHook'), - # dict(type='TensorboardLoggerHook') - ]) -# yapf:enable -# runtime settings -total_epochs = 12 -dist_params = dict(backend='nccl') -log_level = 'INFO' -work_dir = './work_dirs/mask_rcnn_x101_64x4d_fpn_1x' -load_from = None -resume_from = None -workflow = [('train', 1)] diff --git a/configs/ms_rcnn/ms_rcnn_r101_caffe_fpn_1x.py b/configs/ms_rcnn/ms_rcnn_r101_caffe_fpn_1x.py deleted file mode 100644 index c59a51316dfc853a43b8234993393a043b163e90..0000000000000000000000000000000000000000 --- a/configs/ms_rcnn/ms_rcnn_r101_caffe_fpn_1x.py +++ /dev/null @@ -1,200 +0,0 @@ -# model settings -model = dict( - type='MaskScoringRCNN', - pretrained='open-mmlab://resnet101_caffe', - backbone=dict( - type='ResNet', - depth=101, - num_stages=4, - out_indices=(0, 1, 2, 3), - frozen_stages=1, - norm_cfg=dict(type='BN', requires_grad=False), - style='caffe'), - neck=dict( - type='FPN', - in_channels=[256, 512, 1024, 2048], - out_channels=256, - num_outs=5), - rpn_head=dict( - type='RPNHead', - in_channels=256, - feat_channels=256, - anchor_scales=[8], - anchor_ratios=[0.5, 1.0, 2.0], - anchor_strides=[4, 8, 16, 32, 64], - target_means=[.0, .0, .0, .0], - target_stds=[1.0, 1.0, 1.0, 1.0], - loss_cls=dict( - type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0)), - bbox_roi_extractor=dict( - type='SingleRoIExtractor', - roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2), - out_channels=256, - featmap_strides=[4, 8, 16, 32]), - bbox_head=dict( - type='SharedFCBBoxHead', - num_fcs=2, - in_channels=256, - fc_out_channels=1024, - roi_feat_size=7, - num_classes=81, - target_means=[0., 0., 0., 0.], - target_stds=[0.1, 0.1, 0.2, 0.2], - reg_class_agnostic=False, - loss_cls=dict( - type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)), - mask_roi_extractor=dict( - type='SingleRoIExtractor', - roi_layer=dict(type='RoIAlign', out_size=14, sample_num=2), - out_channels=256, - featmap_strides=[4, 8, 16, 32]), - mask_head=dict( - type='FCNMaskHead', - num_convs=4, - in_channels=256, - conv_out_channels=256, - num_classes=81, - loss_mask=dict( - type='CrossEntropyLoss', use_mask=True, loss_weight=1.0)), - mask_iou_head=dict( - type='MaskIoUHead', - num_convs=4, - num_fcs=2, - roi_feat_size=14, - in_channels=256, - conv_out_channels=256, - fc_out_channels=1024, - num_classes=81)) -# model training and testing settings -train_cfg = dict( - rpn=dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.7, - neg_iou_thr=0.3, - min_pos_iou=0.3, - ignore_iof_thr=-1), - sampler=dict( - type='RandomSampler', - num=256, - pos_fraction=0.5, - neg_pos_ub=-1, - add_gt_as_proposals=False), - allowed_border=0, - pos_weight=-1, - debug=False), - rpn_proposal=dict( - nms_across_levels=False, - nms_pre=2000, - nms_post=2000, - max_num=2000, - nms_thr=0.7, - min_bbox_size=0), - rcnn=dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.5, - neg_iou_thr=0.5, - min_pos_iou=0.5, - ignore_iof_thr=-1), - sampler=dict( - type='RandomSampler', - num=512, - pos_fraction=0.25, - neg_pos_ub=-1, - add_gt_as_proposals=True), - mask_size=28, - pos_weight=-1, - mask_thr_binary=0.5, - debug=False)) -test_cfg = dict( - rpn=dict( - nms_across_levels=False, - nms_pre=1000, - nms_post=1000, - max_num=1000, - nms_thr=0.7, - min_bbox_size=0), - rcnn=dict( - score_thr=0.05, - nms=dict(type='nms', iou_thr=0.5), - max_per_img=100, - mask_thr_binary=0.5)) -# dataset settings -dataset_type = 'CocoDataset' -data_root = 'data/coco/' -img_norm_cfg = dict( - mean=[102.9801, 115.9465, 122.7717], std=[1.0, 1.0, 1.0], to_rgb=False) -train_pipeline = [ - dict(type='LoadImageFromFile'), - dict(type='LoadAnnotations', with_bbox=True, with_mask=True), - dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), - dict(type='RandomFlip', flip_ratio=0.5), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='DefaultFormatBundle'), - dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']), -] -test_pipeline = [ - dict(type='LoadImageFromFile'), - dict( - type='MultiScaleFlipAug', - img_scale=(1333, 800), - flip=False, - transforms=[ - dict(type='Resize', keep_ratio=True), - dict(type='RandomFlip'), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='ImageToTensor', keys=['img']), - dict(type='Collect', keys=['img']), - ]) -] -data = dict( - imgs_per_gpu=2, - workers_per_gpu=2, - train=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_train2017.json', - img_prefix=data_root + 'train2017/', - pipeline=train_pipeline), - val=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline), - test=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline)) -evaluation = dict(interval=1, metric=['bbox', 'segm']) -# optimizer -optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) -optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) -# learning policy -lr_config = dict( - policy='step', - warmup='linear', - warmup_iters=500, - warmup_ratio=1.0 / 3, - step=[8, 11]) -checkpoint_config = dict(interval=1) -# yapf:disable -log_config = dict( - interval=50, - hooks=[ - dict(type='TextLoggerHook'), - # dict(type='TensorboardLoggerHook') - ]) -# yapf:enable -# runtime settings -total_epochs = 12 -dist_params = dict(backend='nccl') -log_level = 'INFO' -work_dir = './work_dirs/ms_rcnn_r101_caffe_fpn_1x' -load_from = None -resume_from = None -workflow = [('train', 1)] diff --git a/configs/ms_rcnn/ms_rcnn_r101_caffe_fpn_1x_coco.py b/configs/ms_rcnn/ms_rcnn_r101_caffe_fpn_1x_coco.py new file mode 100644 index 0000000000000000000000000000000000000000..9f75a0b876da60197eecbb18d829dfd76eca7768 --- /dev/null +++ b/configs/ms_rcnn/ms_rcnn_r101_caffe_fpn_1x_coco.py @@ -0,0 +1,4 @@ +_base_ = './ms_rcnn_r50_caffe_fpn_1x_coco.py' +model = dict( + pretrained='open-mmlab://resnet101_caffe', backbone=dict(depth=101)) +work_dir = './work_dirs/ms_rcnn_r101_caffe_fpn_1x' diff --git a/configs/ms_rcnn/ms_rcnn_r50_caffe_fpn_1x.py b/configs/ms_rcnn/ms_rcnn_r50_caffe_fpn_1x.py deleted file mode 100644 index 3476a01a32c4c112c78e5853ea2e79fe60793ed1..0000000000000000000000000000000000000000 --- a/configs/ms_rcnn/ms_rcnn_r50_caffe_fpn_1x.py +++ /dev/null @@ -1,200 +0,0 @@ -# model settings -model = dict( - type='MaskScoringRCNN', - pretrained='open-mmlab://resnet50_caffe', - backbone=dict( - type='ResNet', - depth=50, - num_stages=4, - out_indices=(0, 1, 2, 3), - frozen_stages=1, - norm_cfg=dict(type='BN', requires_grad=False), - style='caffe'), - neck=dict( - type='FPN', - in_channels=[256, 512, 1024, 2048], - out_channels=256, - num_outs=5), - rpn_head=dict( - type='RPNHead', - in_channels=256, - feat_channels=256, - anchor_scales=[8], - anchor_ratios=[0.5, 1.0, 2.0], - anchor_strides=[4, 8, 16, 32, 64], - target_means=[.0, .0, .0, .0], - target_stds=[1.0, 1.0, 1.0, 1.0], - loss_cls=dict( - type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0)), - bbox_roi_extractor=dict( - type='SingleRoIExtractor', - roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2), - out_channels=256, - featmap_strides=[4, 8, 16, 32]), - bbox_head=dict( - type='SharedFCBBoxHead', - num_fcs=2, - in_channels=256, - fc_out_channels=1024, - roi_feat_size=7, - num_classes=81, - target_means=[0., 0., 0., 0.], - target_stds=[0.1, 0.1, 0.2, 0.2], - reg_class_agnostic=False, - loss_cls=dict( - type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)), - mask_roi_extractor=dict( - type='SingleRoIExtractor', - roi_layer=dict(type='RoIAlign', out_size=14, sample_num=2), - out_channels=256, - featmap_strides=[4, 8, 16, 32]), - mask_head=dict( - type='FCNMaskHead', - num_convs=4, - in_channels=256, - conv_out_channels=256, - num_classes=81, - loss_mask=dict( - type='CrossEntropyLoss', use_mask=True, loss_weight=1.0)), - mask_iou_head=dict( - type='MaskIoUHead', - num_convs=4, - num_fcs=2, - roi_feat_size=14, - in_channels=256, - conv_out_channels=256, - fc_out_channels=1024, - num_classes=81)) -# model training and testing settings -train_cfg = dict( - rpn=dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.7, - neg_iou_thr=0.3, - min_pos_iou=0.3, - ignore_iof_thr=-1), - sampler=dict( - type='RandomSampler', - num=256, - pos_fraction=0.5, - neg_pos_ub=-1, - add_gt_as_proposals=False), - allowed_border=0, - pos_weight=-1, - debug=False), - rpn_proposal=dict( - nms_across_levels=False, - nms_pre=2000, - nms_post=2000, - max_num=2000, - nms_thr=0.7, - min_bbox_size=0), - rcnn=dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.5, - neg_iou_thr=0.5, - min_pos_iou=0.5, - ignore_iof_thr=-1), - sampler=dict( - type='RandomSampler', - num=512, - pos_fraction=0.25, - neg_pos_ub=-1, - add_gt_as_proposals=True), - mask_size=28, - pos_weight=-1, - mask_thr_binary=0.5, - debug=False)) -test_cfg = dict( - rpn=dict( - nms_across_levels=False, - nms_pre=1000, - nms_post=1000, - max_num=1000, - nms_thr=0.7, - min_bbox_size=0), - rcnn=dict( - score_thr=0.05, - nms=dict(type='nms', iou_thr=0.5), - max_per_img=100, - mask_thr_binary=0.5)) -# dataset settings -dataset_type = 'CocoDataset' -data_root = 'data/coco/' -img_norm_cfg = dict( - mean=[102.9801, 115.9465, 122.7717], std=[1.0, 1.0, 1.0], to_rgb=False) -train_pipeline = [ - dict(type='LoadImageFromFile'), - dict(type='LoadAnnotations', with_bbox=True, with_mask=True), - dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), - dict(type='RandomFlip', flip_ratio=0.5), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='DefaultFormatBundle'), - dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']), -] -test_pipeline = [ - dict(type='LoadImageFromFile'), - dict( - type='MultiScaleFlipAug', - img_scale=(1333, 800), - flip=False, - transforms=[ - dict(type='Resize', keep_ratio=True), - dict(type='RandomFlip'), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='ImageToTensor', keys=['img']), - dict(type='Collect', keys=['img']), - ]) -] -data = dict( - imgs_per_gpu=2, - workers_per_gpu=2, - train=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_train2017.json', - img_prefix=data_root + 'train2017/', - pipeline=train_pipeline), - val=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline), - test=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline)) -evaluation = dict(interval=1, metric=['bbox', 'segm']) -# optimizer -optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) -optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) -# learning policy -lr_config = dict( - policy='step', - warmup='linear', - warmup_iters=500, - warmup_ratio=1.0 / 3, - step=[8, 11]) -checkpoint_config = dict(interval=1) -# yapf:disable -log_config = dict( - interval=50, - hooks=[ - dict(type='TextLoggerHook'), - # dict(type='TensorboardLoggerHook') - ]) -# yapf:enable -# runtime settings -total_epochs = 12 -dist_params = dict(backend='nccl') -log_level = 'INFO' -work_dir = './work_dirs/ms_rcnn_r50_caffe_fpn_1x' -load_from = None -resume_from = None -workflow = [('train', 1)] diff --git a/configs/ms_rcnn/ms_rcnn_r50_caffe_fpn_1x_coco.py b/configs/ms_rcnn/ms_rcnn_r50_caffe_fpn_1x_coco.py new file mode 100644 index 0000000000000000000000000000000000000000..e245892323b8e771bceec825b0aeb69f3e793f45 --- /dev/null +++ b/configs/ms_rcnn/ms_rcnn_r50_caffe_fpn_1x_coco.py @@ -0,0 +1,15 @@ +_base_ = '../mask_rcnn/mask_rcnn_r50_caffe_fpn_1x_coco.py' +model = dict( + type='MaskScoringRCNN', + mask_iou_head=dict( + type='MaskIoUHead', + num_convs=4, + num_fcs=2, + roi_feat_size=14, + in_channels=256, + conv_out_channels=256, + fc_out_channels=1024, + num_classes=81)) +# model training and testing settings +train_cfg = dict(rcnn=dict(mask_thr_binary=0.5)) +work_dir = './work_dirs/ms_rcnn_r50_caffe_fpn_1x' diff --git a/configs/ms_rcnn/ms_rcnn_r50_fpn_1x_coco.py b/configs/ms_rcnn/ms_rcnn_r50_fpn_1x_coco.py new file mode 100644 index 0000000000000000000000000000000000000000..6911ef01601fdc1f489873cbde157959ce0b77dc --- /dev/null +++ b/configs/ms_rcnn/ms_rcnn_r50_fpn_1x_coco.py @@ -0,0 +1,15 @@ +_base_ = '../mask_rcnn/mask_rcnn_r50_fpn_1x_coco.py' +model = dict( + type='MaskScoringRCNN', + mask_iou_head=dict( + type='MaskIoUHead', + num_convs=4, + num_fcs=2, + roi_feat_size=14, + in_channels=256, + conv_out_channels=256, + fc_out_channels=1024, + num_classes=81)) +# model training and testing settings +train_cfg = dict(rcnn=dict(mask_thr_binary=0.5)) +work_dir = './work_dirs/ms_rcnn_r50_fpn_1x' diff --git a/configs/ms_rcnn/ms_rcnn_x101_64x4d_fpn_1x.py b/configs/ms_rcnn/ms_rcnn_x101_64x4d_fpn_1x.py deleted file mode 100644 index 856a9307015baada7bb6c4a0576ae71d9eea6462..0000000000000000000000000000000000000000 --- a/configs/ms_rcnn/ms_rcnn_x101_64x4d_fpn_1x.py +++ /dev/null @@ -1,202 +0,0 @@ -# model settings -model = dict( - type='MaskScoringRCNN', - pretrained='open-mmlab://resnext101_64x4d', - backbone=dict( - type='ResNeXt', - depth=101, - groups=64, - base_width=4, - num_stages=4, - out_indices=(0, 1, 2, 3), - frozen_stages=1, - norm_cfg=dict(type='BN', requires_grad=True), - style='pytorch'), - neck=dict( - type='FPN', - in_channels=[256, 512, 1024, 2048], - out_channels=256, - num_outs=5), - rpn_head=dict( - type='RPNHead', - in_channels=256, - feat_channels=256, - anchor_scales=[8], - anchor_ratios=[0.5, 1.0, 2.0], - anchor_strides=[4, 8, 16, 32, 64], - target_means=[.0, .0, .0, .0], - target_stds=[1.0, 1.0, 1.0, 1.0], - loss_cls=dict( - type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0)), - bbox_roi_extractor=dict( - type='SingleRoIExtractor', - roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2), - out_channels=256, - featmap_strides=[4, 8, 16, 32]), - bbox_head=dict( - type='SharedFCBBoxHead', - num_fcs=2, - in_channels=256, - fc_out_channels=1024, - roi_feat_size=7, - num_classes=81, - target_means=[0., 0., 0., 0.], - target_stds=[0.1, 0.1, 0.2, 0.2], - reg_class_agnostic=False, - loss_cls=dict( - type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)), - mask_roi_extractor=dict( - type='SingleRoIExtractor', - roi_layer=dict(type='RoIAlign', out_size=14, sample_num=2), - out_channels=256, - featmap_strides=[4, 8, 16, 32]), - mask_head=dict( - type='FCNMaskHead', - num_convs=4, - in_channels=256, - conv_out_channels=256, - num_classes=81, - loss_mask=dict( - type='CrossEntropyLoss', use_mask=True, loss_weight=1.0)), - mask_iou_head=dict( - type='MaskIoUHead', - num_convs=4, - num_fcs=2, - roi_feat_size=14, - in_channels=256, - conv_out_channels=256, - fc_out_channels=1024, - num_classes=81)) -# model training and testing settings -train_cfg = dict( - rpn=dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.7, - neg_iou_thr=0.3, - min_pos_iou=0.3, - ignore_iof_thr=-1), - sampler=dict( - type='RandomSampler', - num=256, - pos_fraction=0.5, - neg_pos_ub=-1, - add_gt_as_proposals=False), - allowed_border=0, - pos_weight=-1, - debug=False), - rpn_proposal=dict( - nms_across_levels=False, - nms_pre=2000, - nms_post=2000, - max_num=2000, - nms_thr=0.7, - min_bbox_size=0), - rcnn=dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.5, - neg_iou_thr=0.5, - min_pos_iou=0.5, - ignore_iof_thr=-1), - sampler=dict( - type='RandomSampler', - num=512, - pos_fraction=0.25, - neg_pos_ub=-1, - add_gt_as_proposals=True), - mask_size=28, - pos_weight=-1, - mask_thr_binary=0.5, - debug=False)) -test_cfg = dict( - rpn=dict( - nms_across_levels=False, - nms_pre=1000, - nms_post=1000, - max_num=1000, - nms_thr=0.7, - min_bbox_size=0), - rcnn=dict( - score_thr=0.05, - nms=dict(type='nms', iou_thr=0.5), - max_per_img=100, - mask_thr_binary=0.5)) -# dataset settings -dataset_type = 'CocoDataset' -data_root = 'data/coco/' -img_norm_cfg = dict( - mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) -train_pipeline = [ - dict(type='LoadImageFromFile'), - dict(type='LoadAnnotations', with_bbox=True, with_mask=True), - dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), - dict(type='RandomFlip', flip_ratio=0.5), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='DefaultFormatBundle'), - dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']), -] -test_pipeline = [ - dict(type='LoadImageFromFile'), - dict( - type='MultiScaleFlipAug', - img_scale=(1333, 800), - flip=False, - transforms=[ - dict(type='Resize', keep_ratio=True), - dict(type='RandomFlip'), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='ImageToTensor', keys=['img']), - dict(type='Collect', keys=['img']), - ]) -] -data = dict( - imgs_per_gpu=2, - workers_per_gpu=2, - train=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_train2017.json', - img_prefix=data_root + 'train2017/', - pipeline=train_pipeline), - val=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline), - test=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline)) -evaluation = dict(interval=1, metric=['bbox', 'segm']) -# optimizer -optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) -optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) -# learning policy -lr_config = dict( - policy='step', - warmup='linear', - warmup_iters=500, - warmup_ratio=1.0 / 3, - step=[8, 11]) -checkpoint_config = dict(interval=1) -# yapf:disable -log_config = dict( - interval=50, - hooks=[ - dict(type='TextLoggerHook'), - # dict(type='TensorboardLoggerHook') - ]) -# yapf:enable -# runtime settings -total_epochs = 12 -dist_params = dict(backend='nccl') -log_level = 'INFO' -work_dir = './work_dirs/ms_rcnn_x101_64x4d_fpn_1x' -load_from = None -resume_from = None -workflow = [('train', 1)] diff --git a/configs/ms_rcnn/ms_rcnn_x101_64x4d_fpn_1x_coco.py b/configs/ms_rcnn/ms_rcnn_x101_64x4d_fpn_1x_coco.py new file mode 100644 index 0000000000000000000000000000000000000000..786c08f730fe42fb58e27adfd14388385df7dcc8 --- /dev/null +++ b/configs/ms_rcnn/ms_rcnn_x101_64x4d_fpn_1x_coco.py @@ -0,0 +1,14 @@ +_base_ = './ms_rcnn_r50_fpn_1x_coco.py' +model = dict( + pretrained='open-mmlab://resnext101_64x4d', + backbone=dict( + type='ResNeXt', + depth=101, + groups=64, + base_width=4, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + style='pytorch')) +work_dir = './work_dirs/ms_rcnn_x101_64x4d_fpn_1x' diff --git a/configs/nas_fpn/retinanet_crop640_r50_fpn_50e.py b/configs/nas_fpn/retinanet_crop640_r50_fpn_50e.py deleted file mode 100644 index a921c5b8771e90b747f25f07c776a3421a850e12..0000000000000000000000000000000000000000 --- a/configs/nas_fpn/retinanet_crop640_r50_fpn_50e.py +++ /dev/null @@ -1,149 +0,0 @@ -cudnn_benchmark = True -# model settings -norm_cfg = dict(type='BN', requires_grad=True) -model = dict( - type='RetinaNet', - pretrained='torchvision://resnet50', - backbone=dict( - type='ResNet', - depth=50, - num_stages=4, - out_indices=(0, 1, 2, 3), - frozen_stages=1, - norm_cfg=norm_cfg, - norm_eval=False, - style='pytorch'), - neck=dict( - type='FPN', - in_channels=[256, 512, 1024, 2048], - out_channels=256, - start_level=1, - add_extra_convs=True, - relu_before_extra_convs=True, - no_norm_on_lateral=True, - norm_cfg=norm_cfg, - num_outs=5), - bbox_head=dict( - type='RetinaSepBNHead', - num_classes=81, - num_ins=5, - in_channels=256, - stacked_convs=4, - feat_channels=256, - octave_base_scale=4, - scales_per_octave=3, - anchor_ratios=[0.5, 1.0, 2.0], - anchor_strides=[8, 16, 32, 64, 128], - target_means=[.0, .0, .0, .0], - target_stds=[1.0, 1.0, 1.0, 1.0], - norm_cfg=norm_cfg, - loss_cls=dict( - type='FocalLoss', - use_sigmoid=True, - gamma=2.0, - alpha=0.25, - loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=0.11, loss_weight=1.0))) -# training and testing settings -train_cfg = dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.5, - neg_iou_thr=0.5, - min_pos_iou=0, - ignore_iof_thr=-1), - allowed_border=-1, - pos_weight=-1, - debug=False) -test_cfg = dict( - nms_pre=1000, - min_bbox_size=0, - score_thr=0.05, - nms=dict(type='nms', iou_thr=0.5), - max_per_img=100) -# dataset settings -dataset_type = 'CocoDataset' -data_root = 'data/coco/' -img_norm_cfg = dict( - mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) -train_pipeline = [ - dict(type='LoadImageFromFile'), - dict(type='LoadAnnotations', with_bbox=True), - dict( - type='Resize', - img_scale=(640, 640), - ratio_range=(0.8, 1.2), - keep_ratio=True), - dict(type='RandomCrop', crop_size=(640, 640)), - dict(type='RandomFlip', flip_ratio=0.5), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size=(640, 640)), - dict(type='DefaultFormatBundle'), - dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), -] -test_pipeline = [ - dict(type='LoadImageFromFile'), - dict( - type='MultiScaleFlipAug', - img_scale=(640, 640), - flip=False, - transforms=[ - dict(type='Resize', keep_ratio=True), - dict(type='RandomFlip'), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=64), - dict(type='ImageToTensor', keys=['img']), - dict(type='Collect', keys=['img']), - ]) -] -data = dict( - imgs_per_gpu=8, - workers_per_gpu=4, - train=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_train2017.json', - img_prefix=data_root + 'train2017/', - pipeline=train_pipeline), - val=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline), - test=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline)) -evaluation = dict(interval=1, metric='bbox') -# optimizer -optimizer = dict( - type='SGD', - lr=0.08, - momentum=0.9, - weight_decay=0.0001, - paramwise_options=dict(norm_decay_mult=0)) -optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) -# learning policy -lr_config = dict( - policy='step', - warmup='linear', - warmup_iters=1000, - warmup_ratio=0.1, - step=[30, 40]) -checkpoint_config = dict(interval=1) -# yapf:disable -log_config = dict( - interval=50, - hooks=[ - dict(type='TextLoggerHook'), - # dict(type='TensorboardLoggerHook') - ]) -# yapf:enable -# runtime settings -total_epochs = 50 -dist_params = dict(backend='nccl') -log_level = 'INFO' -work_dir = './work_dirs/retinanet_crop640_r50_fpn_50e' -load_from = None -resume_from = None -workflow = [('train', 1)] diff --git a/configs/nas_fpn/retinanet_crop640_r50_nasfpn_50e.py b/configs/nas_fpn/retinanet_crop640_r50_nasfpn_50e.py deleted file mode 100644 index 8797430428ec394a1ee5e0d6c2100a128da15fd8..0000000000000000000000000000000000000000 --- a/configs/nas_fpn/retinanet_crop640_r50_nasfpn_50e.py +++ /dev/null @@ -1,148 +0,0 @@ -cudnn_benchmark = True -# model settings -norm_cfg = dict(type='BN', requires_grad=True) -model = dict( - type='RetinaNet', - pretrained='torchvision://resnet50', - backbone=dict( - type='ResNet', - depth=50, - num_stages=4, - out_indices=(0, 1, 2, 3), - frozen_stages=1, - norm_cfg=norm_cfg, - norm_eval=False, - style='pytorch'), - neck=dict( - type='NASFPN', - in_channels=[256, 512, 1024, 2048], - out_channels=256, - num_outs=5, - stack_times=7, - start_level=1, - add_extra_convs=True, - norm_cfg=norm_cfg), - bbox_head=dict( - type='RetinaSepBNHead', - num_classes=81, - num_ins=5, - in_channels=256, - stacked_convs=4, - feat_channels=256, - octave_base_scale=4, - scales_per_octave=3, - anchor_ratios=[0.5, 1.0, 2.0], - anchor_strides=[8, 16, 32, 64, 128], - target_means=[.0, .0, .0, .0], - target_stds=[1.0, 1.0, 1.0, 1.0], - norm_cfg=norm_cfg, - loss_cls=dict( - type='FocalLoss', - use_sigmoid=True, - gamma=2.0, - alpha=0.25, - loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=0.11, loss_weight=1.0))) -# training and testing settings -train_cfg = dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.5, - neg_iou_thr=0.5, - min_pos_iou=0, - ignore_iof_thr=-1), - allowed_border=-1, - pos_weight=-1, - debug=False) -test_cfg = dict( - nms_pre=1000, - min_bbox_size=0, - score_thr=0.05, - nms=dict(type='nms', iou_thr=0.5), - max_per_img=100) -# dataset settings -dataset_type = 'CocoDataset' -data_root = 'data/coco/' -img_norm_cfg = dict( - mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) -train_pipeline = [ - dict(type='LoadImageFromFile'), - dict(type='LoadAnnotations', with_bbox=True), - dict( - type='Resize', - img_scale=(640, 640), - ratio_range=(0.8, 1.2), - keep_ratio=True), - dict(type='RandomCrop', crop_size=(640, 640)), - dict(type='RandomFlip', flip_ratio=0.5), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size=(640, 640)), - dict(type='DefaultFormatBundle'), - dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), -] -test_pipeline = [ - dict(type='LoadImageFromFile'), - dict( - type='MultiScaleFlipAug', - img_scale=(640, 640), - flip=False, - transforms=[ - dict(type='Resize', keep_ratio=True), - dict(type='RandomFlip'), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=128), - dict(type='ImageToTensor', keys=['img']), - dict(type='Collect', keys=['img']), - ]) -] -data = dict( - imgs_per_gpu=8, - workers_per_gpu=4, - train=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_train2017.json', - img_prefix=data_root + 'train2017/', - pipeline=train_pipeline), - val=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline), - test=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline)) -evaluation = dict(interval=1, metric='bbox') -# optimizer -optimizer = dict( - type='SGD', - lr=0.08, - momentum=0.9, - weight_decay=0.0001, - paramwise_options=dict(norm_decay_mult=0)) -optimizer_config = dict(grad_clip=None) -# learning policy -lr_config = dict( - policy='step', - warmup='linear', - warmup_iters=1000, - warmup_ratio=0.1, - step=[30, 40]) -checkpoint_config = dict(interval=1) -# yapf:disable -log_config = dict( - interval=50, - hooks=[ - dict(type='TextLoggerHook'), - # dict(type='TensorboardLoggerHook') - ]) -# yapf:enable -# runtime settings -total_epochs = 50 -dist_params = dict(backend='nccl') -log_level = 'INFO' -work_dir = './work_dirs/retinanet_crop640_r50_nasfpn_50e' -load_from = None -resume_from = None -workflow = [('train', 1)] diff --git a/configs/nas_fpn/retinanet_r50_fpn_crop640_50e_coco.py b/configs/nas_fpn/retinanet_r50_fpn_crop640_50e_coco.py new file mode 100644 index 0000000000000000000000000000000000000000..069d5a8bbe5babc994ccf04f09db41c1ed9a22bb --- /dev/null +++ b/configs/nas_fpn/retinanet_r50_fpn_crop640_50e_coco.py @@ -0,0 +1,81 @@ +_base_ = [ + '../_base_/models/retinanet_r50_fpn.py', + '../_base_/datasets/coco_detection.py', '../_base_/default_runtime.py' +] +cudnn_benchmark = True +norm_cfg = dict(type='BN', requires_grad=True) +model = dict( + pretrained='torchvision://resnet50', + backbone=dict( + type='ResNet', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=norm_cfg, + norm_eval=False, + style='pytorch'), + neck=dict( + relu_before_extra_convs=True, + no_norm_on_lateral=True, + norm_cfg=norm_cfg), + bbox_head=dict(type='RetinaSepBNHead', num_ins=5, norm_cfg=norm_cfg)) +# training and testing settings +train_cfg = dict(assigner=dict(neg_iou_thr=0.5)) +# dataset settings +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True), + dict( + type='Resize', + img_scale=(640, 640), + ratio_range=(0.8, 1.2), + keep_ratio=True), + dict(type='RandomCrop', crop_size=(640, 640)), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size=(640, 640)), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(640, 640), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=64), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +data = dict( + imgs_per_gpu=8, + workers_per_gpu=4, + train=dict(pipeline=train_pipeline), + val=dict(pipeline=test_pipeline), + test=dict(pipeline=test_pipeline)) +# optimizer +optimizer = dict( + type='SGD', + lr=0.08, + momentum=0.9, + weight_decay=0.0001, + paramwise_options=dict(norm_decay_mult=0)) +optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) +# learning policy +lr_config = dict( + policy='step', + warmup='linear', + warmup_iters=1000, + warmup_ratio=0.1, + step=[30, 40]) +# runtime settings +total_epochs = 50 +work_dir = './work_dirs/retinanet_crop640_r50_fpn_50e' diff --git a/configs/nas_fpn/retinanet_r50_nasfpn_crop640_50e_coco.py b/configs/nas_fpn/retinanet_r50_nasfpn_crop640_50e_coco.py new file mode 100644 index 0000000000000000000000000000000000000000..8f76590ba177c7b250f73068755ed58b68291df4 --- /dev/null +++ b/configs/nas_fpn/retinanet_r50_nasfpn_crop640_50e_coco.py @@ -0,0 +1,80 @@ +_base_ = [ + '../_base_/models/retinanet_r50_fpn.py', + '../_base_/datasets/coco_detection.py', '../_base_/default_runtime.py' +] +cudnn_benchmark = True +# model settings +norm_cfg = dict(type='BN', requires_grad=True) +model = dict( + type='RetinaNet', + pretrained='torchvision://resnet50', + backbone=dict( + type='ResNet', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=norm_cfg, + norm_eval=False, + style='pytorch'), + neck=dict(type='NASFPN', stack_times=7, norm_cfg=norm_cfg), + bbox_head=dict(type='RetinaSepBNHead', num_ins=5, norm_cfg=norm_cfg)) +# training and testing settings +train_cfg = dict(assigner=dict(neg_iou_thr=0.5)) +# dataset settings +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True), + dict( + type='Resize', + img_scale=(640, 640), + ratio_range=(0.8, 1.2), + keep_ratio=True), + dict(type='RandomCrop', crop_size=(640, 640)), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size=(640, 640)), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(640, 640), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=128), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +data = dict( + imgs_per_gpu=8, + workers_per_gpu=4, + train=dict(pipeline=train_pipeline), + val=dict(pipeline=test_pipeline), + test=dict(pipeline=test_pipeline)) +# optimizer +optimizer = dict( + type='SGD', + lr=0.08, + momentum=0.9, + weight_decay=0.0001, + paramwise_options=dict(norm_decay_mult=0)) +optimizer_config = dict(grad_clip=None) +# learning policy +lr_config = dict( + policy='step', + warmup='linear', + warmup_iters=1000, + warmup_ratio=0.1, + step=[30, 40]) +# runtime settings +total_epochs = 50 +work_dir = './work_dirs/retinanet_crop640_r50_nasfpn_50e' diff --git a/configs/pascal_voc/faster_rcnn_r50_fpn_1x_voc0712.py b/configs/pascal_voc/faster_rcnn_r50_fpn_1x_voc0712.py index 041cd583462f23bbc0fdaa6428799751d950e70e..1e487606867ba563afabddfafade5335ea4e7163 100644 --- a/configs/pascal_voc/faster_rcnn_r50_fpn_1x_voc0712.py +++ b/configs/pascal_voc/faster_rcnn_r50_fpn_1x_voc0712.py @@ -1,177 +1,14 @@ -# model settings -model = dict( - type='FasterRCNN', - pretrained='torchvision://resnet50', - backbone=dict( - type='ResNet', - depth=50, - num_stages=4, - out_indices=(0, 1, 2, 3), - frozen_stages=1, - norm_cfg=dict(type='BN', requires_grad=True), - style='pytorch'), - neck=dict( - type='FPN', - in_channels=[256, 512, 1024, 2048], - out_channels=256, - num_outs=5), - rpn_head=dict( - type='RPNHead', - in_channels=256, - feat_channels=256, - anchor_scales=[8], - anchor_ratios=[0.5, 1.0, 2.0], - anchor_strides=[4, 8, 16, 32, 64], - target_means=[.0, .0, .0, .0], - target_stds=[1.0, 1.0, 1.0, 1.0], - loss_cls=dict( - type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0)), - bbox_roi_extractor=dict( - type='SingleRoIExtractor', - roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2), - out_channels=256, - featmap_strides=[4, 8, 16, 32]), - bbox_head=dict( - type='SharedFCBBoxHead', - num_fcs=2, - in_channels=256, - fc_out_channels=1024, - roi_feat_size=7, - num_classes=21, - target_means=[0., 0., 0., 0.], - target_stds=[0.1, 0.1, 0.2, 0.2], - reg_class_agnostic=False, - loss_cls=dict( - type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0))) -# model training and testing settings -train_cfg = dict( - rpn=dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.7, - neg_iou_thr=0.3, - min_pos_iou=0.3, - ignore_iof_thr=-1), - sampler=dict( - type='RandomSampler', - num=256, - pos_fraction=0.5, - neg_pos_ub=-1, - add_gt_as_proposals=False), - allowed_border=0, - pos_weight=-1, - debug=False), - rpn_proposal=dict( - nms_across_levels=False, - nms_pre=2000, - nms_post=2000, - max_num=2000, - nms_thr=0.7, - min_bbox_size=0), - rcnn=dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.5, - neg_iou_thr=0.5, - min_pos_iou=0.5, - ignore_iof_thr=-1), - sampler=dict( - type='RandomSampler', - num=512, - pos_fraction=0.25, - neg_pos_ub=-1, - add_gt_as_proposals=True), - pos_weight=-1, - debug=False)) -test_cfg = dict( - rpn=dict( - nms_across_levels=False, - nms_pre=1000, - nms_post=1000, - max_num=1000, - nms_thr=0.7, - min_bbox_size=0), - rcnn=dict( - score_thr=0.05, nms=dict(type='nms', iou_thr=0.5), max_per_img=100) - # soft-nms is also supported for rcnn testing - # e.g., nms=dict(type='soft_nms', iou_thr=0.5, min_score=0.05) -) -# dataset settings -dataset_type = 'VOCDataset' -data_root = 'data/VOCdevkit/' -img_norm_cfg = dict( - mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) -train_pipeline = [ - dict(type='LoadImageFromFile'), - dict(type='LoadAnnotations', with_bbox=True), - dict(type='Resize', img_scale=(1000, 600), keep_ratio=True), - dict(type='RandomFlip', flip_ratio=0.5), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='DefaultFormatBundle'), - dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), +_base_ = [ + '../_base_/models/faster_rcnn_r50_fpn.py', '../_base_/datasets/voc0712.py', + '../_base_/default_runtime.py' ] -test_pipeline = [ - dict(type='LoadImageFromFile'), - dict( - type='MultiScaleFlipAug', - img_scale=(1000, 600), - flip=False, - transforms=[ - dict(type='Resize', keep_ratio=True), - dict(type='RandomFlip'), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='ImageToTensor', keys=['img']), - dict(type='Collect', keys=['img']), - ]) -] -data = dict( - imgs_per_gpu=2, - workers_per_gpu=2, - train=dict( - type='RepeatDataset', - times=3, - dataset=dict( - type=dataset_type, - ann_file=[ - data_root + 'VOC2007/ImageSets/Main/trainval.txt', - data_root + 'VOC2012/ImageSets/Main/trainval.txt' - ], - img_prefix=[data_root + 'VOC2007/', data_root + 'VOC2012/'], - pipeline=train_pipeline)), - val=dict( - type=dataset_type, - ann_file=data_root + 'VOC2007/ImageSets/Main/test.txt', - img_prefix=data_root + 'VOC2007/', - pipeline=test_pipeline), - test=dict( - type=dataset_type, - ann_file=data_root + 'VOC2007/ImageSets/Main/test.txt', - img_prefix=data_root + 'VOC2007/', - pipeline=test_pipeline)) -evaluation = dict(interval=1, metric='mAP') +model = dict(bbox_head=dict(num_classes=21)) # optimizer optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) # learning policy -lr_config = dict(policy='step', step=[3]) # actual epoch = 3 * 3 = 9 -checkpoint_config = dict(interval=1) -# yapf:disable -log_config = dict( - interval=50, - hooks=[ - dict(type='TextLoggerHook'), - # dict(type='TensorboardLoggerHook') - ]) -# yapf:enable +# actual epoch = 3 * 3 = 9 +lr_config = dict(policy='step', step=[3]) # runtime settings total_epochs = 4 # actual epoch = 4 * 3 = 12 -dist_params = dict(backend='nccl') -log_level = 'INFO' work_dir = './work_dirs/faster_rcnn_r50_fpn_1x_voc0712' -load_from = None -resume_from = None -workflow = [('train', 1)] diff --git a/configs/pascal_voc/ssd300_voc.py b/configs/pascal_voc/ssd300_voc.py deleted file mode 100644 index 4589424d3d3924e37dbb8ec564ede41b6225e26b..0000000000000000000000000000000000000000 --- a/configs/pascal_voc/ssd300_voc.py +++ /dev/null @@ -1,138 +0,0 @@ -# model settings -input_size = 300 -model = dict( - type='SingleStageDetector', - pretrained='open-mmlab://vgg16_caffe', - backbone=dict( - type='SSDVGG', - input_size=input_size, - depth=16, - with_last_pool=False, - ceil_mode=True, - out_indices=(3, 4), - out_feature_indices=(22, 34), - l2_norm_scale=20), - neck=None, - bbox_head=dict( - type='SSDHead', - input_size=input_size, - in_channels=(512, 1024, 512, 256, 256, 256), - num_classes=21, - anchor_strides=(8, 16, 32, 64, 100, 300), - basesize_ratio_range=(0.2, 0.9), - anchor_ratios=([2], [2, 3], [2, 3], [2, 3], [2], [2]), - target_means=(.0, .0, .0, .0), - target_stds=(0.1, 0.1, 0.2, 0.2))) -# model training and testing settings -cudnn_benchmark = True -train_cfg = dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.5, - neg_iou_thr=0.5, - min_pos_iou=0., - ignore_iof_thr=-1, - gt_max_assign_all=False), - smoothl1_beta=1., - allowed_border=-1, - pos_weight=-1, - neg_pos_ratio=3, - debug=False) -test_cfg = dict( - nms=dict(type='nms', iou_thr=0.45), - min_bbox_size=0, - score_thr=0.02, - max_per_img=200) -# dataset settings -dataset_type = 'VOCDataset' -data_root = 'data/VOCdevkit/' -img_norm_cfg = dict(mean=[123.675, 116.28, 103.53], std=[1, 1, 1], to_rgb=True) -train_pipeline = [ - dict(type='LoadImageFromFile', to_float32=True), - dict(type='LoadAnnotations', with_bbox=True), - dict( - type='PhotoMetricDistortion', - brightness_delta=32, - contrast_range=(0.5, 1.5), - saturation_range=(0.5, 1.5), - hue_delta=18), - dict( - type='Expand', - mean=img_norm_cfg['mean'], - to_rgb=img_norm_cfg['to_rgb'], - ratio_range=(1, 4)), - dict( - type='MinIoURandomCrop', - min_ious=(0.1, 0.3, 0.5, 0.7, 0.9), - min_crop_size=0.3), - dict(type='Resize', img_scale=(300, 300), keep_ratio=False), - dict(type='Normalize', **img_norm_cfg), - dict(type='RandomFlip', flip_ratio=0.5), - dict(type='DefaultFormatBundle'), - dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), -] -test_pipeline = [ - dict(type='LoadImageFromFile'), - dict( - type='MultiScaleFlipAug', - img_scale=(300, 300), - flip=False, - transforms=[ - dict(type='Resize', keep_ratio=False), - dict(type='Normalize', **img_norm_cfg), - dict(type='ImageToTensor', keys=['img']), - dict(type='Collect', keys=['img']), - ]) -] -data = dict( - imgs_per_gpu=8, - workers_per_gpu=3, - train=dict( - type='RepeatDataset', - times=10, - dataset=dict( - type=dataset_type, - ann_file=[ - data_root + 'VOC2007/ImageSets/Main/trainval.txt', - data_root + 'VOC2012/ImageSets/Main/trainval.txt' - ], - img_prefix=[data_root + 'VOC2007/', data_root + 'VOC2012/'], - pipeline=train_pipeline)), - val=dict( - type=dataset_type, - ann_file=data_root + 'VOC2007/ImageSets/Main/test.txt', - img_prefix=data_root + 'VOC2007/', - pipeline=test_pipeline), - test=dict( - type=dataset_type, - ann_file=data_root + 'VOC2007/ImageSets/Main/test.txt', - img_prefix=data_root + 'VOC2007/', - pipeline=test_pipeline)) -evaluation = dict(interval=1, metric='mAP') -# optimizer -optimizer = dict(type='SGD', lr=1e-3, momentum=0.9, weight_decay=5e-4) -optimizer_config = dict() -# learning policy -lr_config = dict( - policy='step', - warmup='linear', - warmup_iters=500, - warmup_ratio=1.0 / 3, - step=[16, 20]) -checkpoint_config = dict(interval=1) -# yapf:disable -log_config = dict( - interval=50, - hooks=[ - dict(type='TextLoggerHook'), - # dict(type='TensorboardLoggerHook') - ]) -# yapf:enable -# runtime settings -total_epochs = 24 -dist_params = dict(backend='nccl') -log_level = 'INFO' -work_dir = './work_dirs/ssd300_voc' -load_from = None -resume_from = None -workflow = [('train', 1)] diff --git a/configs/pascal_voc/ssd300_voc0712.py b/configs/pascal_voc/ssd300_voc0712.py new file mode 100644 index 0000000000000000000000000000000000000000..5e59091ea638d1059fc0544fc7748be20b3f3f80 --- /dev/null +++ b/configs/pascal_voc/ssd300_voc0712.py @@ -0,0 +1,67 @@ +_base_ = [ + '../_base_/models/ssd300.py', '../_base_/datasets/voc0712.py', + '../_base_/default_runtime.py' +] +model = dict(bbox_head=dict(num_classes=21, basesize_ratio_range=(0.2, 0.9))) +# dataset settings +dataset_type = 'VOCDataset' +data_root = 'data/VOCdevkit/' +img_norm_cfg = dict(mean=[123.675, 116.28, 103.53], std=[1, 1, 1], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile', to_float32=True), + dict(type='LoadAnnotations', with_bbox=True), + dict( + type='PhotoMetricDistortion', + brightness_delta=32, + contrast_range=(0.5, 1.5), + saturation_range=(0.5, 1.5), + hue_delta=18), + dict( + type='Expand', + mean=img_norm_cfg['mean'], + to_rgb=img_norm_cfg['to_rgb'], + ratio_range=(1, 4)), + dict( + type='MinIoURandomCrop', + min_ious=(0.1, 0.3, 0.5, 0.7, 0.9), + min_crop_size=0.3), + dict(type='Resize', img_scale=(300, 300), keep_ratio=False), + dict(type='Normalize', **img_norm_cfg), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(300, 300), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=False), + dict(type='Normalize', **img_norm_cfg), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +data = dict( + imgs_per_gpu=8, + workers_per_gpu=3, + train=dict( + type='RepeatDataset', times=10, dataset=dict(pipeline=train_pipeline)), + val=dict(pipeline=test_pipeline), + test=dict(pipeline=test_pipeline)) +# optimizer +optimizer = dict(type='SGD', lr=1e-3, momentum=0.9, weight_decay=5e-4) +optimizer_config = dict() +# learning policy +lr_config = dict( + policy='step', + warmup='linear', + warmup_iters=500, + warmup_ratio=1.0 / 3, + step=[16, 20]) +checkpoint_config = dict(interval=1) +# runtime settings +total_epochs = 24 +work_dir = './work_dirs/ssd300_voc' diff --git a/configs/pascal_voc/ssd512_voc.py b/configs/pascal_voc/ssd512_voc.py deleted file mode 100644 index c81e5473ad82b343fe09df3ccd95771a75597027..0000000000000000000000000000000000000000 --- a/configs/pascal_voc/ssd512_voc.py +++ /dev/null @@ -1,138 +0,0 @@ -# model settings -input_size = 512 -model = dict( - type='SingleStageDetector', - pretrained='open-mmlab://vgg16_caffe', - backbone=dict( - type='SSDVGG', - input_size=input_size, - depth=16, - with_last_pool=False, - ceil_mode=True, - out_indices=(3, 4), - out_feature_indices=(22, 34), - l2_norm_scale=20), - neck=None, - bbox_head=dict( - type='SSDHead', - input_size=input_size, - in_channels=(512, 1024, 512, 256, 256, 256, 256), - num_classes=21, - anchor_strides=(8, 16, 32, 64, 128, 256, 512), - basesize_ratio_range=(0.15, 0.9), - anchor_ratios=([2], [2, 3], [2, 3], [2, 3], [2, 3], [2], [2]), - target_means=(.0, .0, .0, .0), - target_stds=(0.1, 0.1, 0.2, 0.2))) -# model training and testing settings -cudnn_benchmark = True -train_cfg = dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.5, - neg_iou_thr=0.5, - min_pos_iou=0., - ignore_iof_thr=-1, - gt_max_assign_all=False), - smoothl1_beta=1., - allowed_border=-1, - pos_weight=-1, - neg_pos_ratio=3, - debug=False) -test_cfg = dict( - nms=dict(type='nms', iou_thr=0.45), - min_bbox_size=0, - score_thr=0.02, - max_per_img=200) -# dataset settings -dataset_type = 'VOCDataset' -data_root = 'data/VOCdevkit/' -img_norm_cfg = dict(mean=[123.675, 116.28, 103.53], std=[1, 1, 1], to_rgb=True) -train_pipeline = [ - dict(type='LoadImageFromFile', to_float32=True), - dict(type='LoadAnnotations', with_bbox=True), - dict( - type='PhotoMetricDistortion', - brightness_delta=32, - contrast_range=(0.5, 1.5), - saturation_range=(0.5, 1.5), - hue_delta=18), - dict( - type='Expand', - mean=img_norm_cfg['mean'], - to_rgb=img_norm_cfg['to_rgb'], - ratio_range=(1, 4)), - dict( - type='MinIoURandomCrop', - min_ious=(0.1, 0.3, 0.5, 0.7, 0.9), - min_crop_size=0.3), - dict(type='Resize', img_scale=(512, 512), keep_ratio=False), - dict(type='Normalize', **img_norm_cfg), - dict(type='RandomFlip', flip_ratio=0.5), - dict(type='DefaultFormatBundle'), - dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), -] -test_pipeline = [ - dict(type='LoadImageFromFile'), - dict( - type='MultiScaleFlipAug', - img_scale=(512, 512), - flip=False, - transforms=[ - dict(type='Resize', keep_ratio=False), - dict(type='Normalize', **img_norm_cfg), - dict(type='ImageToTensor', keys=['img']), - dict(type='Collect', keys=['img']), - ]) -] -data = dict( - imgs_per_gpu=8, - workers_per_gpu=3, - train=dict( - type='RepeatDataset', - times=10, - dataset=dict( - type=dataset_type, - ann_file=[ - data_root + 'VOC2007/ImageSets/Main/trainval.txt', - data_root + 'VOC2012/ImageSets/Main/trainval.txt' - ], - img_prefix=[data_root + 'VOC2007/', data_root + 'VOC2012/'], - pipeline=train_pipeline)), - val=dict( - type=dataset_type, - ann_file=data_root + 'VOC2007/ImageSets/Main/test.txt', - img_prefix=data_root + 'VOC2007/', - pipeline=test_pipeline), - test=dict( - type=dataset_type, - ann_file=data_root + 'VOC2007/ImageSets/Main/test.txt', - img_prefix=data_root + 'VOC2007/', - pipeline=test_pipeline)) -evaluation = dict(interval=1, metric='mAP') -# optimizer -optimizer = dict(type='SGD', lr=1e-3, momentum=0.9, weight_decay=5e-4) -optimizer_config = dict() -# learning policy -lr_config = dict( - policy='step', - warmup='linear', - warmup_iters=500, - warmup_ratio=1.0 / 3, - step=[16, 20]) -checkpoint_config = dict(interval=1) -# yapf:disable -log_config = dict( - interval=50, - hooks=[ - dict(type='TextLoggerHook'), - # dict(type='TensorboardLoggerHook') - ]) -# yapf:enable -# runtime settings -total_epochs = 24 -dist_params = dict(backend='nccl') -log_level = 'INFO' -work_dir = './work_dirs/ssd512_voc' -load_from = None -resume_from = None -workflow = [('train', 1)] diff --git a/configs/pascal_voc/ssd512_voc0712.py b/configs/pascal_voc/ssd512_voc0712.py new file mode 100644 index 0000000000000000000000000000000000000000..a308e00e0e5c50a9b7777dd2a63675db22c748ed --- /dev/null +++ b/configs/pascal_voc/ssd512_voc0712.py @@ -0,0 +1,53 @@ +_base_ = 'ssd300_voc0712.py' +input_size = 512 +model = dict( + backbone=dict(input_size=input_size), + bbox_head=dict( + input_size=input_size, + in_channels=(512, 1024, 512, 256, 256, 256, 256), + anchor_strides=(8, 16, 32, 64, 128, 256, 512), + basesize_ratio_range=(0.15, 0.9), + anchor_ratios=([2], [2, 3], [2, 3], [2, 3], [2, 3], [2], [2]))) +img_norm_cfg = dict(mean=[123.675, 116.28, 103.53], std=[1, 1, 1], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile', to_float32=True), + dict(type='LoadAnnotations', with_bbox=True), + dict( + type='PhotoMetricDistortion', + brightness_delta=32, + contrast_range=(0.5, 1.5), + saturation_range=(0.5, 1.5), + hue_delta=18), + dict( + type='Expand', + mean=img_norm_cfg['mean'], + to_rgb=img_norm_cfg['to_rgb'], + ratio_range=(1, 4)), + dict( + type='MinIoURandomCrop', + min_ious=(0.1, 0.3, 0.5, 0.7, 0.9), + min_crop_size=0.3), + dict(type='Resize', img_scale=(512, 512), keep_ratio=False), + dict(type='Normalize', **img_norm_cfg), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(512, 512), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=False), + dict(type='Normalize', **img_norm_cfg), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +data = dict( + train=dict(dataset=dict(pipeline=train_pipeline)), + val=dict(pipeline=test_pipeline), + test=dict(pipeline=test_pipeline)) +work_dir = './work_dirs/ssd512_voc' diff --git a/configs/reppoints/bbox_r50_grid_center_fpn_1x.py b/configs/reppoints/bbox_r50_grid_center_fpn_1x.py deleted file mode 100644 index 8a94bb9f1b0c302005a38f5f6c5f440b54bf5c30..0000000000000000000000000000000000000000 --- a/configs/reppoints/bbox_r50_grid_center_fpn_1x.py +++ /dev/null @@ -1,143 +0,0 @@ -# model settings -norm_cfg = dict(type='GN', num_groups=32, requires_grad=True) - -model = dict( - type='RepPointsDetector', - pretrained='torchvision://resnet50', - backbone=dict( - type='ResNet', - depth=50, - num_stages=4, - out_indices=(0, 1, 2, 3), - frozen_stages=1, - norm_cfg=dict(type='BN', requires_grad=True), - style='pytorch'), - neck=dict( - type='FPN', - in_channels=[256, 512, 1024, 2048], - out_channels=256, - start_level=1, - add_extra_convs=True, - num_outs=5, - norm_cfg=norm_cfg), - bbox_head=dict( - type='RepPointsHead', - num_classes=81, - in_channels=256, - feat_channels=256, - point_feat_channels=256, - stacked_convs=3, - num_points=9, - gradient_mul=0.1, - point_strides=[8, 16, 32, 64, 128], - point_base_scale=4, - norm_cfg=norm_cfg, - loss_cls=dict( - type='FocalLoss', - use_sigmoid=True, - gamma=2.0, - alpha=0.25, - loss_weight=1.0), - loss_bbox_init=dict(type='SmoothL1Loss', beta=0.11, loss_weight=0.5), - loss_bbox_refine=dict(type='SmoothL1Loss', beta=0.11, loss_weight=1.0), - transform_method='minmax', - use_grid_points=True)) -# training and testing settings -train_cfg = dict( - init=dict( - assigner=dict(type='PointAssigner', scale=4, pos_num=1), - allowed_border=-1, - pos_weight=-1, - debug=False), - refine=dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.5, - neg_iou_thr=0.4, - min_pos_iou=0, - ignore_iof_thr=-1), - allowed_border=-1, - pos_weight=-1, - debug=False)) -test_cfg = dict( - nms_pre=1000, - min_bbox_size=0, - score_thr=0.05, - nms=dict(type='nms', iou_thr=0.5), - max_per_img=100) -# dataset settings -dataset_type = 'CocoDataset' -data_root = 'data/coco/' -img_norm_cfg = dict( - mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) -train_pipeline = [ - dict(type='LoadImageFromFile'), - dict(type='LoadAnnotations', with_bbox=True), - dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), - dict(type='RandomFlip', flip_ratio=0.5), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='DefaultFormatBundle'), - dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), -] -test_pipeline = [ - dict(type='LoadImageFromFile'), - dict( - type='MultiScaleFlipAug', - img_scale=(1333, 800), - flip=False, - transforms=[ - dict(type='Resize', keep_ratio=True), - dict(type='RandomFlip'), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='ImageToTensor', keys=['img']), - dict(type='Collect', keys=['img']), - ]) -] -data = dict( - imgs_per_gpu=2, - workers_per_gpu=2, - train=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_train2017.json', - img_prefix=data_root + 'train2017/', - pipeline=train_pipeline), - val=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline), - test=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline)) -evaluation = dict(interval=1, metric='bbox') -# optimizer -optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) -optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) -# learning policy -lr_config = dict( - policy='step', - warmup='linear', - warmup_iters=500, - warmup_ratio=1.0 / 3, - step=[8, 11]) -checkpoint_config = dict(interval=1) -# yapf:disable -log_config = dict( - interval=50, - hooks=[ - dict(type='TextLoggerHook'), - # dict(type='TensorboardLoggerHook') - ]) -# yapf:enable -# runtime settings -total_epochs = 12 -dist_params = dict(backend='nccl') -log_level = 'INFO' -work_dir = './work_dirs/bbox_r50_grid_center_fpn_1x' -load_from = None -resume_from = None -workflow = [('train', 1)] diff --git a/configs/reppoints/bbox_r50_grid_center_fpn_gn-neck+head_1x_coco.py b/configs/reppoints/bbox_r50_grid_center_fpn_gn-neck+head_1x_coco.py new file mode 100644 index 0000000000000000000000000000000000000000..7bdcb8a2437790aa3c988122454c0c1b22ad2eea --- /dev/null +++ b/configs/reppoints/bbox_r50_grid_center_fpn_gn-neck+head_1x_coco.py @@ -0,0 +1,3 @@ +_base_ = './reppoints_moment_r50_fpn_gn-neck+head_1x_coco.py' +model = dict(bbox_head=dict(transform_method='minmax', use_grid_points=True)) +work_dir = './work_dirs/bbox_r50_grid_center_fpn_1x' diff --git a/configs/reppoints/bbox_r50_grid_fpn_1x.py b/configs/reppoints/bbox_r50_grid_fpn_1x.py deleted file mode 100644 index 5fe186a32fb14f101ce9d8a000c0d87e5cd4166e..0000000000000000000000000000000000000000 --- a/configs/reppoints/bbox_r50_grid_fpn_1x.py +++ /dev/null @@ -1,148 +0,0 @@ -# model settings -norm_cfg = dict(type='GN', num_groups=32, requires_grad=True) - -model = dict( - type='RepPointsDetector', - pretrained='torchvision://resnet50', - backbone=dict( - type='ResNet', - depth=50, - num_stages=4, - out_indices=(0, 1, 2, 3), - frozen_stages=1, - norm_cfg=dict(type='BN', requires_grad=True), - style='pytorch'), - neck=dict( - type='FPN', - in_channels=[256, 512, 1024, 2048], - out_channels=256, - start_level=1, - add_extra_convs=True, - num_outs=5, - norm_cfg=norm_cfg), - bbox_head=dict( - type='RepPointsHead', - num_classes=81, - in_channels=256, - feat_channels=256, - point_feat_channels=256, - stacked_convs=3, - num_points=9, - gradient_mul=0.1, - point_strides=[8, 16, 32, 64, 128], - point_base_scale=4, - norm_cfg=norm_cfg, - loss_cls=dict( - type='FocalLoss', - use_sigmoid=True, - gamma=2.0, - alpha=0.25, - loss_weight=1.0), - loss_bbox_init=dict(type='SmoothL1Loss', beta=0.11, loss_weight=0.5), - loss_bbox_refine=dict(type='SmoothL1Loss', beta=0.11, loss_weight=1.0), - transform_method='minmax', - use_grid_points=True)) -# training and testing settings -train_cfg = dict( - init=dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.5, - neg_iou_thr=0.4, - min_pos_iou=0, - ignore_iof_thr=-1), - allowed_border=-1, - pos_weight=-1, - debug=False), - refine=dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.5, - neg_iou_thr=0.4, - min_pos_iou=0, - ignore_iof_thr=-1), - allowed_border=-1, - pos_weight=-1, - debug=False)) -test_cfg = dict( - nms_pre=1000, - min_bbox_size=0, - score_thr=0.05, - nms=dict(type='nms', iou_thr=0.5), - max_per_img=100) -# dataset settings -dataset_type = 'CocoDataset' -data_root = 'data/coco/' -img_norm_cfg = dict( - mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) -train_pipeline = [ - dict(type='LoadImageFromFile'), - dict(type='LoadAnnotations', with_bbox=True), - dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), - dict(type='RandomFlip', flip_ratio=0.5), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='DefaultFormatBundle'), - dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), -] -test_pipeline = [ - dict(type='LoadImageFromFile'), - dict( - type='MultiScaleFlipAug', - img_scale=(1333, 800), - flip=False, - transforms=[ - dict(type='Resize', keep_ratio=True), - dict(type='RandomFlip'), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='ImageToTensor', keys=['img']), - dict(type='Collect', keys=['img']), - ]) -] -data = dict( - imgs_per_gpu=2, - workers_per_gpu=2, - train=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_train2017.json', - img_prefix=data_root + 'train2017/', - pipeline=train_pipeline), - val=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline), - test=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline)) -evaluation = dict(interval=1, metric='bbox') -# optimizer -optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) -optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) -# learning policy -lr_config = dict( - policy='step', - warmup='linear', - warmup_iters=500, - warmup_ratio=1.0 / 3, - step=[8, 11]) -checkpoint_config = dict(interval=1) -# yapf:disable -log_config = dict( - interval=50, - hooks=[ - dict(type='TextLoggerHook'), - # dict(type='TensorboardLoggerHook') - ]) -# yapf:enable -# runtime settings -total_epochs = 12 -dist_params = dict(backend='nccl') -log_level = 'INFO' -work_dir = './work_dirs/bbox_r50_grid_fpn_1x' -load_from = None -resume_from = None -workflow = [('train', 1)] diff --git a/configs/reppoints/bbox_r50_grid_fpn_gn-neck+head_1x_coco.py b/configs/reppoints/bbox_r50_grid_fpn_gn-neck+head_1x_coco.py new file mode 100644 index 0000000000000000000000000000000000000000..96bcfecb4932c125c2110f6f8fcfbea967165170 --- /dev/null +++ b/configs/reppoints/bbox_r50_grid_fpn_gn-neck+head_1x_coco.py @@ -0,0 +1,13 @@ +_base_ = './reppoints_moment_r50_fpn_gn-neck+head_1x_coco.py' +model = dict(bbox_head=dict(transform_method='minmax', use_grid_points=True)) +# training and testing settings +train_cfg = dict( + init=dict( + assigner=dict( + _delete_=True, + type='MaxIoUAssigner', + pos_iou_thr=0.5, + neg_iou_thr=0.4, + min_pos_iou=0, + ignore_iof_thr=-1))) +work_dir = './work_dirs/bbox_r50_grid_fpn_1x' diff --git a/configs/reppoints/reppoints_minmax_r50_fpn_1x.py b/configs/reppoints/reppoints_minmax_r50_fpn_1x.py deleted file mode 100644 index 974b749ed7ea537393edfc07598d92de263e04d2..0000000000000000000000000000000000000000 --- a/configs/reppoints/reppoints_minmax_r50_fpn_1x.py +++ /dev/null @@ -1,142 +0,0 @@ -# model settings -norm_cfg = dict(type='GN', num_groups=32, requires_grad=True) - -model = dict( - type='RepPointsDetector', - pretrained='torchvision://resnet50', - backbone=dict( - type='ResNet', - depth=50, - num_stages=4, - out_indices=(0, 1, 2, 3), - frozen_stages=1, - norm_cfg=dict(type='BN', requires_grad=True), - style='pytorch'), - neck=dict( - type='FPN', - in_channels=[256, 512, 1024, 2048], - out_channels=256, - start_level=1, - add_extra_convs=True, - num_outs=5, - norm_cfg=norm_cfg), - bbox_head=dict( - type='RepPointsHead', - num_classes=81, - in_channels=256, - feat_channels=256, - point_feat_channels=256, - stacked_convs=3, - num_points=9, - gradient_mul=0.1, - point_strides=[8, 16, 32, 64, 128], - point_base_scale=4, - norm_cfg=norm_cfg, - loss_cls=dict( - type='FocalLoss', - use_sigmoid=True, - gamma=2.0, - alpha=0.25, - loss_weight=1.0), - loss_bbox_init=dict(type='SmoothL1Loss', beta=0.11, loss_weight=0.5), - loss_bbox_refine=dict(type='SmoothL1Loss', beta=0.11, loss_weight=1.0), - transform_method='minmax')) -# training and testing settings -train_cfg = dict( - init=dict( - assigner=dict(type='PointAssigner', scale=4, pos_num=1), - allowed_border=-1, - pos_weight=-1, - debug=False), - refine=dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.5, - neg_iou_thr=0.4, - min_pos_iou=0, - ignore_iof_thr=-1), - allowed_border=-1, - pos_weight=-1, - debug=False)) -test_cfg = dict( - nms_pre=1000, - min_bbox_size=0, - score_thr=0.05, - nms=dict(type='nms', iou_thr=0.5), - max_per_img=100) -# dataset settings -dataset_type = 'CocoDataset' -data_root = 'data/coco/' -img_norm_cfg = dict( - mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) -train_pipeline = [ - dict(type='LoadImageFromFile'), - dict(type='LoadAnnotations', with_bbox=True), - dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), - dict(type='RandomFlip', flip_ratio=0.5), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='DefaultFormatBundle'), - dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), -] -test_pipeline = [ - dict(type='LoadImageFromFile'), - dict( - type='MultiScaleFlipAug', - img_scale=(1333, 800), - flip=False, - transforms=[ - dict(type='Resize', keep_ratio=True), - dict(type='RandomFlip'), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='ImageToTensor', keys=['img']), - dict(type='Collect', keys=['img']), - ]) -] -data = dict( - imgs_per_gpu=2, - workers_per_gpu=2, - train=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_train2017.json', - img_prefix=data_root + 'train2017/', - pipeline=train_pipeline), - val=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline), - test=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline)) -evaluation = dict(interval=1, metric='bbox') -# optimizer -optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) -optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) -# learning policy -lr_config = dict( - policy='step', - warmup='linear', - warmup_iters=500, - warmup_ratio=1.0 / 3, - step=[8, 11]) -checkpoint_config = dict(interval=1) -# yapf:disable -log_config = dict( - interval=50, - hooks=[ - dict(type='TextLoggerHook'), - # dict(type='TensorboardLoggerHook') - ]) -# yapf:enable -# runtime settings -total_epochs = 12 -dist_params = dict(backend='nccl') -log_level = 'INFO' -work_dir = './work_dirs/reppoints_minmax_r50_fpn_1x' -load_from = None -resume_from = None -workflow = [('train', 1)] diff --git a/configs/reppoints/reppoints_minmax_r50_fpn_gn-neck+head_1x_coco.py b/configs/reppoints/reppoints_minmax_r50_fpn_gn-neck+head_1x_coco.py new file mode 100644 index 0000000000000000000000000000000000000000..4e3df686840c9ef2fbf92a3d298c4d9e5752d6fb --- /dev/null +++ b/configs/reppoints/reppoints_minmax_r50_fpn_gn-neck+head_1x_coco.py @@ -0,0 +1,3 @@ +_base_ = './reppoints_moment_r50_fpn_gn-neck+head_1x_coco.py' +model = dict(bbox_head=dict(transform_method='minmax')) +work_dir = './work_dirs/reppoints_minmax_r50_fpn_1x' diff --git a/configs/reppoints/reppoints_moment_r101_dcn_fpn_2x.py b/configs/reppoints/reppoints_moment_r101_dcn_fpn_2x.py deleted file mode 100644 index 44bf9fa576f194eef8be7c305e5380bee98e39ce..0000000000000000000000000000000000000000 --- a/configs/reppoints/reppoints_moment_r101_dcn_fpn_2x.py +++ /dev/null @@ -1,144 +0,0 @@ -# model settings -norm_cfg = dict(type='GN', num_groups=32, requires_grad=True) - -model = dict( - type='RepPointsDetector', - pretrained='torchvision://resnet101', - backbone=dict( - type='ResNet', - depth=101, - num_stages=4, - out_indices=(0, 1, 2, 3), - frozen_stages=1, - norm_cfg=dict(type='BN', requires_grad=True), - style='pytorch', - dcn=dict(type='DCN', deformable_groups=1, fallback_on_stride=False), - stage_with_dcn=(False, True, True, True)), - neck=dict( - type='FPN', - in_channels=[256, 512, 1024, 2048], - out_channels=256, - start_level=1, - add_extra_convs=True, - num_outs=5, - norm_cfg=norm_cfg), - bbox_head=dict( - type='RepPointsHead', - num_classes=81, - in_channels=256, - feat_channels=256, - point_feat_channels=256, - stacked_convs=3, - num_points=9, - gradient_mul=0.1, - point_strides=[8, 16, 32, 64, 128], - point_base_scale=4, - norm_cfg=norm_cfg, - loss_cls=dict( - type='FocalLoss', - use_sigmoid=True, - gamma=2.0, - alpha=0.25, - loss_weight=1.0), - loss_bbox_init=dict(type='SmoothL1Loss', beta=0.11, loss_weight=0.5), - loss_bbox_refine=dict(type='SmoothL1Loss', beta=0.11, loss_weight=1.0), - transform_method='moment')) -# training and testing settings -train_cfg = dict( - init=dict( - assigner=dict(type='PointAssigner', scale=4, pos_num=1), - allowed_border=-1, - pos_weight=-1, - debug=False), - refine=dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.5, - neg_iou_thr=0.4, - min_pos_iou=0, - ignore_iof_thr=-1), - allowed_border=-1, - pos_weight=-1, - debug=False)) -test_cfg = dict( - nms_pre=1000, - min_bbox_size=0, - score_thr=0.05, - nms=dict(type='nms', iou_thr=0.5), - max_per_img=100) -# dataset settings -dataset_type = 'CocoDataset' -data_root = 'data/coco/' -img_norm_cfg = dict( - mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) -train_pipeline = [ - dict(type='LoadImageFromFile'), - dict(type='LoadAnnotations', with_bbox=True), - dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), - dict(type='RandomFlip', flip_ratio=0.5), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='DefaultFormatBundle'), - dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), -] -test_pipeline = [ - dict(type='LoadImageFromFile'), - dict( - type='MultiScaleFlipAug', - img_scale=(1333, 800), - flip=False, - transforms=[ - dict(type='Resize', keep_ratio=True), - dict(type='RandomFlip'), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='ImageToTensor', keys=['img']), - dict(type='Collect', keys=['img']), - ]) -] -data = dict( - imgs_per_gpu=2, - workers_per_gpu=2, - train=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_train2017.json', - img_prefix=data_root + 'train2017/', - pipeline=train_pipeline), - val=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline), - test=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline)) -evaluation = dict(interval=1, metric='bbox') -# optimizer -optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) -optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) -# learning policy -lr_config = dict( - policy='step', - warmup='linear', - warmup_iters=500, - warmup_ratio=1.0 / 3, - step=[16, 22]) -checkpoint_config = dict(interval=1) -# yapf:disable -log_config = dict( - interval=50, - hooks=[ - dict(type='TextLoggerHook'), - # dict(type='TensorboardLoggerHook') - ]) -# yapf:enable -# runtime settings -total_epochs = 24 -dist_params = dict(backend='nccl') -log_level = 'INFO' -work_dir = './work_dirs/reppoints_moment_r101_dcn_fpn_2x' -load_from = None -resume_from = None -workflow = [('train', 1)] diff --git a/configs/reppoints/reppoints_moment_r101_dcn_fpn_2x_mt.py b/configs/reppoints/reppoints_moment_r101_dcn_fpn_2x_mt.py deleted file mode 100644 index 633d1b199389d75cde98d111b1b95f06f9d66710..0000000000000000000000000000000000000000 --- a/configs/reppoints/reppoints_moment_r101_dcn_fpn_2x_mt.py +++ /dev/null @@ -1,148 +0,0 @@ -# model settings -norm_cfg = dict(type='GN', num_groups=32, requires_grad=True) - -model = dict( - type='RepPointsDetector', - pretrained='torchvision://resnet101', - backbone=dict( - type='ResNet', - depth=101, - num_stages=4, - out_indices=(0, 1, 2, 3), - frozen_stages=1, - norm_cfg=dict(type='BN', requires_grad=True), - style='pytorch', - dcn=dict(type='DCN', deformable_groups=1, fallback_on_stride=False), - stage_with_dcn=(False, True, True, True)), - neck=dict( - type='FPN', - in_channels=[256, 512, 1024, 2048], - out_channels=256, - start_level=1, - add_extra_convs=True, - num_outs=5, - norm_cfg=norm_cfg), - bbox_head=dict( - type='RepPointsHead', - num_classes=81, - in_channels=256, - feat_channels=256, - point_feat_channels=256, - stacked_convs=3, - num_points=9, - gradient_mul=0.1, - point_strides=[8, 16, 32, 64, 128], - point_base_scale=4, - norm_cfg=norm_cfg, - loss_cls=dict( - type='FocalLoss', - use_sigmoid=True, - gamma=2.0, - alpha=0.25, - loss_weight=1.0), - loss_bbox_init=dict(type='SmoothL1Loss', beta=0.11, loss_weight=0.5), - loss_bbox_refine=dict(type='SmoothL1Loss', beta=0.11, loss_weight=1.0), - transform_method='moment')) -# training and testing settings -train_cfg = dict( - init=dict( - assigner=dict(type='PointAssigner', scale=4, pos_num=1), - allowed_border=-1, - pos_weight=-1, - debug=False), - refine=dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.5, - neg_iou_thr=0.4, - min_pos_iou=0, - ignore_iof_thr=-1), - allowed_border=-1, - pos_weight=-1, - debug=False)) -test_cfg = dict( - nms_pre=1000, - min_bbox_size=0, - score_thr=0.05, - nms=dict(type='nms', iou_thr=0.5), - max_per_img=100) -# dataset settings -dataset_type = 'CocoDataset' -data_root = 'data/coco/' -img_norm_cfg = dict( - mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) -train_pipeline = [ - dict(type='LoadImageFromFile'), - dict(type='LoadAnnotations', with_bbox=True), - dict( - type='Resize', - img_scale=[(1333, 480), (1333, 960)], - keep_ratio=True, - multiscale_mode='range'), - dict(type='RandomFlip', flip_ratio=0.5), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='DefaultFormatBundle'), - dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), -] -test_pipeline = [ - dict(type='LoadImageFromFile'), - dict( - type='MultiScaleFlipAug', - img_scale=(1333, 800), - flip=False, - transforms=[ - dict(type='Resize', keep_ratio=True), - dict(type='RandomFlip'), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='ImageToTensor', keys=['img']), - dict(type='Collect', keys=['img']), - ]) -] -data = dict( - imgs_per_gpu=2, - workers_per_gpu=2, - train=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_train2017.json', - img_prefix=data_root + 'train2017/', - pipeline=train_pipeline), - val=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline), - test=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline)) -evaluation = dict(interval=1, metric='bbox') -# optimizer -optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) -optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) -# learning policy -lr_config = dict( - policy='step', - warmup='linear', - warmup_iters=500, - warmup_ratio=1.0 / 3, - step=[16, 22]) -checkpoint_config = dict(interval=1) -# yapf:disable -log_config = dict( - interval=50, - hooks=[ - dict(type='TextLoggerHook'), - # dict(type='TensorboardLoggerHook') - ]) -# yapf:enable -# runtime settings -total_epochs = 24 -dist_params = dict(backend='nccl') -log_level = 'INFO' -work_dir = './work_dirs/reppoints_moment_r101_dcn_fpn_2x_mt' -load_from = None -resume_from = None -workflow = [('train', 1)] diff --git a/configs/reppoints/reppoints_moment_r101_fpn_2x.py b/configs/reppoints/reppoints_moment_r101_fpn_2x.py deleted file mode 100644 index b610c3f21fb375061b4e2de54a0d099c53a7d3be..0000000000000000000000000000000000000000 --- a/configs/reppoints/reppoints_moment_r101_fpn_2x.py +++ /dev/null @@ -1,142 +0,0 @@ -# model settings -norm_cfg = dict(type='GN', num_groups=32, requires_grad=True) - -model = dict( - type='RepPointsDetector', - pretrained='torchvision://resnet101', - backbone=dict( - type='ResNet', - depth=101, - num_stages=4, - out_indices=(0, 1, 2, 3), - frozen_stages=1, - norm_cfg=dict(type='BN', requires_grad=True), - style='pytorch'), - neck=dict( - type='FPN', - in_channels=[256, 512, 1024, 2048], - out_channels=256, - start_level=1, - add_extra_convs=True, - num_outs=5, - norm_cfg=norm_cfg), - bbox_head=dict( - type='RepPointsHead', - num_classes=81, - in_channels=256, - feat_channels=256, - point_feat_channels=256, - stacked_convs=3, - num_points=9, - gradient_mul=0.1, - point_strides=[8, 16, 32, 64, 128], - point_base_scale=4, - norm_cfg=norm_cfg, - loss_cls=dict( - type='FocalLoss', - use_sigmoid=True, - gamma=2.0, - alpha=0.25, - loss_weight=1.0), - loss_bbox_init=dict(type='SmoothL1Loss', beta=0.11, loss_weight=0.5), - loss_bbox_refine=dict(type='SmoothL1Loss', beta=0.11, loss_weight=1.0), - transform_method='moment')) -# training and testing settings -train_cfg = dict( - init=dict( - assigner=dict(type='PointAssigner', scale=4, pos_num=1), - allowed_border=-1, - pos_weight=-1, - debug=False), - refine=dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.5, - neg_iou_thr=0.4, - min_pos_iou=0, - ignore_iof_thr=-1), - allowed_border=-1, - pos_weight=-1, - debug=False)) -test_cfg = dict( - nms_pre=1000, - min_bbox_size=0, - score_thr=0.05, - nms=dict(type='nms', iou_thr=0.5), - max_per_img=100) -# dataset settings -dataset_type = 'CocoDataset' -data_root = 'data/coco/' -img_norm_cfg = dict( - mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) -train_pipeline = [ - dict(type='LoadImageFromFile'), - dict(type='LoadAnnotations', with_bbox=True), - dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), - dict(type='RandomFlip', flip_ratio=0.5), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='DefaultFormatBundle'), - dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), -] -test_pipeline = [ - dict(type='LoadImageFromFile'), - dict( - type='MultiScaleFlipAug', - img_scale=(1333, 800), - flip=False, - transforms=[ - dict(type='Resize', keep_ratio=True), - dict(type='RandomFlip'), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='ImageToTensor', keys=['img']), - dict(type='Collect', keys=['img']), - ]) -] -data = dict( - imgs_per_gpu=2, - workers_per_gpu=2, - train=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_train2017.json', - img_prefix=data_root + 'train2017/', - pipeline=train_pipeline), - val=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline), - test=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline)) -evaluation = dict(interval=1, metric='bbox') -# optimizer -optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) -optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) -# learning policy -lr_config = dict( - policy='step', - warmup='linear', - warmup_iters=500, - warmup_ratio=1.0 / 3, - step=[16, 22]) -checkpoint_config = dict(interval=1) -# yapf:disable -log_config = dict( - interval=50, - hooks=[ - dict(type='TextLoggerHook'), - # dict(type='TensorboardLoggerHook') - ]) -# yapf:enable -# runtime settings -total_epochs = 24 -dist_params = dict(backend='nccl') -log_level = 'INFO' -work_dir = './work_dirs/reppoints_moment_r101_fpn_2x' -load_from = None -resume_from = None -workflow = [('train', 1)] diff --git a/configs/reppoints/reppoints_moment_r101_fpn_2x_mt.py b/configs/reppoints/reppoints_moment_r101_fpn_2x_mt.py deleted file mode 100644 index 75dcc4606019d8909723b90c1df2ffd15e12f57a..0000000000000000000000000000000000000000 --- a/configs/reppoints/reppoints_moment_r101_fpn_2x_mt.py +++ /dev/null @@ -1,146 +0,0 @@ -# model settings -norm_cfg = dict(type='GN', num_groups=32, requires_grad=True) - -model = dict( - type='RepPointsDetector', - pretrained='torchvision://resnet101', - backbone=dict( - type='ResNet', - depth=101, - num_stages=4, - out_indices=(0, 1, 2, 3), - frozen_stages=1, - norm_cfg=dict(type='BN', requires_grad=True), - style='pytorch'), - neck=dict( - type='FPN', - in_channels=[256, 512, 1024, 2048], - out_channels=256, - start_level=1, - add_extra_convs=True, - num_outs=5, - norm_cfg=norm_cfg), - bbox_head=dict( - type='RepPointsHead', - num_classes=81, - in_channels=256, - feat_channels=256, - point_feat_channels=256, - stacked_convs=3, - num_points=9, - gradient_mul=0.1, - point_strides=[8, 16, 32, 64, 128], - point_base_scale=4, - norm_cfg=norm_cfg, - loss_cls=dict( - type='FocalLoss', - use_sigmoid=True, - gamma=2.0, - alpha=0.25, - loss_weight=1.0), - loss_bbox_init=dict(type='SmoothL1Loss', beta=0.11, loss_weight=0.5), - loss_bbox_refine=dict(type='SmoothL1Loss', beta=0.11, loss_weight=1.0), - transform_method='moment')) -# training and testing settings -train_cfg = dict( - init=dict( - assigner=dict(type='PointAssigner', scale=4, pos_num=1), - allowed_border=-1, - pos_weight=-1, - debug=False), - refine=dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.5, - neg_iou_thr=0.4, - min_pos_iou=0, - ignore_iof_thr=-1), - allowed_border=-1, - pos_weight=-1, - debug=False)) -test_cfg = dict( - nms_pre=1000, - min_bbox_size=0, - score_thr=0.05, - nms=dict(type='nms', iou_thr=0.5), - max_per_img=100) -# dataset settings -dataset_type = 'CocoDataset' -data_root = 'data/coco/' -img_norm_cfg = dict( - mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) -train_pipeline = [ - dict(type='LoadImageFromFile'), - dict(type='LoadAnnotations', with_bbox=True), - dict( - type='Resize', - img_scale=[(1333, 480), (1333, 960)], - keep_ratio=True, - multiscale_mode='range'), - dict(type='RandomFlip', flip_ratio=0.5), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='DefaultFormatBundle'), - dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), -] -test_pipeline = [ - dict(type='LoadImageFromFile'), - dict( - type='MultiScaleFlipAug', - img_scale=(1333, 800), - flip=False, - transforms=[ - dict(type='Resize', keep_ratio=True), - dict(type='RandomFlip'), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='ImageToTensor', keys=['img']), - dict(type='Collect', keys=['img']), - ]) -] -data = dict( - imgs_per_gpu=2, - workers_per_gpu=2, - train=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_train2017.json', - img_prefix=data_root + 'train2017/', - pipeline=train_pipeline), - val=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline), - test=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline)) -evaluation = dict(interval=1, metric='bbox') -# optimizer -optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) -optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) -# learning policy -lr_config = dict( - policy='step', - warmup='linear', - warmup_iters=500, - warmup_ratio=1.0 / 3, - step=[16, 22]) -checkpoint_config = dict(interval=1) -# yapf:disable -log_config = dict( - interval=50, - hooks=[ - dict(type='TextLoggerHook'), - # dict(type='TensorboardLoggerHook') - ]) -# yapf:enable -# runtime settings -total_epochs = 24 -dist_params = dict(backend='nccl') -log_level = 'INFO' -work_dir = './work_dirs/reppoints_moment_r101_fpn_2x_mt' -load_from = None -resume_from = None -workflow = [('train', 1)] diff --git a/configs/reppoints/reppoints_moment_r101_fpn_dconv_c3-c5_gn-neck+head_2x_coco.py b/configs/reppoints/reppoints_moment_r101_fpn_dconv_c3-c5_gn-neck+head_2x_coco.py new file mode 100644 index 0000000000000000000000000000000000000000..e25d8dce8c97dd4e0f4262990e7adb0eeb2ece09 --- /dev/null +++ b/configs/reppoints/reppoints_moment_r101_fpn_dconv_c3-c5_gn-neck+head_2x_coco.py @@ -0,0 +1,8 @@ +_base_ = './reppoints_moment_r50_fpn_gn-neck+head_2x_coco.py' +model = dict( + pretrained='torchvision://resnet101', + backbone=dict( + depth=101, + dcn=dict(type='DCN', deformable_groups=1, fallback_on_stride=False), + stage_with_dcn=(False, True, True, True))) +work_dir = './work_dirs/reppoints_moment_r101_dcn_fpn_2x' diff --git a/configs/reppoints/reppoints_moment_r101_fpn_dconv_c3-c5_gn-neck+head_mstrain_480-960_2x_coco.py b/configs/reppoints/reppoints_moment_r101_fpn_dconv_c3-c5_gn-neck+head_mstrain_480-960_2x_coco.py new file mode 100644 index 0000000000000000000000000000000000000000..2511fd3e2b6ed3bd9262b327701f7f0cc363fe99 --- /dev/null +++ b/configs/reppoints/reppoints_moment_r101_fpn_dconv_c3-c5_gn-neck+head_mstrain_480-960_2x_coco.py @@ -0,0 +1,8 @@ +_base_ = './reppoints_moment_r50_fpn_gn-neck+head_mstrain_480-960_2x_coco.py' +model = dict( + pretrained='torchvision://resnet101', + backbone=dict( + depth=101, + dcn=dict(type='DCN', deformable_groups=1, fallback_on_stride=False), + stage_with_dcn=(False, True, True, True))) +work_dir = './work_dirs/reppoints_moment_r101_dcn_fpn_2x_mt' diff --git a/configs/reppoints/reppoints_moment_r101_fpn_gn-neck+head_2x_coco.py b/configs/reppoints/reppoints_moment_r101_fpn_gn-neck+head_2x_coco.py new file mode 100644 index 0000000000000000000000000000000000000000..7b0e031a085df11d7d414b975b8305620a56434c --- /dev/null +++ b/configs/reppoints/reppoints_moment_r101_fpn_gn-neck+head_2x_coco.py @@ -0,0 +1,3 @@ +_base_ = './reppoints_moment_r50_fpn_gn-neck+head_2x_coco.py' +model = dict(pretrained='torchvision://resnet101', backbone=dict(depth=101)) +work_dir = './work_dirs/reppoints_moment_r101_fpn_2x' diff --git a/configs/reppoints/reppoints_moment_r101_fpn_gn-neck+head_mstrain_480-960_2x_coco.py b/configs/reppoints/reppoints_moment_r101_fpn_gn-neck+head_mstrain_480-960_2x_coco.py new file mode 100644 index 0000000000000000000000000000000000000000..fddd62da6a68eaeb7c9830fb5efbe841fcf76689 --- /dev/null +++ b/configs/reppoints/reppoints_moment_r101_fpn_gn-neck+head_mstrain_480-960_2x_coco.py @@ -0,0 +1,3 @@ +_base_ = './reppoints_moment_r50_fpn_gn-neck+head_mstrain_480-960_2x_coco.py' +model = dict(pretrained='torchvision://resnet101', backbone=dict(depth=101)) +work_dir = './work_dirs/reppoints_moment_r101_fpn_2x_mt' diff --git a/configs/reppoints/reppoints_moment_r50_fpn_1x.py b/configs/reppoints/reppoints_moment_r50_fpn_1x.py deleted file mode 100644 index a2ce0284fc5c7ad141bc9f57fb706ea53628df64..0000000000000000000000000000000000000000 --- a/configs/reppoints/reppoints_moment_r50_fpn_1x.py +++ /dev/null @@ -1,142 +0,0 @@ -# model settings -norm_cfg = dict(type='GN', num_groups=32, requires_grad=True) - -model = dict( - type='RepPointsDetector', - pretrained='torchvision://resnet50', - backbone=dict( - type='ResNet', - depth=50, - num_stages=4, - out_indices=(0, 1, 2, 3), - frozen_stages=1, - norm_cfg=dict(type='BN', requires_grad=True), - style='pytorch'), - neck=dict( - type='FPN', - in_channels=[256, 512, 1024, 2048], - out_channels=256, - start_level=1, - add_extra_convs=True, - num_outs=5, - norm_cfg=norm_cfg), - bbox_head=dict( - type='RepPointsHead', - num_classes=81, - in_channels=256, - feat_channels=256, - point_feat_channels=256, - stacked_convs=3, - num_points=9, - gradient_mul=0.1, - point_strides=[8, 16, 32, 64, 128], - point_base_scale=4, - norm_cfg=norm_cfg, - loss_cls=dict( - type='FocalLoss', - use_sigmoid=True, - gamma=2.0, - alpha=0.25, - loss_weight=1.0), - loss_bbox_init=dict(type='SmoothL1Loss', beta=0.11, loss_weight=0.5), - loss_bbox_refine=dict(type='SmoothL1Loss', beta=0.11, loss_weight=1.0), - transform_method='moment')) -# training and testing settings -train_cfg = dict( - init=dict( - assigner=dict(type='PointAssigner', scale=4, pos_num=1), - allowed_border=-1, - pos_weight=-1, - debug=False), - refine=dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.5, - neg_iou_thr=0.4, - min_pos_iou=0, - ignore_iof_thr=-1), - allowed_border=-1, - pos_weight=-1, - debug=False)) -test_cfg = dict( - nms_pre=1000, - min_bbox_size=0, - score_thr=0.05, - nms=dict(type='nms', iou_thr=0.5), - max_per_img=100) -# dataset settings -dataset_type = 'CocoDataset' -data_root = 'data/coco/' -img_norm_cfg = dict( - mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) -train_pipeline = [ - dict(type='LoadImageFromFile'), - dict(type='LoadAnnotations', with_bbox=True), - dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), - dict(type='RandomFlip', flip_ratio=0.5), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='DefaultFormatBundle'), - dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), -] -test_pipeline = [ - dict(type='LoadImageFromFile'), - dict( - type='MultiScaleFlipAug', - img_scale=(1333, 800), - flip=False, - transforms=[ - dict(type='Resize', keep_ratio=True), - dict(type='RandomFlip'), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='ImageToTensor', keys=['img']), - dict(type='Collect', keys=['img']), - ]) -] -data = dict( - imgs_per_gpu=2, - workers_per_gpu=2, - train=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_train2017.json', - img_prefix=data_root + 'train2017/', - pipeline=train_pipeline), - val=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline), - test=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline)) -evaluation = dict(interval=1, metric='bbox') -# optimizer -optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) -optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) -# learning policy -lr_config = dict( - policy='step', - warmup='linear', - warmup_iters=500, - warmup_ratio=1.0 / 3, - step=[8, 11]) -checkpoint_config = dict(interval=1) -# yapf:disable -log_config = dict( - interval=50, - hooks=[ - dict(type='TextLoggerHook'), - # dict(type='TensorboardLoggerHook') - ]) -# yapf:enable -# runtime settings -total_epochs = 12 -dist_params = dict(backend='nccl') -log_level = 'INFO' -work_dir = './work_dirs/reppoints_moment_r50_fpn_1x' -load_from = None -resume_from = None -workflow = [('train', 1)] diff --git a/configs/reppoints/reppoints_moment_r50_fpn_1x_coco.py b/configs/reppoints/reppoints_moment_r50_fpn_1x_coco.py new file mode 100644 index 0000000000000000000000000000000000000000..1e25c91ab7bb20dfb54be9012eb156208986fae2 --- /dev/null +++ b/configs/reppoints/reppoints_moment_r50_fpn_1x_coco.py @@ -0,0 +1,67 @@ +_base_ = [ + '../_base_/datasets/coco_detection.py', + '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py' +] +model = dict( + type='RepPointsDetector', + pretrained='torchvision://resnet50', + backbone=dict( + type='ResNet', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + style='pytorch'), + neck=dict( + type='FPN', + in_channels=[256, 512, 1024, 2048], + out_channels=256, + start_level=1, + add_extra_convs=True, + num_outs=5), + bbox_head=dict( + type='RepPointsHead', + num_classes=81, + in_channels=256, + feat_channels=256, + point_feat_channels=256, + stacked_convs=3, + num_points=9, + gradient_mul=0.1, + point_strides=[8, 16, 32, 64, 128], + point_base_scale=4, + loss_cls=dict( + type='FocalLoss', + use_sigmoid=True, + gamma=2.0, + alpha=0.25, + loss_weight=1.0), + loss_bbox_init=dict(type='SmoothL1Loss', beta=0.11, loss_weight=0.5), + loss_bbox_refine=dict(type='SmoothL1Loss', beta=0.11, loss_weight=1.0), + transform_method='moment')) +# training and testing settings +train_cfg = dict( + init=dict( + assigner=dict(type='PointAssigner', scale=4, pos_num=1), + allowed_border=-1, + pos_weight=-1, + debug=False), + refine=dict( + assigner=dict( + type='MaxIoUAssigner', + pos_iou_thr=0.5, + neg_iou_thr=0.4, + min_pos_iou=0, + ignore_iof_thr=-1), + allowed_border=-1, + pos_weight=-1, + debug=False)) +test_cfg = dict( + nms_pre=1000, + min_bbox_size=0, + score_thr=0.05, + nms=dict(type='nms', iou_thr=0.5), + max_per_img=100) +optimizer = dict(lr=0.01) +work_dir = './work_dirs/reppoints_moment_r50_no_gn_fpn_1x' diff --git a/configs/reppoints/reppoints_moment_r50_fpn_2x.py b/configs/reppoints/reppoints_moment_r50_fpn_2x.py deleted file mode 100644 index cc51f3afe2b8fe66fc0878f6ab741b4fdfe6ba5d..0000000000000000000000000000000000000000 --- a/configs/reppoints/reppoints_moment_r50_fpn_2x.py +++ /dev/null @@ -1,142 +0,0 @@ -# model settings -norm_cfg = dict(type='GN', num_groups=32, requires_grad=True) - -model = dict( - type='RepPointsDetector', - pretrained='torchvision://resnet50', - backbone=dict( - type='ResNet', - depth=50, - num_stages=4, - out_indices=(0, 1, 2, 3), - frozen_stages=1, - norm_cfg=dict(type='BN', requires_grad=True), - style='pytorch'), - neck=dict( - type='FPN', - in_channels=[256, 512, 1024, 2048], - out_channels=256, - start_level=1, - add_extra_convs=True, - num_outs=5, - norm_cfg=norm_cfg), - bbox_head=dict( - type='RepPointsHead', - num_classes=81, - in_channels=256, - feat_channels=256, - point_feat_channels=256, - stacked_convs=3, - num_points=9, - gradient_mul=0.1, - point_strides=[8, 16, 32, 64, 128], - point_base_scale=4, - norm_cfg=norm_cfg, - loss_cls=dict( - type='FocalLoss', - use_sigmoid=True, - gamma=2.0, - alpha=0.25, - loss_weight=1.0), - loss_bbox_init=dict(type='SmoothL1Loss', beta=0.11, loss_weight=0.5), - loss_bbox_refine=dict(type='SmoothL1Loss', beta=0.11, loss_weight=1.0), - transform_method='moment')) -# training and testing settings -train_cfg = dict( - init=dict( - assigner=dict(type='PointAssigner', scale=4, pos_num=1), - allowed_border=-1, - pos_weight=-1, - debug=False), - refine=dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.5, - neg_iou_thr=0.4, - min_pos_iou=0, - ignore_iof_thr=-1), - allowed_border=-1, - pos_weight=-1, - debug=False)) -test_cfg = dict( - nms_pre=1000, - min_bbox_size=0, - score_thr=0.05, - nms=dict(type='nms', iou_thr=0.5), - max_per_img=100) -# dataset settings -dataset_type = 'CocoDataset' -data_root = 'data/coco/' -img_norm_cfg = dict( - mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) -train_pipeline = [ - dict(type='LoadImageFromFile'), - dict(type='LoadAnnotations', with_bbox=True), - dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), - dict(type='RandomFlip', flip_ratio=0.5), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='DefaultFormatBundle'), - dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), -] -test_pipeline = [ - dict(type='LoadImageFromFile'), - dict( - type='MultiScaleFlipAug', - img_scale=(1333, 800), - flip=False, - transforms=[ - dict(type='Resize', keep_ratio=True), - dict(type='RandomFlip'), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='ImageToTensor', keys=['img']), - dict(type='Collect', keys=['img']), - ]) -] -data = dict( - imgs_per_gpu=2, - workers_per_gpu=2, - train=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_train2017.json', - img_prefix=data_root + 'train2017/', - pipeline=train_pipeline), - val=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline), - test=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline)) -evaluation = dict(interval=1, metric='bbox') -# optimizer -optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) -optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) -# learning policy -lr_config = dict( - policy='step', - warmup='linear', - warmup_iters=500, - warmup_ratio=1.0 / 3, - step=[16, 22]) -checkpoint_config = dict(interval=1) -# yapf:disable -log_config = dict( - interval=50, - hooks=[ - dict(type='TextLoggerHook'), - # dict(type='TensorboardLoggerHook') - ]) -# yapf:enable -# runtime settings -total_epochs = 24 -dist_params = dict(backend='nccl') -log_level = 'INFO' -work_dir = './work_dirs/reppoints_moment_r50_fpn_2x' -load_from = None -resume_from = None -workflow = [('train', 1)] diff --git a/configs/reppoints/reppoints_moment_r50_fpn_2x_mt.py b/configs/reppoints/reppoints_moment_r50_fpn_2x_mt.py deleted file mode 100644 index 62ad7b0d7566a4f9098f26cf56f2f3ea1abce346..0000000000000000000000000000000000000000 --- a/configs/reppoints/reppoints_moment_r50_fpn_2x_mt.py +++ /dev/null @@ -1,146 +0,0 @@ -# model settings -norm_cfg = dict(type='GN', num_groups=32, requires_grad=True) - -model = dict( - type='RepPointsDetector', - pretrained='torchvision://resnet50', - backbone=dict( - type='ResNet', - depth=50, - num_stages=4, - out_indices=(0, 1, 2, 3), - frozen_stages=1, - norm_cfg=dict(type='BN', requires_grad=True), - style='pytorch'), - neck=dict( - type='FPN', - in_channels=[256, 512, 1024, 2048], - out_channels=256, - start_level=1, - add_extra_convs=True, - num_outs=5, - norm_cfg=norm_cfg), - bbox_head=dict( - type='RepPointsHead', - num_classes=81, - in_channels=256, - feat_channels=256, - point_feat_channels=256, - stacked_convs=3, - num_points=9, - gradient_mul=0.1, - point_strides=[8, 16, 32, 64, 128], - point_base_scale=4, - norm_cfg=norm_cfg, - loss_cls=dict( - type='FocalLoss', - use_sigmoid=True, - gamma=2.0, - alpha=0.25, - loss_weight=1.0), - loss_bbox_init=dict(type='SmoothL1Loss', beta=0.11, loss_weight=0.5), - loss_bbox_refine=dict(type='SmoothL1Loss', beta=0.11, loss_weight=1.0), - transform_method='moment')) -# training and testing settings -train_cfg = dict( - init=dict( - assigner=dict(type='PointAssigner', scale=4, pos_num=1), - allowed_border=-1, - pos_weight=-1, - debug=False), - refine=dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.5, - neg_iou_thr=0.4, - min_pos_iou=0, - ignore_iof_thr=-1), - allowed_border=-1, - pos_weight=-1, - debug=False)) -test_cfg = dict( - nms_pre=1000, - min_bbox_size=0, - score_thr=0.05, - nms=dict(type='nms', iou_thr=0.5), - max_per_img=100) -# dataset settings -dataset_type = 'CocoDataset' -data_root = 'data/coco/' -img_norm_cfg = dict( - mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) -train_pipeline = [ - dict(type='LoadImageFromFile'), - dict(type='LoadAnnotations', with_bbox=True), - dict( - type='Resize', - img_scale=[(1333, 480), (1333, 960)], - keep_ratio=True, - multiscale_mode='range'), - dict(type='RandomFlip', flip_ratio=0.5), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='DefaultFormatBundle'), - dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), -] -test_pipeline = [ - dict(type='LoadImageFromFile'), - dict( - type='MultiScaleFlipAug', - img_scale=(1333, 800), - flip=False, - transforms=[ - dict(type='Resize', keep_ratio=True), - dict(type='RandomFlip'), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='ImageToTensor', keys=['img']), - dict(type='Collect', keys=['img']), - ]) -] -data = dict( - imgs_per_gpu=2, - workers_per_gpu=2, - train=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_train2017.json', - img_prefix=data_root + 'train2017/', - pipeline=train_pipeline), - val=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline), - test=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline)) -evaluation = dict(interval=1, metric='bbox') -# optimizer -optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) -optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) -# learning policy -lr_config = dict( - policy='step', - warmup='linear', - warmup_iters=500, - warmup_ratio=1.0 / 3, - step=[16, 22]) -checkpoint_config = dict(interval=1) -# yapf:disable -log_config = dict( - interval=50, - hooks=[ - dict(type='TextLoggerHook'), - # dict(type='TensorboardLoggerHook') - ]) -# yapf:enable -# runtime settings -total_epochs = 24 -dist_params = dict(backend='nccl') -log_level = 'INFO' -work_dir = './work_dirs/reppoints_moment_r50_fpn_2x_mt' -load_from = None -resume_from = None -workflow = [('train', 1)] diff --git a/configs/reppoints/reppoints_moment_r50_fpn_gn-neck+head_1x_coco.py b/configs/reppoints/reppoints_moment_r50_fpn_gn-neck+head_1x_coco.py new file mode 100644 index 0000000000000000000000000000000000000000..fb41fe519faead7839bc3496da486eef55a7881b --- /dev/null +++ b/configs/reppoints/reppoints_moment_r50_fpn_gn-neck+head_1x_coco.py @@ -0,0 +1,5 @@ +_base_ = './reppoints_moment_r50_fpn_1x_coco.py' +norm_cfg = dict(type='GN', num_groups=32, requires_grad=True) +model = dict(neck=dict(norm_cfg=norm_cfg), bbox_head=dict(norm_cfg=norm_cfg)) +optimizer = dict(lr=0.01) +work_dir = './work_dirs/reppoints_moment_r50_fpn_1x' diff --git a/configs/reppoints/reppoints_moment_r50_fpn_gn-neck+head_2x_coco.py b/configs/reppoints/reppoints_moment_r50_fpn_gn-neck+head_2x_coco.py new file mode 100644 index 0000000000000000000000000000000000000000..d5bdbe7ffdb13ab6eee9b84c257d8307c850b660 --- /dev/null +++ b/configs/reppoints/reppoints_moment_r50_fpn_gn-neck+head_2x_coco.py @@ -0,0 +1,4 @@ +_base_ = './reppoints_moment_r50_fpn_gn-neck+head_1x_coco.py' +lr_config = dict(step=[16, 22]) +total_epochs = 24 +work_dir = './work_dirs/reppoints_moment_r50_fpn_2x' diff --git a/configs/reppoints/reppoints_moment_r50_fpn_gn-neck+head_mstrain_480-960_2x_coco.py b/configs/reppoints/reppoints_moment_r50_fpn_gn-neck+head_mstrain_480-960_2x_coco.py new file mode 100644 index 0000000000000000000000000000000000000000..c72ff9acbe361881a0962629cbe3fb40312f7192 --- /dev/null +++ b/configs/reppoints/reppoints_moment_r50_fpn_gn-neck+head_mstrain_480-960_2x_coco.py @@ -0,0 +1,19 @@ +_base_ = './reppoints_moment_r50_fpn_gn-neck+head_2x_coco.py' +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True), + dict( + type='Resize', + img_scale=[(1333, 480), (1333, 960)], + keep_ratio=True, + multiscale_mode='range'), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), +] +data = dict(train=dict(pipeline=train_pipeline)) +work_dir = './work_dirs/reppoints_moment_r50_fpn_2x_mt' diff --git a/configs/reppoints/reppoints_moment_r50_no_gn_fpn_1x.py b/configs/reppoints/reppoints_moment_r50_no_gn_fpn_1x.py deleted file mode 100644 index bdaf07eefcb40aa762a98ed8aa49a49729b48ea1..0000000000000000000000000000000000000000 --- a/configs/reppoints/reppoints_moment_r50_no_gn_fpn_1x.py +++ /dev/null @@ -1,138 +0,0 @@ -# model settings -model = dict( - type='RepPointsDetector', - pretrained='torchvision://resnet50', - backbone=dict( - type='ResNet', - depth=50, - num_stages=4, - out_indices=(0, 1, 2, 3), - frozen_stages=1, - norm_cfg=dict(type='BN', requires_grad=True), - style='pytorch'), - neck=dict( - type='FPN', - in_channels=[256, 512, 1024, 2048], - out_channels=256, - start_level=1, - add_extra_convs=True, - num_outs=5), - bbox_head=dict( - type='RepPointsHead', - num_classes=81, - in_channels=256, - feat_channels=256, - point_feat_channels=256, - stacked_convs=3, - num_points=9, - gradient_mul=0.1, - point_strides=[8, 16, 32, 64, 128], - point_base_scale=4, - loss_cls=dict( - type='FocalLoss', - use_sigmoid=True, - gamma=2.0, - alpha=0.25, - loss_weight=1.0), - loss_bbox_init=dict(type='SmoothL1Loss', beta=0.11, loss_weight=0.5), - loss_bbox_refine=dict(type='SmoothL1Loss', beta=0.11, loss_weight=1.0), - transform_method='moment')) -# training and testing settings -train_cfg = dict( - init=dict( - assigner=dict(type='PointAssigner', scale=4, pos_num=1), - allowed_border=-1, - pos_weight=-1, - debug=False), - refine=dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.5, - neg_iou_thr=0.4, - min_pos_iou=0, - ignore_iof_thr=-1), - allowed_border=-1, - pos_weight=-1, - debug=False)) -test_cfg = dict( - nms_pre=1000, - min_bbox_size=0, - score_thr=0.05, - nms=dict(type='nms', iou_thr=0.5), - max_per_img=100) -# dataset settings -dataset_type = 'CocoDataset' -data_root = 'data/coco/' -img_norm_cfg = dict( - mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) -train_pipeline = [ - dict(type='LoadImageFromFile'), - dict(type='LoadAnnotations', with_bbox=True), - dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), - dict(type='RandomFlip', flip_ratio=0.5), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='DefaultFormatBundle'), - dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), -] -test_pipeline = [ - dict(type='LoadImageFromFile'), - dict( - type='MultiScaleFlipAug', - img_scale=(1333, 800), - flip=False, - transforms=[ - dict(type='Resize', keep_ratio=True), - dict(type='RandomFlip'), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='ImageToTensor', keys=['img']), - dict(type='Collect', keys=['img']), - ]) -] -data = dict( - imgs_per_gpu=2, - workers_per_gpu=2, - train=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_train2017.json', - img_prefix=data_root + 'train2017/', - pipeline=train_pipeline), - val=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline), - test=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline)) -evaluation = dict(interval=1, metric='bbox') -# optimizer -optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) -optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) -# learning policy -lr_config = dict( - policy='step', - warmup='linear', - warmup_iters=500, - warmup_ratio=1.0 / 3, - step=[8, 11]) -checkpoint_config = dict(interval=1) -# yapf:disable -log_config = dict( - interval=50, - hooks=[ - dict(type='TextLoggerHook'), - # dict(type='TensorboardLoggerHook') - ]) -# yapf:enable -# runtime settings -total_epochs = 12 -dist_params = dict(backend='nccl') -log_level = 'INFO' -work_dir = './work_dirs/reppoints_moment_r50_no_gn_fpn_1x' -load_from = None -resume_from = None -workflow = [('train', 1)] diff --git a/configs/reppoints/reppoints_moment_x101_dcn_fpn_2x.py b/configs/reppoints/reppoints_moment_x101_dcn_fpn_2x.py deleted file mode 100644 index acc23969c90f175521f43a5c336acd407f5dbeef..0000000000000000000000000000000000000000 --- a/configs/reppoints/reppoints_moment_x101_dcn_fpn_2x.py +++ /dev/null @@ -1,146 +0,0 @@ -# model settings -norm_cfg = dict(type='GN', num_groups=32, requires_grad=True) - -model = dict( - type='RepPointsDetector', - pretrained='open-mmlab://resnext101_32x4d', - backbone=dict( - type='ResNeXt', - depth=101, - groups=32, - base_width=4, - num_stages=4, - out_indices=(0, 1, 2, 3), - frozen_stages=1, - norm_cfg=dict(type='BN', requires_grad=True), - style='pytorch', - dcn=dict(type='DCN', deformable_groups=1, fallback_on_stride=False), - stage_with_dcn=(False, True, True, True)), - neck=dict( - type='FPN', - in_channels=[256, 512, 1024, 2048], - out_channels=256, - start_level=1, - add_extra_convs=True, - num_outs=5, - norm_cfg=norm_cfg), - bbox_head=dict( - type='RepPointsHead', - num_classes=81, - in_channels=256, - feat_channels=256, - point_feat_channels=256, - stacked_convs=3, - num_points=9, - gradient_mul=0.1, - point_strides=[8, 16, 32, 64, 128], - point_base_scale=4, - norm_cfg=norm_cfg, - loss_cls=dict( - type='FocalLoss', - use_sigmoid=True, - gamma=2.0, - alpha=0.25, - loss_weight=1.0), - loss_bbox_init=dict(type='SmoothL1Loss', beta=0.11, loss_weight=0.5), - loss_bbox_refine=dict(type='SmoothL1Loss', beta=0.11, loss_weight=1.0), - transform_method='moment')) -# training and testing settings -train_cfg = dict( - init=dict( - assigner=dict(type='PointAssigner', scale=4, pos_num=1), - allowed_border=-1, - pos_weight=-1, - debug=False), - refine=dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.5, - neg_iou_thr=0.4, - min_pos_iou=0, - ignore_iof_thr=-1), - allowed_border=-1, - pos_weight=-1, - debug=False)) -test_cfg = dict( - nms_pre=1000, - min_bbox_size=0, - score_thr=0.05, - nms=dict(type='nms', iou_thr=0.5), - max_per_img=100) -# dataset settings -dataset_type = 'CocoDataset' -data_root = 'data/coco/' -img_norm_cfg = dict( - mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) -train_pipeline = [ - dict(type='LoadImageFromFile'), - dict(type='LoadAnnotations', with_bbox=True), - dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), - dict(type='RandomFlip', flip_ratio=0.5), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='DefaultFormatBundle'), - dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), -] -test_pipeline = [ - dict(type='LoadImageFromFile'), - dict( - type='MultiScaleFlipAug', - img_scale=(1333, 800), - flip=False, - transforms=[ - dict(type='Resize', keep_ratio=True), - dict(type='RandomFlip'), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='ImageToTensor', keys=['img']), - dict(type='Collect', keys=['img']), - ]) -] -data = dict( - imgs_per_gpu=2, - workers_per_gpu=2, - train=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_train2017.json', - img_prefix=data_root + 'train2017/', - pipeline=train_pipeline), - val=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline), - test=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline)) -evaluation = dict(interval=1, metric='bbox') -# optimizer -optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) -optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) -# learning policy -lr_config = dict( - policy='step', - warmup='linear', - warmup_iters=500, - warmup_ratio=1.0 / 3, - step=[16, 22]) -checkpoint_config = dict(interval=1) -# yapf:disable -log_config = dict( - interval=50, - hooks=[ - dict(type='TextLoggerHook'), - # dict(type='TensorboardLoggerHook') - ]) -# yapf:enable -# runtime settings -total_epochs = 24 -dist_params = dict(backend='nccl') -log_level = 'INFO' -work_dir = './work_dirs/reppoints_moment_x101_dcn_fpn_2x' -load_from = None -resume_from = None -workflow = [('train', 1)] diff --git a/configs/reppoints/reppoints_moment_x101_dcn_fpn_2x_mt.py b/configs/reppoints/reppoints_moment_x101_dcn_fpn_2x_mt.py deleted file mode 100644 index 8fa745366befaca9e32185d43c2af0c25dd69348..0000000000000000000000000000000000000000 --- a/configs/reppoints/reppoints_moment_x101_dcn_fpn_2x_mt.py +++ /dev/null @@ -1,150 +0,0 @@ -# model settings -norm_cfg = dict(type='GN', num_groups=32, requires_grad=True) - -model = dict( - type='RepPointsDetector', - pretrained='open-mmlab://resnext101_32x4d', - backbone=dict( - type='ResNeXt', - depth=101, - groups=32, - base_width=4, - num_stages=4, - out_indices=(0, 1, 2, 3), - frozen_stages=1, - norm_cfg=dict(type='BN', requires_grad=True), - style='pytorch', - dcn=dict(type='DCN', deformable_groups=1, fallback_on_stride=False), - stage_with_dcn=(False, True, True, True)), - neck=dict( - type='FPN', - in_channels=[256, 512, 1024, 2048], - out_channels=256, - start_level=1, - add_extra_convs=True, - num_outs=5, - norm_cfg=norm_cfg), - bbox_head=dict( - type='RepPointsHead', - num_classes=81, - in_channels=256, - feat_channels=256, - point_feat_channels=256, - stacked_convs=3, - num_points=9, - gradient_mul=0.1, - point_strides=[8, 16, 32, 64, 128], - point_base_scale=4, - norm_cfg=norm_cfg, - loss_cls=dict( - type='FocalLoss', - use_sigmoid=True, - gamma=2.0, - alpha=0.25, - loss_weight=1.0), - loss_bbox_init=dict(type='SmoothL1Loss', beta=0.11, loss_weight=0.5), - loss_bbox_refine=dict(type='SmoothL1Loss', beta=0.11, loss_weight=1.0), - transform_method='moment')) -# training and testing settings -train_cfg = dict( - init=dict( - assigner=dict(type='PointAssigner', scale=4, pos_num=1), - allowed_border=-1, - pos_weight=-1, - debug=False), - refine=dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.5, - neg_iou_thr=0.4, - min_pos_iou=0, - ignore_iof_thr=-1), - allowed_border=-1, - pos_weight=-1, - debug=False)) -test_cfg = dict( - nms_pre=1000, - min_bbox_size=0, - score_thr=0.05, - nms=dict(type='nms', iou_thr=0.5), - max_per_img=100) -# dataset settings -dataset_type = 'CocoDataset' -data_root = 'data/coco/' -img_norm_cfg = dict( - mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) -train_pipeline = [ - dict(type='LoadImageFromFile'), - dict(type='LoadAnnotations', with_bbox=True), - dict( - type='Resize', - img_scale=[(1333, 480), (1333, 960)], - keep_ratio=True, - multiscale_mode='range'), - dict(type='RandomFlip', flip_ratio=0.5), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='DefaultFormatBundle'), - dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), -] -test_pipeline = [ - dict(type='LoadImageFromFile'), - dict( - type='MultiScaleFlipAug', - img_scale=(1333, 800), - flip=False, - transforms=[ - dict(type='Resize', keep_ratio=True), - dict(type='RandomFlip'), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='ImageToTensor', keys=['img']), - dict(type='Collect', keys=['img']), - ]) -] -data = dict( - imgs_per_gpu=2, - workers_per_gpu=2, - train=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_train2017.json', - img_prefix=data_root + 'train2017/', - pipeline=train_pipeline), - val=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline), - test=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline)) -evaluation = dict(interval=1, metric='bbox') -# optimizer -optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) -optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) -# learning policy -lr_config = dict( - policy='step', - warmup='linear', - warmup_iters=500, - warmup_ratio=1.0 / 3, - step=[16, 22]) -checkpoint_config = dict(interval=1) -# yapf:disable -log_config = dict( - interval=50, - hooks=[ - dict(type='TextLoggerHook'), - # dict(type='TensorboardLoggerHook') - ]) -# yapf:enable -# runtime settings -total_epochs = 24 -dist_params = dict(backend='nccl') -log_level = 'INFO' -work_dir = './work_dirs/reppoints_moment_x101_dcn_fpn_2x_mt' -load_from = None -resume_from = None -workflow = [('train', 1)] diff --git a/configs/reppoints/reppoints_moment_x101_fpn_dconv_c3-c5_gn-neck+head_2x_coco.py b/configs/reppoints/reppoints_moment_x101_fpn_dconv_c3-c5_gn-neck+head_2x_coco.py new file mode 100644 index 0000000000000000000000000000000000000000..707b8f979e60aeb0466f3418270e40839361a732 --- /dev/null +++ b/configs/reppoints/reppoints_moment_x101_fpn_dconv_c3-c5_gn-neck+head_2x_coco.py @@ -0,0 +1,16 @@ +_base_ = './reppoints_moment_r50_fpn_gn-neck+head_2x_coco.py' +model = dict( + pretrained='open-mmlab://resnext101_32x4d', + backbone=dict( + type='ResNeXt', + depth=101, + groups=32, + base_width=4, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + style='pytorch', + dcn=dict(type='DCN', deformable_groups=1, fallback_on_stride=False), + stage_with_dcn=(False, True, True, True))) +work_dir = './work_dirs/reppoints_moment_x101_dcn_fpn_2x' diff --git a/configs/reppoints/reppoints_moment_x101_fpn_dconv_c3-c5_gn-neck+head_mstrain_480-960_2x_coco.py b/configs/reppoints/reppoints_moment_x101_fpn_dconv_c3-c5_gn-neck+head_mstrain_480-960_2x_coco.py new file mode 100644 index 0000000000000000000000000000000000000000..82521489171668a58ebc5b0c39b5d4d202668b31 --- /dev/null +++ b/configs/reppoints/reppoints_moment_x101_fpn_dconv_c3-c5_gn-neck+head_mstrain_480-960_2x_coco.py @@ -0,0 +1,16 @@ +_base_ = './reppoints_moment_r50_fpn_gn-neck+head_mstrain_480-960_2x_coco.py' +model = dict( + pretrained='open-mmlab://resnext101_32x4d', + backbone=dict( + type='ResNeXt', + depth=101, + groups=32, + base_width=4, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + style='pytorch', + dcn=dict(type='DCN', deformable_groups=1, fallback_on_stride=False), + stage_with_dcn=(False, True, True, True))) +work_dir = './work_dirs/reppoints_moment_x101_dcn_fpn_2x_mt' diff --git a/configs/reppoints/reppoints_partial_minmax_r50_fpn_1x.py b/configs/reppoints/reppoints_partial_minmax_r50_fpn_1x.py deleted file mode 100644 index 659706133873ad9f17cfcbd8cde8d94ee91ec3d1..0000000000000000000000000000000000000000 --- a/configs/reppoints/reppoints_partial_minmax_r50_fpn_1x.py +++ /dev/null @@ -1,142 +0,0 @@ -# model settings -norm_cfg = dict(type='GN', num_groups=32, requires_grad=True) - -model = dict( - type='RepPointsDetector', - pretrained='torchvision://resnet50', - backbone=dict( - type='ResNet', - depth=50, - num_stages=4, - out_indices=(0, 1, 2, 3), - frozen_stages=1, - norm_cfg=dict(type='BN', requires_grad=True), - style='pytorch'), - neck=dict( - type='FPN', - in_channels=[256, 512, 1024, 2048], - out_channels=256, - start_level=1, - add_extra_convs=True, - num_outs=5, - norm_cfg=norm_cfg), - bbox_head=dict( - type='RepPointsHead', - num_classes=81, - in_channels=256, - feat_channels=256, - point_feat_channels=256, - stacked_convs=3, - num_points=9, - gradient_mul=0.1, - point_strides=[8, 16, 32, 64, 128], - point_base_scale=4, - norm_cfg=norm_cfg, - loss_cls=dict( - type='FocalLoss', - use_sigmoid=True, - gamma=2.0, - alpha=0.25, - loss_weight=1.0), - loss_bbox_init=dict(type='SmoothL1Loss', beta=0.11, loss_weight=0.5), - loss_bbox_refine=dict(type='SmoothL1Loss', beta=0.11, loss_weight=1.0), - transform_method='partial_minmax')) -# training and testing settings -train_cfg = dict( - init=dict( - assigner=dict(type='PointAssigner', scale=4, pos_num=1), - allowed_border=-1, - pos_weight=-1, - debug=False), - refine=dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.5, - neg_iou_thr=0.4, - min_pos_iou=0, - ignore_iof_thr=-1), - allowed_border=-1, - pos_weight=-1, - debug=False)) -test_cfg = dict( - nms_pre=1000, - min_bbox_size=0, - score_thr=0.05, - nms=dict(type='nms', iou_thr=0.5), - max_per_img=100) -# dataset settings -dataset_type = 'CocoDataset' -data_root = 'data/coco/' -img_norm_cfg = dict( - mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) -train_pipeline = [ - dict(type='LoadImageFromFile'), - dict(type='LoadAnnotations', with_bbox=True), - dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), - dict(type='RandomFlip', flip_ratio=0.5), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='DefaultFormatBundle'), - dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), -] -test_pipeline = [ - dict(type='LoadImageFromFile'), - dict( - type='MultiScaleFlipAug', - img_scale=(1333, 800), - flip=False, - transforms=[ - dict(type='Resize', keep_ratio=True), - dict(type='RandomFlip'), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='ImageToTensor', keys=['img']), - dict(type='Collect', keys=['img']), - ]) -] -data = dict( - imgs_per_gpu=2, - workers_per_gpu=2, - train=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_train2017.json', - img_prefix=data_root + 'train2017/', - pipeline=train_pipeline), - val=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline), - test=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline)) -evaluation = dict(interval=1, metric='bbox') -# optimizer -optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) -optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) -# learning policy -lr_config = dict( - policy='step', - warmup='linear', - warmup_iters=500, - warmup_ratio=1.0 / 3, - step=[8, 11]) -checkpoint_config = dict(interval=1) -# yapf:disable -log_config = dict( - interval=50, - hooks=[ - dict(type='TextLoggerHook'), - # dict(type='TensorboardLoggerHook') - ]) -# yapf:enable -# runtime settings -total_epochs = 12 -dist_params = dict(backend='nccl') -log_level = 'INFO' -work_dir = './work_dirs/reppoints_partial_minmax_r50_fpn_1x' -load_from = None -resume_from = None -workflow = [('train', 1)] diff --git a/configs/reppoints/reppoints_partial_minmax_r50_fpn_gn-neck+head_1x_coco.py b/configs/reppoints/reppoints_partial_minmax_r50_fpn_gn-neck+head_1x_coco.py new file mode 100644 index 0000000000000000000000000000000000000000..b09c7b8a6af303d908fa456eae2d1eaf4122daa3 --- /dev/null +++ b/configs/reppoints/reppoints_partial_minmax_r50_fpn_gn-neck+head_1x_coco.py @@ -0,0 +1,3 @@ +_base_ = './reppoints_moment_r50_fpn_gn-neck+head_1x_coco.py' +model = dict(bbox_head=dict(transform_method='partial_minmax')) +work_dir = './work_dirs/reppoints_partial_minmax_r50_fpn_1x' diff --git a/configs/retinanet/retinanet_r101_fpn_1x_coco.py b/configs/retinanet/retinanet_r101_fpn_1x_coco.py new file mode 100644 index 0000000000000000000000000000000000000000..bcce4d8883e7c8399554a1a9b3e24ed003ff1cc4 --- /dev/null +++ b/configs/retinanet/retinanet_r101_fpn_1x_coco.py @@ -0,0 +1,3 @@ +_base_ = './retinanet_r50_fpn_1x_coco.py' +model = dict(pretrained='torchvision://resnet101', backbone=dict(depth=101)) +work_dir = './work_dirs/retinanet_r101_fpn_1x' diff --git a/configs/retinanet/retinanet_r50_caffe_fpn_1x_coco.py b/configs/retinanet/retinanet_r50_caffe_fpn_1x_coco.py new file mode 100644 index 0000000000000000000000000000000000000000..e56d2ec6abbd9ccda2ef3aba9aa8564c39b347ea --- /dev/null +++ b/configs/retinanet/retinanet_r50_caffe_fpn_1x_coco.py @@ -0,0 +1,37 @@ +_base_ = './retinanet_r50_fpn_1x_coco.py' +model = dict( + pretrained='open-mmlab://resnet50_caffe', + backbone=dict(norm_cfg=dict(requires_grad=False), style='caffe')) +# use caffe img_norm +img_norm_cfg = dict( + mean=[102.9801, 115.9465, 122.7717], std=[1.0, 1.0, 1.0], to_rgb=False) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True), + dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +data = dict( + train=dict(pipeline=train_pipeline), + val=dict(pipeline=test_pipeline), + test=dict(pipeline=test_pipeline)) +work_dir = './work_dirs/retinanet_r50_fpn_1x' diff --git a/configs/retinanet/retinanet_r50_fpn_1x_coco.py b/configs/retinanet/retinanet_r50_fpn_1x_coco.py new file mode 100644 index 0000000000000000000000000000000000000000..88166c227f9780e19935cc0745f55ae618744ee1 --- /dev/null +++ b/configs/retinanet/retinanet_r50_fpn_1x_coco.py @@ -0,0 +1,8 @@ +_base_ = [ + '../_base_/models/retinanet_r50_fpn.py', + '../_base_/datasets/coco_detection.py', + '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py' +] +# optimizer +optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) +work_dir = './work_dirs/retinanet_r50_fpn_1x' diff --git a/configs/retinanet/retinanet_x101_32x4d_fpn_1x_coco.py b/configs/retinanet/retinanet_x101_32x4d_fpn_1x_coco.py new file mode 100644 index 0000000000000000000000000000000000000000..aad5f119426783603e2dc1680b5b12ce5fe8bb86 --- /dev/null +++ b/configs/retinanet/retinanet_x101_32x4d_fpn_1x_coco.py @@ -0,0 +1,14 @@ +_base_ = './retinanet_r50_fpn_1x_coco.py' +model = dict( + pretrained='open-mmlab://resnext101_32x4d', + backbone=dict( + type='ResNeXt', + depth=101, + groups=32, + base_width=4, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + style='pytorch')) +work_dir = './work_dirs/retinanet_x101_32x4d_fpn_1x' diff --git a/configs/retinanet/retinanet_x101_64x4d_fpn_1x_coco.py b/configs/retinanet/retinanet_x101_64x4d_fpn_1x_coco.py new file mode 100644 index 0000000000000000000000000000000000000000..31eb21ff3fe9811c8644b007c5a4bdef8c05ef4c --- /dev/null +++ b/configs/retinanet/retinanet_x101_64x4d_fpn_1x_coco.py @@ -0,0 +1,14 @@ +_base_ = './retinanet_r50_fpn_1x_coco.py' +model = dict( + pretrained='open-mmlab://resnext101_64x4d', + backbone=dict( + type='ResNeXt', + depth=101, + groups=64, + base_width=4, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + style='pytorch')) +work_dir = './work_dirs/retinanet_x101_64x4d_fpn_1x' diff --git a/configs/retinanet_r101_fpn_1x.py b/configs/retinanet_r101_fpn_1x.py deleted file mode 100644 index 7e4e9919ab975d308266b6ccfbf1aa03d68c40c5..0000000000000000000000000000000000000000 --- a/configs/retinanet_r101_fpn_1x.py +++ /dev/null @@ -1,131 +0,0 @@ -# model settings -model = dict( - type='RetinaNet', - pretrained='torchvision://resnet101', - backbone=dict( - type='ResNet', - depth=101, - num_stages=4, - out_indices=(0, 1, 2, 3), - frozen_stages=1, - norm_cfg=dict(type='BN', requires_grad=True), - style='pytorch'), - neck=dict( - type='FPN', - in_channels=[256, 512, 1024, 2048], - out_channels=256, - start_level=1, - add_extra_convs=True, - num_outs=5), - bbox_head=dict( - type='RetinaHead', - num_classes=81, - in_channels=256, - stacked_convs=4, - feat_channels=256, - octave_base_scale=4, - scales_per_octave=3, - anchor_ratios=[0.5, 1.0, 2.0], - anchor_strides=[8, 16, 32, 64, 128], - target_means=[.0, .0, .0, .0], - target_stds=[1.0, 1.0, 1.0, 1.0], - loss_cls=dict( - type='FocalLoss', - use_sigmoid=True, - gamma=2.0, - alpha=0.25, - loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=0.11, loss_weight=1.0))) -# training and testing settings -train_cfg = dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.5, - neg_iou_thr=0.4, - min_pos_iou=0, - ignore_iof_thr=-1), - allowed_border=-1, - pos_weight=-1, - debug=False) -test_cfg = dict( - nms_pre=1000, - min_bbox_size=0, - score_thr=0.05, - nms=dict(type='nms', iou_thr=0.5), - max_per_img=100) -# dataset settings -dataset_type = 'CocoDataset' -data_root = 'data/coco/' -img_norm_cfg = dict( - mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) -train_pipeline = [ - dict(type='LoadImageFromFile'), - dict(type='LoadAnnotations', with_bbox=True), - dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), - dict(type='RandomFlip', flip_ratio=0.5), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='DefaultFormatBundle'), - dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), -] -test_pipeline = [ - dict(type='LoadImageFromFile'), - dict( - type='MultiScaleFlipAug', - img_scale=(1333, 800), - flip=False, - transforms=[ - dict(type='Resize', keep_ratio=True), - dict(type='RandomFlip'), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='ImageToTensor', keys=['img']), - dict(type='Collect', keys=['img']), - ]) -] -data = dict( - imgs_per_gpu=2, - workers_per_gpu=2, - train=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_train2017.json', - img_prefix=data_root + 'train2017/', - pipeline=train_pipeline), - val=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline), - test=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline)) -evaluation = dict(interval=1, metric='bbox') -# optimizer -optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) -optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) -# learning policy -lr_config = dict( - policy='step', - warmup='linear', - warmup_iters=500, - warmup_ratio=1.0 / 3, - step=[8, 11]) -checkpoint_config = dict(interval=1) -# yapf:disable -log_config = dict( - interval=50, - hooks=[ - dict(type='TextLoggerHook'), - # dict(type='TensorboardLoggerHook') - ]) -# yapf:enable -# runtime settings -total_epochs = 12 -dist_params = dict(backend='nccl') -log_level = 'INFO' -work_dir = './work_dirs/retinanet_r101_fpn_1x' -load_from = None -resume_from = None -workflow = [('train', 1)] diff --git a/configs/retinanet_r50_fpn_1x.py b/configs/retinanet_r50_fpn_1x.py deleted file mode 100644 index 37f851ca0a7115cad5561272587274f0ca618e6d..0000000000000000000000000000000000000000 --- a/configs/retinanet_r50_fpn_1x.py +++ /dev/null @@ -1,131 +0,0 @@ -# model settings -model = dict( - type='RetinaNet', - pretrained='torchvision://resnet50', - backbone=dict( - type='ResNet', - depth=50, - num_stages=4, - out_indices=(0, 1, 2, 3), - frozen_stages=1, - norm_cfg=dict(type='BN', requires_grad=True), - style='pytorch'), - neck=dict( - type='FPN', - in_channels=[256, 512, 1024, 2048], - out_channels=256, - start_level=1, - add_extra_convs=True, - num_outs=5), - bbox_head=dict( - type='RetinaHead', - num_classes=81, - in_channels=256, - stacked_convs=4, - feat_channels=256, - octave_base_scale=4, - scales_per_octave=3, - anchor_ratios=[0.5, 1.0, 2.0], - anchor_strides=[8, 16, 32, 64, 128], - target_means=[.0, .0, .0, .0], - target_stds=[1.0, 1.0, 1.0, 1.0], - loss_cls=dict( - type='FocalLoss', - use_sigmoid=True, - gamma=2.0, - alpha=0.25, - loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=0.11, loss_weight=1.0))) -# training and testing settings -train_cfg = dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.5, - neg_iou_thr=0.4, - min_pos_iou=0, - ignore_iof_thr=-1), - allowed_border=-1, - pos_weight=-1, - debug=False) -test_cfg = dict( - nms_pre=1000, - min_bbox_size=0, - score_thr=0.05, - nms=dict(type='nms', iou_thr=0.5), - max_per_img=100) -# dataset settings -dataset_type = 'CocoDataset' -data_root = 'data/coco/' -img_norm_cfg = dict( - mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) -train_pipeline = [ - dict(type='LoadImageFromFile'), - dict(type='LoadAnnotations', with_bbox=True), - dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), - dict(type='RandomFlip', flip_ratio=0.5), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='DefaultFormatBundle'), - dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), -] -test_pipeline = [ - dict(type='LoadImageFromFile'), - dict( - type='MultiScaleFlipAug', - img_scale=(1333, 800), - flip=False, - transforms=[ - dict(type='Resize', keep_ratio=True), - dict(type='RandomFlip'), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='ImageToTensor', keys=['img']), - dict(type='Collect', keys=['img']), - ]) -] -data = dict( - imgs_per_gpu=2, - workers_per_gpu=2, - train=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_train2017.json', - img_prefix=data_root + 'train2017/', - pipeline=train_pipeline), - val=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline), - test=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline)) -evaluation = dict(interval=1, metric='bbox') -# optimizer -optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) -optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) -# learning policy -lr_config = dict( - policy='step', - warmup='linear', - warmup_iters=500, - warmup_ratio=1.0 / 3, - step=[8, 11]) -checkpoint_config = dict(interval=1) -# yapf:disable -log_config = dict( - interval=50, - hooks=[ - dict(type='TextLoggerHook'), - # dict(type='TensorboardLoggerHook') - ]) -# yapf:enable -# runtime settings -total_epochs = 12 -dist_params = dict(backend='nccl') -log_level = 'INFO' -work_dir = './work_dirs/retinanet_r50_fpn_1x' -load_from = None -resume_from = None -workflow = [('train', 1)] diff --git a/configs/retinanet_x101_32x4d_fpn_1x.py b/configs/retinanet_x101_32x4d_fpn_1x.py deleted file mode 100644 index 3120f1c124da0e6a16c7931270d7e191e4f0e73e..0000000000000000000000000000000000000000 --- a/configs/retinanet_x101_32x4d_fpn_1x.py +++ /dev/null @@ -1,133 +0,0 @@ -# model settings -model = dict( - type='RetinaNet', - pretrained='open-mmlab://resnext101_32x4d', - backbone=dict( - type='ResNeXt', - depth=101, - groups=32, - base_width=4, - num_stages=4, - out_indices=(0, 1, 2, 3), - frozen_stages=1, - norm_cfg=dict(type='BN', requires_grad=True), - style='pytorch'), - neck=dict( - type='FPN', - in_channels=[256, 512, 1024, 2048], - out_channels=256, - start_level=1, - add_extra_convs=True, - num_outs=5), - bbox_head=dict( - type='RetinaHead', - num_classes=81, - in_channels=256, - stacked_convs=4, - feat_channels=256, - octave_base_scale=4, - scales_per_octave=3, - anchor_ratios=[0.5, 1.0, 2.0], - anchor_strides=[8, 16, 32, 64, 128], - target_means=[.0, .0, .0, .0], - target_stds=[1.0, 1.0, 1.0, 1.0], - loss_cls=dict( - type='FocalLoss', - use_sigmoid=True, - gamma=2.0, - alpha=0.25, - loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=0.11, loss_weight=1.0))) -# training and testing settings -train_cfg = dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.5, - neg_iou_thr=0.4, - min_pos_iou=0, - ignore_iof_thr=-1), - allowed_border=-1, - pos_weight=-1, - debug=False) -test_cfg = dict( - nms_pre=1000, - min_bbox_size=0, - score_thr=0.05, - nms=dict(type='nms', iou_thr=0.5), - max_per_img=100) -# dataset settings -dataset_type = 'CocoDataset' -data_root = 'data/coco/' -img_norm_cfg = dict( - mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) -train_pipeline = [ - dict(type='LoadImageFromFile'), - dict(type='LoadAnnotations', with_bbox=True), - dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), - dict(type='RandomFlip', flip_ratio=0.5), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='DefaultFormatBundle'), - dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), -] -test_pipeline = [ - dict(type='LoadImageFromFile'), - dict( - type='MultiScaleFlipAug', - img_scale=(1333, 800), - flip=False, - transforms=[ - dict(type='Resize', keep_ratio=True), - dict(type='RandomFlip'), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='ImageToTensor', keys=['img']), - dict(type='Collect', keys=['img']), - ]) -] -data = dict( - imgs_per_gpu=2, - workers_per_gpu=2, - train=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_train2017.json', - img_prefix=data_root + 'train2017/', - pipeline=train_pipeline), - val=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline), - test=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline)) -evaluation = dict(interval=1, metric='bbox') -# optimizer -optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) -optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) -# learning policy -lr_config = dict( - policy='step', - warmup='linear', - warmup_iters=500, - warmup_ratio=1.0 / 3, - step=[8, 11]) -checkpoint_config = dict(interval=1) -# yapf:disable -log_config = dict( - interval=50, - hooks=[ - dict(type='TextLoggerHook'), - # dict(type='TensorboardLoggerHook') - ]) -# yapf:enable -# runtime settings -total_epochs = 12 -dist_params = dict(backend='nccl') -log_level = 'INFO' -work_dir = './work_dirs/retinanet_x101_32x4d_fpn_1x' -load_from = None -resume_from = None -workflow = [('train', 1)] diff --git a/configs/retinanet_x101_64x4d_fpn_1x.py b/configs/retinanet_x101_64x4d_fpn_1x.py deleted file mode 100644 index 8c7e73da88ebc5e850d091ff1646c0bdab57f9d2..0000000000000000000000000000000000000000 --- a/configs/retinanet_x101_64x4d_fpn_1x.py +++ /dev/null @@ -1,133 +0,0 @@ -# model settings -model = dict( - type='RetinaNet', - pretrained='open-mmlab://resnext101_64x4d', - backbone=dict( - type='ResNeXt', - depth=101, - groups=64, - base_width=4, - num_stages=4, - out_indices=(0, 1, 2, 3), - frozen_stages=1, - norm_cfg=dict(type='BN', requires_grad=True), - style='pytorch'), - neck=dict( - type='FPN', - in_channels=[256, 512, 1024, 2048], - out_channels=256, - start_level=1, - add_extra_convs=True, - num_outs=5), - bbox_head=dict( - type='RetinaHead', - num_classes=81, - in_channels=256, - stacked_convs=4, - feat_channels=256, - octave_base_scale=4, - scales_per_octave=3, - anchor_ratios=[0.5, 1.0, 2.0], - anchor_strides=[8, 16, 32, 64, 128], - target_means=[.0, .0, .0, .0], - target_stds=[1.0, 1.0, 1.0, 1.0], - loss_cls=dict( - type='FocalLoss', - use_sigmoid=True, - gamma=2.0, - alpha=0.25, - loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=0.11, loss_weight=1.0))) -# training and testing settings -train_cfg = dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.5, - neg_iou_thr=0.4, - min_pos_iou=0, - ignore_iof_thr=-1), - allowed_border=-1, - pos_weight=-1, - debug=False) -test_cfg = dict( - nms_pre=1000, - min_bbox_size=0, - score_thr=0.05, - nms=dict(type='nms', iou_thr=0.5), - max_per_img=100) -# dataset settings -dataset_type = 'CocoDataset' -data_root = 'data/coco/' -img_norm_cfg = dict( - mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) -train_pipeline = [ - dict(type='LoadImageFromFile'), - dict(type='LoadAnnotations', with_bbox=True), - dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), - dict(type='RandomFlip', flip_ratio=0.5), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='DefaultFormatBundle'), - dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), -] -test_pipeline = [ - dict(type='LoadImageFromFile'), - dict( - type='MultiScaleFlipAug', - img_scale=(1333, 800), - flip=False, - transforms=[ - dict(type='Resize', keep_ratio=True), - dict(type='RandomFlip'), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='ImageToTensor', keys=['img']), - dict(type='Collect', keys=['img']), - ]) -] -data = dict( - imgs_per_gpu=2, - workers_per_gpu=2, - train=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_train2017.json', - img_prefix=data_root + 'train2017/', - pipeline=train_pipeline), - val=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline), - test=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline)) -evaluation = dict(interval=1, metric='bbox') -# optimizer -optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) -optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) -# learning policy -lr_config = dict( - policy='step', - warmup='linear', - warmup_iters=500, - warmup_ratio=1.0 / 3, - step=[8, 11]) -checkpoint_config = dict(interval=1) -# yapf:disable -log_config = dict( - interval=50, - hooks=[ - dict(type='TextLoggerHook'), - # dict(type='TensorboardLoggerHook') - ]) -# yapf:enable -# runtime settings -total_epochs = 12 -dist_params = dict(backend='nccl') -log_level = 'INFO' -work_dir = './work_dirs/retinanet_x101_64x4d_fpn_1x' -load_from = None -resume_from = None -workflow = [('train', 1)] diff --git a/configs/rpn/rpn_r101_fpn_1x_coco.py b/configs/rpn/rpn_r101_fpn_1x_coco.py new file mode 100644 index 0000000000000000000000000000000000000000..7e3223d97e9fb42d7383450bf92dfc4429c1b65b --- /dev/null +++ b/configs/rpn/rpn_r101_fpn_1x_coco.py @@ -0,0 +1,3 @@ +_base_ = './rpn_r50_fpn_1x_coco.py' +model = dict(pretrained='torchvision://resnet101', backbone=dict(depth=101)) +work_dir = './work_dirs/rpn_r101_fpn_1x' diff --git a/configs/rpn/rpn_r50_caffe_c4_1x_coco.py b/configs/rpn/rpn_r50_caffe_c4_1x_coco.py new file mode 100644 index 0000000000000000000000000000000000000000..1697fed538a5f74cf4a98243b715e26ae8bbc5f9 --- /dev/null +++ b/configs/rpn/rpn_r50_caffe_c4_1x_coco.py @@ -0,0 +1,39 @@ +_base_ = [ + '../_base_/models/rpn_r50_caffe_c4.py', + '../_base_/datasets/coco_detection.py', + '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py' +] +# dataset settings +img_norm_cfg = dict( + mean=[102.9801, 115.9465, 122.7717], std=[1.0, 1.0, 1.0], to_rgb=False) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True, with_label=False), + dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +data = dict( + train=dict(pipeline=train_pipeline), + val=dict(pipeline=test_pipeline), + test=dict(pipeline=test_pipeline)) +evaluation = dict(interval=1, metric='proposal_fast') +work_dir = './work_dirs/rpn_r50_caffe_c4_1x' diff --git a/configs/rpn/rpn_r50_caffe_fpn_1x_coco.py b/configs/rpn/rpn_r50_caffe_fpn_1x_coco.py new file mode 100644 index 0000000000000000000000000000000000000000..b07a42f453225ca192af22b8a86fcfc69a0af633 --- /dev/null +++ b/configs/rpn/rpn_r50_caffe_fpn_1x_coco.py @@ -0,0 +1,37 @@ +_base_ = './rpn_r50_fpn_1x_coco.py' +model = dict( + pretrained='open-mmlab://resnet50_caffe', + backbone=dict(norm_cfg=dict(requires_grad=False), style='caffe')) +# use caffe img_norm +img_norm_cfg = dict( + mean=[102.9801, 115.9465, 122.7717], std=[1.0, 1.0, 1.0], to_rgb=False) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True, with_label=False), + dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +data = dict( + train=dict(pipeline=train_pipeline), + val=dict(pipeline=test_pipeline), + test=dict(pipeline=test_pipeline)) +work_dir = './work_dirs/rpn_r50_caffe_fpn_1x' diff --git a/configs/rpn/rpn_r50_fpn_1x_coco.py b/configs/rpn/rpn_r50_fpn_1x_coco.py new file mode 100644 index 0000000000000000000000000000000000000000..5e459d88d2188479e579a85047aebb48d4cfc829 --- /dev/null +++ b/configs/rpn/rpn_r50_fpn_1x_coco.py @@ -0,0 +1,19 @@ +_base_ = [ + '../_base_/models/rpn_r50_fpn.py', '../_base_/datasets/coco_detection.py', + '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py' +] +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True, with_label=False), + dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes']), +] +data = dict(train=dict(pipeline=train_pipeline)) +evaluation = dict(interval=1, metric='proposal_fast') +work_dir = './work_dirs/rpn_r50_fpn_1x' diff --git a/configs/rpn/rpn_x101_32x4d_fpn_1x_coco.py b/configs/rpn/rpn_x101_32x4d_fpn_1x_coco.py new file mode 100644 index 0000000000000000000000000000000000000000..7dc2bd6a49d72b4e5f58139d4da58fe4e48cac98 --- /dev/null +++ b/configs/rpn/rpn_x101_32x4d_fpn_1x_coco.py @@ -0,0 +1,14 @@ +_base_ = './rpn_r50_fpn_1x_coco.py' +model = dict( + pretrained='open-mmlab://resnext101_32x4d', + backbone=dict( + type='ResNeXt', + depth=101, + groups=32, + base_width=4, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + style='pytorch')) +work_dir = './work_dirs/rpn_x101_32x4d_fpn_1x' diff --git a/configs/rpn/rpn_x101_64x4d_fpn_1x_coco.py b/configs/rpn/rpn_x101_64x4d_fpn_1x_coco.py new file mode 100644 index 0000000000000000000000000000000000000000..b25888c62ae6bfa758bc685e406c4cc63fa63e29 --- /dev/null +++ b/configs/rpn/rpn_x101_64x4d_fpn_1x_coco.py @@ -0,0 +1,14 @@ +_base_ = './rpn_r50_fpn_1x_coco.py' +model = dict( + pretrained='open-mmlab://resnext101_64x4d', + backbone=dict( + type='ResNeXt', + depth=101, + groups=64, + base_width=4, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + style='pytorch')) +work_dir = './work_dirs/rpn_x101_64x4d_fpn_1x' diff --git a/configs/rpn_r101_fpn_1x.py b/configs/rpn_r101_fpn_1x.py deleted file mode 100644 index 498dff4eecfa578be458bb457cd4aa49bd77b2cb..0000000000000000000000000000000000000000 --- a/configs/rpn_r101_fpn_1x.py +++ /dev/null @@ -1,131 +0,0 @@ -# model settings -model = dict( - type='RPN', - pretrained='torchvision://resnet101', - backbone=dict( - type='ResNet', - depth=101, - num_stages=4, - out_indices=(0, 1, 2, 3), - frozen_stages=1, - norm_cfg=dict(type='BN', requires_grad=True), - style='pytorch'), - neck=dict( - type='FPN', - in_channels=[256, 512, 1024, 2048], - out_channels=256, - num_outs=5), - rpn_head=dict( - type='RPNHead', - in_channels=256, - feat_channels=256, - anchor_scales=[8], - anchor_ratios=[0.5, 1.0, 2.0], - anchor_strides=[4, 8, 16, 32, 64], - target_means=[.0, .0, .0, .0], - target_stds=[1.0, 1.0, 1.0, 1.0], - loss_cls=dict( - type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0))) -# model training and testing settings -train_cfg = dict( - rpn=dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.7, - neg_iou_thr=0.3, - min_pos_iou=0.3, - ignore_iof_thr=-1), - sampler=dict( - type='RandomSampler', - num=256, - pos_fraction=0.5, - neg_pos_ub=-1, - add_gt_as_proposals=False), - allowed_border=0, - pos_weight=-1, - debug=False)) -test_cfg = dict( - rpn=dict( - nms_across_levels=False, - nms_pre=2000, - nms_post=2000, - max_num=2000, - nms_thr=0.7, - min_bbox_size=0)) -# dataset settings -dataset_type = 'CocoDataset' -data_root = 'data/coco/' -img_norm_cfg = dict( - mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) -train_pipeline = [ - dict(type='LoadImageFromFile'), - dict(type='LoadAnnotations', with_bbox=True, with_label=False), - dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), - dict(type='RandomFlip', flip_ratio=0.5), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='DefaultFormatBundle'), - dict(type='Collect', keys=['img', 'gt_bboxes']), -] -test_pipeline = [ - dict(type='LoadImageFromFile'), - dict( - type='MultiScaleFlipAug', - img_scale=(1333, 800), - flip=False, - transforms=[ - dict(type='Resize', keep_ratio=True), - dict(type='RandomFlip'), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='ImageToTensor', keys=['img']), - dict(type='Collect', keys=['img']), - ]) -] -data = dict( - imgs_per_gpu=2, - workers_per_gpu=2, - train=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_train2017.json', - img_prefix=data_root + 'train2017/', - pipeline=train_pipeline), - val=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline), - test=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline)) -evaluation = dict(interval=1, metric='proposal_fast') -# optimizer -optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) -# runner configs -optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) -lr_config = dict( - policy='step', - warmup='linear', - warmup_iters=500, - warmup_ratio=1.0 / 3, - step=[8, 11]) -checkpoint_config = dict(interval=1) -# yapf:disable -log_config = dict( - interval=50, - hooks=[ - dict(type='TextLoggerHook'), - # dict(type='TensorboardLoggerHook') - ]) -# yapf:enable -# runtime settings -total_epochs = 12 -dist_params = dict(backend='nccl') -log_level = 'INFO' -work_dir = './work_dirs/rpn_r101_fpn_1x' -load_from = None -resume_from = None -workflow = [('train', 1)] diff --git a/configs/rpn_r50_caffe_c4_1x.py b/configs/rpn_r50_caffe_c4_1x.py deleted file mode 100644 index c0b05a8a994d137131a6ac5abd4d1970786c3925..0000000000000000000000000000000000000000 --- a/configs/rpn_r50_caffe_c4_1x.py +++ /dev/null @@ -1,130 +0,0 @@ -# model settings -model = dict( - type='RPN', - pretrained='open-mmlab://resnet50_caffe', - backbone=dict( - type='ResNet', - depth=50, - num_stages=3, - strides=(1, 2, 2), - dilations=(1, 1, 1), - out_indices=(2, ), - frozen_stages=1, - norm_cfg=dict(type='BN', requires_grad=False), - norm_eval=True, - style='caffe'), - neck=None, - rpn_head=dict( - type='RPNHead', - in_channels=1024, - feat_channels=1024, - anchor_scales=[2, 4, 8, 16, 32], - anchor_ratios=[0.5, 1.0, 2.0], - anchor_strides=[16], - target_means=[.0, .0, .0, .0], - target_stds=[1.0, 1.0, 1.0, 1.0], - loss_cls=dict( - type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0))) -# model training and testing settings -train_cfg = dict( - rpn=dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.7, - neg_iou_thr=0.3, - min_pos_iou=0.3, - ignore_iof_thr=-1), - sampler=dict( - type='RandomSampler', - num=256, - pos_fraction=0.5, - neg_pos_ub=-1, - add_gt_as_proposals=False), - allowed_border=0, - pos_weight=-1, - debug=False)) -test_cfg = dict( - rpn=dict( - nms_across_levels=False, - nms_pre=12000, - nms_post=2000, - max_num=2000, - nms_thr=0.7, - min_bbox_size=0)) -# dataset settings -dataset_type = 'CocoDataset' -data_root = 'data/coco/' -img_norm_cfg = dict( - mean=[102.9801, 115.9465, 122.7717], std=[1.0, 1.0, 1.0], to_rgb=False) -train_pipeline = [ - dict(type='LoadImageFromFile'), - dict(type='LoadAnnotations', with_bbox=True, with_label=False), - dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), - dict(type='RandomFlip', flip_ratio=0.5), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='DefaultFormatBundle'), - dict(type='Collect', keys=['img', 'gt_bboxes']), -] -test_pipeline = [ - dict(type='LoadImageFromFile'), - dict( - type='MultiScaleFlipAug', - img_scale=(1333, 800), - flip=False, - transforms=[ - dict(type='Resize', keep_ratio=True), - dict(type='RandomFlip'), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='ImageToTensor', keys=['img']), - dict(type='Collect', keys=['img']), - ]) -] -data = dict( - imgs_per_gpu=2, - workers_per_gpu=2, - train=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_train2017.json', - img_prefix=data_root + 'train2017/', - pipeline=train_pipeline), - val=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline), - test=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline)) -evaluation = dict(interval=1, metric='proposal_fast') -# optimizer -optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) -# runner configs -optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) -lr_config = dict( - policy='step', - warmup='linear', - warmup_iters=500, - warmup_ratio=1.0 / 3, - step=[8, 11]) -checkpoint_config = dict(interval=1) -# yapf:disable -log_config = dict( - interval=50, - hooks=[ - dict(type='TextLoggerHook'), - # dict(type='TensorboardLoggerHook') - ]) -# yapf:enable -# runtime settings -total_epochs = 12 -dist_params = dict(backend='nccl') -log_level = 'INFO' -work_dir = './work_dirs/rpn_r50_caffe_c4_1x' -load_from = None -resume_from = None -workflow = [('train', 1)] diff --git a/configs/rpn_r50_fpn_1x.py b/configs/rpn_r50_fpn_1x.py deleted file mode 100644 index d0bedd6bc07f0fbc7732adf9ddff44fcfd0fcc9f..0000000000000000000000000000000000000000 --- a/configs/rpn_r50_fpn_1x.py +++ /dev/null @@ -1,131 +0,0 @@ -# model settings -model = dict( - type='RPN', - pretrained='torchvision://resnet50', - backbone=dict( - type='ResNet', - depth=50, - num_stages=4, - out_indices=(0, 1, 2, 3), - frozen_stages=1, - norm_cfg=dict(type='BN', requires_grad=True), - style='pytorch'), - neck=dict( - type='FPN', - in_channels=[256, 512, 1024, 2048], - out_channels=256, - num_outs=5), - rpn_head=dict( - type='RPNHead', - in_channels=256, - feat_channels=256, - anchor_scales=[8], - anchor_ratios=[0.5, 1.0, 2.0], - anchor_strides=[4, 8, 16, 32, 64], - target_means=[.0, .0, .0, .0], - target_stds=[1.0, 1.0, 1.0, 1.0], - loss_cls=dict( - type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0))) -# model training and testing settings -train_cfg = dict( - rpn=dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.7, - neg_iou_thr=0.3, - min_pos_iou=0.3, - ignore_iof_thr=-1), - sampler=dict( - type='RandomSampler', - num=256, - pos_fraction=0.5, - neg_pos_ub=-1, - add_gt_as_proposals=False), - allowed_border=0, - pos_weight=-1, - debug=False)) -test_cfg = dict( - rpn=dict( - nms_across_levels=False, - nms_pre=2000, - nms_post=2000, - max_num=2000, - nms_thr=0.7, - min_bbox_size=0)) -# dataset settings -dataset_type = 'CocoDataset' -data_root = 'data/coco/' -img_norm_cfg = dict( - mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) -train_pipeline = [ - dict(type='LoadImageFromFile'), - dict(type='LoadAnnotations', with_bbox=True, with_label=False), - dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), - dict(type='RandomFlip', flip_ratio=0.5), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='DefaultFormatBundle'), - dict(type='Collect', keys=['img', 'gt_bboxes']), -] -test_pipeline = [ - dict(type='LoadImageFromFile'), - dict( - type='MultiScaleFlipAug', - img_scale=(1333, 800), - flip=False, - transforms=[ - dict(type='Resize', keep_ratio=True), - dict(type='RandomFlip'), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='ImageToTensor', keys=['img']), - dict(type='Collect', keys=['img']), - ]) -] -data = dict( - imgs_per_gpu=2, - workers_per_gpu=2, - train=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_train2017.json', - img_prefix=data_root + 'train2017/', - pipeline=train_pipeline), - val=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline), - test=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline)) -evaluation = dict(interval=1, metric='proposal_fast') -# optimizer -optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) -# runner configs -optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) -lr_config = dict( - policy='step', - warmup='linear', - warmup_iters=500, - warmup_ratio=1.0 / 3, - step=[8, 11]) -checkpoint_config = dict(interval=1) -# yapf:disable -log_config = dict( - interval=50, - hooks=[ - dict(type='TextLoggerHook'), - # dict(type='TensorboardLoggerHook') - ]) -# yapf:enable -# runtime settings -total_epochs = 12 -dist_params = dict(backend='nccl') -log_level = 'INFO' -work_dir = './work_dirs/rpn_r50_fpn_1x' -load_from = None -resume_from = None -workflow = [('train', 1)] diff --git a/configs/rpn_x101_32x4d_fpn_1x.py b/configs/rpn_x101_32x4d_fpn_1x.py deleted file mode 100644 index ab24b69f3fc33f1a466e3846f62ac256c0f3862d..0000000000000000000000000000000000000000 --- a/configs/rpn_x101_32x4d_fpn_1x.py +++ /dev/null @@ -1,133 +0,0 @@ -# model settings -model = dict( - type='RPN', - pretrained='open-mmlab://resnext101_32x4d', - backbone=dict( - type='ResNeXt', - depth=101, - groups=32, - base_width=4, - num_stages=4, - out_indices=(0, 1, 2, 3), - frozen_stages=1, - norm_cfg=dict(type='BN', requires_grad=True), - style='pytorch'), - neck=dict( - type='FPN', - in_channels=[256, 512, 1024, 2048], - out_channels=256, - num_outs=5), - rpn_head=dict( - type='RPNHead', - in_channels=256, - feat_channels=256, - anchor_scales=[8], - anchor_ratios=[0.5, 1.0, 2.0], - anchor_strides=[4, 8, 16, 32, 64], - target_means=[.0, .0, .0, .0], - target_stds=[1.0, 1.0, 1.0, 1.0], - loss_cls=dict( - type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0))) -# model training and testing settings -train_cfg = dict( - rpn=dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.7, - neg_iou_thr=0.3, - min_pos_iou=0.3, - ignore_iof_thr=-1), - sampler=dict( - type='RandomSampler', - num=256, - pos_fraction=0.5, - neg_pos_ub=-1, - add_gt_as_proposals=False), - allowed_border=0, - pos_weight=-1, - debug=False)) -test_cfg = dict( - rpn=dict( - nms_across_levels=False, - nms_pre=2000, - nms_post=2000, - max_num=2000, - nms_thr=0.7, - min_bbox_size=0)) -# dataset settings -dataset_type = 'CocoDataset' -data_root = 'data/coco/' -img_norm_cfg = dict( - mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) -train_pipeline = [ - dict(type='LoadImageFromFile'), - dict(type='LoadAnnotations', with_bbox=True, with_label=False), - dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), - dict(type='RandomFlip', flip_ratio=0.5), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='DefaultFormatBundle'), - dict(type='Collect', keys=['img', 'gt_bboxes']), -] -test_pipeline = [ - dict(type='LoadImageFromFile'), - dict( - type='MultiScaleFlipAug', - img_scale=(1333, 800), - flip=False, - transforms=[ - dict(type='Resize', keep_ratio=True), - dict(type='RandomFlip'), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='ImageToTensor', keys=['img']), - dict(type='Collect', keys=['img']), - ]) -] -data = dict( - imgs_per_gpu=2, - workers_per_gpu=2, - train=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_train2017.json', - img_prefix=data_root + 'train2017/', - pipeline=train_pipeline), - val=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline), - test=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline)) -evaluation = dict(interval=1, metric='proposal_fast') -# optimizer -optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) -# runner configs -optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) -lr_config = dict( - policy='step', - warmup='linear', - warmup_iters=500, - warmup_ratio=1.0 / 3, - step=[8, 11]) -checkpoint_config = dict(interval=1) -# yapf:disable -log_config = dict( - interval=50, - hooks=[ - dict(type='TextLoggerHook'), - # dict(type='TensorboardLoggerHook') - ]) -# yapf:enable -# runtime settings -total_epochs = 12 -dist_params = dict(backend='nccl') -log_level = 'INFO' -work_dir = './work_dirs/rpn_x101_32x4d_fpn_1x' -load_from = None -resume_from = None -workflow = [('train', 1)] diff --git a/configs/rpn_x101_64x4d_fpn_1x.py b/configs/rpn_x101_64x4d_fpn_1x.py deleted file mode 100644 index 98d61531063aff5a8e20cdabb22853e08bb15f57..0000000000000000000000000000000000000000 --- a/configs/rpn_x101_64x4d_fpn_1x.py +++ /dev/null @@ -1,133 +0,0 @@ -# model settings -model = dict( - type='RPN', - pretrained='open-mmlab://resnext101_64x4d', - backbone=dict( - type='ResNeXt', - depth=101, - groups=64, - base_width=4, - num_stages=4, - out_indices=(0, 1, 2, 3), - frozen_stages=1, - norm_cfg=dict(type='BN', requires_grad=True), - style='pytorch'), - neck=dict( - type='FPN', - in_channels=[256, 512, 1024, 2048], - out_channels=256, - num_outs=5), - rpn_head=dict( - type='RPNHead', - in_channels=256, - feat_channels=256, - anchor_scales=[8], - anchor_ratios=[0.5, 1.0, 2.0], - anchor_strides=[4, 8, 16, 32, 64], - target_means=[.0, .0, .0, .0], - target_stds=[1.0, 1.0, 1.0, 1.0], - loss_cls=dict( - type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0))) -# model training and testing settings -train_cfg = dict( - rpn=dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.7, - neg_iou_thr=0.3, - min_pos_iou=0.3, - ignore_iof_thr=-1), - sampler=dict( - type='RandomSampler', - num=256, - pos_fraction=0.5, - neg_pos_ub=-1, - add_gt_as_proposals=False), - allowed_border=0, - pos_weight=-1, - debug=False)) -test_cfg = dict( - rpn=dict( - nms_across_levels=False, - nms_pre=2000, - nms_post=2000, - max_num=2000, - nms_thr=0.7, - min_bbox_size=0)) -# dataset settings -dataset_type = 'CocoDataset' -data_root = 'data/coco/' -img_norm_cfg = dict( - mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) -train_pipeline = [ - dict(type='LoadImageFromFile'), - dict(type='LoadAnnotations', with_bbox=True, with_label=False), - dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), - dict(type='RandomFlip', flip_ratio=0.5), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='DefaultFormatBundle'), - dict(type='Collect', keys=['img', 'gt_bboxes']), -] -test_pipeline = [ - dict(type='LoadImageFromFile'), - dict( - type='MultiScaleFlipAug', - img_scale=(1333, 800), - flip=False, - transforms=[ - dict(type='Resize', keep_ratio=True), - dict(type='RandomFlip'), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='ImageToTensor', keys=['img']), - dict(type='Collect', keys=['img']), - ]) -] -data = dict( - imgs_per_gpu=2, - workers_per_gpu=2, - train=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_train2017.json', - img_prefix=data_root + 'train2017/', - pipeline=train_pipeline), - val=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline), - test=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline)) -evaluation = dict(interval=1, metric='proposal_fast') -# optimizer -optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) -# runner configs -optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) -lr_config = dict( - policy='step', - warmup='linear', - warmup_iters=500, - warmup_ratio=1.0 / 3, - step=[8, 11]) -checkpoint_config = dict(interval=1) -# yapf:disable -log_config = dict( - interval=50, - hooks=[ - dict(type='TextLoggerHook'), - # dict(type='TensorboardLoggerHook') - ]) -# yapf:enable -# runtime settings -total_epochs = 12 -dist_params = dict(backend='nccl') -log_level = 'INFO' -work_dir = './work_dirs/rpn_x101_64x4d_fpn_1x' -load_from = None -resume_from = None -workflow = [('train', 1)] diff --git a/configs/scratch/faster_rcnn_r50_fpn_gn-all_scratch_6x_coco.py b/configs/scratch/faster_rcnn_r50_fpn_gn-all_scratch_6x_coco.py new file mode 100644 index 0000000000000000000000000000000000000000..e939670ca3625e893d912bda091a75aa06b7db3a --- /dev/null +++ b/configs/scratch/faster_rcnn_r50_fpn_gn-all_scratch_6x_coco.py @@ -0,0 +1,22 @@ +_base_ = [ + '../_base_/models/faster_rcnn_r50_fpn.py', + '../_base_/datasets/coco_detection.py', + '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py' +] +norm_cfg = dict(type='GN', num_groups=32, requires_grad=True) +model = dict( + pretrained=None, + backbone=dict( + frozen_stages=-1, zero_init_residual=False, norm_cfg=norm_cfg), + neck=dict(norm_cfg=norm_cfg), + bbox_head=dict( + type='Shared4Conv1FCBBoxHead', + conv_out_channels=256, + norm_cfg=norm_cfg)) +# optimizer +optimizer = dict(paramwise_options=dict(norm_decay_mult=0)) +optimizer_config = dict(_delete_=True, grad_clip=None) +# learning policy +lr_config = dict(warmup_ratio=0.1, step=[65, 71]) +total_epochs = 73 +work_dir = './work_dirs/scratch_faster_rcnn_r50_fpn_gn_6x' diff --git a/configs/scratch/mask_rcnn_r50_fpn_gn-all_scratch_6x_coco.py b/configs/scratch/mask_rcnn_r50_fpn_gn-all_scratch_6x_coco.py new file mode 100644 index 0000000000000000000000000000000000000000..30f1ef02da168155bf8aaf2751935122e90dd9e9 --- /dev/null +++ b/configs/scratch/mask_rcnn_r50_fpn_gn-all_scratch_6x_coco.py @@ -0,0 +1,23 @@ +_base_ = [ + '../_base_/models/mask_rcnn_r50_fpn.py', + '../_base_/datasets/coco_instance.py', + '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py' +] +norm_cfg = dict(type='GN', num_groups=32, requires_grad=True) +model = dict( + pretrained=None, + backbone=dict( + frozen_stages=-1, zero_init_residual=False, norm_cfg=norm_cfg), + neck=dict(norm_cfg=norm_cfg), + bbox_head=dict( + type='Shared4Conv1FCBBoxHead', + conv_out_channels=256, + norm_cfg=norm_cfg), + mask_head=dict(norm_cfg=norm_cfg)) +# optimizer +optimizer = dict(paramwise_options=dict(norm_decay_mult=0)) +optimizer_config = dict(_delete_=True, grad_clip=None) +# learning policy +lr_config = dict(warmup_ratio=0.1, step=[65, 71]) +total_epochs = 73 +work_dir = './work_dirs/scratch_mask_rcnn_r50_fpn_gn_6x' diff --git a/configs/scratch/scratch_faster_rcnn_r50_fpn_gn_6x.py b/configs/scratch/scratch_faster_rcnn_r50_fpn_gn_6x.py deleted file mode 100644 index 2a5935148d6d0531946bf8d783a0b23717484c99..0000000000000000000000000000000000000000 --- a/configs/scratch/scratch_faster_rcnn_r50_fpn_gn_6x.py +++ /dev/null @@ -1,185 +0,0 @@ -# model settings -norm_cfg = dict(type='GN', num_groups=32, requires_grad=True) -model = dict( - type='FasterRCNN', - pretrained=None, - backbone=dict( - type='ResNet', - depth=50, - num_stages=4, - out_indices=(0, 1, 2, 3), - frozen_stages=-1, - style='pytorch', - zero_init_residual=False, - norm_cfg=norm_cfg), - neck=dict( - type='FPN', - in_channels=[256, 512, 1024, 2048], - out_channels=256, - num_outs=5, - norm_cfg=norm_cfg), - rpn_head=dict( - type='RPNHead', - in_channels=256, - feat_channels=256, - anchor_scales=[8], - anchor_ratios=[0.5, 1.0, 2.0], - anchor_strides=[4, 8, 16, 32, 64], - target_means=[.0, .0, .0, .0], - target_stds=[1.0, 1.0, 1.0, 1.0], - loss_cls=dict( - type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0)), - bbox_roi_extractor=dict( - type='SingleRoIExtractor', - roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2), - out_channels=256, - featmap_strides=[4, 8, 16, 32]), - bbox_head=dict( - type='ConvFCBBoxHead', - num_shared_convs=4, - num_shared_fcs=1, - in_channels=256, - conv_out_channels=256, - fc_out_channels=1024, - roi_feat_size=7, - num_classes=81, - target_means=[0., 0., 0., 0.], - target_stds=[0.1, 0.1, 0.2, 0.2], - reg_class_agnostic=False, - norm_cfg=norm_cfg, - loss_cls=dict( - type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0))) -# model training and testing settings -train_cfg = dict( - rpn=dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.7, - neg_iou_thr=0.3, - min_pos_iou=0.3, - ignore_iof_thr=-1), - sampler=dict( - type='RandomSampler', - num=256, - pos_fraction=0.5, - neg_pos_ub=-1, - add_gt_as_proposals=False), - allowed_border=0, - pos_weight=-1, - debug=False), - rpn_proposal=dict( - nms_across_levels=False, - nms_pre=2000, - nms_post=2000, - max_num=2000, - nms_thr=0.7, - min_bbox_size=0), - rcnn=dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.5, - neg_iou_thr=0.5, - min_pos_iou=0.5, - ignore_iof_thr=-1), - sampler=dict( - type='RandomSampler', - num=512, - pos_fraction=0.25, - neg_pos_ub=-1, - add_gt_as_proposals=True), - mask_size=28, - pos_weight=-1, - debug=False)) -test_cfg = dict( - rpn=dict( - nms_across_levels=False, - nms_pre=1000, - nms_post=1000, - max_num=1000, - nms_thr=0.7, - min_bbox_size=0), - rcnn=dict( - score_thr=0.05, nms=dict(type='nms', iou_thr=0.5), max_per_img=100)) -# dataset settings -dataset_type = 'CocoDataset' -data_root = 'data/coco/' -img_norm_cfg = dict( - mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) -train_pipeline = [ - dict(type='LoadImageFromFile'), - dict(type='LoadAnnotations', with_bbox=True), - dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), - dict(type='RandomFlip', flip_ratio=0.5), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='DefaultFormatBundle'), - dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), -] -test_pipeline = [ - dict(type='LoadImageFromFile'), - dict( - type='MultiScaleFlipAug', - img_scale=(1333, 800), - flip=False, - transforms=[ - dict(type='Resize', keep_ratio=True), - dict(type='RandomFlip'), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='ImageToTensor', keys=['img']), - dict(type='Collect', keys=['img']), - ]) -] -data = dict( - imgs_per_gpu=2, - workers_per_gpu=2, - train=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_train2017.json', - img_prefix=data_root + 'train2017/', - pipeline=train_pipeline), - val=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline), - test=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline)) -evaluation = dict(interval=1, metric='bbox') -# optimizer -optimizer = dict( - type='SGD', - lr=0.02, - momentum=0.9, - weight_decay=0.0001, - paramwise_options=dict(norm_decay_mult=0)) -optimizer_config = dict(grad_clip=None) -# learning policy -lr_config = dict( - policy='step', - warmup='linear', - warmup_iters=500, - warmup_ratio=0.1, - step=[65, 71]) -checkpoint_config = dict(interval=1) -# yapf:disable -log_config = dict( - interval=50, - hooks=[ - dict(type='TextLoggerHook'), - # dict(type='TensorboardLoggerHook') - ]) -# yapf:enable -# runtime settings -total_epochs = 73 -dist_params = dict(backend='nccl') -log_level = 'INFO' -work_dir = './work_dirs/scratch_faster_rcnn_r50_fpn_gn_6x' -load_from = None -resume_from = None -workflow = [('train', 1)] diff --git a/configs/scratch/scratch_mask_rcnn_r50_fpn_gn_6x.py b/configs/scratch/scratch_mask_rcnn_r50_fpn_gn_6x.py deleted file mode 100644 index 1f389cd58fd05fe35e83c344b63924bfb8014a85..0000000000000000000000000000000000000000 --- a/configs/scratch/scratch_mask_rcnn_r50_fpn_gn_6x.py +++ /dev/null @@ -1,202 +0,0 @@ -# model settings -norm_cfg = dict(type='GN', num_groups=32, requires_grad=True) -model = dict( - type='MaskRCNN', - pretrained=None, - backbone=dict( - type='ResNet', - depth=50, - num_stages=4, - out_indices=(0, 1, 2, 3), - frozen_stages=-1, - style='pytorch', - zero_init_residual=False, - norm_cfg=norm_cfg), - neck=dict( - type='FPN', - in_channels=[256, 512, 1024, 2048], - out_channels=256, - num_outs=5, - norm_cfg=norm_cfg), - rpn_head=dict( - type='RPNHead', - in_channels=256, - feat_channels=256, - anchor_scales=[8], - anchor_ratios=[0.5, 1.0, 2.0], - anchor_strides=[4, 8, 16, 32, 64], - target_means=[.0, .0, .0, .0], - target_stds=[1.0, 1.0, 1.0, 1.0], - loss_cls=dict( - type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0)), - bbox_roi_extractor=dict( - type='SingleRoIExtractor', - roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2), - out_channels=256, - featmap_strides=[4, 8, 16, 32]), - bbox_head=dict( - type='ConvFCBBoxHead', - num_shared_convs=4, - num_shared_fcs=1, - in_channels=256, - conv_out_channels=256, - fc_out_channels=1024, - roi_feat_size=7, - num_classes=81, - target_means=[0., 0., 0., 0.], - target_stds=[0.1, 0.1, 0.2, 0.2], - reg_class_agnostic=False, - norm_cfg=norm_cfg, - loss_cls=dict( - type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)), - mask_roi_extractor=dict( - type='SingleRoIExtractor', - roi_layer=dict(type='RoIAlign', out_size=14, sample_num=2), - out_channels=256, - featmap_strides=[4, 8, 16, 32]), - mask_head=dict( - type='FCNMaskHead', - num_convs=4, - in_channels=256, - conv_out_channels=256, - num_classes=81, - norm_cfg=norm_cfg, - loss_mask=dict( - type='CrossEntropyLoss', use_mask=True, loss_weight=1.0))) -# model training and testing settings -train_cfg = dict( - rpn=dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.7, - neg_iou_thr=0.3, - min_pos_iou=0.3, - ignore_iof_thr=-1), - sampler=dict( - type='RandomSampler', - num=256, - pos_fraction=0.5, - neg_pos_ub=-1, - add_gt_as_proposals=False), - allowed_border=0, - pos_weight=-1, - debug=False), - rpn_proposal=dict( - nms_across_levels=False, - nms_pre=2000, - nms_post=2000, - max_num=2000, - nms_thr=0.7, - min_bbox_size=0), - rcnn=dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.5, - neg_iou_thr=0.5, - min_pos_iou=0.5, - ignore_iof_thr=-1), - sampler=dict( - type='RandomSampler', - num=512, - pos_fraction=0.25, - neg_pos_ub=-1, - add_gt_as_proposals=True), - mask_size=28, - pos_weight=-1, - debug=False)) -test_cfg = dict( - rpn=dict( - nms_across_levels=False, - nms_pre=1000, - nms_post=1000, - max_num=1000, - nms_thr=0.7, - min_bbox_size=0), - rcnn=dict( - score_thr=0.05, - nms=dict(type='nms', iou_thr=0.5), - max_per_img=100, - mask_thr_binary=0.5)) -# dataset settings -dataset_type = 'CocoDataset' -data_root = 'data/coco/' -img_norm_cfg = dict( - mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) -train_pipeline = [ - dict(type='LoadImageFromFile'), - dict(type='LoadAnnotations', with_bbox=True, with_mask=True), - dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), - dict(type='RandomFlip', flip_ratio=0.5), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='DefaultFormatBundle'), - dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']), -] -test_pipeline = [ - dict(type='LoadImageFromFile'), - dict( - type='MultiScaleFlipAug', - img_scale=(1333, 800), - flip=False, - transforms=[ - dict(type='Resize', keep_ratio=True), - dict(type='RandomFlip'), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='ImageToTensor', keys=['img']), - dict(type='Collect', keys=['img']), - ]) -] -data = dict( - imgs_per_gpu=2, - workers_per_gpu=2, - train=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_train2017.json', - img_prefix=data_root + 'train2017/', - pipeline=train_pipeline), - val=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline), - test=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline)) -evaluation = dict(interval=1, metric=['bbox', 'segm']) -# optimizer -optimizer = dict( - type='SGD', - lr=0.02, - momentum=0.9, - weight_decay=0.0001, - paramwise_options=dict(norm_decay_mult=0)) -optimizer_config = dict(grad_clip=None) -# learning policy -lr_config = dict( - policy='step', - warmup='linear', - warmup_iters=500, - warmup_ratio=0.1, - step=[65, 71]) -checkpoint_config = dict(interval=1) -# yapf:disable -log_config = dict( - interval=50, - hooks=[ - dict(type='TextLoggerHook'), - # dict(type='TensorboardLoggerHook') - ]) -# yapf:enable -# runtime settings -total_epochs = 73 -dist_params = dict(backend='nccl') -log_level = 'INFO' -work_dir = './work_dirs/scratch_mask_rcnn_r50_fpn_gn_6x' -load_from = None -resume_from = None -workflow = [('train', 1)] diff --git a/configs/ssd/ssd300_coco.py b/configs/ssd/ssd300_coco.py new file mode 100644 index 0000000000000000000000000000000000000000..3b367d3c02c5fb50b246d8ade4357e15f8120391 --- /dev/null +++ b/configs/ssd/ssd300_coco.py @@ -0,0 +1,63 @@ +_base_ = [ + '../_base_/models/ssd300.py', '../_base_/datasets/coco_detection.py', + '../_base_/schedules/schedule_2x.py', '../_base_/default_runtime.py' +] +# dataset settings +dataset_type = 'CocoDataset' +data_root = 'data/coco/' +img_norm_cfg = dict(mean=[123.675, 116.28, 103.53], std=[1, 1, 1], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile', to_float32=True), + dict(type='LoadAnnotations', with_bbox=True), + dict( + type='PhotoMetricDistortion', + brightness_delta=32, + contrast_range=(0.5, 1.5), + saturation_range=(0.5, 1.5), + hue_delta=18), + dict( + type='Expand', + mean=img_norm_cfg['mean'], + to_rgb=img_norm_cfg['to_rgb'], + ratio_range=(1, 4)), + dict( + type='MinIoURandomCrop', + min_ious=(0.1, 0.3, 0.5, 0.7, 0.9), + min_crop_size=0.3), + dict(type='Resize', img_scale=(300, 300), keep_ratio=False), + dict(type='Normalize', **img_norm_cfg), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(300, 300), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=False), + dict(type='Normalize', **img_norm_cfg), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +data = dict( + imgs_per_gpu=8, + workers_per_gpu=3, + train=dict( + _delete_=True, + type='RepeatDataset', + times=5, + dataset=dict( + type=dataset_type, + ann_file=data_root + 'annotations/instances_train2017.json', + img_prefix=data_root + 'train2017/', + pipeline=train_pipeline)), + val=dict(pipeline=test_pipeline), + test=dict(pipeline=test_pipeline)) +# optimizer +optimizer = dict(type='SGD', lr=2e-3, momentum=0.9, weight_decay=5e-4) +optimizer_config = dict(_delete_=True) +work_dir = './work_dirs/ssd300_coco' diff --git a/configs/ssd512_coco.py b/configs/ssd/ssd512_coco.py similarity index 52% rename from configs/ssd512_coco.py rename to configs/ssd/ssd512_coco.py index e6534c516168fe7e9d91ff7f60436b2e283f84cc..13ea14f3f5576db5f9aea3a28ee7654fd6606de8 100644 --- a/configs/ssd512_coco.py +++ b/configs/ssd/ssd512_coco.py @@ -1,48 +1,13 @@ -# model settings +_base_ = 'ssd300_coco.py' input_size = 512 model = dict( - type='SingleStageDetector', - pretrained='open-mmlab://vgg16_caffe', - backbone=dict( - type='SSDVGG', - input_size=input_size, - depth=16, - with_last_pool=False, - ceil_mode=True, - out_indices=(3, 4), - out_feature_indices=(22, 34), - l2_norm_scale=20), - neck=None, + backbone=dict(input_size=input_size), bbox_head=dict( - type='SSDHead', input_size=input_size, in_channels=(512, 1024, 512, 256, 256, 256, 256), - num_classes=81, anchor_strides=(8, 16, 32, 64, 128, 256, 512), basesize_ratio_range=(0.1, 0.9), - anchor_ratios=([2], [2, 3], [2, 3], [2, 3], [2, 3], [2], [2]), - target_means=(.0, .0, .0, .0), - target_stds=(0.1, 0.1, 0.2, 0.2))) -cudnn_benchmark = True -train_cfg = dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.5, - neg_iou_thr=0.5, - min_pos_iou=0., - ignore_iof_thr=-1, - gt_max_assign_all=False), - smoothl1_beta=1., - allowed_border=-1, - pos_weight=-1, - neg_pos_ratio=3, - debug=False) -test_cfg = dict( - nms=dict(type='nms', iou_thr=0.45), - min_bbox_size=0, - score_thr=0.02, - max_per_img=200) -# model training and testing settings + anchor_ratios=([2], [2, 3], [2, 3], [2, 3], [2, 3], [2], [2]))) # dataset settings dataset_type = 'CocoDataset' data_root = 'data/coco/' @@ -88,6 +53,7 @@ data = dict( imgs_per_gpu=8, workers_per_gpu=3, train=dict( + _delete_=True, type='RepeatDataset', times=5, dataset=dict( @@ -95,41 +61,9 @@ data = dict( ann_file=data_root + 'annotations/instances_train2017.json', img_prefix=data_root + 'train2017/', pipeline=train_pipeline)), - val=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline), - test=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline)) -evaluation = dict(interval=1, metric='bbox') + val=dict(pipeline=test_pipeline), + test=dict(pipeline=test_pipeline)) # optimizer optimizer = dict(type='SGD', lr=2e-3, momentum=0.9, weight_decay=5e-4) -optimizer_config = dict() -# learning policy -lr_config = dict( - policy='step', - warmup='linear', - warmup_iters=500, - warmup_ratio=1.0 / 3, - step=[16, 22]) -checkpoint_config = dict(interval=1) -# yapf:disable -log_config = dict( - interval=50, - hooks=[ - dict(type='TextLoggerHook'), - # dict(type='TensorboardLoggerHook') - ]) -# yapf:enable -# runtime settings -total_epochs = 24 -dist_params = dict(backend='nccl') -log_level = 'INFO' +optimizer_config = dict(_delete_=True) work_dir = './work_dirs/ssd512_coco' -load_from = None -resume_from = None -workflow = [('train', 1)] diff --git a/configs/ssd300_coco.py b/configs/ssd300_coco.py deleted file mode 100644 index 35219fac316e419149f78c6a0be27d34600e951a..0000000000000000000000000000000000000000 --- a/configs/ssd300_coco.py +++ /dev/null @@ -1,135 +0,0 @@ -# model settings -input_size = 300 -model = dict( - type='SingleStageDetector', - pretrained='open-mmlab://vgg16_caffe', - backbone=dict( - type='SSDVGG', - input_size=input_size, - depth=16, - with_last_pool=False, - ceil_mode=True, - out_indices=(3, 4), - out_feature_indices=(22, 34), - l2_norm_scale=20), - neck=None, - bbox_head=dict( - type='SSDHead', - input_size=input_size, - in_channels=(512, 1024, 512, 256, 256, 256), - num_classes=81, - anchor_strides=(8, 16, 32, 64, 100, 300), - basesize_ratio_range=(0.15, 0.9), - anchor_ratios=([2], [2, 3], [2, 3], [2, 3], [2], [2]), - target_means=(.0, .0, .0, .0), - target_stds=(0.1, 0.1, 0.2, 0.2))) -cudnn_benchmark = True -train_cfg = dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.5, - neg_iou_thr=0.5, - min_pos_iou=0., - ignore_iof_thr=-1, - gt_max_assign_all=False), - smoothl1_beta=1., - allowed_border=-1, - pos_weight=-1, - neg_pos_ratio=3, - debug=False) -test_cfg = dict( - nms=dict(type='nms', iou_thr=0.45), - min_bbox_size=0, - score_thr=0.02, - max_per_img=200) -# model training and testing settings -# dataset settings -dataset_type = 'CocoDataset' -data_root = 'data/coco/' -img_norm_cfg = dict(mean=[123.675, 116.28, 103.53], std=[1, 1, 1], to_rgb=True) -train_pipeline = [ - dict(type='LoadImageFromFile', to_float32=True), - dict(type='LoadAnnotations', with_bbox=True), - dict( - type='PhotoMetricDistortion', - brightness_delta=32, - contrast_range=(0.5, 1.5), - saturation_range=(0.5, 1.5), - hue_delta=18), - dict( - type='Expand', - mean=img_norm_cfg['mean'], - to_rgb=img_norm_cfg['to_rgb'], - ratio_range=(1, 4)), - dict( - type='MinIoURandomCrop', - min_ious=(0.1, 0.3, 0.5, 0.7, 0.9), - min_crop_size=0.3), - dict(type='Resize', img_scale=(300, 300), keep_ratio=False), - dict(type='Normalize', **img_norm_cfg), - dict(type='RandomFlip', flip_ratio=0.5), - dict(type='DefaultFormatBundle'), - dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), -] -test_pipeline = [ - dict(type='LoadImageFromFile'), - dict( - type='MultiScaleFlipAug', - img_scale=(300, 300), - flip=False, - transforms=[ - dict(type='Resize', keep_ratio=False), - dict(type='Normalize', **img_norm_cfg), - dict(type='ImageToTensor', keys=['img']), - dict(type='Collect', keys=['img']), - ]) -] -data = dict( - imgs_per_gpu=8, - workers_per_gpu=3, - train=dict( - type='RepeatDataset', - times=5, - dataset=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_train2017.json', - img_prefix=data_root + 'train2017/', - pipeline=train_pipeline)), - val=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline), - test=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_val2017.json', - img_prefix=data_root + 'val2017/', - pipeline=test_pipeline)) -evaluation = dict(interval=1, metric='bbox') -# optimizer -optimizer = dict(type='SGD', lr=2e-3, momentum=0.9, weight_decay=5e-4) -optimizer_config = dict() -# learning policy -lr_config = dict( - policy='step', - warmup='linear', - warmup_iters=500, - warmup_ratio=1.0 / 3, - step=[16, 22]) -checkpoint_config = dict(interval=1) -# yapf:disable -log_config = dict( - interval=50, - hooks=[ - dict(type='TextLoggerHook'), - # dict(type='TensorboardLoggerHook') - ]) -# yapf:enable -# runtime settings -total_epochs = 24 -dist_params = dict(backend='nccl') -log_level = 'INFO' -work_dir = './work_dirs/ssd300_coco' -load_from = None -resume_from = None -workflow = [('train', 1)] diff --git a/configs/wider_face/ssd300_wider_face.py b/configs/wider_face/ssd300_wider_face.py index 9c0ce79d0bc4213f551179094274cc83abb19767..3f65ec1adf64276a8333a92db32dd4ee4809c5df 100644 --- a/configs/wider_face/ssd300_wider_face.py +++ b/configs/wider_face/ssd300_wider_face.py @@ -1,111 +1,8 @@ -# model settings -input_size = 300 -model = dict( - type='SingleStageDetector', - pretrained='open-mmlab://vgg16_caffe', - backbone=dict( - type='SSDVGG', - input_size=input_size, - depth=16, - with_last_pool=False, - ceil_mode=True, - out_indices=(3, 4), - out_feature_indices=(22, 34), - l2_norm_scale=20), - neck=None, - bbox_head=dict( - type='SSDHead', - input_size=input_size, - in_channels=(512, 1024, 512, 256, 256, 256), - num_classes=2, - anchor_strides=(8, 16, 32, 64, 100, 300), - basesize_ratio_range=(0.15, 0.9), - anchor_ratios=([2], [2, 3], [2, 3], [2, 3], [2], [2]), - target_means=(.0, .0, .0, .0), - target_stds=(0.1, 0.1, 0.2, 0.2))) -# model training and testing settings -cudnn_benchmark = True -train_cfg = dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.5, - neg_iou_thr=0.5, - min_pos_iou=0., - ignore_iof_thr=-1, - gt_max_assign_all=False), - smoothl1_beta=1., - allowed_border=-1, - pos_weight=-1, - neg_pos_ratio=3, - debug=False) -test_cfg = dict( - nms=dict(type='nms', iou_thr=0.45), - min_bbox_size=0, - score_thr=0.02, - max_per_img=200) -# dataset settings -dataset_type = 'WIDERFaceDataset' -data_root = 'data/WIDERFace/' -img_norm_cfg = dict(mean=[123.675, 116.28, 103.53], std=[1, 1, 1], to_rgb=True) -train_pipeline = [ - dict(type='LoadImageFromFile', to_float32=True), - dict(type='LoadAnnotations', with_bbox=True), - dict( - type='PhotoMetricDistortion', - brightness_delta=32, - contrast_range=(0.5, 1.5), - saturation_range=(0.5, 1.5), - hue_delta=18), - dict( - type='Expand', - mean=img_norm_cfg['mean'], - to_rgb=img_norm_cfg['to_rgb'], - ratio_range=(1, 4)), - dict( - type='MinIoURandomCrop', - min_ious=(0.1, 0.3, 0.5, 0.7, 0.9), - min_crop_size=0.3), - dict(type='Resize', img_scale=(300, 300), keep_ratio=False), - dict(type='Normalize', **img_norm_cfg), - dict(type='RandomFlip', flip_ratio=0.5), - dict(type='DefaultFormatBundle'), - dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), +_base_ = [ + '../_base_/models/ssd300.py', '../_base_/datasets/wider_face.py', + '../_base_/default_runtime.py' ] -test_pipeline = [ - dict(type='LoadImageFromFile'), - dict( - type='MultiScaleFlipAug', - img_scale=(300, 300), - flip=False, - transforms=[ - dict(type='Resize', keep_ratio=False), - dict(type='Normalize', **img_norm_cfg), - dict(type='ImageToTensor', keys=['img']), - dict(type='Collect', keys=['img']), - ]) -] -data = dict( - imgs_per_gpu=60, - workers_per_gpu=2, - train=dict( - type='RepeatDataset', - times=2, - dataset=dict( - type=dataset_type, - ann_file=data_root + 'train.txt', - img_prefix=data_root + 'WIDER_train/', - min_size=17, - pipeline=train_pipeline)), - val=dict( - type=dataset_type, - ann_file=data_root + 'val.txt', - img_prefix=data_root + 'WIDER_val/', - pipeline=test_pipeline), - test=dict( - type=dataset_type, - ann_file=data_root + 'val.txt', - img_prefix=data_root + 'WIDER_val/', - pipeline=test_pipeline)) +model = dict(bbox_head=dict(num_classes=2)) # optimizer optimizer = dict(type='SGD', lr=0.012, momentum=0.9, weight_decay=5e-4) optimizer_config = dict() @@ -116,20 +13,7 @@ lr_config = dict( warmup_iters=1000, warmup_ratio=1.0 / 3, step=[16, 20]) -checkpoint_config = dict(interval=1) -# yapf:disable -log_config = dict( - interval=1, - hooks=[ - dict(type='TextLoggerHook'), - # dict(type='TensorboardLoggerHook') - ]) -# yapf:enable # runtime settings total_epochs = 24 -dist_params = dict(backend='nccl') -log_level = 'INFO' +log_config = dict(interval=1) work_dir = './work_dirs/ssd300_wider' -load_from = None -resume_from = None -workflow = [('train', 1)] diff --git a/demo/inference_demo.ipynb b/demo/inference_demo.ipynb index ffe4765de5ffb037fcba89c20275c35fc08ffc77..fa6d513c5c0c4cc040b9639a96cb9e2ba89c1d51 100644 --- a/demo/inference_demo.ipynb +++ b/demo/inference_demo.ipynb @@ -16,7 +16,7 @@ "metadata": {}, "outputs": [], "source": [ - "config_file = '../configs/faster_rcnn_r50_fpn_1x.py'\n", + "config_file = '../configs/faster_rcnn_r50_fpn_1x_coco.py'\n", "# download the checkpoint from model zoo and put it in `checkpoints/`\n", "checkpoint_file = '../checkpoints/faster_rcnn_r50_fpn_1x_20181010-3d1b3351.pth'" ] @@ -85,8 +85,17 @@ "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.3" + }, + "pycharm": { + "stem_cell": { + "cell_type": "raw", + "source": [], + "metadata": { + "collapsed": false + } + } } }, "nbformat": 4, "nbformat_minor": 2 -} +} \ No newline at end of file diff --git a/docs/GETTING_STARTED.md b/docs/GETTING_STARTED.md index 38f677c6597c1d009cb38a1c085f9f332338409a..55cd16595d4473448a6250ddd77d92ffe95dc12c 100644 --- a/docs/GETTING_STARTED.md +++ b/docs/GETTING_STARTED.md @@ -38,7 +38,7 @@ Assume that you have already downloaded the checkpoints to the directory `checkp 1. Test Faster R-CNN and visualize the results. Press any key for the next image. ```shell -python tools/test.py configs/faster_rcnn_r50_fpn_1x.py \ +python tools/test.py configs/faster_rcnn_r50_fpn_1x_coco.py \ checkpoints/faster_rcnn_r50_fpn_1x_20181010-3d1b3351.pth \ --show ``` @@ -54,7 +54,7 @@ python tools/test.py configs/pascal_voc/faster_rcnn_r50_fpn_1x_voc.py \ 3. Test Mask R-CNN with 8 GPUs, and evaluate the bbox and mask AP. ```shell -./tools/dist_test.sh configs/mask_rcnn_r50_fpn_1x.py \ +./tools/dist_test.sh configs/mask_rcnn_r50_fpn_1x_coco.py \ checkpoints/mask_rcnn_r50_fpn_1x_20181010-069fa190.pth \ 8 --out results.pkl --eval bbox segm ``` @@ -62,7 +62,7 @@ python tools/test.py configs/pascal_voc/faster_rcnn_r50_fpn_1x_voc.py \ 4. Test Mask R-CNN on COCO test-dev with 8 GPUs, and generate the json file to be submit to the official evaluation server. ```shell -./tools/dist_test.sh configs/mask_rcnn_r50_fpn_1x.py \ +./tools/dist_test.sh configs/mask_rcnn_r50_fpn_1x_coco.py \ checkpoints/mask_rcnn_r50_fpn_1x_20181010-069fa190.pth \ 8 --format_only --options "jsonfile_prefix=./mask_rcnn_test-dev_results" ``` @@ -90,7 +90,7 @@ python demo/webcam_demo.py ${CONFIG_FILE} ${CHECKPOINT_FILE} [--device ${GPU_ID} Examples: ```shell -python demo/webcam_demo.py configs/faster_rcnn_r50_fpn_1x.py \ +python demo/webcam_demo.py configs/faster_rcnn_r50_fpn_1x_coco.py \ checkpoints/faster_rcnn_r50_fpn_1x_20181010-3d1b3351.pth ``` @@ -103,7 +103,7 @@ Here is an example of building the model and test given images. from mmdet.apis import init_detector, inference_detector, show_result import mmcv -config_file = 'configs/faster_rcnn_r50_fpn_1x.py' +config_file = 'configs/faster_rcnn_r50_fpn_1x_coco.py' checkpoint_file = 'checkpoints/faster_rcnn_r50_fpn_1x_20181010-3d1b3351.pth' # build the model from a config file and a checkpoint file @@ -139,7 +139,7 @@ from mmdet.apis import init_detector, async_inference_detector, show_result from mmdet.utils.contextmanagers import concurrent async def main(): - config_file = 'configs/faster_rcnn_r50_fpn_1x.py' + config_file = 'configs/faster_rcnn_r50_fpn_1x_coco.py' checkpoint_file = 'checkpoints/faster_rcnn_r50_fpn_1x_20181010-3d1b3351.pth' device = 'cuda:0' model = init_detector(config_file, checkpoint=checkpoint_file, device=device) @@ -201,7 +201,7 @@ If you want to specify the working directory in the command, you can add an argu Optional arguments are: -- `--validate` (**strongly recommended**): Perform evaluation at every k (default value is 1, which can be modified like [this](https://github.com/open-mmlab/mmdetection/blob/master/configs/mask_rcnn_r50_fpn_1x.py#L174)) epochs during the training. +- `--validate` (**strongly recommended**): Perform evaluation at every k (default value is 1, which can be modified like [this](https://github.com/open-mmlab/mmdetection/blob/master/configs/mask_rcnn_r50_fpn_1x_coco.py#L174)) epochs during the training. - `--work_dir ${WORK_DIR}`: Override the working directory specified in the config file. - `--resume_from ${CHECKPOINT_FILE}`: Resume from a previous checkpoint file. @@ -220,7 +220,7 @@ If you run MMDetection on a cluster managed with [slurm](https://slurm.schedmd.c Here is an example of using 16 GPUs to train Mask R-CNN on the dev partition. ```shell -./tools/slurm_train.sh dev mask_r50_1x configs/mask_rcnn_r50_fpn_1x.py /nfs/xxxx/mask_rcnn_r50_fpn_1x 16 +./tools/slurm_train.sh dev mask_r50_1x configs/mask_rcnn_r50_fpn_1x_coco.py /nfs/xxxx/mask_rcnn_r50_fpn_1x 16 ``` You can check [slurm_train.sh](https://github.com/open-mmlab/mmdetection/blob/master/tools/slurm_train.sh) for full arguments and environment variables. diff --git a/mmdet/models/bbox_heads/__init__.py b/mmdet/models/bbox_heads/__init__.py index a668bdb0182361b5213adb877a21246e23ab4ede..708f8ae643831d4e06a41f68aa63aec84b018b44 100644 --- a/mmdet/models/bbox_heads/__init__.py +++ b/mmdet/models/bbox_heads/__init__.py @@ -1,7 +1,9 @@ from .bbox_head import BBoxHead -from .convfc_bbox_head import ConvFCBBoxHead, SharedFCBBoxHead +from .convfc_bbox_head import (ConvFCBBoxHead, Shared2FCBBoxHead, + Shared4Conv1FCBBoxHead) from .double_bbox_head import DoubleConvFCBBoxHead __all__ = [ - 'BBoxHead', 'ConvFCBBoxHead', 'SharedFCBBoxHead', 'DoubleConvFCBBoxHead' + 'BBoxHead', 'ConvFCBBoxHead', 'Shared2FCBBoxHead', + 'Shared4Conv1FCBBoxHead', 'DoubleConvFCBBoxHead' ] diff --git a/mmdet/models/bbox_heads/convfc_bbox_head.py b/mmdet/models/bbox_heads/convfc_bbox_head.py index c43707da09e49ad6903712d0cb9a912055f8d96f..a30765e25b6eca922c40ca7f0c88662b97946d89 100644 --- a/mmdet/models/bbox_heads/convfc_bbox_head.py +++ b/mmdet/models/bbox_heads/convfc_bbox_head.py @@ -172,13 +172,28 @@ class ConvFCBBoxHead(BBoxHead): @HEADS.register_module -class SharedFCBBoxHead(ConvFCBBoxHead): +class Shared2FCBBoxHead(ConvFCBBoxHead): - def __init__(self, num_fcs=2, fc_out_channels=1024, *args, **kwargs): - assert num_fcs >= 1 - super(SharedFCBBoxHead, self).__init__( + def __init__(self, fc_out_channels=1024, *args, **kwargs): + super(Shared2FCBBoxHead, self).__init__( num_shared_convs=0, - num_shared_fcs=num_fcs, + num_shared_fcs=2, + num_cls_convs=0, + num_cls_fcs=0, + num_reg_convs=0, + num_reg_fcs=0, + fc_out_channels=fc_out_channels, + *args, + **kwargs) + + +@HEADS.register_module +class Shared4Conv1FCBBoxHead(ConvFCBBoxHead): + + def __init__(self, fc_out_channels=1024, *args, **kwargs): + super(Shared4Conv1FCBBoxHead, self).__init__( + num_shared_convs=4, + num_shared_fcs=1, num_cls_convs=0, num_cls_fcs=0, num_reg_convs=0, diff --git a/tests/async_benchmark.py b/tests/async_benchmark.py index b4d885e203f89c67cfb8308f68b19feef177a535..d63c56bf18f17a6f7fc4fc310ec961385b7efbde 100644 --- a/tests/async_benchmark.py +++ b/tests/async_benchmark.py @@ -31,7 +31,8 @@ async def main(): """ project_dir = os.path.abspath(os.path.dirname(os.path.dirname(__file__))) - config_file = os.path.join(project_dir, 'configs/mask_rcnn_r50_fpn_1x.py') + config_file = os.path.join(project_dir, + 'configs/mask_rcnn_r50_fpn_1x_coco.py') checkpoint_file = os.path.join( project_dir, 'checkpoints/mask_rcnn_r50_fpn_1x_20181010-069fa190.pth') diff --git a/tests/test_async.py b/tests/test_async.py index 2733c7f5ef747dd03144aaac5d018256403dc60d..f14d03c5db0f6680baa320306f3b2e8ba237dd82 100644 --- a/tests/test_async.py +++ b/tests/test_async.py @@ -69,8 +69,8 @@ class AsyncInferenceTestCase(AsyncTestCase): pytest.skip('test requires GPU and torch+cuda') root_dir = os.path.dirname(os.path.dirname(__name__)) - model_config = os.path.join(root_dir, - 'configs/mask_rcnn_r50_fpn_1x.py') + model_config = os.path.join( + root_dir, 'configs/mask_rcnn/mask_rcnn_r50_fpn_1x_coco.py') detector = MaskRCNNDetector(model_config) await detector.init() img_path = os.path.join(root_dir, 'demo/demo.jpg') diff --git a/tests/test_config.py b/tests/test_config.py index 599a1cf59a5dcddd7fe8250c743fd24604f9ce25..968b0ed3b03f91b59b1a1d60ff63bd0225c53df6 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -20,7 +20,7 @@ def test_config_build_detector(): """ Test that all detection models defined in the configs can be initialized. """ - from xdoctest.utils import import_module_from_path + from mmcv import Config from mmdet.models import build_detector config_dpath = _get_config_directory() @@ -33,128 +33,130 @@ def test_config_build_detector(): # Only tests a representative subset of configurations config_names = [ - # 'dcn/faster_rcnn_dconv_c3-c5_r50_fpn_1x.py', - # 'dcn/cascade_mask_rcnn_dconv_c3-c5_r50_fpn_1x.py', - # 'dcn/faster_rcnn_dpool_r50_fpn_1x.py', - 'dcn/mask_rcnn_dconv_c3-c5_r50_fpn_1x.py', - # 'dcn/faster_rcnn_dconv_c3-c5_x101_32x4d_fpn_1x.py', - # 'dcn/cascade_rcnn_dconv_c3-c5_r50_fpn_1x.py', - # 'dcn/faster_rcnn_mdpool_r50_fpn_1x.py', - # 'dcn/faster_rcnn_mdconv_c3-c5_group4_r50_fpn_1x.py', - # 'dcn/faster_rcnn_mdconv_c3-c5_r50_fpn_1x.py', + # 'dcn/faster_rcnn_dconv_c3-c5_r50_fpn_1x_coco.py', + # 'dcn/cascade_mask_rcnn_dconv_c3-c5_r50_fpn_1x_coco.py', + # 'dcn/faster_rcnn_dpool_r50_fpn_1x_coco.py', + 'dcn/mask_rcnn_r50_fpn_dconv_c3-c5_1x_coco.py', + # 'dcn/faster_rcnn_dconv_c3-c5_x101_32x4d_fpn_1x_coco.py', + # 'dcn/cascade_rcnn_dconv_c3-c5_r50_fpn_1x_coco.py', + # 'dcn/faster_rcnn_mdpool_r50_fpn_1x_coco.py', + # 'dcn/faster_rcnn_mdconv_c3-c5_group4_r50_fpn_1x_coco.py', + # 'dcn/faster_rcnn_mdconv_c3-c5_r50_fpn_1x_coco.py', # --- - # 'htc/htc_x101_32x4d_fpn_20e_16gpu.py', - 'htc/htc_without_semantic_r50_fpn_1x.py', - # 'htc/htc_dconv_c3-c5_mstrain_400_1400_x101_64x4d_fpn_20e.py', - # 'htc/htc_x101_64x4d_fpn_20e_16gpu.py', - # 'htc/htc_r50_fpn_1x.py', + # 'htc/htc_x101_32x4d_fpn_20e_16gpu_coco.py', + 'htc/htc_without_semantic_r50_fpn_1x_coco.py', + # 'htc/htc_dconv_c3-c5_mstrain_400_1400_x101_64x4d_fpn_20e_coco.py', + # 'htc/htc_x101_64x4d_fpn_20e_16gpu_coco.py', + # 'htc/htc_r50_fpn_1x_coco.py', # 'htc/htc_r101_fpn_20e.py', # 'htc/htc_r50_fpn_20e.py', # --- 'cityscapes/mask_rcnn_r50_fpn_1x_cityscapes.py', # 'cityscapes/faster_rcnn_r50_fpn_1x_cityscapes.py', # --- - # 'scratch/scratch_faster_rcnn_r50_fpn_gn_6x.py', - # 'scratch/scratch_mask_rcnn_r50_fpn_gn_6x.py', + # 'scratch/scratch_faster_rcnn_r50_fpn_gn_6x_coco.py', + # 'scratch/scratch_mask_rcnn_r50_fpn_gn_6x_coco.py', # --- - # 'grid_rcnn/grid_rcnn_gn_head_x101_32x4d_fpn_2x.py', - 'grid_rcnn/grid_rcnn_gn_head_r50_fpn_2x.py', + # 'grid_rcnn/grid_rcnn_gn_head_x101_32x4d_fpn_2x_coco.py', + 'grid_rcnn/grid_rcnn_r50_fpn_gn-head_2x_coco.py', # --- - 'double_heads/dh_faster_rcnn_r50_fpn_1x.py', + 'double_heads/dh_faster_rcnn_r50_fpn_1x_coco.py', # --- - 'empirical_attention/faster_rcnn_r50_fpn_attention_0010_dcn_1x.py', - # 'empirical_attention/faster_rcnn_r50_fpn_attention_1111_1x.py', - # 'empirical_attention/faster_rcnn_r50_fpn_attention_0010_1x.py', - # 'empirical_attention/faster_rcnn_r50_fpn_attention_1111_dcn_1x.py', + 'empirical_attention/faster_rcnn_r50_fpn_attention_0010_dcn_1x_coco' + '.py', + # 'empirical_attention/faster_rcnn_r50_fpn_attention_1111_1x_coco.py', + # 'empirical_attention/faster_rcnn_r50_fpn_attention_0010_1x_coco.py', + # 'empirical_attention/faster_rcnn_r50_fpn_attention_1111_dcn_1x_coco + # .py', # --- - # 'ms_rcnn/ms_rcnn_r101_caffe_fpn_1x.py', - # 'ms_rcnn/ms_rcnn_x101_64x4d_fpn_1x.py', - # 'ms_rcnn/ms_rcnn_r50_caffe_fpn_1x.py', + # 'ms_rcnn/ms_rcnn_r101_caffe_fpn_1x_coco.py', + # 'ms_rcnn/ms_rcnn_x101_64x4d_fpn_1x_coco.py', + # 'ms_rcnn/ms_rcnn_r50_caffe_fpn_1x_coco.py', # --- - # 'guided_anchoring/ga_faster_x101_32x4d_fpn_1x.py', - # 'guided_anchoring/ga_rpn_x101_32x4d_fpn_1x.py', - # 'guided_anchoring/ga_retinanet_r50_caffe_fpn_1x.py', - # 'guided_anchoring/ga_fast_r50_caffe_fpn_1x.py', - # 'guided_anchoring/ga_retinanet_x101_32x4d_fpn_1x.py', - # 'guided_anchoring/ga_rpn_r101_caffe_rpn_1x.py', - # 'guided_anchoring/ga_faster_r50_caffe_fpn_1x.py', - 'guided_anchoring/ga_rpn_r50_caffe_fpn_1x.py', + # 'guided_anchoring/ga_faster_x101_32x4d_fpn_1x_coco.py', + # 'guided_anchoring/ga_rpn_x101_32x4d_fpn_1x_coco.py', + # 'guided_anchoring/ga_retinanet_r50_caffe_fpn_1x_coco.py', + # 'guided_anchoring/ga_fast_r50_caffe_fpn_1x_coco.py', + # 'guided_anchoring/ga_retinanet_x101_32x4d_fpn_1x_coco.py', + # 'guided_anchoring/ga_rpn_r101_caffe_rpn_1x_coco.py', + # 'guided_anchoring/ga_faster_r50_caffe_fpn_1x_coco.py', + 'guided_anchoring/ga_rpn_r50_caffe_fpn_1x_coco.py', # --- - 'foveabox/fovea_r50_fpn_4gpu_1x.py', - # 'foveabox/fovea_align_gn_ms_r101_fpn_4gpu_2x.py', - # 'foveabox/fovea_align_gn_r50_fpn_4gpu_2x.py', - # 'foveabox/fovea_align_gn_r101_fpn_4gpu_2x.py', - 'foveabox/fovea_align_gn_ms_r50_fpn_4gpu_2x.py', + 'foveabox/fovea_r50_fpn_4x4_1x_coco.py', + # 'foveabox/fovea_align_gn_ms_r101_fpn_4gpu_2x_coco.py', + # 'foveabox/fovea_align_gn_r50_fpn_4gpu_2x_coco.py', + # 'foveabox/fovea_align_gn_r101_fpn_4gpu_2x_coco.py', + 'foveabox/fovea_align_r50_fpn_gn-head_mstrain_640-800_4x4_2x_coco.py', # --- - # 'hrnet/cascade_rcnn_hrnetv2p_w32_20e.py', - # 'hrnet/mask_rcnn_hrnetv2p_w32_1x.py', - # 'hrnet/cascade_mask_rcnn_hrnetv2p_w32_20e.py', - # 'hrnet/htc_hrnetv2p_w32_20e.py', - # 'hrnet/faster_rcnn_hrnetv2p_w18_1x.py', - # 'hrnet/mask_rcnn_hrnetv2p_w18_1x.py', - # 'hrnet/faster_rcnn_hrnetv2p_w32_1x.py', - # 'hrnet/faster_rcnn_hrnetv2p_w40_1x.py', - 'hrnet/fcos_hrnetv2p_w32_gn_1x_4gpu.py', + # 'hrnet/cascade_rcnn_hrnetv2p_w32_20e_coco.py', + # 'hrnet/mask_rcnn_hrnetv2p_w32_1x_coco.py', + # 'hrnet/cascade_mask_rcnn_hrnetv2p_w32_20e_coco.py', + # 'hrnet/htc_hrnetv2p_w32_20e_coco.py', + # 'hrnet/faster_rcnn_hrnetv2p_w18_1x_coco.py', + # 'hrnet/mask_rcnn_hrnetv2p_w18_1x_coco.py', + # 'hrnet/faster_rcnn_hrnetv2p_w32_1x_coco.py', + # 'hrnet/faster_rcnn_hrnetv2p_w40_1x_coco.py', + 'hrnet/fcos_hrnetv2p_w32_gn-head_4x4_1x_coco.py', # --- - # 'gn+ws/faster_rcnn_r50_fpn_gn_ws_1x.py', - # 'gn+ws/mask_rcnn_x101_32x4d_fpn_gn_ws_2x.py', - 'gn+ws/mask_rcnn_r50_fpn_gn_ws_2x.py', - # 'gn+ws/mask_rcnn_r50_fpn_gn_ws_20_23_24e.py', + # 'gn+ws/faster_rcnn_r50_fpn_gn_ws_1x_coco.py', + # 'gn+ws/mask_rcnn_x101_32x4d_fpn_gn_ws_2x_coco.py', + 'gn+ws/mask_rcnn_r50_fpn_gn_ws-all_2x_coco.py', + # 'gn+ws/mask_rcnn_r50_fpn_gn_ws_20_23_24e_coco.py', # --- # 'wider_face/ssd300_wider_face.py', # --- - 'pascal_voc/ssd300_voc.py', + 'pascal_voc/ssd300_voc0712.py', 'pascal_voc/faster_rcnn_r50_fpn_1x_voc0712.py', - 'pascal_voc/ssd512_voc.py', + 'pascal_voc/ssd512_voc0712.py', # --- - # 'gcnet/mask_rcnn_r4_gcb_c3-c5_r50_fpn_syncbn_1x.py', - # 'gcnet/mask_rcnn_r16_gcb_c3-c5_r50_fpn_syncbn_1x.py', - # 'gcnet/mask_rcnn_r4_gcb_c3-c5_r50_fpn_1x.py', - # 'gcnet/mask_rcnn_r16_gcb_c3-c5_r50_fpn_1x.py', - 'gcnet/mask_rcnn_r50_fpn_sbn_1x.py', + # 'gcnet/mask_rcnn_r4_gcb_c3-c5_r50_fpn_syncbn_1x_coco.py', + # 'gcnet/mask_rcnn_r16_gcb_c3-c5_r50_fpn_syncbn_1x_coco.py', + # 'gcnet/mask_rcnn_r4_gcb_c3-c5_r50_fpn_1x_coco.py', + # 'gcnet/mask_rcnn_r16_gcb_c3-c5_r50_fpn_1x_coco.py', + 'gcnet/mask_rcnn_r50_fpn_syncbn-backbone_1x_coco.py', # --- - 'gn/mask_rcnn_r50_fpn_gn_contrib_2x.py', - # 'gn/mask_rcnn_r50_fpn_gn_2x.py', - # 'gn/mask_rcnn_r101_fpn_gn_2x.py', + 'gn/mask_rcnn_r50_fpn_gn-all_contrib_2x_coco.py', + # 'gn/mask_rcnn_r50_fpn_gn_2x_coco.py', + # 'gn/mask_rcnn_r101_fpn_gn_2x_coco.py', # --- # 'reppoints/reppoints_moment_x101_dcn_fpn_2x.py', - 'reppoints/reppoints_moment_r50_fpn_2x.py', - # 'reppoints/reppoints_moment_x101_dcn_fpn_2x_mt.py', - 'reppoints/reppoints_partial_minmax_r50_fpn_1x.py', - 'reppoints/bbox_r50_grid_center_fpn_1x.py', - # 'reppoints/reppoints_moment_r101_dcn_fpn_2x.py', - # 'reppoints/reppoints_moment_r101_fpn_2x_mt.py', - # 'reppoints/reppoints_moment_r50_fpn_2x_mt.py', - 'reppoints/reppoints_minmax_r50_fpn_1x.py', - # 'reppoints/reppoints_moment_r50_fpn_1x.py', - # 'reppoints/reppoints_moment_r101_fpn_2x.py', - # 'reppoints/reppoints_moment_r101_dcn_fpn_2x_mt.py', - 'reppoints/bbox_r50_grid_fpn_1x.py', + 'reppoints/reppoints_moment_r50_fpn_gn-neck+head_2x_coco.py', + # 'reppoints/reppoints_moment_x101_dcn_fpn_2x_mt_coco.py', + 'reppoints/reppoints_partial_minmax_r50_fpn_gn-neck+head_1x_coco.py', + 'reppoints/bbox_r50_grid_center_fpn_gn-neck+head_1x_coco.py', + # 'reppoints/reppoints_moment_r101_dcn_fpn_2x_coco.py', + # 'reppoints/reppoints_moment_r101_fpn_2x_mt_coco.py', + # 'reppoints/reppoints_moment_r50_fpn_2x_mt_coco.py', + 'reppoints/reppoints_minmax_r50_fpn_gn-neck+head_1x_coco.py', + # 'reppoints/reppoints_moment_r50_fpn_1x_coco.py', + # 'reppoints/reppoints_moment_r101_fpn_2x_coco.py', + # 'reppoints/reppoints_moment_r101_dcn_fpn_2x_mt_coco.py', + 'reppoints/bbox_r50_grid_fpn_gn-neck+head_1x_coco.py', # --- - # 'fcos/fcos_mstrain_640_800_x101_64x4d_fpn_gn_2x.py', - # 'fcos/fcos_mstrain_640_800_r101_caffe_fpn_gn_2x_4gpu.py', - 'fcos/fcos_r50_caffe_fpn_gn_1x_4gpu.py', + # 'fcos/fcos_mstrain_640_800_x101_64x4d_fpn_gn_2x_coco.py', + # 'fcos/fcos_mstrain_640_800_r101_caffe_fpn_gn_2x_4gpu_coco.py', + 'fcos/fcos_r50_caffe_fpn_gn-head_4x4_1x_coco.py', # --- - 'albu_example/mask_rcnn_r50_fpn_1x.py', + 'albu_example/mask_rcnn_r50_fpn_albu_1x_coco.py', # --- - 'libra_rcnn/libra_faster_rcnn_r50_fpn_1x.py', - # 'libra_rcnn/libra_retinanet_r50_fpn_1x.py', - # 'libra_rcnn/libra_faster_rcnn_r101_fpn_1x.py', - # 'libra_rcnn/libra_faster_rcnn_x101_64x4d_fpn_1x.py', - # 'libra_rcnn/libra_fast_rcnn_r50_fpn_1x.py', + 'libra_rcnn/libra_faster_rcnn_r50_fpn_1x_coco.py', + # 'libra_rcnn/libra_retinanet_r50_fpn_1x_coco.py', + # 'libra_rcnn/libra_faster_rcnn_r101_fpn_1x_coco.py', + # 'libra_rcnn/libra_faster_rcnn_x101_64x4d_fpn_1x_coco.py', + # 'libra_rcnn/libra_fast_rcnn_r50_fpn_1x_coco.py', # --- - # 'ghm/retinanet_ghm_r50_fpn_1x.py', + # 'ghm/retinanet_ghm_r50_fpn_1x_coco.py', # --- - # 'fp16/retinanet_r50_fpn_fp16_1x.py', - 'fp16/mask_rcnn_r50_fpn_fp16_1x.py', - 'fp16/faster_rcnn_r50_fpn_fp16_1x.py' + # 'fp16/retinanet_r50_fpn_fp16_1x_coco.py', + 'fp16/mask_rcnn_r50_fpn_fp16_1x_coco.py', + 'fp16/faster_rcnn_r50_fpn_fp16_1x_coco.py' ] print('Using {} config files'.format(len(config_names))) for config_fname in config_names: config_fpath = join(config_dpath, config_fname) - config_mod = import_module_from_path(config_fpath) + config_mod = Config.fromfile(config_fpath) config_mod.model config_mod.train_cfg @@ -178,7 +180,7 @@ def test_config_data_pipeline(): CommandLine: xdoctest -m tests/test_config.py test_config_build_data_pipeline """ - from xdoctest.utils import import_module_from_path + from mmcv import Config from mmdet.datasets.pipelines import Compose import numpy as np @@ -189,17 +191,17 @@ def test_config_data_pipeline(): # TODO: test pipelines using Albu, current Albu throw None given empty GT config_names = [ 'wider_face/ssd300_wider_face.py', - 'pascal_voc/ssd300_voc.py', - 'pascal_voc/ssd512_voc.py', + 'pascal_voc/ssd300_voc0712.py', + 'pascal_voc/ssd512_voc0712.py', # 'albu_example/mask_rcnn_r50_fpn_1x.py', - 'fp16/mask_rcnn_r50_fpn_fp16_1x.py', + 'fp16/mask_rcnn_r50_fpn_fp16_1x_coco.py', ] print('Using {} config files'.format(len(config_names))) for config_fname in config_names: config_fpath = join(config_dpath, config_fname) - config_mod = import_module_from_path(config_fpath) + config_mod = Config.fromfile(config_fpath) # remove loading pipeline loading_pipeline = config_mod.train_pipeline.pop(0) diff --git a/tests/test_forward.py b/tests/test_forward.py index 5ba56bf24ff51861e3e897a34aa526d702d520a2..2599fb115b92b44bf98e28ff89b6576e5df80bb1 100644 --- a/tests/test_forward.py +++ b/tests/test_forward.py @@ -27,10 +27,10 @@ def _get_config_module(fname): """ Load a configuration as a python module """ - from xdoctest.utils import import_module_from_path + from mmcv import Config config_dpath = _get_config_directory() config_fpath = join(config_dpath, fname) - config_mod = import_module_from_path(config_fpath) + config_mod = Config.fromfile(config_fpath) return config_mod @@ -48,7 +48,7 @@ def _get_detector_cfg(fname): def test_ssd300_forward(): - model, train_cfg, test_cfg = _get_detector_cfg('ssd300_coco.py') + model, train_cfg, test_cfg = _get_detector_cfg('ssd/ssd300_coco.py') model['pretrained'] = None from mmdet.models import build_detector @@ -82,7 +82,8 @@ def test_ssd300_forward(): def test_rpn_forward(): - model, train_cfg, test_cfg = _get_detector_cfg('rpn_r50_fpn_1x.py') + model, train_cfg, test_cfg = _get_detector_cfg( + 'rpn/rpn_r50_fpn_1x_coco.py') model['pretrained'] = None from mmdet.models import build_detector @@ -112,7 +113,7 @@ def test_rpn_forward(): def test_retina_ghm_forward(): model, train_cfg, test_cfg = _get_detector_cfg( - 'ghm/retinanet_ghm_r50_fpn_1x.py') + 'ghm/retinanet_ghm_r50_fpn_1x_coco.py') model['pretrained'] = None from mmdet.models import build_detector @@ -176,7 +177,7 @@ def test_cascade_forward(): raise pytest.skip('requires torchvision on cpu') model, train_cfg, test_cfg = _get_detector_cfg( - 'cascade_rcnn_r50_fpn_1x.py') + 'cascade_rcnn/cascade_rcnn_r50_fpn_1x_coco.py') model['pretrained'] = None # torchvision roi align supports CPU model['bbox_roi_extractor']['roi_layer']['use_torchvision'] = True @@ -228,7 +229,8 @@ def test_faster_rcnn_forward(): import pytest raise pytest.skip('requires torchvision on cpu') - model, train_cfg, test_cfg = _get_detector_cfg('faster_rcnn_r50_fpn_1x.py') + model, train_cfg, test_cfg = _get_detector_cfg( + 'faster_rcnn/faster_rcnn_r50_fpn_1x_coco.py') model['pretrained'] = None # torchvision roi align supports CPU model['bbox_roi_extractor']['roi_layer']['use_torchvision'] = True @@ -281,7 +283,7 @@ def test_faster_rcnn_ohem_forward(): raise pytest.skip('requires torchvision on cpu') model, train_cfg, test_cfg = _get_detector_cfg( - 'faster_rcnn_ohem_r50_fpn_1x.py') + 'faster_rcnn/faster_rcnn_r50_fpn_ohem_1x_coco.py') model['pretrained'] = None # torchvision roi align supports CPU model['bbox_roi_extractor']['roi_layer']['use_torchvision'] = True diff --git a/tests/test_heads.py b/tests/test_heads.py index b1e4ceebf45d284e5a96731fe247abaebd6fe242..e835c9e391e4e3697179cd43088e0be216b2d540 100644 --- a/tests/test_heads.py +++ b/tests/test_heads.py @@ -241,7 +241,7 @@ def test_refine_boxes(): 'rng': 699807 }, - # Corner case: similar num rois and images + # Corner case: cal_similarity num rois and images { 'n_roi': 20, 'n_img': 20, diff --git a/tests/test_sampler.py b/tests/test_sampler.py index c75360268e6fd3afd6cbfb4ecf09a4de5cbf138a..cdae8f4821cfc929e779f55077bc929a7e113818 100644 --- a/tests/test_sampler.py +++ b/tests/test_sampler.py @@ -100,7 +100,7 @@ def _context_for_ohem(): sys.path.insert(0, dirname(__file__)) from test_forward import _get_detector_cfg model, train_cfg, test_cfg = _get_detector_cfg( - 'faster_rcnn_ohem_r50_fpn_1x.py') + 'faster_rcnn/faster_rcnn_r50_fpn_ohem_1x_coco.py') model['pretrained'] = None # torchvision roi align supports CPU model['bbox_roi_extractor']['roi_layer']['use_torchvision'] = True diff --git a/tools/train.py b/tools/train.py index 01df1caef34098041fc080d1a5e4a336ff45a62a..9ce7fa2f4a64772b54a97a2eb269613c67d13f83 100644 --- a/tools/train.py +++ b/tools/train.py @@ -65,6 +65,10 @@ def main(): # update configs according to CLI args if args.work_dir is not None: cfg.work_dir = args.work_dir + elif cfg.get('work_dir', None) is None: + # use filename as default work_dir + cfg.work_dir = osp.join('./work_dirs', + osp.splitext(osp.basename(args.config))[0]) if args.resume_from is not None: cfg.resume_from = args.resume_from cfg.gpus = args.gpus