diff --git a/configs/albu_example/mask_rcnn_r50_fpn_1x.py b/configs/albu_example/mask_rcnn_r50_fpn_1x.py
index c84988a50860c79926078cf8a1299b3266e73aa1..da3e23ae1ac6bdcac81863df91ab8d809e2ae321 100644
--- a/configs/albu_example/mask_rcnn_r50_fpn_1x.py
+++ b/configs/albu_example/mask_rcnn_r50_fpn_1x.py
@@ -8,6 +8,7 @@ model = dict(
         num_stages=4,
         out_indices=(0, 1, 2, 3),
         frozen_stages=1,
+        norm_cfg=dict(type='BN', requires_grad=True),
         style='pytorch'),
     neck=dict(
         type='FPN',
diff --git a/configs/atss/atss_r50_fpn_1x.py b/configs/atss/atss_r50_fpn_1x.py
index e8c7253e196e52b10cb525b537693a58e271beab..f65e7ef386b28f1f44085d29be734892374d23d6 100644
--- a/configs/atss/atss_r50_fpn_1x.py
+++ b/configs/atss/atss_r50_fpn_1x.py
@@ -8,6 +8,7 @@ model = dict(
         num_stages=4,
         out_indices=(0, 1, 2, 3),
         frozen_stages=1,
+        norm_cfg=dict(type='BN', requires_grad=True),
         style='pytorch'),
     neck=dict(
         type='FPN',
diff --git a/configs/carafe/faster_rcnn_r50_fpn_carafe_1x.py b/configs/carafe/faster_rcnn_r50_fpn_carafe_1x.py
index 94c8a0fc1a56bc1a6601421de7e9a46df277b5d6..605d13cc0164f17211e69b8070c004dff310aecd 100644
--- a/configs/carafe/faster_rcnn_r50_fpn_carafe_1x.py
+++ b/configs/carafe/faster_rcnn_r50_fpn_carafe_1x.py
@@ -8,6 +8,7 @@ model = dict(
         num_stages=4,
         out_indices=(0, 1, 2, 3),
         frozen_stages=1,
+        norm_cfg=dict(type='BN', requires_grad=True),
         style='pytorch'),
     neck=dict(
         type='FPN_CARAFE',
@@ -177,7 +178,6 @@ log_config = dict(
         # dict(type='TensorboardLoggerHook')
     ])
 # yapf:enable
-evaluation = dict(interval=1)
 # runtime settings
 total_epochs = 12
 dist_params = dict(backend='nccl')
diff --git a/configs/carafe/mask_rcnn_r50_fpn_carafe_1x.py b/configs/carafe/mask_rcnn_r50_fpn_carafe_1x.py
index 656bd7c6a7c53a15959919b6a45d88cfd529cf84..d2bc4bdaac050490a1e2de76aec3a39d992fb17b 100644
--- a/configs/carafe/mask_rcnn_r50_fpn_carafe_1x.py
+++ b/configs/carafe/mask_rcnn_r50_fpn_carafe_1x.py
@@ -8,6 +8,7 @@ model = dict(
         num_stages=4,
         out_indices=(0, 1, 2, 3),
         frozen_stages=1,
+        norm_cfg=dict(type='BN', requires_grad=True),
         style='pytorch'),
     neck=dict(
         type='FPN_CARAFE',
@@ -199,7 +200,6 @@ log_config = dict(
         # dict(type='TensorboardLoggerHook')
     ])
 # yapf:enable
-evaluation = dict(interval=1)
 # runtime settings
 total_epochs = 12
 dist_params = dict(backend='nccl')
diff --git a/configs/cascade_mask_rcnn_r101_fpn_1x.py b/configs/cascade_mask_rcnn_r101_fpn_1x.py
index 2d7209a09edbf40cb79e2f1a9d5f6e09aa23f6d0..d5de4a56e7721ed013c7a1c0285e044a5ed136c7 100644
--- a/configs/cascade_mask_rcnn_r101_fpn_1x.py
+++ b/configs/cascade_mask_rcnn_r101_fpn_1x.py
@@ -9,6 +9,7 @@ model = dict(
         num_stages=4,
         out_indices=(0, 1, 2, 3),
         frozen_stages=1,
+        norm_cfg=dict(type='BN', requires_grad=True),
         style='pytorch'),
     neck=dict(
         type='FPN',
diff --git a/configs/cascade_mask_rcnn_r50_fpn_1x.py b/configs/cascade_mask_rcnn_r50_fpn_1x.py
index 10463a5f7e33f6e31dc1320c748711112f6cd75c..459eb17ae6d97804df623ad81aab777d8e06724c 100644
--- a/configs/cascade_mask_rcnn_r50_fpn_1x.py
+++ b/configs/cascade_mask_rcnn_r50_fpn_1x.py
@@ -9,6 +9,7 @@ model = dict(
         num_stages=4,
         out_indices=(0, 1, 2, 3),
         frozen_stages=1,
+        norm_cfg=dict(type='BN', requires_grad=True),
         style='pytorch'),
     neck=dict(
         type='FPN',
diff --git a/configs/cascade_mask_rcnn_x101_32x4d_fpn_1x.py b/configs/cascade_mask_rcnn_x101_32x4d_fpn_1x.py
index 38a38bdd54fb055cbe1d4b1628989f161e2c37b1..4a1edbfd0a3fee44c30a321122252b23d97c0299 100644
--- a/configs/cascade_mask_rcnn_x101_32x4d_fpn_1x.py
+++ b/configs/cascade_mask_rcnn_x101_32x4d_fpn_1x.py
@@ -11,6 +11,7 @@ model = dict(
         num_stages=4,
         out_indices=(0, 1, 2, 3),
         frozen_stages=1,
+        norm_cfg=dict(type='BN', requires_grad=True),
         style='pytorch'),
     neck=dict(
         type='FPN',
diff --git a/configs/cascade_mask_rcnn_x101_64x4d_fpn_1x.py b/configs/cascade_mask_rcnn_x101_64x4d_fpn_1x.py
index 80a5ed6a2b31ff06816ce74d04570d82517229a0..d4d1f09bb90396c36e21ef4f8ac47a3921f42396 100644
--- a/configs/cascade_mask_rcnn_x101_64x4d_fpn_1x.py
+++ b/configs/cascade_mask_rcnn_x101_64x4d_fpn_1x.py
@@ -11,6 +11,7 @@ model = dict(
         num_stages=4,
         out_indices=(0, 1, 2, 3),
         frozen_stages=1,
+        norm_cfg=dict(type='BN', requires_grad=True),
         style='pytorch'),
     neck=dict(
         type='FPN',
diff --git a/configs/cascade_rcnn_r101_fpn_1x.py b/configs/cascade_rcnn_r101_fpn_1x.py
index e9e5a46aa8f765ca5ac6499093c2aaff71d30240..29fd1b7076eb8a8fecd5d12a7e4be107aa214cfe 100644
--- a/configs/cascade_rcnn_r101_fpn_1x.py
+++ b/configs/cascade_rcnn_r101_fpn_1x.py
@@ -9,6 +9,7 @@ model = dict(
         num_stages=4,
         out_indices=(0, 1, 2, 3),
         frozen_stages=1,
+        norm_cfg=dict(type='BN', requires_grad=True),
         style='pytorch'),
     neck=dict(
         type='FPN',
diff --git a/configs/cascade_rcnn_r50_fpn_1x.py b/configs/cascade_rcnn_r50_fpn_1x.py
index c4f80c52cab2995f99520fb38bf55bde5a8c9548..6f7940307a70eee63822be18b85dc8a94e8dd4f6 100644
--- a/configs/cascade_rcnn_r50_fpn_1x.py
+++ b/configs/cascade_rcnn_r50_fpn_1x.py
@@ -9,6 +9,7 @@ model = dict(
         num_stages=4,
         out_indices=(0, 1, 2, 3),
         frozen_stages=1,
+        norm_cfg=dict(type='BN', requires_grad=True),
         style='pytorch'),
     neck=dict(
         type='FPN',
diff --git a/configs/cascade_rcnn_x101_32x4d_fpn_1x.py b/configs/cascade_rcnn_x101_32x4d_fpn_1x.py
index 1e76574a151a9c4d225cd22928a0bedf7f7f725a..4806dea90b7a3e1bbeeafc52f1b7529d82b4bee1 100644
--- a/configs/cascade_rcnn_x101_32x4d_fpn_1x.py
+++ b/configs/cascade_rcnn_x101_32x4d_fpn_1x.py
@@ -11,6 +11,7 @@ model = dict(
         num_stages=4,
         out_indices=(0, 1, 2, 3),
         frozen_stages=1,
+        norm_cfg=dict(type='BN', requires_grad=True),
         style='pytorch'),
     neck=dict(
         type='FPN',
diff --git a/configs/cascade_rcnn_x101_64x4d_fpn_1x.py b/configs/cascade_rcnn_x101_64x4d_fpn_1x.py
index 67b0ff600a5751264768ebc6e0d5015c6fe22f4e..1d2667a85bc773122b3a3464e87f0a1b4259a4ea 100644
--- a/configs/cascade_rcnn_x101_64x4d_fpn_1x.py
+++ b/configs/cascade_rcnn_x101_64x4d_fpn_1x.py
@@ -11,6 +11,7 @@ model = dict(
         num_stages=4,
         out_indices=(0, 1, 2, 3),
         frozen_stages=1,
+        norm_cfg=dict(type='BN', requires_grad=True),
         style='pytorch'),
     neck=dict(
         type='FPN',
diff --git a/configs/cityscapes/faster_rcnn_r50_fpn_1x_cityscapes.py b/configs/cityscapes/faster_rcnn_r50_fpn_1x_cityscapes.py
index 37e991d28fd288afd32179afa4fa2f912559d35d..4fd17e302187f5d6524a16354c6114a4b84ac469 100644
--- a/configs/cityscapes/faster_rcnn_r50_fpn_1x_cityscapes.py
+++ b/configs/cityscapes/faster_rcnn_r50_fpn_1x_cityscapes.py
@@ -8,6 +8,7 @@ model = dict(
         num_stages=4,
         out_indices=(0, 1, 2, 3),
         frozen_stages=1,
+        norm_cfg=dict(type='BN', requires_grad=True),
         style='pytorch'),
     neck=dict(
         type='FPN',
diff --git a/configs/cityscapes/mask_rcnn_r50_fpn_1x_cityscapes.py b/configs/cityscapes/mask_rcnn_r50_fpn_1x_cityscapes.py
index ac7fb190389317bc7c968c10d6eba363a143bae4..e6481c7aa8ec5b1e6db0563310a628e8a3ccc731 100644
--- a/configs/cityscapes/mask_rcnn_r50_fpn_1x_cityscapes.py
+++ b/configs/cityscapes/mask_rcnn_r50_fpn_1x_cityscapes.py
@@ -8,6 +8,7 @@ model = dict(
         num_stages=4,
         out_indices=(0, 1, 2, 3),
         frozen_stages=1,
+        norm_cfg=dict(type='BN', requires_grad=True),
         style='pytorch'),
     neck=dict(
         type='FPN',
diff --git a/configs/dcn/cascade_mask_rcnn_dconv_c3-c5_r50_fpn_1x.py b/configs/dcn/cascade_mask_rcnn_dconv_c3-c5_r50_fpn_1x.py
index 820d40907bdc04bbc809b9dfc2d09b27067b8ad6..7d82c6b2b989f4599fbe1b59793052dade1a8e1d 100644
--- a/configs/dcn/cascade_mask_rcnn_dconv_c3-c5_r50_fpn_1x.py
+++ b/configs/dcn/cascade_mask_rcnn_dconv_c3-c5_r50_fpn_1x.py
@@ -9,6 +9,7 @@ model = dict(
         num_stages=4,
         out_indices=(0, 1, 2, 3),
         frozen_stages=1,
+        norm_cfg=dict(type='BN', requires_grad=True),
         style='pytorch',
         dcn=dict(type='DCN', deformable_groups=1, fallback_on_stride=False),
         stage_with_dcn=(False, True, True, True)),
diff --git a/configs/dcn/cascade_rcnn_dconv_c3-c5_r50_fpn_1x.py b/configs/dcn/cascade_rcnn_dconv_c3-c5_r50_fpn_1x.py
index ae76ddde05aaec01ca0049b545f981b863c9355f..9ac1f985fa1e64407ae0883d9752d1ca3fc2f3bb 100644
--- a/configs/dcn/cascade_rcnn_dconv_c3-c5_r50_fpn_1x.py
+++ b/configs/dcn/cascade_rcnn_dconv_c3-c5_r50_fpn_1x.py
@@ -9,6 +9,7 @@ model = dict(
         num_stages=4,
         out_indices=(0, 1, 2, 3),
         frozen_stages=1,
+        norm_cfg=dict(type='BN', requires_grad=True),
         style='pytorch',
         dcn=dict(type='DCN', deformable_groups=1, fallback_on_stride=False),
         stage_with_dcn=(False, True, True, True)),
diff --git a/configs/dcn/faster_rcnn_dconv_c3-c5_r50_fpn_1x.py b/configs/dcn/faster_rcnn_dconv_c3-c5_r50_fpn_1x.py
index 7f92c2cc1bcae50e06df818fd253ca09c7be8c01..b252251f435cffe7de031af2d4ebe2f614461fee 100644
--- a/configs/dcn/faster_rcnn_dconv_c3-c5_r50_fpn_1x.py
+++ b/configs/dcn/faster_rcnn_dconv_c3-c5_r50_fpn_1x.py
@@ -8,6 +8,7 @@ model = dict(
         num_stages=4,
         out_indices=(0, 1, 2, 3),
         frozen_stages=1,
+        norm_cfg=dict(type='BN', requires_grad=True),
         style='pytorch',
         dcn=dict(type='DCN', deformable_groups=1, fallback_on_stride=False),
         stage_with_dcn=(False, True, True, True)),
diff --git a/configs/dcn/faster_rcnn_dconv_c3-c5_x101_32x4d_fpn_1x.py b/configs/dcn/faster_rcnn_dconv_c3-c5_x101_32x4d_fpn_1x.py
index d924e7fa3324d6605ddbb97dda4f201a9a91dd49..53a07c3345ce9383559c92736ea7d7b776abcb97 100644
--- a/configs/dcn/faster_rcnn_dconv_c3-c5_x101_32x4d_fpn_1x.py
+++ b/configs/dcn/faster_rcnn_dconv_c3-c5_x101_32x4d_fpn_1x.py
@@ -10,6 +10,7 @@ model = dict(
         num_stages=4,
         out_indices=(0, 1, 2, 3),
         frozen_stages=1,
+        norm_cfg=dict(type='BN', requires_grad=True),
         style='pytorch',
         dcn=dict(type='DCN', deformable_groups=1, fallback_on_stride=False),
         stage_with_dcn=(False, True, True, True)),
diff --git a/configs/dcn/faster_rcnn_dpool_r50_fpn_1x.py b/configs/dcn/faster_rcnn_dpool_r50_fpn_1x.py
index 62fb85c65a9b54525941a2a80e2caacf70df7e59..124ba04653f2b0c5abf6dc6c73c7ad55a6b488f0 100644
--- a/configs/dcn/faster_rcnn_dpool_r50_fpn_1x.py
+++ b/configs/dcn/faster_rcnn_dpool_r50_fpn_1x.py
@@ -8,6 +8,7 @@ model = dict(
         num_stages=4,
         out_indices=(0, 1, 2, 3),
         frozen_stages=1,
+        norm_cfg=dict(type='BN', requires_grad=True),
         style='pytorch'),
     neck=dict(
         type='FPN',
diff --git a/configs/dcn/faster_rcnn_mdconv_c3-c5_group4_r50_fpn_1x.py b/configs/dcn/faster_rcnn_mdconv_c3-c5_group4_r50_fpn_1x.py
index b520238b0595855c15b5528ab8c40c68f0c420a7..e1d2cddcb142ab3f5792041e233a951b1352da90 100644
--- a/configs/dcn/faster_rcnn_mdconv_c3-c5_group4_r50_fpn_1x.py
+++ b/configs/dcn/faster_rcnn_mdconv_c3-c5_group4_r50_fpn_1x.py
@@ -8,6 +8,7 @@ model = dict(
         num_stages=4,
         out_indices=(0, 1, 2, 3),
         frozen_stages=1,
+        norm_cfg=dict(type='BN', requires_grad=True),
         style='pytorch',
         dcn=dict(type='DCNv2', deformable_groups=4, fallback_on_stride=False),
         stage_with_dcn=(False, True, True, True)),
diff --git a/configs/dcn/faster_rcnn_mdconv_c3-c5_r50_fpn_1x.py b/configs/dcn/faster_rcnn_mdconv_c3-c5_r50_fpn_1x.py
index c35d4d1dd471d1c47a6090707b8f6a142d904757..e22dbd4391212f3113cc1e98b25af66198411f35 100644
--- a/configs/dcn/faster_rcnn_mdconv_c3-c5_r50_fpn_1x.py
+++ b/configs/dcn/faster_rcnn_mdconv_c3-c5_r50_fpn_1x.py
@@ -8,6 +8,7 @@ model = dict(
         num_stages=4,
         out_indices=(0, 1, 2, 3),
         frozen_stages=1,
+        norm_cfg=dict(type='BN', requires_grad=True),
         style='pytorch',
         dcn=dict(type='DCNv2', deformable_groups=1, fallback_on_stride=False),
         stage_with_dcn=(False, True, True, True)),
diff --git a/configs/dcn/faster_rcnn_mdpool_r50_fpn_1x.py b/configs/dcn/faster_rcnn_mdpool_r50_fpn_1x.py
index e2a37f3311bac415fa6c195f50baee32931efb72..c36e65c499314fc787336749a1d6e5602906ee7b 100644
--- a/configs/dcn/faster_rcnn_mdpool_r50_fpn_1x.py
+++ b/configs/dcn/faster_rcnn_mdpool_r50_fpn_1x.py
@@ -8,6 +8,7 @@ model = dict(
         num_stages=4,
         out_indices=(0, 1, 2, 3),
         frozen_stages=1,
+        norm_cfg=dict(type='BN', requires_grad=True),
         style='pytorch'),
     neck=dict(
         type='FPN',
diff --git a/configs/dcn/mask_rcnn_dconv_c3-c5_r50_fpn_1x.py b/configs/dcn/mask_rcnn_dconv_c3-c5_r50_fpn_1x.py
index 180d58ecf61ce4034b5cbdf616977f7a13793b09..802f4e79049f39265f3af4fc997e1fc378e994cd 100644
--- a/configs/dcn/mask_rcnn_dconv_c3-c5_r50_fpn_1x.py
+++ b/configs/dcn/mask_rcnn_dconv_c3-c5_r50_fpn_1x.py
@@ -8,6 +8,7 @@ model = dict(
         num_stages=4,
         out_indices=(0, 1, 2, 3),
         frozen_stages=1,
+        norm_cfg=dict(type='BN', requires_grad=True),
         style='pytorch',
         dcn=dict(type='DCN', deformable_groups=1, fallback_on_stride=False),
         stage_with_dcn=(False, True, True, True)),
diff --git a/configs/dcn/mask_rcnn_mdconv_c3-c5_r50_fpn_1x.py b/configs/dcn/mask_rcnn_mdconv_c3-c5_r50_fpn_1x.py
index 238a07a93d4ce7bc276c8e798581db0af2741a03..1f2c703b3b174f1ac738da22468ae2ea7f70f232 100644
--- a/configs/dcn/mask_rcnn_mdconv_c3-c5_r50_fpn_1x.py
+++ b/configs/dcn/mask_rcnn_mdconv_c3-c5_r50_fpn_1x.py
@@ -8,6 +8,7 @@ model = dict(
         num_stages=4,
         out_indices=(0, 1, 2, 3),
         frozen_stages=1,
+        norm_cfg=dict(type='BN', requires_grad=True),
         style='pytorch',
         dcn=dict(type='DCNv2', deformable_groups=1, fallback_on_stride=False),
         stage_with_dcn=(False, True, True, True)),
diff --git a/configs/double_heads/dh_faster_rcnn_r50_fpn_1x.py b/configs/double_heads/dh_faster_rcnn_r50_fpn_1x.py
index 2e5086617a3795b79120773b8b12173bdf990fb1..0f3143687373be9636020888ceb9a5f0549f4812 100644
--- a/configs/double_heads/dh_faster_rcnn_r50_fpn_1x.py
+++ b/configs/double_heads/dh_faster_rcnn_r50_fpn_1x.py
@@ -1,13 +1,14 @@
 # model settings
 model = dict(
     type='DoubleHeadRCNN',
-    pretrained='modelzoo://resnet50',
+    pretrained='torchvision://resnet50',
     backbone=dict(
         type='ResNet',
         depth=50,
         num_stages=4,
         out_indices=(0, 1, 2, 3),
         frozen_stages=1,
+        norm_cfg=dict(type='BN', requires_grad=True),
         style='pytorch'),
     neck=dict(
         type='FPN',
diff --git a/configs/empirical_attention/faster_rcnn_r50_fpn_attention_0010_1x.py b/configs/empirical_attention/faster_rcnn_r50_fpn_attention_0010_1x.py
index 772530b19f35c16756f3ec349d48da7f25b6e835..9e196b24d28886e82cf1152d63eab6f00f651d50 100644
--- a/configs/empirical_attention/faster_rcnn_r50_fpn_attention_0010_1x.py
+++ b/configs/empirical_attention/faster_rcnn_r50_fpn_attention_0010_1x.py
@@ -8,6 +8,7 @@ model = dict(
         num_stages=4,
         out_indices=(0, 1, 2, 3),
         frozen_stages=1,
+        norm_cfg=dict(type='BN', requires_grad=True),
         style='pytorch',
         gen_attention=dict(
             spatial_range=-1, num_heads=8, attention_type='0010', kv_stride=2),
diff --git a/configs/empirical_attention/faster_rcnn_r50_fpn_attention_0010_dcn_1x.py b/configs/empirical_attention/faster_rcnn_r50_fpn_attention_0010_dcn_1x.py
index aa53d436299e6effadb7f4ac6397a40d5a3f4e5e..51827b404947446732b36899d2a86dc813a33a5a 100644
--- a/configs/empirical_attention/faster_rcnn_r50_fpn_attention_0010_dcn_1x.py
+++ b/configs/empirical_attention/faster_rcnn_r50_fpn_attention_0010_dcn_1x.py
@@ -8,6 +8,7 @@ model = dict(
         num_stages=4,
         out_indices=(0, 1, 2, 3),
         frozen_stages=1,
+        norm_cfg=dict(type='BN', requires_grad=True),
         style='pytorch',
         gen_attention=dict(
             spatial_range=-1, num_heads=8, attention_type='0010', kv_stride=2),
diff --git a/configs/empirical_attention/faster_rcnn_r50_fpn_attention_1111_1x.py b/configs/empirical_attention/faster_rcnn_r50_fpn_attention_1111_1x.py
index d945ac0ad33c32eefda9532dc0a8512a88b708d7..d76d599b2c40703a4857ddb0704eb9ce9395281a 100644
--- a/configs/empirical_attention/faster_rcnn_r50_fpn_attention_1111_1x.py
+++ b/configs/empirical_attention/faster_rcnn_r50_fpn_attention_1111_1x.py
@@ -8,6 +8,7 @@ model = dict(
         num_stages=4,
         out_indices=(0, 1, 2, 3),
         frozen_stages=1,
+        norm_cfg=dict(type='BN', requires_grad=True),
         style='pytorch',
         gen_attention=dict(
             spatial_range=-1, num_heads=8, attention_type='1111', kv_stride=2),
diff --git a/configs/empirical_attention/faster_rcnn_r50_fpn_attention_1111_dcn_1x.py b/configs/empirical_attention/faster_rcnn_r50_fpn_attention_1111_dcn_1x.py
index c98b3b94af2d7ed574004612da35011775db46a8..8132e696100cebae03b2ae554f3ec57ad47fb206 100644
--- a/configs/empirical_attention/faster_rcnn_r50_fpn_attention_1111_dcn_1x.py
+++ b/configs/empirical_attention/faster_rcnn_r50_fpn_attention_1111_dcn_1x.py
@@ -8,6 +8,7 @@ model = dict(
         num_stages=4,
         out_indices=(0, 1, 2, 3),
         frozen_stages=1,
+        norm_cfg=dict(type='BN', requires_grad=True),
         style='pytorch',
         gen_attention=dict(
             spatial_range=-1, num_heads=8, attention_type='1111', kv_stride=2),
diff --git a/configs/fast_mask_rcnn_r101_fpn_1x.py b/configs/fast_mask_rcnn_r101_fpn_1x.py
index 0ea15a9eefe6b025f2ac8bf48ef0d0d27ae895a3..533bfb313fd30ce58d93252e39243f02199395a3 100644
--- a/configs/fast_mask_rcnn_r101_fpn_1x.py
+++ b/configs/fast_mask_rcnn_r101_fpn_1x.py
@@ -8,6 +8,7 @@ model = dict(
         num_stages=4,
         out_indices=(0, 1, 2, 3),
         frozen_stages=1,
+        norm_cfg=dict(type='BN', requires_grad=True),
         style='pytorch'),
     neck=dict(
         type='FPN',
diff --git a/configs/fast_mask_rcnn_r50_fpn_1x.py b/configs/fast_mask_rcnn_r50_fpn_1x.py
index 6c70a72c3e4f77e9e56459bb3bd5d663c2c43ba0..7425527d3f2eacf8145c1f36808e7219c7b0c0f2 100644
--- a/configs/fast_mask_rcnn_r50_fpn_1x.py
+++ b/configs/fast_mask_rcnn_r50_fpn_1x.py
@@ -8,6 +8,7 @@ model = dict(
         num_stages=4,
         out_indices=(0, 1, 2, 3),
         frozen_stages=1,
+        norm_cfg=dict(type='BN', requires_grad=True),
         style='pytorch'),
     neck=dict(
         type='FPN',
diff --git a/configs/fast_rcnn_r101_fpn_1x.py b/configs/fast_rcnn_r101_fpn_1x.py
index 2aafdb47bf0cbbe48b58d7d2764516ee1ae406d5..b9c3cbf9de2a4e49d36d8956e4d598a0159c1190 100644
--- a/configs/fast_rcnn_r101_fpn_1x.py
+++ b/configs/fast_rcnn_r101_fpn_1x.py
@@ -8,6 +8,7 @@ model = dict(
         num_stages=4,
         out_indices=(0, 1, 2, 3),
         frozen_stages=1,
+        norm_cfg=dict(type='BN', requires_grad=True),
         style='pytorch'),
     neck=dict(
         type='FPN',
diff --git a/configs/fast_rcnn_r50_fpn_1x.py b/configs/fast_rcnn_r50_fpn_1x.py
index 1695186aab4b6b860fde72f68ef9f09434bf62d9..cb54fad145781094e876972752594a4d2f3d9d90 100644
--- a/configs/fast_rcnn_r50_fpn_1x.py
+++ b/configs/fast_rcnn_r50_fpn_1x.py
@@ -8,6 +8,7 @@ model = dict(
         num_stages=4,
         out_indices=(0, 1, 2, 3),
         frozen_stages=1,
+        norm_cfg=dict(type='BN', requires_grad=True),
         style='pytorch'),
     neck=dict(
         type='FPN',
diff --git a/configs/faster_rcnn_ohem_r50_fpn_1x.py b/configs/faster_rcnn_ohem_r50_fpn_1x.py
index 315aa613621baebd31f114cda6b752f871da5ba8..baad9c85736d66d60173a24d0ce59a59b589e9f8 100644
--- a/configs/faster_rcnn_ohem_r50_fpn_1x.py
+++ b/configs/faster_rcnn_ohem_r50_fpn_1x.py
@@ -8,6 +8,7 @@ model = dict(
         num_stages=4,
         out_indices=(0, 1, 2, 3),
         frozen_stages=1,
+        norm_cfg=dict(type='BN', requires_grad=True),
         style='pytorch'),
     neck=dict(
         type='FPN',
diff --git a/configs/faster_rcnn_r101_fpn_1x.py b/configs/faster_rcnn_r101_fpn_1x.py
index df81b701ccf6f1b6e272c70cd30193f66159f13d..d3735e5fdfb6cb055ae66c7d1e70d1565ce4d755 100644
--- a/configs/faster_rcnn_r101_fpn_1x.py
+++ b/configs/faster_rcnn_r101_fpn_1x.py
@@ -8,6 +8,7 @@ model = dict(
         num_stages=4,
         out_indices=(0, 1, 2, 3),
         frozen_stages=1,
+        norm_cfg=dict(type='BN', requires_grad=True),
         style='pytorch'),
     neck=dict(
         type='FPN',
diff --git a/configs/faster_rcnn_r50_fpn_1x.py b/configs/faster_rcnn_r50_fpn_1x.py
index 3dd19f7a0fc186363452f60419c81444e126414d..e1659fedb6138e2f9e7877a2948c52bc7ff24cff 100644
--- a/configs/faster_rcnn_r50_fpn_1x.py
+++ b/configs/faster_rcnn_r50_fpn_1x.py
@@ -8,6 +8,7 @@ model = dict(
         num_stages=4,
         out_indices=(0, 1, 2, 3),
         frozen_stages=1,
+        norm_cfg=dict(type='BN', requires_grad=True),
         style='pytorch'),
     neck=dict(
         type='FPN',
diff --git a/configs/faster_rcnn_x101_32x4d_fpn_1x.py b/configs/faster_rcnn_x101_32x4d_fpn_1x.py
index c1cd45168fcc27f98f3e14869c1caf73ed84af41..9e14dc9f5ea4d3ceeb07a9e3564ffbca6c5a3657 100644
--- a/configs/faster_rcnn_x101_32x4d_fpn_1x.py
+++ b/configs/faster_rcnn_x101_32x4d_fpn_1x.py
@@ -10,6 +10,7 @@ model = dict(
         num_stages=4,
         out_indices=(0, 1, 2, 3),
         frozen_stages=1,
+        norm_cfg=dict(type='BN', requires_grad=True),
         style='pytorch'),
     neck=dict(
         type='FPN',
diff --git a/configs/faster_rcnn_x101_64x4d_fpn_1x.py b/configs/faster_rcnn_x101_64x4d_fpn_1x.py
index fb09f2e1bed0540d0f7aacdcafbe37d012eed7b8..5454cfe170583d310352798c1eb0ea6c67a2b1a9 100644
--- a/configs/faster_rcnn_x101_64x4d_fpn_1x.py
+++ b/configs/faster_rcnn_x101_64x4d_fpn_1x.py
@@ -10,6 +10,7 @@ model = dict(
         num_stages=4,
         out_indices=(0, 1, 2, 3),
         frozen_stages=1,
+        norm_cfg=dict(type='BN', requires_grad=True),
         style='pytorch'),
     neck=dict(
         type='FPN',
diff --git a/configs/fcos/fcos_mstrain_640_800_x101_64x4d_fpn_gn_2x.py b/configs/fcos/fcos_mstrain_640_800_x101_64x4d_fpn_gn_2x.py
index 950873915c07d767e57aef28e534e2114117e3b8..3ba6505024b346514c6f8fe3913b81906b9bf073 100644
--- a/configs/fcos/fcos_mstrain_640_800_x101_64x4d_fpn_gn_2x.py
+++ b/configs/fcos/fcos_mstrain_640_800_x101_64x4d_fpn_gn_2x.py
@@ -10,6 +10,7 @@ model = dict(
         num_stages=4,
         out_indices=(0, 1, 2, 3),
         frozen_stages=1,
+        norm_cfg=dict(type='BN', requires_grad=True),
         style='pytorch'),
     neck=dict(
         type='FPN',
diff --git a/configs/foveabox/fovea_align_gn_ms_r101_fpn_4gpu_2x.py b/configs/foveabox/fovea_align_gn_ms_r101_fpn_4gpu_2x.py
index 2869a57d5fc30a67a71e5274a77ab196e2b56a9f..502ff3ee76705ea23064706eee0df0971d095e11 100644
--- a/configs/foveabox/fovea_align_gn_ms_r101_fpn_4gpu_2x.py
+++ b/configs/foveabox/fovea_align_gn_ms_r101_fpn_4gpu_2x.py
@@ -8,6 +8,7 @@ model = dict(
         num_stages=4,
         out_indices=(0, 1, 2, 3),
         frozen_stages=1,
+        norm_cfg=dict(type='BN', requires_grad=True),
         style='pytorch'),
     neck=dict(
         type='FPN',
diff --git a/configs/foveabox/fovea_align_gn_ms_r50_fpn_4gpu_2x.py b/configs/foveabox/fovea_align_gn_ms_r50_fpn_4gpu_2x.py
index 42691a46184ff281f7fa55a0eb7d3cecb07af3fd..a4960660ccf39d35b89a660b95973c239f816880 100644
--- a/configs/foveabox/fovea_align_gn_ms_r50_fpn_4gpu_2x.py
+++ b/configs/foveabox/fovea_align_gn_ms_r50_fpn_4gpu_2x.py
@@ -8,6 +8,7 @@ model = dict(
         num_stages=4,
         out_indices=(0, 1, 2, 3),
         frozen_stages=1,
+        norm_cfg=dict(type='BN', requires_grad=True),
         style='pytorch'),
     neck=dict(
         type='FPN',
diff --git a/configs/foveabox/fovea_align_gn_r101_fpn_4gpu_2x.py b/configs/foveabox/fovea_align_gn_r101_fpn_4gpu_2x.py
index 0f5ef2ca16921e695b291dd2fae25204237cbb3e..8b7acaf61e7250e6855d0589b98cb39f1637704f 100644
--- a/configs/foveabox/fovea_align_gn_r101_fpn_4gpu_2x.py
+++ b/configs/foveabox/fovea_align_gn_r101_fpn_4gpu_2x.py
@@ -8,6 +8,7 @@ model = dict(
         num_stages=4,
         out_indices=(0, 1, 2, 3),
         frozen_stages=1,
+        norm_cfg=dict(type='BN', requires_grad=True),
         style='pytorch'),
     neck=dict(
         type='FPN',
diff --git a/configs/foveabox/fovea_align_gn_r50_fpn_4gpu_2x.py b/configs/foveabox/fovea_align_gn_r50_fpn_4gpu_2x.py
index b2f58a9ec2f5ef723330fe5714a3a7b076cffee6..15efc4c5a8738ba8c519cadb624fb1086a7d3ba3 100644
--- a/configs/foveabox/fovea_align_gn_r50_fpn_4gpu_2x.py
+++ b/configs/foveabox/fovea_align_gn_r50_fpn_4gpu_2x.py
@@ -8,6 +8,7 @@ model = dict(
         num_stages=4,
         out_indices=(0, 1, 2, 3),
         frozen_stages=1,
+        norm_cfg=dict(type='BN', requires_grad=True),
         style='pytorch'),
     neck=dict(
         type='FPN',
diff --git a/configs/foveabox/fovea_r50_fpn_4gpu_1x.py b/configs/foveabox/fovea_r50_fpn_4gpu_1x.py
index bb67c646d311d4e1ccfb88963da33c8d49085c6f..724a8aab327b7da75497ad0125919712c332b2d2 100644
--- a/configs/foveabox/fovea_r50_fpn_4gpu_1x.py
+++ b/configs/foveabox/fovea_r50_fpn_4gpu_1x.py
@@ -8,6 +8,7 @@ model = dict(
         num_stages=4,
         out_indices=(0, 1, 2, 3),
         frozen_stages=1,
+        norm_cfg=dict(type='BN', requires_grad=True),
         style='pytorch'),
     neck=dict(
         type='FPN',
diff --git a/configs/fp16/faster_rcnn_r50_fpn_fp16_1x.py b/configs/fp16/faster_rcnn_r50_fpn_fp16_1x.py
index 9b90538ecaa3b74f81e355eac283420c0f0fbdb0..ba3a4db66e2a4603d3bdc17c88b1802bc14ca2df 100644
--- a/configs/fp16/faster_rcnn_r50_fpn_fp16_1x.py
+++ b/configs/fp16/faster_rcnn_r50_fpn_fp16_1x.py
@@ -11,6 +11,7 @@ model = dict(
         num_stages=4,
         out_indices=(0, 1, 2, 3),
         frozen_stages=1,
+        norm_cfg=dict(type='BN', requires_grad=True),
         style='pytorch'),
     neck=dict(
         type='FPN',
diff --git a/configs/fp16/mask_rcnn_r50_fpn_fp16_1x.py b/configs/fp16/mask_rcnn_r50_fpn_fp16_1x.py
index 4d747fcaea0a2b9ef74f936722a1191aa1015f19..34bb1489235d19bae21538d9969ecb6b72268793 100644
--- a/configs/fp16/mask_rcnn_r50_fpn_fp16_1x.py
+++ b/configs/fp16/mask_rcnn_r50_fpn_fp16_1x.py
@@ -11,6 +11,7 @@ model = dict(
         num_stages=4,
         out_indices=(0, 1, 2, 3),
         frozen_stages=1,
+        norm_cfg=dict(type='BN', requires_grad=True),
         style='pytorch'),
     neck=dict(
         type='FPN',
diff --git a/configs/fp16/retinanet_r50_fpn_fp16_1x.py b/configs/fp16/retinanet_r50_fpn_fp16_1x.py
index c3e2ffaf0e93fb4118ee7fbf1629f8752e1a7d6f..3eebebb64e953e3e12c380d8ed058be1de08353d 100644
--- a/configs/fp16/retinanet_r50_fpn_fp16_1x.py
+++ b/configs/fp16/retinanet_r50_fpn_fp16_1x.py
@@ -11,6 +11,7 @@ model = dict(
         num_stages=4,
         out_indices=(0, 1, 2, 3),
         frozen_stages=1,
+        norm_cfg=dict(type='BN', requires_grad=True),
         style='pytorch'),
     neck=dict(
         type='FPN',
diff --git a/configs/free_anchor/retinanet_free_anchor_r101_fpn_1x.py b/configs/free_anchor/retinanet_free_anchor_r101_fpn_1x.py
index 76c16004f1bd3e54b4b98ee290527b7058fcc28d..134de95153ebb99a84f832eb856be04585635c02 100644
--- a/configs/free_anchor/retinanet_free_anchor_r101_fpn_1x.py
+++ b/configs/free_anchor/retinanet_free_anchor_r101_fpn_1x.py
@@ -8,6 +8,7 @@ model = dict(
         num_stages=4,
         out_indices=(0, 1, 2, 3),
         frozen_stages=1,
+        norm_cfg=dict(type='BN', requires_grad=True),
         style='pytorch'),
     neck=dict(
         type='FPN',
@@ -116,7 +117,6 @@ log_config = dict(
 # yapf:enable
 # runtime settings
 total_epochs = 12
-device_ids = range(8)
 dist_params = dict(backend='nccl')
 log_level = 'INFO'
 work_dir = './work_dirs/retinanet_free_anchor_r101_fpn_1x'
diff --git a/configs/free_anchor/retinanet_free_anchor_r50_fpn_1x.py b/configs/free_anchor/retinanet_free_anchor_r50_fpn_1x.py
index 141137d87eb9d27cd94284b265e6739c0a726a0f..93a211a6146fe1725b544af3e4a2b9fb33ede7b6 100644
--- a/configs/free_anchor/retinanet_free_anchor_r50_fpn_1x.py
+++ b/configs/free_anchor/retinanet_free_anchor_r50_fpn_1x.py
@@ -8,6 +8,7 @@ model = dict(
         num_stages=4,
         out_indices=(0, 1, 2, 3),
         frozen_stages=1,
+        norm_cfg=dict(type='BN', requires_grad=True),
         style='pytorch'),
     neck=dict(
         type='FPN',
@@ -116,7 +117,6 @@ log_config = dict(
 # yapf:enable
 # runtime settings
 total_epochs = 12
-device_ids = range(8)
 dist_params = dict(backend='nccl')
 log_level = 'INFO'
 work_dir = './work_dirs/retinanet_free_anchor_r50_fpn_1x'
diff --git a/configs/free_anchor/retinanet_free_anchor_x101-32x4d_fpn_1x.py b/configs/free_anchor/retinanet_free_anchor_x101-32x4d_fpn_1x.py
index 2f00adb400ec040de6bd851b0ef9100a69cefb90..5edec0d29920c8b2b0cab8008961e7aeb77e6782 100644
--- a/configs/free_anchor/retinanet_free_anchor_x101-32x4d_fpn_1x.py
+++ b/configs/free_anchor/retinanet_free_anchor_x101-32x4d_fpn_1x.py
@@ -10,6 +10,7 @@ model = dict(
         num_stages=4,
         out_indices=(0, 1, 2, 3),
         frozen_stages=1,
+        norm_cfg=dict(type='BN', requires_grad=True),
         style='pytorch'),
     neck=dict(
         type='FPN',
@@ -118,7 +119,6 @@ log_config = dict(
 # yapf:enable
 # runtime settings
 total_epochs = 12
-device_ids = range(8)
 dist_params = dict(backend='nccl')
 log_level = 'INFO'
 work_dir = './work_dirs/retinanet_free_anchor_x101-32x4d_fpn_1x'
diff --git a/configs/gcnet/mask_rcnn_r16_gcb_c3-c5_r50_fpn_1x.py b/configs/gcnet/mask_rcnn_r16_gcb_c3-c5_r50_fpn_1x.py
index e1d653a9e454c0dcc10e5a7ba1264c4eadcd5529..bce18b940a59b29f3d053f1c00e24db7f86e5bee 100644
--- a/configs/gcnet/mask_rcnn_r16_gcb_c3-c5_r50_fpn_1x.py
+++ b/configs/gcnet/mask_rcnn_r16_gcb_c3-c5_r50_fpn_1x.py
@@ -8,6 +8,7 @@ model = dict(
         num_stages=4,
         out_indices=(0, 1, 2, 3),
         frozen_stages=1,
+        norm_cfg=dict(type='BN', requires_grad=True),
         style='pytorch',
         gcb=dict(ratio=1. / 16., ),
         stage_with_gcb=(False, True, True, True)),
diff --git a/configs/gcnet/mask_rcnn_r16_gcb_c3-c5_r50_fpn_syncbn_1x.py b/configs/gcnet/mask_rcnn_r16_gcb_c3-c5_r50_fpn_syncbn_1x.py
index abd079e0abb19e9c3566db89e925fd8120f0c676..acb15eb33a8c21b9d6eac549505f435ef69fe9e0 100644
--- a/configs/gcnet/mask_rcnn_r16_gcb_c3-c5_r50_fpn_syncbn_1x.py
+++ b/configs/gcnet/mask_rcnn_r16_gcb_c3-c5_r50_fpn_syncbn_1x.py
@@ -1,6 +1,4 @@
 # model settings
-norm_cfg = dict(type='SyncBN', requires_grad=True)
-
 model = dict(
     type='MaskRCNN',
     pretrained='torchvision://resnet50',
@@ -10,11 +8,11 @@ model = dict(
         num_stages=4,
         out_indices=(0, 1, 2, 3),
         frozen_stages=1,
+        norm_cfg=dict(type='SyncBN', requires_grad=True),
+        norm_eval=False,
         style='pytorch',
         gcb=dict(ratio=1. / 16., ),
-        stage_with_gcb=(False, True, True, True),
-        norm_eval=False,
-        norm_cfg=norm_cfg),
+        stage_with_gcb=(False, True, True, True)),
     neck=dict(
         type='FPN',
         in_channels=[256, 512, 1024, 2048],
diff --git a/configs/gcnet/mask_rcnn_r4_gcb_c3-c5_r50_fpn_1x.py b/configs/gcnet/mask_rcnn_r4_gcb_c3-c5_r50_fpn_1x.py
index a43f0011830d984a0c7056eacf80b61e17e9865c..41058e74a71eb510a4acc78773789f5f06d04427 100644
--- a/configs/gcnet/mask_rcnn_r4_gcb_c3-c5_r50_fpn_1x.py
+++ b/configs/gcnet/mask_rcnn_r4_gcb_c3-c5_r50_fpn_1x.py
@@ -8,6 +8,7 @@ model = dict(
         num_stages=4,
         out_indices=(0, 1, 2, 3),
         frozen_stages=1,
+        norm_cfg=dict(type='BN', requires_grad=True),
         style='pytorch',
         gcb=dict(ratio=1. / 4., ),
         stage_with_gcb=(False, True, True, True)),
diff --git a/configs/gcnet/mask_rcnn_r4_gcb_c3-c5_r50_fpn_syncbn_1x.py b/configs/gcnet/mask_rcnn_r4_gcb_c3-c5_r50_fpn_syncbn_1x.py
index 810b8063a2b90b9e54cb9885e8d535c50d368482..9e047758cf37dfeaba98713077e33e2714379c65 100644
--- a/configs/gcnet/mask_rcnn_r4_gcb_c3-c5_r50_fpn_syncbn_1x.py
+++ b/configs/gcnet/mask_rcnn_r4_gcb_c3-c5_r50_fpn_syncbn_1x.py
@@ -1,6 +1,4 @@
 # model settings
-norm_cfg = dict(type='SyncBN', requires_grad=True)
-
 model = dict(
     type='MaskRCNN',
     pretrained='torchvision://resnet50',
@@ -10,11 +8,11 @@ model = dict(
         num_stages=4,
         out_indices=(0, 1, 2, 3),
         frozen_stages=1,
+        norm_cfg=dict(type='SyncBN', requires_grad=True),
+        norm_eval=False,
         style='pytorch',
         gcb=dict(ratio=1. / 4., ),
-        stage_with_gcb=(False, True, True, True),
-        norm_eval=False,
-        norm_cfg=norm_cfg),
+        stage_with_gcb=(False, True, True, True)),
     neck=dict(
         type='FPN',
         in_channels=[256, 512, 1024, 2048],
diff --git a/configs/gcnet/mask_rcnn_r50_fpn_sbn_1x.py b/configs/gcnet/mask_rcnn_r50_fpn_sbn_1x.py
index cbde867210a06ce16e8e531071b742b25e46f79e..f273a3a62c9f44f8b23a38ace06b278ac7f009fe 100644
--- a/configs/gcnet/mask_rcnn_r50_fpn_sbn_1x.py
+++ b/configs/gcnet/mask_rcnn_r50_fpn_sbn_1x.py
@@ -1,6 +1,4 @@
 # model settings
-norm_cfg = dict(type='SyncBN', requires_grad=True)
-
 model = dict(
     type='MaskRCNN',
     pretrained='torchvision://resnet50',
@@ -10,9 +8,9 @@ model = dict(
         num_stages=4,
         out_indices=(0, 1, 2, 3),
         frozen_stages=1,
-        style='pytorch',
+        norm_cfg=dict(type='SyncBN', requires_grad=True),
         norm_eval=False,
-        norm_cfg=norm_cfg),
+        style='pytorch'),
     neck=dict(
         type='FPN',
         in_channels=[256, 512, 1024, 2048],
diff --git a/configs/ghm/retinanet_ghm_r50_fpn_1x.py b/configs/ghm/retinanet_ghm_r50_fpn_1x.py
index 4eb43bca0195cd97c9f4401f7f2afd9921b7c22a..3126a430c7fdff1a818abe12b402b978acf02c16 100644
--- a/configs/ghm/retinanet_ghm_r50_fpn_1x.py
+++ b/configs/ghm/retinanet_ghm_r50_fpn_1x.py
@@ -8,6 +8,7 @@ model = dict(
         num_stages=4,
         out_indices=(0, 1, 2, 3),
         frozen_stages=1,
+        norm_cfg=dict(type='BN', requires_grad=True),
         style='pytorch'),
     neck=dict(
         type='FPN',
diff --git a/configs/grid_rcnn/grid_rcnn_gn_head_r50_fpn_2x.py b/configs/grid_rcnn/grid_rcnn_gn_head_r50_fpn_2x.py
index 48ba454e9873305c074c3dddb25d94fbd4e3b8bd..810e6ddecd6ede880b4ee2cc92284799d921d3d3 100644
--- a/configs/grid_rcnn/grid_rcnn_gn_head_r50_fpn_2x.py
+++ b/configs/grid_rcnn/grid_rcnn_gn_head_r50_fpn_2x.py
@@ -8,6 +8,7 @@ model = dict(
         num_stages=4,
         out_indices=(0, 1, 2, 3),
         frozen_stages=1,
+        norm_cfg=dict(type='BN', requires_grad=True),
         style='pytorch'),
     neck=dict(
         type='FPN',
diff --git a/configs/grid_rcnn/grid_rcnn_gn_head_x101_32x4d_fpn_2x.py b/configs/grid_rcnn/grid_rcnn_gn_head_x101_32x4d_fpn_2x.py
index 84ac88bc2aa441312e4171166ece427a60ef0ac5..fd459b0cb3832036e700fd93479e7b136058a389 100644
--- a/configs/grid_rcnn/grid_rcnn_gn_head_x101_32x4d_fpn_2x.py
+++ b/configs/grid_rcnn/grid_rcnn_gn_head_x101_32x4d_fpn_2x.py
@@ -10,6 +10,7 @@ model = dict(
         num_stages=4,
         out_indices=(0, 1, 2, 3),
         frozen_stages=1,
+        norm_cfg=dict(type='BN', requires_grad=True),
         style='pytorch'),
     neck=dict(
         type='FPN',
diff --git a/configs/guided_anchoring/ga_faster_x101_32x4d_fpn_1x.py b/configs/guided_anchoring/ga_faster_x101_32x4d_fpn_1x.py
index 8a77ae938e408c5f47f666a0295d7cecc11bcb79..57a39228b5ce84c91367e102826875eb8fec4f44 100644
--- a/configs/guided_anchoring/ga_faster_x101_32x4d_fpn_1x.py
+++ b/configs/guided_anchoring/ga_faster_x101_32x4d_fpn_1x.py
@@ -10,6 +10,7 @@ model = dict(
         num_stages=4,
         out_indices=(0, 1, 2, 3),
         frozen_stages=1,
+        norm_cfg=dict(type='BN', requires_grad=True),
         style='pytorch'),
     neck=dict(
         type='FPN',
diff --git a/configs/guided_anchoring/ga_retinanet_x101_32x4d_fpn_1x.py b/configs/guided_anchoring/ga_retinanet_x101_32x4d_fpn_1x.py
index d37cf062f2535371b0f66e76f87618fb6d9d1386..a47fad03aa128d24ad07522cd9a88f5698df3893 100644
--- a/configs/guided_anchoring/ga_retinanet_x101_32x4d_fpn_1x.py
+++ b/configs/guided_anchoring/ga_retinanet_x101_32x4d_fpn_1x.py
@@ -10,6 +10,7 @@ model = dict(
         num_stages=4,
         out_indices=(0, 1, 2, 3),
         frozen_stages=1,
+        norm_cfg=dict(type='BN', requires_grad=True),
         style='pytorch'),
     neck=dict(
         type='FPN',
diff --git a/configs/guided_anchoring/ga_rpn_x101_32x4d_fpn_1x.py b/configs/guided_anchoring/ga_rpn_x101_32x4d_fpn_1x.py
index 97f684cec1cc2e3605dd22efaa0c596f6ad93fb7..a8b00113abcef48c7c54b7f9101febacdf7a2dd0 100644
--- a/configs/guided_anchoring/ga_rpn_x101_32x4d_fpn_1x.py
+++ b/configs/guided_anchoring/ga_rpn_x101_32x4d_fpn_1x.py
@@ -10,6 +10,7 @@ model = dict(
         num_stages=4,
         out_indices=(0, 1, 2, 3),
         frozen_stages=1,
+        norm_cfg=dict(type='BN', requires_grad=True),
         style='pytorch'),
     neck=dict(
         type='FPN',
diff --git a/configs/hrnet/mask_rcnn_hrnetv2p_w32_1x.py b/configs/hrnet/mask_rcnn_hrnetv2p_w32_1x.py
index 54eccb902452e130ff0eba165ae31de6512c0dcf..4511426f437866a0d92c4513c8da5dc334196e1d 100644
--- a/configs/hrnet/mask_rcnn_hrnetv2p_w32_1x.py
+++ b/configs/hrnet/mask_rcnn_hrnetv2p_w32_1x.py
@@ -56,6 +56,7 @@ model = dict(
         num_classes=81,
         target_means=[0., 0., 0., 0.],
         target_stds=[0.1, 0.1, 0.2, 0.2],
+        reg_class_agnostic=False,
         loss_cls=dict(
             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
         loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)),
diff --git a/configs/htc/htc_dconv_c3-c5_mstrain_400_1400_x101_64x4d_fpn_20e.py b/configs/htc/htc_dconv_c3-c5_mstrain_400_1400_x101_64x4d_fpn_20e.py
index c54586a411287678409b1c5d100cf3799a40b75b..f75deb697c3c4461eb2fd54ef183dfa7d575e827 100644
--- a/configs/htc/htc_dconv_c3-c5_mstrain_400_1400_x101_64x4d_fpn_20e.py
+++ b/configs/htc/htc_dconv_c3-c5_mstrain_400_1400_x101_64x4d_fpn_20e.py
@@ -13,6 +13,7 @@ model = dict(
         num_stages=4,
         out_indices=(0, 1, 2, 3),
         frozen_stages=1,
+        norm_cfg=dict(type='BN', requires_grad=True),
         style='pytorch',
         dcn=dict(
             type='DCN',
diff --git a/configs/htc/htc_r101_fpn_20e.py b/configs/htc/htc_r101_fpn_20e.py
index 23d069591cf3555eede5100060d4414eccd14ee8..5b46ab53b1ffc8a1bcf039f0b3527ee514f59c9a 100644
--- a/configs/htc/htc_r101_fpn_20e.py
+++ b/configs/htc/htc_r101_fpn_20e.py
@@ -11,6 +11,7 @@ model = dict(
         num_stages=4,
         out_indices=(0, 1, 2, 3),
         frozen_stages=1,
+        norm_cfg=dict(type='BN', requires_grad=True),
         style='pytorch'),
     neck=dict(
         type='FPN',
diff --git a/configs/htc/htc_r50_fpn_1x.py b/configs/htc/htc_r50_fpn_1x.py
index f5e38345edd76e450061251098198c21202b6755..f28d7df24f8d009a1c5b56e2988a545cff917ce1 100644
--- a/configs/htc/htc_r50_fpn_1x.py
+++ b/configs/htc/htc_r50_fpn_1x.py
@@ -11,6 +11,7 @@ model = dict(
         num_stages=4,
         out_indices=(0, 1, 2, 3),
         frozen_stages=1,
+        norm_cfg=dict(type='BN', requires_grad=True),
         style='pytorch'),
     neck=dict(
         type='FPN',
diff --git a/configs/htc/htc_r50_fpn_20e.py b/configs/htc/htc_r50_fpn_20e.py
index af1fca5f79df916978c3cbb2583cd22b64ecaadc..ec596b9ac089002ed5c85614102bf0dd1e6de24c 100644
--- a/configs/htc/htc_r50_fpn_20e.py
+++ b/configs/htc/htc_r50_fpn_20e.py
@@ -11,6 +11,7 @@ model = dict(
         num_stages=4,
         out_indices=(0, 1, 2, 3),
         frozen_stages=1,
+        norm_cfg=dict(type='BN', requires_grad=True),
         style='pytorch'),
     neck=dict(
         type='FPN',
diff --git a/configs/htc/htc_without_semantic_r50_fpn_1x.py b/configs/htc/htc_without_semantic_r50_fpn_1x.py
index d0bb99ef1c4eecc108ba41d4bfc0b2324fc476db..6616e8e202f07c9135eab0c780f8642fe700ccea 100644
--- a/configs/htc/htc_without_semantic_r50_fpn_1x.py
+++ b/configs/htc/htc_without_semantic_r50_fpn_1x.py
@@ -11,6 +11,7 @@ model = dict(
         num_stages=4,
         out_indices=(0, 1, 2, 3),
         frozen_stages=1,
+        norm_cfg=dict(type='BN', requires_grad=True),
         style='pytorch'),
     neck=dict(
         type='FPN',
diff --git a/configs/htc/htc_x101_32x4d_fpn_20e_16gpu.py b/configs/htc/htc_x101_32x4d_fpn_20e_16gpu.py
index 1d7a369c0bf0de0db4a890efcb54e977308bf5a7..65c94a661bb2d676a3494150b26a4d7d2dfdeadc 100644
--- a/configs/htc/htc_x101_32x4d_fpn_20e_16gpu.py
+++ b/configs/htc/htc_x101_32x4d_fpn_20e_16gpu.py
@@ -13,6 +13,7 @@ model = dict(
         num_stages=4,
         out_indices=(0, 1, 2, 3),
         frozen_stages=1,
+        norm_cfg=dict(type='BN', requires_grad=True),
         style='pytorch'),
     neck=dict(
         type='FPN',
diff --git a/configs/htc/htc_x101_64x4d_fpn_20e_16gpu.py b/configs/htc/htc_x101_64x4d_fpn_20e_16gpu.py
index 11728e8293e5924f1828632f8e7c76f461f6b39e..574736b5741f9ba18e53e8709ec471b764f8acea 100644
--- a/configs/htc/htc_x101_64x4d_fpn_20e_16gpu.py
+++ b/configs/htc/htc_x101_64x4d_fpn_20e_16gpu.py
@@ -13,6 +13,7 @@ model = dict(
         num_stages=4,
         out_indices=(0, 1, 2, 3),
         frozen_stages=1,
+        norm_cfg=dict(type='BN', requires_grad=True),
         style='pytorch'),
     neck=dict(
         type='FPN',
diff --git a/configs/instaboost/cascade_mask_rcnn_r50_fpn_instaboost_4x.py b/configs/instaboost/cascade_mask_rcnn_r50_fpn_instaboost_4x.py
index 64ac47d1d7f39699b02dbff546d076dbf8c6e0ed..4ce744df3f375a288085ab4f48e13039df5a24b5 100644
--- a/configs/instaboost/cascade_mask_rcnn_r50_fpn_instaboost_4x.py
+++ b/configs/instaboost/cascade_mask_rcnn_r50_fpn_instaboost_4x.py
@@ -9,6 +9,7 @@ model = dict(
         num_stages=4,
         out_indices=(0, 1, 2, 3),
         frozen_stages=1,
+        norm_cfg=dict(type='BN', requires_grad=True),
         style='pytorch'),
     neck=dict(
         type='FPN',
diff --git a/configs/instaboost/mask_rcnn_r50_fpn_instaboost_4x.py b/configs/instaboost/mask_rcnn_r50_fpn_instaboost_4x.py
index fe143a6e6d2c84dac516b0fc0d9aa5619d232e11..169652dc7cc5e0c37010a234a669293306fcd078 100644
--- a/configs/instaboost/mask_rcnn_r50_fpn_instaboost_4x.py
+++ b/configs/instaboost/mask_rcnn_r50_fpn_instaboost_4x.py
@@ -8,6 +8,7 @@ model = dict(
         num_stages=4,
         out_indices=(0, 1, 2, 3),
         frozen_stages=1,
+        norm_cfg=dict(type='BN', requires_grad=True),
         style='pytorch'),
     neck=dict(
         type='FPN',
diff --git a/configs/libra_rcnn/libra_fast_rcnn_r50_fpn_1x.py b/configs/libra_rcnn/libra_fast_rcnn_r50_fpn_1x.py
index aea0ed96c374fd59544ef62ea66afae7046d2bd9..839225268c8a591eb337c65686b84a6f319b062d 100644
--- a/configs/libra_rcnn/libra_fast_rcnn_r50_fpn_1x.py
+++ b/configs/libra_rcnn/libra_fast_rcnn_r50_fpn_1x.py
@@ -8,6 +8,7 @@ model = dict(
         num_stages=4,
         out_indices=(0, 1, 2, 3),
         frozen_stages=1,
+        norm_cfg=dict(type='BN', requires_grad=True),
         style='pytorch'),
     neck=[
         dict(
diff --git a/configs/libra_rcnn/libra_faster_rcnn_r101_fpn_1x.py b/configs/libra_rcnn/libra_faster_rcnn_r101_fpn_1x.py
index 3edd7dffbdd5e78852f3b94f2910fe640efec2c4..af3a0b6749bfbf2d77ed0459b3a49c8f4aefb2fe 100644
--- a/configs/libra_rcnn/libra_faster_rcnn_r101_fpn_1x.py
+++ b/configs/libra_rcnn/libra_faster_rcnn_r101_fpn_1x.py
@@ -8,6 +8,7 @@ model = dict(
         num_stages=4,
         out_indices=(0, 1, 2, 3),
         frozen_stages=1,
+        norm_cfg=dict(type='BN', requires_grad=True),
         style='pytorch'),
     neck=[
         dict(
diff --git a/configs/libra_rcnn/libra_faster_rcnn_r50_fpn_1x.py b/configs/libra_rcnn/libra_faster_rcnn_r50_fpn_1x.py
index 0e97617f5ab088d2d3d9dc61ffa74f727315e81c..41de0c62259ffc79584d3ef1e443c5e4e945c70d 100644
--- a/configs/libra_rcnn/libra_faster_rcnn_r50_fpn_1x.py
+++ b/configs/libra_rcnn/libra_faster_rcnn_r50_fpn_1x.py
@@ -8,6 +8,7 @@ model = dict(
         num_stages=4,
         out_indices=(0, 1, 2, 3),
         frozen_stages=1,
+        norm_cfg=dict(type='BN', requires_grad=True),
         style='pytorch'),
     neck=[
         dict(
diff --git a/configs/libra_rcnn/libra_faster_rcnn_x101_64x4d_fpn_1x.py b/configs/libra_rcnn/libra_faster_rcnn_x101_64x4d_fpn_1x.py
index 6a06465c32e5bc723fc1d079a99e800a5c631665..50ec1df825ac0d853c02a08a7636ceb589e5c556 100644
--- a/configs/libra_rcnn/libra_faster_rcnn_x101_64x4d_fpn_1x.py
+++ b/configs/libra_rcnn/libra_faster_rcnn_x101_64x4d_fpn_1x.py
@@ -10,6 +10,7 @@ model = dict(
         num_stages=4,
         out_indices=(0, 1, 2, 3),
         frozen_stages=1,
+        norm_cfg=dict(type='BN', requires_grad=True),
         style='pytorch'),
     neck=[
         dict(
diff --git a/configs/libra_rcnn/libra_retinanet_r50_fpn_1x.py b/configs/libra_rcnn/libra_retinanet_r50_fpn_1x.py
index acb929f345fed5df9100187484d4e5a7747835f8..543ec4a99d6868a9f1041b80641bc758252388fd 100644
--- a/configs/libra_rcnn/libra_retinanet_r50_fpn_1x.py
+++ b/configs/libra_rcnn/libra_retinanet_r50_fpn_1x.py
@@ -8,6 +8,7 @@ model = dict(
         num_stages=4,
         out_indices=(0, 1, 2, 3),
         frozen_stages=1,
+        norm_cfg=dict(type='BN', requires_grad=True),
         style='pytorch'),
     neck=[
         dict(
diff --git a/configs/mask_rcnn_r101_fpn_1x.py b/configs/mask_rcnn_r101_fpn_1x.py
index 30807e6a4a4401a7edc23d6e8f5b715160cd6326..47451f09cf664ef786bd70225606f2a2d18c25ae 100644
--- a/configs/mask_rcnn_r101_fpn_1x.py
+++ b/configs/mask_rcnn_r101_fpn_1x.py
@@ -8,6 +8,7 @@ model = dict(
         num_stages=4,
         out_indices=(0, 1, 2, 3),
         frozen_stages=1,
+        norm_cfg=dict(type='BN', requires_grad=True),
         style='pytorch'),
     neck=dict(
         type='FPN',
diff --git a/configs/mask_rcnn_r50_fpn_1x.py b/configs/mask_rcnn_r50_fpn_1x.py
index 73857c62003d4f7f10e5445d44e79e0f616576ff..b3a0e66a091f4c9a8f9f15504f15260c11cdb13f 100644
--- a/configs/mask_rcnn_r50_fpn_1x.py
+++ b/configs/mask_rcnn_r50_fpn_1x.py
@@ -8,6 +8,7 @@ model = dict(
         num_stages=4,
         out_indices=(0, 1, 2, 3),
         frozen_stages=1,
+        norm_cfg=dict(type='BN', requires_grad=True),
         style='pytorch'),
     neck=dict(
         type='FPN',
diff --git a/configs/mask_rcnn_x101_32x4d_fpn_1x.py b/configs/mask_rcnn_x101_32x4d_fpn_1x.py
index 24c7fd4efd252827f39137cc04974ba629a912be..0e33ef4ea1b14ab7b3589a7bfe03d03205122901 100644
--- a/configs/mask_rcnn_x101_32x4d_fpn_1x.py
+++ b/configs/mask_rcnn_x101_32x4d_fpn_1x.py
@@ -10,6 +10,7 @@ model = dict(
         num_stages=4,
         out_indices=(0, 1, 2, 3),
         frozen_stages=1,
+        norm_cfg=dict(type='BN', requires_grad=True),
         style='pytorch'),
     neck=dict(
         type='FPN',
diff --git a/configs/mask_rcnn_x101_64x4d_fpn_1x.py b/configs/mask_rcnn_x101_64x4d_fpn_1x.py
index 89b4499b3185bcbf0736bc5ec1c37feb7519f35d..02ef2cee3cb14dbadcae00063f383142623c47d9 100644
--- a/configs/mask_rcnn_x101_64x4d_fpn_1x.py
+++ b/configs/mask_rcnn_x101_64x4d_fpn_1x.py
@@ -10,6 +10,7 @@ model = dict(
         num_stages=4,
         out_indices=(0, 1, 2, 3),
         frozen_stages=1,
+        norm_cfg=dict(type='BN', requires_grad=True),
         style='pytorch'),
     neck=dict(
         type='FPN',
diff --git a/configs/ms_rcnn/ms_rcnn_x101_64x4d_fpn_1x.py b/configs/ms_rcnn/ms_rcnn_x101_64x4d_fpn_1x.py
index 4533919cd59b5961a0f0a4deda0628f6ab46130d..856a9307015baada7bb6c4a0576ae71d9eea6462 100644
--- a/configs/ms_rcnn/ms_rcnn_x101_64x4d_fpn_1x.py
+++ b/configs/ms_rcnn/ms_rcnn_x101_64x4d_fpn_1x.py
@@ -10,6 +10,7 @@ model = dict(
         num_stages=4,
         out_indices=(0, 1, 2, 3),
         frozen_stages=1,
+        norm_cfg=dict(type='BN', requires_grad=True),
         style='pytorch'),
     neck=dict(
         type='FPN',
diff --git a/configs/nas_fpn/retinanet_crop640_r50_fpn_50e.py b/configs/nas_fpn/retinanet_crop640_r50_fpn_50e.py
index 7c05c18cd4a2272fc6a6109ba38c61b7eb198c9b..a921c5b8771e90b747f25f07c776a3421a850e12 100644
--- a/configs/nas_fpn/retinanet_crop640_r50_fpn_50e.py
+++ b/configs/nas_fpn/retinanet_crop640_r50_fpn_50e.py
@@ -141,7 +141,6 @@ log_config = dict(
 # yapf:enable
 # runtime settings
 total_epochs = 50
-device_ids = range(8)
 dist_params = dict(backend='nccl')
 log_level = 'INFO'
 work_dir = './work_dirs/retinanet_crop640_r50_fpn_50e'
diff --git a/configs/nas_fpn/retinanet_crop640_r50_nasfpn_50e.py b/configs/nas_fpn/retinanet_crop640_r50_nasfpn_50e.py
index 30b1cbe76330b6ed894bdd9b4179dcd3cffafcd8..8797430428ec394a1ee5e0d6c2100a128da15fd8 100644
--- a/configs/nas_fpn/retinanet_crop640_r50_nasfpn_50e.py
+++ b/configs/nas_fpn/retinanet_crop640_r50_nasfpn_50e.py
@@ -140,7 +140,6 @@ log_config = dict(
 # yapf:enable
 # runtime settings
 total_epochs = 50
-device_ids = range(8)
 dist_params = dict(backend='nccl')
 log_level = 'INFO'
 work_dir = './work_dirs/retinanet_crop640_r50_nasfpn_50e'
diff --git a/configs/pascal_voc/faster_rcnn_r50_fpn_1x_voc0712.py b/configs/pascal_voc/faster_rcnn_r50_fpn_1x_voc0712.py
index 5614029bd4be4a90d9e3379b3eaf24d1b46bb9e0..041cd583462f23bbc0fdaa6428799751d950e70e 100644
--- a/configs/pascal_voc/faster_rcnn_r50_fpn_1x_voc0712.py
+++ b/configs/pascal_voc/faster_rcnn_r50_fpn_1x_voc0712.py
@@ -8,6 +8,7 @@ model = dict(
         num_stages=4,
         out_indices=(0, 1, 2, 3),
         frozen_stages=1,
+        norm_cfg=dict(type='BN', requires_grad=True),
         style='pytorch'),
     neck=dict(
         type='FPN',
diff --git a/configs/reppoints/bbox_r50_grid_center_fpn_1x.py b/configs/reppoints/bbox_r50_grid_center_fpn_1x.py
index 7719cd5a32f05ed6af5de6d62d7b7131eba665e0..8a94bb9f1b0c302005a38f5f6c5f440b54bf5c30 100644
--- a/configs/reppoints/bbox_r50_grid_center_fpn_1x.py
+++ b/configs/reppoints/bbox_r50_grid_center_fpn_1x.py
@@ -10,6 +10,7 @@ model = dict(
         num_stages=4,
         out_indices=(0, 1, 2, 3),
         frozen_stages=1,
+        norm_cfg=dict(type='BN', requires_grad=True),
         style='pytorch'),
     neck=dict(
         type='FPN',
@@ -139,5 +140,4 @@ log_level = 'INFO'
 work_dir = './work_dirs/bbox_r50_grid_center_fpn_1x'
 load_from = None
 resume_from = None
-auto_resume = True
 workflow = [('train', 1)]
diff --git a/configs/reppoints/bbox_r50_grid_fpn_1x.py b/configs/reppoints/bbox_r50_grid_fpn_1x.py
index 7420611a2ec957694eb1dc32072179bdd29d115c..5fe186a32fb14f101ce9d8a000c0d87e5cd4166e 100644
--- a/configs/reppoints/bbox_r50_grid_fpn_1x.py
+++ b/configs/reppoints/bbox_r50_grid_fpn_1x.py
@@ -10,6 +10,7 @@ model = dict(
         num_stages=4,
         out_indices=(0, 1, 2, 3),
         frozen_stages=1,
+        norm_cfg=dict(type='BN', requires_grad=True),
         style='pytorch'),
     neck=dict(
         type='FPN',
@@ -144,5 +145,4 @@ log_level = 'INFO'
 work_dir = './work_dirs/bbox_r50_grid_fpn_1x'
 load_from = None
 resume_from = None
-auto_resume = True
 workflow = [('train', 1)]
diff --git a/configs/reppoints/reppoints_minmax_r50_fpn_1x.py b/configs/reppoints/reppoints_minmax_r50_fpn_1x.py
index 76cbd4bfb644b1f7020697044e207e8005b91b1a..974b749ed7ea537393edfc07598d92de263e04d2 100644
--- a/configs/reppoints/reppoints_minmax_r50_fpn_1x.py
+++ b/configs/reppoints/reppoints_minmax_r50_fpn_1x.py
@@ -10,6 +10,7 @@ model = dict(
         num_stages=4,
         out_indices=(0, 1, 2, 3),
         frozen_stages=1,
+        norm_cfg=dict(type='BN', requires_grad=True),
         style='pytorch'),
     neck=dict(
         type='FPN',
@@ -138,5 +139,4 @@ log_level = 'INFO'
 work_dir = './work_dirs/reppoints_minmax_r50_fpn_1x'
 load_from = None
 resume_from = None
-auto_resume = True
 workflow = [('train', 1)]
diff --git a/configs/reppoints/reppoints_moment_r101_dcn_fpn_2x.py b/configs/reppoints/reppoints_moment_r101_dcn_fpn_2x.py
index c6891a7626d0c2ff365eb235fb2cbb8479f0e52f..6557461f163235d32a37b11c3ee2ecc9142046ff 100644
--- a/configs/reppoints/reppoints_moment_r101_dcn_fpn_2x.py
+++ b/configs/reppoints/reppoints_moment_r101_dcn_fpn_2x.py
@@ -10,6 +10,7 @@ model = dict(
         num_stages=4,
         out_indices=(0, 1, 2, 3),
         frozen_stages=1,
+        norm_cfg=dict(type='BN', requires_grad=True),
         style='pytorch',
         dcn=dict(
             modulated=False, deformable_groups=1, fallback_on_stride=False),
@@ -141,5 +142,4 @@ log_level = 'INFO'
 work_dir = './work_dirs/reppoints_moment_r101_dcn_fpn_2x'
 load_from = None
 resume_from = None
-auto_resume = True
 workflow = [('train', 1)]
diff --git a/configs/reppoints/reppoints_moment_r101_dcn_fpn_2x_mt.py b/configs/reppoints/reppoints_moment_r101_dcn_fpn_2x_mt.py
index 7a97d4914465134b81990b66b15d92e99ceb55fa..f8f1cc62710f27d3b6a27b2f70b1b8c3c3af9692 100644
--- a/configs/reppoints/reppoints_moment_r101_dcn_fpn_2x_mt.py
+++ b/configs/reppoints/reppoints_moment_r101_dcn_fpn_2x_mt.py
@@ -10,6 +10,7 @@ model = dict(
         num_stages=4,
         out_indices=(0, 1, 2, 3),
         frozen_stages=1,
+        norm_cfg=dict(type='BN', requires_grad=True),
         style='pytorch',
         dcn=dict(
             modulated=False, deformable_groups=1, fallback_on_stride=False),
@@ -145,5 +146,4 @@ log_level = 'INFO'
 work_dir = './work_dirs/reppoints_moment_r101_dcn_fpn_2x_mt'
 load_from = None
 resume_from = None
-auto_resume = True
 workflow = [('train', 1)]
diff --git a/configs/reppoints/reppoints_moment_r101_fpn_2x.py b/configs/reppoints/reppoints_moment_r101_fpn_2x.py
index 9bf2631c6020498be716e5fea3bd34d9e21a2214..b610c3f21fb375061b4e2de54a0d099c53a7d3be 100644
--- a/configs/reppoints/reppoints_moment_r101_fpn_2x.py
+++ b/configs/reppoints/reppoints_moment_r101_fpn_2x.py
@@ -10,6 +10,7 @@ model = dict(
         num_stages=4,
         out_indices=(0, 1, 2, 3),
         frozen_stages=1,
+        norm_cfg=dict(type='BN', requires_grad=True),
         style='pytorch'),
     neck=dict(
         type='FPN',
@@ -138,5 +139,4 @@ log_level = 'INFO'
 work_dir = './work_dirs/reppoints_moment_r101_fpn_2x'
 load_from = None
 resume_from = None
-auto_resume = True
 workflow = [('train', 1)]
diff --git a/configs/reppoints/reppoints_moment_r101_fpn_2x_mt.py b/configs/reppoints/reppoints_moment_r101_fpn_2x_mt.py
index a96549acd3b74d9a7661fb249b366aa2c4c79f33..75dcc4606019d8909723b90c1df2ffd15e12f57a 100644
--- a/configs/reppoints/reppoints_moment_r101_fpn_2x_mt.py
+++ b/configs/reppoints/reppoints_moment_r101_fpn_2x_mt.py
@@ -10,6 +10,7 @@ model = dict(
         num_stages=4,
         out_indices=(0, 1, 2, 3),
         frozen_stages=1,
+        norm_cfg=dict(type='BN', requires_grad=True),
         style='pytorch'),
     neck=dict(
         type='FPN',
@@ -142,5 +143,4 @@ log_level = 'INFO'
 work_dir = './work_dirs/reppoints_moment_r101_fpn_2x_mt'
 load_from = None
 resume_from = None
-auto_resume = True
 workflow = [('train', 1)]
diff --git a/configs/reppoints/reppoints_moment_r50_fpn_1x.py b/configs/reppoints/reppoints_moment_r50_fpn_1x.py
index bfec877ae379a0a4e598a4d6ea54cf569f8ca119..a2ce0284fc5c7ad141bc9f57fb706ea53628df64 100644
--- a/configs/reppoints/reppoints_moment_r50_fpn_1x.py
+++ b/configs/reppoints/reppoints_moment_r50_fpn_1x.py
@@ -10,6 +10,7 @@ model = dict(
         num_stages=4,
         out_indices=(0, 1, 2, 3),
         frozen_stages=1,
+        norm_cfg=dict(type='BN', requires_grad=True),
         style='pytorch'),
     neck=dict(
         type='FPN',
@@ -138,5 +139,4 @@ log_level = 'INFO'
 work_dir = './work_dirs/reppoints_moment_r50_fpn_1x'
 load_from = None
 resume_from = None
-auto_resume = True
 workflow = [('train', 1)]
diff --git a/configs/reppoints/reppoints_moment_r50_fpn_2x.py b/configs/reppoints/reppoints_moment_r50_fpn_2x.py
index 76cf73a49e9fb5b65fee7e1ab9263cb93adfde3b..cc51f3afe2b8fe66fc0878f6ab741b4fdfe6ba5d 100644
--- a/configs/reppoints/reppoints_moment_r50_fpn_2x.py
+++ b/configs/reppoints/reppoints_moment_r50_fpn_2x.py
@@ -10,6 +10,7 @@ model = dict(
         num_stages=4,
         out_indices=(0, 1, 2, 3),
         frozen_stages=1,
+        norm_cfg=dict(type='BN', requires_grad=True),
         style='pytorch'),
     neck=dict(
         type='FPN',
@@ -138,5 +139,4 @@ log_level = 'INFO'
 work_dir = './work_dirs/reppoints_moment_r50_fpn_2x'
 load_from = None
 resume_from = None
-auto_resume = True
 workflow = [('train', 1)]
diff --git a/configs/reppoints/reppoints_moment_r50_fpn_2x_mt.py b/configs/reppoints/reppoints_moment_r50_fpn_2x_mt.py
index 962a6a97fbd866dc1e0a59bec9261e92d88f6a17..62ad7b0d7566a4f9098f26cf56f2f3ea1abce346 100644
--- a/configs/reppoints/reppoints_moment_r50_fpn_2x_mt.py
+++ b/configs/reppoints/reppoints_moment_r50_fpn_2x_mt.py
@@ -10,6 +10,7 @@ model = dict(
         num_stages=4,
         out_indices=(0, 1, 2, 3),
         frozen_stages=1,
+        norm_cfg=dict(type='BN', requires_grad=True),
         style='pytorch'),
     neck=dict(
         type='FPN',
@@ -142,5 +143,4 @@ log_level = 'INFO'
 work_dir = './work_dirs/reppoints_moment_r50_fpn_2x_mt'
 load_from = None
 resume_from = None
-auto_resume = True
 workflow = [('train', 1)]
diff --git a/configs/reppoints/reppoints_moment_r50_no_gn_fpn_1x.py b/configs/reppoints/reppoints_moment_r50_no_gn_fpn_1x.py
index 053a0e7209825ef05ec0a1948379a70eda5d4b96..bdaf07eefcb40aa762a98ed8aa49a49729b48ea1 100644
--- a/configs/reppoints/reppoints_moment_r50_no_gn_fpn_1x.py
+++ b/configs/reppoints/reppoints_moment_r50_no_gn_fpn_1x.py
@@ -8,6 +8,7 @@ model = dict(
         num_stages=4,
         out_indices=(0, 1, 2, 3),
         frozen_stages=1,
+        norm_cfg=dict(type='BN', requires_grad=True),
         style='pytorch'),
     neck=dict(
         type='FPN',
@@ -107,6 +108,7 @@ data = dict(
         ann_file=data_root + 'annotations/instances_val2017.json',
         img_prefix=data_root + 'val2017/',
         pipeline=test_pipeline))
+evaluation = dict(interval=1, metric='bbox')
 # optimizer
 optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001)
 optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
@@ -133,5 +135,4 @@ log_level = 'INFO'
 work_dir = './work_dirs/reppoints_moment_r50_no_gn_fpn_1x'
 load_from = None
 resume_from = None
-auto_resume = True
 workflow = [('train', 1)]
diff --git a/configs/reppoints/reppoints_moment_x101_dcn_fpn_2x.py b/configs/reppoints/reppoints_moment_x101_dcn_fpn_2x.py
index 901b7ef7cad1a95ecbec7c3aa52331a3909354fc..9ed54df2fd3527939d6ddab46a91da46238c5301 100644
--- a/configs/reppoints/reppoints_moment_x101_dcn_fpn_2x.py
+++ b/configs/reppoints/reppoints_moment_x101_dcn_fpn_2x.py
@@ -12,6 +12,7 @@ model = dict(
         num_stages=4,
         out_indices=(0, 1, 2, 3),
         frozen_stages=1,
+        norm_cfg=dict(type='BN', requires_grad=True),
         style='pytorch',
         dcn=dict(
             modulated=False,
@@ -146,5 +147,4 @@ log_level = 'INFO'
 work_dir = './work_dirs/reppoints_moment_x101_dcn_fpn_2x'
 load_from = None
 resume_from = None
-auto_resume = True
 workflow = [('train', 1)]
diff --git a/configs/reppoints/reppoints_moment_x101_dcn_fpn_2x_mt.py b/configs/reppoints/reppoints_moment_x101_dcn_fpn_2x_mt.py
index 4cabf9822a4f46555d6380d8f093a446be53b606..17cce67b94f6032b173dbcd85e72591911b32578 100644
--- a/configs/reppoints/reppoints_moment_x101_dcn_fpn_2x_mt.py
+++ b/configs/reppoints/reppoints_moment_x101_dcn_fpn_2x_mt.py
@@ -12,6 +12,7 @@ model = dict(
         num_stages=4,
         out_indices=(0, 1, 2, 3),
         frozen_stages=1,
+        norm_cfg=dict(type='BN', requires_grad=True),
         style='pytorch',
         dcn=dict(
             modulated=False,
@@ -150,5 +151,4 @@ log_level = 'INFO'
 work_dir = './work_dirs/reppoints_moment_x101_dcn_fpn_2x_mt'
 load_from = None
 resume_from = None
-auto_resume = True
 workflow = [('train', 1)]
diff --git a/configs/reppoints/reppoints_partial_minmax_r50_fpn_1x.py b/configs/reppoints/reppoints_partial_minmax_r50_fpn_1x.py
index 1409b4e2c3bfa827a8607c66d70717b0d613a0ed..659706133873ad9f17cfcbd8cde8d94ee91ec3d1 100644
--- a/configs/reppoints/reppoints_partial_minmax_r50_fpn_1x.py
+++ b/configs/reppoints/reppoints_partial_minmax_r50_fpn_1x.py
@@ -10,6 +10,7 @@ model = dict(
         num_stages=4,
         out_indices=(0, 1, 2, 3),
         frozen_stages=1,
+        norm_cfg=dict(type='BN', requires_grad=True),
         style='pytorch'),
     neck=dict(
         type='FPN',
@@ -138,5 +139,4 @@ log_level = 'INFO'
 work_dir = './work_dirs/reppoints_partial_minmax_r50_fpn_1x'
 load_from = None
 resume_from = None
-auto_resume = True
 workflow = [('train', 1)]
diff --git a/configs/retinanet_r101_fpn_1x.py b/configs/retinanet_r101_fpn_1x.py
index 3316ec4dd989d067296a3402ba1453911ec100d1..7e4e9919ab975d308266b6ccfbf1aa03d68c40c5 100644
--- a/configs/retinanet_r101_fpn_1x.py
+++ b/configs/retinanet_r101_fpn_1x.py
@@ -8,6 +8,7 @@ model = dict(
         num_stages=4,
         out_indices=(0, 1, 2, 3),
         frozen_stages=1,
+        norm_cfg=dict(type='BN', requires_grad=True),
         style='pytorch'),
     neck=dict(
         type='FPN',
diff --git a/configs/retinanet_r50_fpn_1x.py b/configs/retinanet_r50_fpn_1x.py
index 15e1611b04efb726a60c1ec3f929b21f98dd9977..37f851ca0a7115cad5561272587274f0ca618e6d 100644
--- a/configs/retinanet_r50_fpn_1x.py
+++ b/configs/retinanet_r50_fpn_1x.py
@@ -8,6 +8,7 @@ model = dict(
         num_stages=4,
         out_indices=(0, 1, 2, 3),
         frozen_stages=1,
+        norm_cfg=dict(type='BN', requires_grad=True),
         style='pytorch'),
     neck=dict(
         type='FPN',
diff --git a/configs/retinanet_x101_32x4d_fpn_1x.py b/configs/retinanet_x101_32x4d_fpn_1x.py
index 653e54116371a03d8dd4644137bad78b0cff5029..3120f1c124da0e6a16c7931270d7e191e4f0e73e 100644
--- a/configs/retinanet_x101_32x4d_fpn_1x.py
+++ b/configs/retinanet_x101_32x4d_fpn_1x.py
@@ -10,6 +10,7 @@ model = dict(
         num_stages=4,
         out_indices=(0, 1, 2, 3),
         frozen_stages=1,
+        norm_cfg=dict(type='BN', requires_grad=True),
         style='pytorch'),
     neck=dict(
         type='FPN',
diff --git a/configs/retinanet_x101_64x4d_fpn_1x.py b/configs/retinanet_x101_64x4d_fpn_1x.py
index c8be724f92d2a09198980ad017f4851b0be09359..8c7e73da88ebc5e850d091ff1646c0bdab57f9d2 100644
--- a/configs/retinanet_x101_64x4d_fpn_1x.py
+++ b/configs/retinanet_x101_64x4d_fpn_1x.py
@@ -10,6 +10,7 @@ model = dict(
         num_stages=4,
         out_indices=(0, 1, 2, 3),
         frozen_stages=1,
+        norm_cfg=dict(type='BN', requires_grad=True),
         style='pytorch'),
     neck=dict(
         type='FPN',
diff --git a/configs/rpn_r101_fpn_1x.py b/configs/rpn_r101_fpn_1x.py
index ed90c4529b45c686ff354178ac4f56046665b064..498dff4eecfa578be458bb457cd4aa49bd77b2cb 100644
--- a/configs/rpn_r101_fpn_1x.py
+++ b/configs/rpn_r101_fpn_1x.py
@@ -8,6 +8,7 @@ model = dict(
         num_stages=4,
         out_indices=(0, 1, 2, 3),
         frozen_stages=1,
+        norm_cfg=dict(type='BN', requires_grad=True),
         style='pytorch'),
     neck=dict(
         type='FPN',
diff --git a/configs/rpn_r50_fpn_1x.py b/configs/rpn_r50_fpn_1x.py
index 1ace04bca920dddef3fe6f3bc287542dc1257ee0..d0bedd6bc07f0fbc7732adf9ddff44fcfd0fcc9f 100644
--- a/configs/rpn_r50_fpn_1x.py
+++ b/configs/rpn_r50_fpn_1x.py
@@ -8,6 +8,7 @@ model = dict(
         num_stages=4,
         out_indices=(0, 1, 2, 3),
         frozen_stages=1,
+        norm_cfg=dict(type='BN', requires_grad=True),
         style='pytorch'),
     neck=dict(
         type='FPN',
diff --git a/configs/rpn_x101_32x4d_fpn_1x.py b/configs/rpn_x101_32x4d_fpn_1x.py
index ce7a978b63ff36223e91e80f64eba9d26dd043a6..ab24b69f3fc33f1a466e3846f62ac256c0f3862d 100644
--- a/configs/rpn_x101_32x4d_fpn_1x.py
+++ b/configs/rpn_x101_32x4d_fpn_1x.py
@@ -10,6 +10,7 @@ model = dict(
         num_stages=4,
         out_indices=(0, 1, 2, 3),
         frozen_stages=1,
+        norm_cfg=dict(type='BN', requires_grad=True),
         style='pytorch'),
     neck=dict(
         type='FPN',
diff --git a/configs/rpn_x101_64x4d_fpn_1x.py b/configs/rpn_x101_64x4d_fpn_1x.py
index 6cc50f4c46d946545e8669926af8047a06bb41c2..98d61531063aff5a8e20cdabb22853e08bb15f57 100644
--- a/configs/rpn_x101_64x4d_fpn_1x.py
+++ b/configs/rpn_x101_64x4d_fpn_1x.py
@@ -10,6 +10,7 @@ model = dict(
         num_stages=4,
         out_indices=(0, 1, 2, 3),
         frozen_stages=1,
+        norm_cfg=dict(type='BN', requires_grad=True),
         style='pytorch'),
     neck=dict(
         type='FPN',