Merge branch 'master' into gpu3

27c917ab · wanggh · f9e77805 · 29efbf63 · 27c917ab · 27c917ab
Commit 27c917ab authored 3 years ago by wanggh
--- a/configs/distillers/DeFeat/DeFeat_deneck_decls_faster_rcnn_r152_fpn_1x_distill_faster_rcnn_r50_fpn_1x_coco.py
+++ b/configs/distillers/DeFeat/DeFeat_deneck_decls_faster_rcnn_r152_fpn_1x_distill_faster_rcnn_r50_fpn_1x_coco.py
+_base_ = [
+    '../../_base_/models/faster_rcnn_r50_fpn.py',
+    '../../_base_/datasets/coco_detection.py',
+    '../../_base_/schedules/schedule_1x.py', '../../_base_/default_runtime.py'
+]
+
+# model settings
+# model settings
+find_unused_parameters=True
+pos_w=0.5
+neg_w=2
+roi_pos_w=0.05
+roi_pos_T=3
+roi_neg_w=2
+roi_neg_T=1
+distiller = dict(
+    type='DeFeat',
+    teacher_pretrained = '/mnt/data3/wangguohua/model/mmdet/faster_rcnn/faster_rcnn_r152_fpn_1x_coco.pth',
+    load_teacher_part = 'neck_head',
+    distill_cfg = [ dict(feature_level = 0,
+                         methods=[dict(type='MSELoss',
+                                       name='loss_p_0',
+                                       student_channels = 256,
+                                       teacher_channels = 256,
+                                       weight = pos_w,
+                                       ),
+                                  dict(type='MSELoss',
+                                       name='loss_n_0',
+                                       student_channels = 256,
+                                       teacher_channels = 256,
+                                       weight = neg_w,
+                                       )
+                                ]
+                        ),
+                    dict(feature_level = 1,
+                         methods=[dict(type='MSELoss',
+                                       name='loss_p_1',
+                                       student_channels = 256,
+                                       teacher_channels = 256,
+                                       weight = pos_w,
+                                       ),
+                                  dict(type='MSELoss',
+                                       name='loss_n_1',
+                                       student_channels = 256,
+                                       teacher_channels = 256,
+                                       weight = neg_w,
+                                       )
+                                ]
+                        ),
+                    dict(feature_level = 2,
+                         methods=[dict(type='MSELoss',
+                                       name='loss_p_2',
+                                       student_channels = 256,
+                                       teacher_channels = 256,
+                                       weight = pos_w,
+                                       ),
+                                  dict(type='MSELoss',
+                                       name='loss_n_2',
+                                       student_channels = 256,
+                                       teacher_channels = 256,
+                                       weight = neg_w,
+                                       )
+                                ]
+                        ),
+                    dict(feature_level = 3,
+                         methods=[dict(type='MSELoss',
+                                       name='loss_p_3',
+                                       student_channels = 256,
+                                       teacher_channels = 256,
+                                       weight = pos_w,
+                                       ),
+                                  dict(type='MSELoss',
+                                       name='loss_n_3',
+                                       student_channels = 256,
+                                       teacher_channels = 256,
+                                       weight = neg_w,
+                                       )
+                                ]
+                        ),
+                    dict(student_module = 'roi_head.bbox_head.fc_cls',
+                         teacher_module = 'roi_head.bbox_head.fc_cls',
+                         register_hook = True,
+                         methods=[dict(type='KLwithT',
+                                       name='loss_roi_p',
+                                       T=roi_pos_T,
+                                       weight = roi_pos_w,
+                                       ),
+                                   dict(type='KLwithT',
+                                       name='loss_roi_n',
+                                       T=roi_neg_T,
+                                       weight = roi_neg_w,
+                                       ),
+                                ]
+                        ),
+                   ]
+    )
+
+student_cfg = 'configs/faster_rcnn/faster_rcnn_r50_fpn_1x_coco.py'
+teacher_cfg = 'configs/faster_rcnn/faster_rcnn_r152_fpn_1x_coco.py'
--- a/configs/distillers/DeFeat/DeFeat_deneck_faster_rcnn_r152_fpn_1x_distill_faster_rcnn_r50_fpn_1x_coco.py
+++ b/configs/distillers/DeFeat/DeFeat_deneck_faster_rcnn_r152_fpn_1x_distill_faster_rcnn_r50_fpn_1x_coco.py
+_base_ = [
+    '../../_base_/models/faster_rcnn_r50_fpn.py',
+    '../../_base_/datasets/coco_detection.py',
+    '../../_base_/schedules/schedule_1x.py', '../../_base_/default_runtime.py'
+]
+
+# model settings
+# model settings
+find_unused_parameters=True
+pos_w=0.5
+neg_w=2
+distiller = dict(
+    type='DeFeat',
+    teacher_pretrained = '/mnt/data3/wangguohua/model/mmdet/faster_rcnn/faster_rcnn_r152_fpn_1x_coco.pth',
+    load_teacher_part = '',
+    distill_cfg = [ dict(feature_level = 0,
+                         methods=[dict(type='MSELoss',
+                                       name='loss_p_0',
+                                       student_channels = 256,
+                                       teacher_channels = 256,
+                                       weight = pos_w,
+                                       ),
+                                  dict(type='MSELoss',
+                                       name='loss_n_0',
+                                       student_channels = 256,
+                                       teacher_channels = 256,
+                                       weight = neg_w,
+                                       )
+                                ]
+                        ),
+                    dict(feature_level = 1,
+                         methods=[dict(type='MSELoss',
+                                       name='loss_p_1',
+                                       student_channels = 256,
+                                       teacher_channels = 256,
+                                       weight = pos_w,
+                                       ),
+                                  dict(type='MSELoss',
+                                       name='loss_n_1',
+                                       student_channels = 256,
+                                       teacher_channels = 256,
+                                       weight = neg_w,
+                                       )
+                                ]
+                        ),
+                    dict(feature_level = 2,
+                         methods=[dict(type='MSELoss',
+                                       name='loss_p_2',
+                                       student_channels = 256,
+                                       teacher_channels = 256,
+                                       weight = pos_w,
+                                       ),
+                                  dict(type='MSELoss',
+                                       name='loss_n_2',
+                                       student_channels = 256,
+                                       teacher_channels = 256,
+                                       weight = neg_w,
+                                       )
+                                ]
+                        ),
+                    dict(feature_level = 3,
+                         methods=[dict(type='MSELoss',
+                                       name='loss_p_3',
+                                       student_channels = 256,
+                                       teacher_channels = 256,
+                                       weight = pos_w,
+                                       ),
+                                  dict(type='MSELoss',
+                                       name='loss_n_3',
+                                       student_channels = 256,
+                                       teacher_channels = 256,
+                                       weight = neg_w,
+                                       )
+                                ]
+                        ),
+                   ]
+    )
+
+student_cfg = 'configs/faster_rcnn/faster_rcnn_r50_fpn_1x_coco.py'
+teacher_cfg = 'configs/faster_rcnn/faster_rcnn_r152_fpn_1x_coco.py'
--- a/configs/distillers/DeFeat/DeFeat_deneck_mask_rcnn_swinT_fpn_3x_distill_faster_rcnn_r50_fpn_1x_coco.py
+++ b/configs/distillers/DeFeat/DeFeat_deneck_mask_rcnn_swinT_fpn_3x_distill_faster_rcnn_r50_fpn_1x_coco.py
+_base_ = [
+    '../../_base_/models/faster_rcnn_r50_fpn.py',
+    '../../_base_/datasets/coco_detection.py',
+    '../../_base_/schedules/schedule_1x.py', '../../_base_/default_runtime.py'
+]
+
+# model settings
+find_unused_parameters=True
+pos_w=0.5
+neg_w=2
+distiller = dict(
+    type='DeFeat',
+    teacher_pretrained = '/mnt/data3/wangguohua/model/mmdet/swin/mask_rcnn_swin_tiny_patch4_window7.pth',
+    load_teacher_part = 'neck_head',
+    distill_cfg = [ dict(feature_level = 0,
+                         methods=[dict(type='MSELoss',
+                                       name='loss_p_0',
+                                       student_channels = 256,
+                                       teacher_channels = 256,
+                                       weight = pos_w,
+                                       ),
+                                  dict(type='MSELoss',
+                                       name='loss_n_0',
+                                       student_channels = 256,
+                                       teacher_channels = 256,
+                                       weight = neg_w,
+                                       )
+                                ]
+                        ),
+                    dict(feature_level = 1,
+                         methods=[dict(type='MSELoss',
+                                       name='loss_p_1',
+                                       student_channels = 256,
+                                       teacher_channels = 256,
+                                       weight = pos_w,
+                                       ),
+                                  dict(type='MSELoss',
+                                       name='loss_n_1',
+                                       student_channels = 256,
+                                       teacher_channels = 256,
+                                       weight = neg_w,
+                                       )
+                                ]
+                        ),
+                    dict(feature_level = 2,
+                         methods=[dict(type='MSELoss',
+                                       name='loss_p_2',
+                                       student_channels = 256,
+                                       teacher_channels = 256,
+                                       weight = pos_w,
+                                       ),
+                                  dict(type='MSELoss',
+                                       name='loss_n_2',
+                                       student_channels = 256,
+                                       teacher_channels = 256,
+                                       weight = neg_w,
+                                       )
+                                ]
+                        ),
+                    dict(feature_level = 3,
+                         methods=[dict(type='MSELoss',
+                                       name='loss_p_3',
+                                       student_channels = 256,
+                                       teacher_channels = 256,
+                                       weight = pos_w,
+                                       ),
+                                  dict(type='MSELoss',
+                                       name='loss_n_3',
+                                       student_channels = 256,
+                                       teacher_channels = 256,
+                                       weight = neg_w,
+                                       )
+                                ]
+                        ),
+                   ]
+    )
+
+student_cfg = 'configs/faster_rcnn/faster_rcnn_r50_fpn_1x_coco.py'
+teacher_cfg = 'configs/swin/mask_rcnn_swin_tiny_patch4_window7_mstrain_480-800_adamw_3x_coco.py'
--- a/configs/distillers/DeFeat/DeFeat_faster_rcnn_r152_fpn_1x_distill_faster_rcnn_r50_fpn_1x_coco.py
+++ b/configs/distillers/DeFeat/DeFeat_faster_rcnn_r152_fpn_1x_distill_faster_rcnn_r50_fpn_1x_coco.py
+_base_ = [
+    '../../_base_/models/faster_rcnn_r50_fpn.py',
+    '../../_base_/datasets/coco_detection.py',
+    '../../_base_/schedules/schedule_1x.py', '../../_base_/default_runtime.py'
+]
+
+# model settings
+# model settings
+find_unused_parameters=True
+pos_w=1
+neg_w=2
+distiller = dict(
+    type='DeFeat',
+    teacher_pretrained = '/mnt/data3/wangguohua/model/mmdet/faster_rcnn/faster_rcnn_r152_fpn_1x_coco.pth',
+    load_teacher_part = 'neck_head',
+    distill_cfg = [ dict(feature_level = 0,
+                         methods=[dict(type='MSELoss',
+                                       name='loss_p_0',
+                                       student_channels = 256,
+                                       teacher_channels = 256,
+                                       weight = pos_w,
+                                       ),
+                                  dict(type='MSELoss',
+                                       name='loss_n_0',
+                                       student_channels = 256,
+                                       teacher_channels = 256,
+                                       weight = neg_w,
+                                       )
+                                ]
+                        ),
+                    dict(feature_level = 1,
+                         methods=[dict(type='MSELoss',
+                                       name='loss_p_1',
+                                       student_channels = 256,
+                                       teacher_channels = 256,
+                                       weight = pos_w,
+                                       ),
+                                  dict(type='MSELoss',
+                                       name='loss_n_1',
+                                       student_channels = 256,
+                                       teacher_channels = 256,
+                                       weight = neg_w,
+                                       )
+                                ]
+                        ),
+                    dict(feature_level = 2,
+                         methods=[dict(type='MSELoss',
+                                       name='loss_p_2',
+                                       student_channels = 256,
+                                       teacher_channels = 256,
+                                       weight = pos_w,
+                                       ),
+                                  dict(type='MSELoss',
+                                       name='loss_n_2',
+                                       student_channels = 256,
+                                       teacher_channels = 256,
+                                       weight = neg_w,
+                                       )
+                                ]
+                        ),
+                    dict(feature_level = 3,
+                         methods=[dict(type='MSELoss',
+                                       name='loss_p_3',
+                                       student_channels = 256,
+                                       teacher_channels = 256,
+                                       weight = pos_w,
+                                       ),
+                                  dict(type='MSELoss',
+                                       name='loss_n_3',
+                                       student_channels = 256,
+                                       teacher_channels = 256,
+                                       weight = neg_w,
+                                       )
+                                ]
+                        ),
+                   ]
+    )
+
+student_cfg = 'configs/faster_rcnn/faster_rcnn_r50_fpn_1x_coco.py'
+teacher_cfg = 'configs/faster_rcnn/faster_rcnn_r152_fpn_1x_coco.py'
--- a/configs/distillers/FGFI/FGFI_faster_rcnn_r152_fpn_1x_distill_faster_rcnn_r50_fpn_1x_coco.py
+++ b/configs/distillers/FGFI/FGFI_faster_rcnn_r152_fpn_1x_distill_faster_rcnn_r50_fpn_1x_coco.py
+_base_ = [
+    '../../_base_/models/faster_rcnn_r50_fpn.py',
+    '../../_base_/datasets/coco_detection.py',
+    '../../_base_/schedules/schedule_1x.py', '../../_base_/default_runtime.py'
+]
+
+# model settings
+# model settings
+find_unused_parameters=True
+pos_w=1
+neg_w=2
+distiller = dict(
+    type='FGFI',
+    teacher_pretrained = '/mnt/data3/wangguohua/model/mmdet/faster_rcnn/faster_rcnn_r152_fpn_1x_coco.pth',
+    load_teacher_part = 'neck_head',
+    distill_cfg = [ dict(feature_level = 0,
+                         methods=[dict(type='MSELoss',
+                                       name='loss_p_0',
+                                       student_channels = 256,
+                                       teacher_channels = 256,
+                                       weight = pos_w,
+                                       ),
+                                  dict(type='MSELoss',
+                                       name='loss_n_0',
+                                       student_channels = 256,
+                                       teacher_channels = 256,
+                                       weight = neg_w,
+                                       )
+                                ]
+                        ),
+                    dict(feature_level = 1,
+                         methods=[dict(type='MSELoss',
+                                       name='loss_p_1',
+                                       student_channels = 256,
+                                       teacher_channels = 256,
+                                       weight = pos_w,
+                                       ),
+                                  dict(type='MSELoss',
+                                       name='loss_n_1',
+                                       student_channels = 256,
+                                       teacher_channels = 256,
+                                       weight = neg_w,
+                                       )
+                                ]
+                        ),
+                    dict(feature_level = 2,
+                         methods=[dict(type='MSELoss',
+                                       name='loss_p_2',
+                                       student_channels = 256,
+                                       teacher_channels = 256,
+                                       weight = pos_w,
+                                       ),
+                                  dict(type='MSELoss',
+                                       name='loss_n_2',
+                                       student_channels = 256,
+                                       teacher_channels = 256,
+                                       weight = neg_w,
+                                       )
+                                ]
+                        ),
+                    dict(feature_level = 3,
+                         methods=[dict(type='MSELoss',
+                                       name='loss_p_3',
+                                       student_channels = 256,
+                                       teacher_channels = 256,
+                                       weight = pos_w,
+                                       ),
+                                  dict(type='MSELoss',
+                                       name='loss_n_3',
+                                       student_channels = 256,
+                                       teacher_channels = 256,
+                                       weight = neg_w,
+                                       )
+                                ]
+                        ),
+                   ]
+    )
+
+student_cfg = 'configs/faster_rcnn/faster_rcnn_r50_fpn_1x_coco.py'
+teacher_cfg = 'configs/faster_rcnn/faster_rcnn_r152_fpn_1x_coco.py'
--- a/configs/distillers/FGFI/FGFI_mask_rcnn_swinT_fpn_3x_distill_faster_rcnn_r50_fpn_1x_coco.py
+++ b/configs/distillers/FGFI/FGFI_mask_rcnn_swinT_fpn_3x_distill_faster_rcnn_r50_fpn_1x_coco.py
+_base_ = [
+    '../../_base_/models/faster_rcnn_r50_fpn.py',
+    '../../_base_/datasets/coco_detection.py',
+    '../../_base_/schedules/schedule_1x.py', '../../_base_/default_runtime.py'
+]
+
+# model settings
+find_unused_parameters=True
+pos_w=1
+neg_w=2
+distiller = dict(
+    type='FGFI',
+    teacher_pretrained = '/mnt/data3/wangguohua/model/mmdet/swin/mask_rcnn_swin_tiny_patch4_window7.pth',
+    load_teacher_part = 'neck_head',
+    distill_cfg = [ dict(feature_level = 0,
+                         methods=[dict(type='MSELoss',
+                                       name='loss_p_0',
+                                       student_channels = 256,
+                                       teacher_channels = 256,
+                                       weight = pos_w,
+                                       ),
+                                  dict(type='MSELoss',
+                                       name='loss_n_0',
+                                       student_channels = 256,
+                                       teacher_channels = 256,
+                                       weight = neg_w,
+                                       )
+                                ]
+                        ),
+                    dict(feature_level = 1,
+                         methods=[dict(type='MSELoss',
+                                       name='loss_p_1',
+                                       student_channels = 256,
+                                       teacher_channels = 256,
+                                       weight = pos_w,
+                                       ),
+                                  dict(type='MSELoss',
+                                       name='loss_n_1',
+                                       student_channels = 256,
+                                       teacher_channels = 256,
+                                       weight = neg_w,
+                                       )
+                                ]
+                        ),
+                    dict(feature_level = 2,
+                         methods=[dict(type='MSELoss',
+                                       name='loss_p_2',
+                                       student_channels = 256,
+                                       teacher_channels = 256,
+                                       weight = pos_w,
+                                       ),
+                                  dict(type='MSELoss',
+                                       name='loss_n_2',
+                                       student_channels = 256,
+                                       teacher_channels = 256,
+                                       weight = neg_w,
+                                       )
+                                ]
+                        ),
+                    dict(feature_level = 3,
+                         methods=[dict(type='MSELoss',
+                                       name='loss_p_3',
+                                       student_channels = 256,
+                                       teacher_channels = 256,
+                                       weight = pos_w,
+                                       ),
+                                  dict(type='MSELoss',
+                                       name='loss_n_3',
+                                       student_channels = 256,
+                                       teacher_channels = 256,
+                                       weight = neg_w,
+                                       )
+                                ]
+                        ),
+                   ]
+    )
+
+student_cfg = 'configs/faster_rcnn/faster_rcnn_r50_fpn_1x_coco.py'
+teacher_cfg = 'configs/swin/mask_rcnn_swin_tiny_patch4_window7_mstrain_480-800_adamw_3x_coco.py'
--- a/configs/distillers/ROIfeature_mimicking/roi_fm_faster_rcnn_r152_fpn_1x_distill_faster_rcnn_r50_fpn_1x_coco.py
+++ b/configs/distillers/ROIfeature_mimicking/roi_fm_faster_rcnn_r152_fpn_1x_distill_faster_rcnn_r50_fpn_1x_coco.py
@@ -6,7 +6,7 @@ _base_ = [

 # model settings
 find_unused_parameters=True
-weight=1
+weight=10
 distiller = dict(
    type='ROIMimicking',
    teacher_pretrained = '/mnt/data3/wangguohua/model/mmdet/faster_rcnn/faster_rcnn_r152_fpn_1x_coco.pth',

--- a/configs/distillers/ROIfeature_mimicking/roi_fm_mask_rcnn_swinT_fpn_3x_distill_faster_rcnn_r50_fpn_1x_coco.py
+++ b/configs/distillers/ROIfeature_mimicking/roi_fm_mask_rcnn_swinT_fpn_3x_distill_faster_rcnn_r50_fpn_1x_coco.py
+_base_ = [
+    '../../_base_/models/faster_rcnn_r50_fpn.py',
+    '../../_base_/datasets/coco_detection.py',
+    '../../_base_/schedules/schedule_1x.py', '../../_base_/default_runtime.py'
+]
+
+# model settings
+find_unused_parameters=True
+weight=10
+distiller = dict(
+    type='ROIMimicking',
+    teacher_pretrained = '/mnt/data3/wangguohua/model/mmdet/swin/mask_rcnn_swin_tiny_patch4_window7.pth',
+    load_teacher_part = 'neck_head',
+    distill_cfg = [ dict(student_module = 'roi_head.bbox_head.shared_fcs.1',
+                         teacher_module = 'roi_head.bbox_head.shared_fcs.1',
+                         register_hook = True,
+                         methods=[dict(type='MSELoss',
+                                       name='loss_fm',
+                                       student_channels = 1024,
+                                       teacher_channels = 1024,
+                                       weight = weight,
+                                       )
+                                ]
+                        ),
+                   ]
+    )
+
+
+student_cfg = 'configs/faster_rcnn/faster_rcnn_r50_fpn_1x_coco.py'
+teacher_cfg = 'configs/swin/mask_rcnn_swin_tiny_patch4_window7_mstrain_480-800_adamw_3x_coco.py'
--- a/configs/distillers/fgd/fgd_faster_rcnn_r101_fpn_1x_distill_faster_rcnn_r50_fpn_1x_voc0712.py
+++ b/configs/distillers/fgd/fgd_faster_rcnn_r101_fpn_1x_distill_faster_rcnn_r50_fpn_1x_voc0712.py
@@ -10,7 +10,7 @@ beta_fgd=0.000025
 gamma_fgd=0.00005
 lambda_fgd=0.0000005
 distiller = dict(
-    type='DetectionDistiller',
+    type='FGD',
    teacher_pretrained = '/mnt/data3/wangguohua/model/mmdet/faster_rcnn_r101_fpn_1x_voc0712.pth',
    init_student = True,
    distill_cfg = [ dict(student_module = 'neck.fpn_convs.3.conv',

--- a/configs/distillers/fgd/fgd_faster_rcnn_r101_fpn_2x_distill_faster_rcnn_r50_fpn_2x_coco.py
+++ b/configs/distillers/fgd/fgd_faster_rcnn_r101_fpn_2x_distill_faster_rcnn_r50_fpn_2x_coco.py
@@ -10,7 +10,7 @@ beta_fgd=0.000025
 gamma_fgd=0.00005
 lambda_fgd=0.0000005
 distiller = dict(
-    type='DetectionDistiller',
+    type='FGD',
    teacher_pretrained = 'https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r101_fpn_2x_coco/faster_rcnn_r101_fpn_2x_coco_bbox_mAP-0.398_20200504_210455-1d2dac9c.pth',
    init_student = True,
    distill_cfg = [ dict(student_module = 'neck.fpn_convs.3.conv',

--- a/configs/distillers/fgd/fgd_faster_rcnn_r152_fpn_1x_distill_faster_rcnn_r50_fpn_1x_coco.py
+++ b/configs/distillers/fgd/fgd_faster_rcnn_r152_fpn_1x_distill_faster_rcnn_r50_fpn_1x_coco.py
+_base_ = [
+    '../../_base_/models/faster_rcnn_r50_fpn.py',
+    '../../_base_/datasets/coco_detection.py',
+    '../../_base_/schedules/schedule_1x.py', '../../_base_/default_runtime.py'
+]
+
+# model settings
+find_unused_parameters=True
+temp=0.5
+alpha_fgd=0.00005
+beta_fgd=0.000025
+gamma_fgd=0.00005
+lambda_fgd=0.0000005
+distiller = dict(
+    type='FGD',
+    teacher_pretrained = '/mnt/data3/wangguohua/model/mmdet/faster_rcnn/faster_rcnn_r152_fpn_1x_coco.pth',
+    init_student = True,
+    distill_cfg = [ dict(student_module = 'neck.fpn_convs.3.conv',
+                         teacher_module = 'neck.fpn_convs.3.conv',
+                         output_hook = True,
+                         methods=[dict(type='FeatureLoss',
+                                       name='loss_fgd_fpn_3',
+                                       student_channels = 256,
+                                       teacher_channels = 256,
+                                       temp = temp,
+                                       alpha_fgd=alpha_fgd,
+                                       beta_fgd=beta_fgd,
+                                       gamma_fgd=gamma_fgd,
+                                       lambda_fgd=lambda_fgd,
+                                       )
+                                ]
+                        ),
+                    dict(student_module = 'neck.fpn_convs.2.conv',
+                         teacher_module = 'neck.fpn_convs.2.conv',
+                         output_hook = True,
+                         methods=[dict(type='FeatureLoss',
+                                       name='loss_fgd_fpn_2',
+                                       student_channels = 256,
+                                       teacher_channels = 256,
+                                       temp = temp,
+                                       alpha_fgd=alpha_fgd,
+                                       beta_fgd=beta_fgd,
+                                       gamma_fgd=gamma_fgd,
+                                       lambda_fgd=lambda_fgd,
+                                       )
+                                ]
+                        ),
+                    dict(student_module = 'neck.fpn_convs.1.conv',
+                         teacher_module = 'neck.fpn_convs.1.conv',
+                         output_hook = True,
+                         methods=[dict(type='FeatureLoss',
+                                       name='loss_fgd_fpn_1',
+                                       student_channels = 256,
+                                       teacher_channels = 256,
+                                       temp = temp,
+                                       alpha_fgd=alpha_fgd,
+                                       beta_fgd=beta_fgd,
+                                       gamma_fgd=gamma_fgd,
+                                       lambda_fgd=lambda_fgd,
+                                       )
+                                ]
+                        ),
+                    dict(student_module = 'neck.fpn_convs.0.conv',
+                         teacher_module = 'neck.fpn_convs.0.conv',
+                         output_hook = True,
+                         methods=[dict(type='FeatureLoss',
+                                       name='loss_fgd_fpn_0',
+                                       student_channels = 256,
+                                       teacher_channels = 256,
+                                       temp = temp,
+                                       alpha_fgd=alpha_fgd,
+                                       beta_fgd=beta_fgd,
+                                       gamma_fgd=gamma_fgd,
+                                       lambda_fgd=lambda_fgd,
+                                       )
+                                ]
+                        ),
+                   ]
+    )
+
+student_cfg = 'configs/faster_rcnn/faster_rcnn_r50_fpn_1x_coco.py'
+teacher_cfg = 'configs/faster_rcnn/faster_rcnn_r152_fpn_1x_coco.py'
+# optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
+# optimizer_config = dict(_delete_=True, grad_clip=dict(max_norm=35, norm_type=2))
+# data = dict(
+#     samples_per_gpu=2,
+#     workers_per_gpu=2,)
\ No newline at end of file
--- a/configs/distillers/fgd/fgd_mask_rcnn_swinS_fpn_3x_distill_mask_rcnn_swinT_fpn_1x_coco.py
+++ b/configs/distillers/fgd/fgd_mask_rcnn_swinS_fpn_3x_distill_mask_rcnn_swinT_fpn_1x_coco.py
@@ -12,7 +12,7 @@ beta_fgd=0.000025
 gamma_fgd=0.00005
 lambda_fgd=0.0000005
 distiller = dict(
-    type='DetectionDistiller',
+    type='FGD',
    teacher_pretrained = '/mnt/data3/wangguohua/project/swin/checkpoint/mask_rcnn_swin_small_patch4_window7.pth',
    init_student = True,
    distill_cfg = [ dict(student_module = 'neck.fpn_convs.3.conv',

--- a/configs/distillers/fgd/fgd_mask_rcnn_swinT_fpn_3x_distill_faster_rcnn_r50_fpn_1x_coco.py
+++ b/configs/distillers/fgd/fgd_mask_rcnn_swinT_fpn_3x_distill_faster_rcnn_r50_fpn_1x_coco.py
+_base_ = [
+    '../../_base_/models/faster_rcnn_r50_fpn.py',
+    '../../_base_/datasets/coco_detection.py',
+    '../../_base_/schedules/schedule_1x.py', '../../_base_/default_runtime.py'
+]
+
+# model settings
+find_unused_parameters=True
+temp=0.5
+alpha_fgd=0.00005
+beta_fgd=0.000025
+gamma_fgd=0.00005
+lambda_fgd=0.0000005
+distiller = dict(
+    type='FGD',
+    teacher_pretrained = '/mnt/data3/wangguohua/model/mmdet/swin/mask_rcnn_swin_tiny_patch4_window7.pth',
+    init_student = True,
+    distill_cfg = [ dict(student_module = 'neck.fpn_convs.3.conv',
+                         teacher_module = 'neck.fpn_convs.3.conv',
+                         output_hook = True,
+                         methods=[dict(type='FeatureLoss',
+                                       name='loss_fgd_fpn_3',
+                                       student_channels = 256,
+                                       teacher_channels = 256,
+                                       temp = temp,
+                                       alpha_fgd=alpha_fgd,
+                                       beta_fgd=beta_fgd,
+                                       gamma_fgd=gamma_fgd,
+                                       lambda_fgd=lambda_fgd,
+                                       )
+                                ]
+                        ),
+                    dict(student_module = 'neck.fpn_convs.2.conv',
+                         teacher_module = 'neck.fpn_convs.2.conv',
+                         output_hook = True,
+                         methods=[dict(type='FeatureLoss',
+                                       name='loss_fgd_fpn_2',
+                                       student_channels = 256,
+                                       teacher_channels = 256,
+                                       temp = temp,
+                                       alpha_fgd=alpha_fgd,
+                                       beta_fgd=beta_fgd,
+                                       gamma_fgd=gamma_fgd,
+                                       lambda_fgd=lambda_fgd,
+                                       )
+                                ]
+                        ),
+                    dict(student_module = 'neck.fpn_convs.1.conv',
+                         teacher_module = 'neck.fpn_convs.1.conv',
+                         output_hook = True,
+                         methods=[dict(type='FeatureLoss',
+                                       name='loss_fgd_fpn_1',
+                                       student_channels = 256,
+                                       teacher_channels = 256,
+                                       temp = temp,
+                                       alpha_fgd=alpha_fgd,
+                                       beta_fgd=beta_fgd,
+                                       gamma_fgd=gamma_fgd,
+                                       lambda_fgd=lambda_fgd,
+                                       )
+                                ]
+                        ),
+                    dict(student_module = 'neck.fpn_convs.0.conv',
+                         teacher_module = 'neck.fpn_convs.0.conv',
+                         output_hook = True,
+                         methods=[dict(type='FeatureLoss',
+                                       name='loss_fgd_fpn_0',
+                                       student_channels = 256,
+                                       teacher_channels = 256,
+                                       temp = temp,
+                                       alpha_fgd=alpha_fgd,
+                                       beta_fgd=beta_fgd,
+                                       gamma_fgd=gamma_fgd,
+                                       lambda_fgd=lambda_fgd,
+                                       )
+                                ]
+                        ),
+                   ]
+    )
+
+student_cfg = 'configs/faster_rcnn/faster_rcnn_r50_fpn_1x_coco.py'
+teacher_cfg = 'configs/swin/mask_rcnn_swin_tiny_patch4_window7_mstrain_480-800_adamw_3x_coco.py'
+
+optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
+optimizer_config = dict(_delete_=True, grad_clip=dict(max_norm=35, norm_type=2))
+data = dict(
+    samples_per_gpu=2,
+    workers_per_gpu=2,)
\ No newline at end of file
--- a/configs/distillers/fgd/fgd_mask_rcnn_swinT_fpn_3x_distill_mask_rcnn_swinT_fpn_1x_coco.py
+++ b/configs/distillers/fgd/fgd_mask_rcnn_swinT_fpn_3x_distill_mask_rcnn_swinT_fpn_1x_coco.py
@@ -12,7 +12,7 @@ beta_fgd=0.000025
 gamma_fgd=0.00005
 lambda_fgd=0.0000005
 distiller = dict(
-    type='DetectionDistiller',
+    type='FGD',
    teacher_pretrained = '/mnt/data3/wangguohua/model/mmdet/swin/mask_rcnn_swin_tiny_patch4_window7.pth',
    init_student = True,
    distill_cfg = [ dict(student_module = 'neck.fpn_convs.3.conv',

--- a/configs/distillers/mimic_fpn/mfpn_faster_rcnn_r152_fpn_1x_distill_faster_rcnn_r50_fpn_1x_coco.py
+++ b/configs/distillers/mimic_fpn/mfpn_faster_rcnn_r152_fpn_1x_distill_faster_rcnn_r50_fpn_1x_coco.py
+_base_ = [
+    '../../_base_/models/faster_rcnn_r50_fpn.py',
+    '../../_base_/datasets/coco_detection.py',
+    '../../_base_/schedules/schedule_1x.py', '../../_base_/default_runtime.py'
+]
+
+# model settings
+find_unused_parameters=True
+weight=2
+distiller = dict(
+    type='FPNDistiller',
+    teacher_pretrained = '/mnt/data3/wangguohua/model/mmdet/faster_rcnn/faster_rcnn_r152_fpn_1x_coco.pth',
+    init_student = 'neck_head',
+    train_head = False,
+    distill_cfg = [ dict(feature_level = 0,
+                         methods=[dict(type='MSELoss',
+                                       name='loss_mfpn_0',
+                                       student_channels = 256,
+                                       teacher_channels = 256,
+                                       weight = weight,
+                                       )
+                                ]
+                        ),
+                    dict(feature_level = 1,
+                         methods=[dict(type='MSELoss',
+                                       name='loss_mfpn_1',
+                                       student_channels = 256,
+                                       teacher_channels = 256,
+                                       weight = weight,
+                                       )
+                                ]
+                        ),
+                    dict(feature_level = 2,
+                         methods=[dict(type='MSELoss',
+                                       name='loss_mfpn_2',
+                                       student_channels = 256,
+                                       teacher_channels = 256,
+                                       weight = weight,
+                                       )
+                                ]
+                        ),
+                    dict(feature_level = 3,
+                         methods=[dict(type='MSELoss',
+                                       name='loss_mfpn_3',
+                                       student_channels = 256,
+                                       teacher_channels = 256,
+                                       weight = weight,
+                                       )
+                                ]
+                        ),
+                   ]
+    )
+
+student_cfg = 'configs/faster_rcnn/faster_rcnn_r50_fpn_1x_coco.py'
+teacher_cfg = 'configs/faster_rcnn/faster_rcnn_r152_fpn_1x_coco.py'
--- a/configs/distillers/mimic_fpn/mfpn_mask_rcnn_swinT_fpn_3x_distill_faster_rcnn_r50_fpn_1x_coco.py
+++ b/configs/distillers/mimic_fpn/mfpn_mask_rcnn_swinT_fpn_3x_distill_faster_rcnn_r50_fpn_1x_coco.py
+_base_ = [
+    '../../_base_/models/faster_rcnn_r50_fpn.py',
+    '../../_base_/datasets/coco_detection.py',
+    '../../_base_/schedules/schedule_1x.py', '../../_base_/default_runtime.py'
+]
+
+# model settings
+find_unused_parameters=True
+weight=2
+distiller = dict(
+    type='FPNDistiller',
+    teacher_pretrained = '/mnt/data3/wangguohua/model/mmdet/swin/mask_rcnn_swin_tiny_patch4_window7.pth',
+    init_student = 'neck_head',
+    train_head = False,
+    distill_cfg = [ dict(feature_level = 0,
+                         methods=[dict(type='MSELoss',
+                                       name='loss_mfpn_0',
+                                       student_channels = 256,
+                                       teacher_channels = 256,
+                                       weight = weight,
+                                       )
+                                ]
+                        ),
+                    dict(feature_level = 1,
+                         methods=[dict(type='MSELoss',
+                                       name='loss_mfpn_1',
+                                       student_channels = 256,
+                                       teacher_channels = 256,
+                                       weight = weight,
+                                       )
+                                ]
+                        ),
+                    dict(feature_level = 2,
+                         methods=[dict(type='MSELoss',
+                                       name='loss_mfpn_2',
+                                       student_channels = 256,
+                                       teacher_channels = 256,
+                                       weight = weight,
+                                       )
+                                ]
+                        ),
+                    dict(feature_level = 3,
+                         methods=[dict(type='MSELoss',
+                                       name='loss_mfpn_3',
+                                       student_channels = 256,
+                                       teacher_channels = 256,
+                                       weight = weight,
+                                       )
+                                ]
+                        ),
+                   ]
+    )
+
+student_cfg = 'configs/faster_rcnn/faster_rcnn_r50_fpn_1x_coco.py'
+teacher_cfg = 'configs/swin/mask_rcnn_swin_tiny_patch4_window7_mstrain_480-800_adamw_3x_coco.py'
--- a/mmdet/distillation/distillers/detection_distiller.py
+++ b/mmdet/distillation/distillers/detection_distiller.py
@@ -11,8 +11,8 @@ from collections import OrderedDict


 @DISTILLER.register_module()
-class DetectionDistiller(BaseDetector):
-    """Base distiller for detectors.
+class FGD(BaseDetector):
+    """FGD distiller for detectors.
    It typically consists of teacher_model and student_model.
    """
    def __init__(self,
@@ -22,7 +22,7 @@ class DetectionDistiller(BaseDetector):
                 teacher_pretrained=None,
                 init_student=False):

-        super(DetectionDistiller, self).__init__()
+        super(FGD, self).__init__()
        
        self.teacher = build_detector(teacher_cfg.model,
                                        train_cfg=teacher_cfg.get('train_cfg'),
@@ -34,6 +34,7 @@ class DetectionDistiller(BaseDetector):
        self.student= build_detector(student_cfg.model,
                                        train_cfg=student_cfg.get('train_cfg'),
                                        test_cfg=student_cfg.get('test_cfg'))
+        self.student.init_weights()
        if init_student:
            t_checkpoint = _load_checkpoint(teacher_pretrained, map_location='cpu')
            all_name = []

--- a/mmdet/distillation/distillers/FGFI.py
+++ b/mmdet/distillation/distillers/FGFI.py
@@ -122,7 +122,7 @@ class FGFI(BaseDetector):
                for level in range(len(anchor_list[batch])):
                    gt_level = gt_bboxes[batch][target_lvls==level]
                    h, w = featmap_sizes[level][0], featmap_sizes[level][1]
-                    mask_per_img = torch.zeros([h, w], dtype=torch.double).cuda()
+                    mask_per_img = torch.zeros([h, w], dtype=torch.float).cuda()
                    if gt_level.shape[0] > 0:
                        IoU_map = bbox_overlaps(anchor_list[batch][level], gt_level)
                        max_iou, _ = torch.max(IoU_map, dim=0)
@@ -131,7 +131,7 @@ class FGFI(BaseDetector):
                            max_iou_per_gt = max_iou[ins] * phi
                            mask_per_gt = torch.sum(IoU_map[:,:,:,ins] > max_iou_per_gt, dim = 2)
                            mask_per_img += mask_per_gt
-                        mask_per_img = (mask_per_img > 0).double()
+                        mask_per_img = (mask_per_img > 0).float()
                    mask_level.append(mask_per_img)
                mask_batch.append(mask_level)
            mask_batch_level = []
@@ -184,7 +184,8 @@ class FGFI(BaseDetector):

        losses.update(rpn_losses)

-        neck_mask_batch = self.get_roi_mask(rpn_outs[0], img_metas, gt_bboxes, phi=0.5)
+        with torch.no_grad():
+            neck_mask_batch = self.get_roi_mask(rpn_outs[0], img_metas, gt_bboxes, phi=0.5)

        
        for item_loc in self.distill_cfg:
@@ -193,6 +194,7 @@ class FGFI(BaseDetector):
            f_t_l = f_t[feature_level]
            mask = neck_mask_batch[feature_level]
            mask = mask.unsqueeze(1).repeat(1, f_s_l.size(1), 1, 1)
+            losses['{}_ratio'.format(feature_level)] = mask.sum() / mask.numel()
            for item_loss in item_loc.methods:
                loss_name = item_loss.name
                if 'n' in loss_name:
@@ -200,6 +202,7 @@ class FGFI(BaseDetector):
                losses[loss_name] = self.distill_losses[loss_name](f_s_l, f_t_l, mask)


+
        roi_losses = self.student.roi_head.forward_train(f_s, img_metas, proposal_list,
                                                 gt_bboxes, gt_labels,
                                                 gt_bboxes_ignore, gt_masks,

--- a/mmdet/distillation/distillers/__init__.py
+++ b/mmdet/distillation/distillers/__init__.py
-from .detection_distiller import DetectionDistiller
+from .FGD import FGD
 from .backbone_distiller import BackboneDistiller
 from .fpn_distiller import FPNDistiller
 from .head_distiller import HeadDistiller
@@ -8,7 +8,7 @@ from .DeFeat import DeFeat
 from .RPN_FM import RPN_FM

 __all__ = [
-    'DetectionDistiller',
+    'FGD',
    'BackboneDistiller',
    'FPNDistiller',
    'RPN_FM',

--- a/submit_work.sh
+++ b/submit_work.sh
@@ -13,4 +13,6 @@ sleep 2
 # when $pid finished, run these 
 #PORT=29504 CUDA_VISIBLE_DEVICES=0,1,2,3 tools/dist_train.sh configs/distillers/mimic_fpn/mfpn_trainH3_mask_rcnn_swinS_fpn_3x_distill_mask_rcnn_swinT_fpn_1x_coco.py 4
 #PORT=29502 CUDA_VISIBLE_DEVICES=4,5,6,7 tools/dist_train.sh configs/pascal_voc/faster_rcnn_r101_fpn_1x_voc0712.py 4
-PORT=29505 tools/dist_train.sh configs/distillers/feature_mimicking/roi_fm_faster_rcnn_r152_fpn_1x_distill_faster_rcnn_r50_fpn_1x_coco.py 8
\ No newline at end of file
+#PORT=29505 tools/dist_train.sh configs/distillers/feature_mimicking/roi_fm_faster_rcnn_r152_fpn_1x_distill_faster_rcnn_r50_fpn_1x_coco.py 8
+#PORT=29505 tools/dist_train.sh configs/distillers/mimic_fpn/mfpn_tH_mask_rcnn_swinT_fpn_3x_distill_faster_rcnn_r50_fpn_1x_coco.py 8
+PORT=29505 tools/dist_train.sh configs/distillers/ROIfeature_mimicking/roi_fm_mask_rcnn_swinT_fpn_3x_distill_faster_rcnn_r50_fpn_1x_coco.py 8
\ No newline at end of file