diff --git a/.gitignore b/.gitignore
index 77ca0d7c808c77d27777041e64cd8a01054433fc..a5d7e1f95306a5f04087418485f85cbb7b0531b4 100644
--- a/.gitignore
+++ b/.gitignore
@@ -114,6 +114,7 @@ data
 # Pytorch
diff --git a/configs/_base_/datasets/voc0712_analyze.py b/configs/_base_/datasets/voc0712_analyze.py
new file mode 100644
index 0000000000000000000000000000000000000000..873ecf5857ff7d038d76cf1f70c73f884eb27deb
--- /dev/null
+++ b/configs/_base_/datasets/voc0712_analyze.py
@@ -0,0 +1,53 @@
+# dataset settings
+dataset_type = 'VOCDataset'
+#data_root = 'data/VOCdevkit/'
+data_root = '/opt/Dataset/VOCdevkit/'
+img_norm_cfg = dict(
+    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
+train_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(type='LoadAnnotations', with_bbox=True),
+    dict(type='Resize', img_scale=(1000, 600), keep_ratio=True),
+    dict(type='RandomFlip', flip_ratio=0.5),
+    dict(type='Normalize', **img_norm_cfg),
+    dict(type='Pad', size_divisor=32),
+    dict(type='DefaultFormatBundle'),
+    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),
+test_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(
+        type='MultiScaleFlipAug',
+        img_scale=(1000, 600),
+        flip=False,
+        transforms=[
+            dict(type='Resize', keep_ratio=True),
+            dict(type='RandomFlip'),
+            dict(type='Normalize', **img_norm_cfg),
+            dict(type='Pad', size_divisor=32),
+            dict(type='ImageToTensor', keys=['img']),
+            dict(type='Collect', keys=['img']),
+        ])
+data = dict(
+    samples_per_gpu=2,
+    workers_per_gpu=2,
+    train=dict(
+        type=dataset_type,
+        ann_file=[
+            data_root + 'VOC2007/ImageSets/Main/trainval.txt',
+            data_root + 'VOC2012/ImageSets/Main/trainval.txt'
+        ],
+        img_prefix=[data_root + 'VOC2007/', data_root + 'VOC2012/'],
+        pipeline=train_pipeline),
+    val=dict(
+        type=dataset_type,
+        ann_file=data_root + 'VOC2007/ImageSets/Main/test.txt',
+        img_prefix=data_root + 'VOC2007/',
+        pipeline=test_pipeline),
+    test=dict(
+        type=dataset_type,
+        ann_file=data_root + 'VOC2007/ImageSets/Main/test.txt',
+        img_prefix=data_root + 'VOC2007/',
+        pipeline=test_pipeline))
+evaluation = dict(interval=1, metric='mAP')
diff --git a/configs/distillers/feature_mimicking/fm_faster_rcnn_r152_fpn_1x_distill_faster_rcnn_r50_fpn_1x_voc0712.py b/configs/distillers/feature_mimicking/fm_faster_rcnn_r152_fpn_1x_distill_faster_rcnn_r50_fpn_1x_voc0712.py
new file mode 100644
index 0000000000000000000000000000000000000000..dbab6bf72302f596287442db686754d04c99bb21
--- /dev/null
+++ b/configs/distillers/feature_mimicking/fm_faster_rcnn_r152_fpn_1x_distill_faster_rcnn_r50_fpn_1x_voc0712.py
@@ -0,0 +1,44 @@
+_base_ = [
+    '../../_base_/models/faster_rcnn_r50_fpn.py',
+    '../../_base_/datasets/voc0712.py',
+    '../../_base_/schedules/schedule_1x.py', '../../_base_/default_runtime.py'
+# model settings
+distiller = dict(
+    type='FeatureMimicking',
+    teacher_pretrained = '/mnt/data3/wangguohua/model/mmdet/faster_rcnn/faster_rcnn_r152_fpn_1x_voc0712.pth',
+    init_student = 'neck_head',
+    distill_cfg = [ dict(student_module = 'roi_head.bbox_head.shared_fcs.0',
+                         teacher_module = 'roi_head.bbox_head.shared_fcs.0',
+                         output_hook = True,
+                         methods=[dict(type='MSELoss',
+                                       name='fm_loss',
+                                       student_channels = 1024,
+                                       teacher_channels = 1024,
+                                       weight = weight,
+                                       )
+                                ]
+                        ),
+                   ]
+    )
+student_cfg = 'configs/pascal_voc/faster_rcnn_r50_fpn_1x_voc0712.py'
+teacher_cfg = 'configs/pascal_voc/faster_rcnn_r152_fpn_1x_voc0712.py'
+# optimizer
+optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+# actual epoch = 3 * 3 = 9
+lr_config = dict(
+    policy='step',
+    warmup='linear',
+    warmup_iters=200,
+    warmup_ratio=0.001,
+    step=[3])
+# runtime settings
+runner = dict(
+    type='EpochBasedRunner', max_epochs=4)  # actual epoch = 4 * 3 = 12
\ No newline at end of file
diff --git a/configs/distillers/mimic_backbone/mb_faster_rcnn_r152_fpn_1x_distill_faster_rcnn_r50_fpn_1x_voc0712.py b/configs/distillers/mimic_backbone/mb_faster_rcnn_r152_fpn_1x_distill_faster_rcnn_r50_fpn_1x_voc0712.py
new file mode 100644
index 0000000000000000000000000000000000000000..7a798f1050f2ab3cd61e598005fc073ac59c1f01
--- /dev/null
+++ b/configs/distillers/mimic_backbone/mb_faster_rcnn_r152_fpn_1x_distill_faster_rcnn_r50_fpn_1x_voc0712.py
@@ -0,0 +1,70 @@
+_base_ = [
+    '../../_base_/models/faster_rcnn_r50_fpn.py',
+    '../../_base_/datasets/voc0712.py',
+    '../../_base_/schedules/schedule_1x.py', '../../_base_/default_runtime.py'
+# model settings
+distiller = dict(
+    type='BackboneDistiller',
+    teacher_pretrained = '/mnt/data3/wangguohua/model/mmdet/faster_rcnn/faster_rcnn_r152_fpn_1x_voc0712.pth',
+    init_student = 'neck_head',
+    train_head = False,
+    distill_cfg = [ dict(feature_level = 0,
+                         methods=[dict(type='MSELoss',
+                                       name='loss_mb_0',
+                                       student_channels = 256,
+                                       teacher_channels = 256,
+                                       weight = weight,
+                                       )
+                                ]
+                        ),
+                    dict(feature_level = 1,
+                         methods=[dict(type='MSELoss',
+                                       name='loss_mb_1',
+                                       student_channels = 512,
+                                       teacher_channels = 512,
+                                       weight = weight,
+                                       )
+                                ]
+                        ),
+                    dict(feature_level = 2,
+                         methods=[dict(type='MSELoss',
+                                       name='loss_mb_2',
+                                       student_channels = 1024,
+                                       teacher_channels = 1024,
+                                       weight = weight,
+                                       )
+                                ]
+                        ),
+                    dict(feature_level = 3,
+                         methods=[dict(type='MSELoss',
+                                       name='loss_mb_3',
+                                       student_channels = 2048,
+                                       teacher_channels = 2048,
+                                       weight = weight,
+                                       )
+                                ]
+                        ),
+                   ]
+    )
+student_cfg = 'configs/pascal_voc/faster_rcnn_r50_fpn_1x_voc0712.py'
+teacher_cfg = 'configs/pascal_voc/faster_rcnn_r152_fpn_1x_voc0712.py'
+# optimizer
+optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+# actual epoch = 3 * 3 = 9
+lr_config = dict(
+    policy='step',
+    warmup='linear',
+    warmup_iters=200,
+    warmup_ratio=0.001,
+    step=[3])
+# runtime settings
+runner = dict(
+    type='EpochBasedRunner', max_epochs=4)  # actual epoch = 4 * 3 = 12
\ No newline at end of file
diff --git a/configs/pascal_voc/cascade_rcnn_r152_fpn_1x_voc0712.py b/configs/pascal_voc/cascade_rcnn_r152_fpn_1x_voc0712.py
new file mode 100644
index 0000000000000000000000000000000000000000..0576486bb66bbc9cfd61c8e5d46f0b78a19bbca1
--- /dev/null
+++ b/configs/pascal_voc/cascade_rcnn_r152_fpn_1x_voc0712.py
@@ -0,0 +1,7 @@
+_base_ = './cascade_rcnn_r50_fpn_1x_voc0712.py'
+model = dict(
+    backbone=dict(
+        depth=152,
+        init_cfg=dict(type='Pretrained',
+                      checkpoint='torchvision://resnet152')))
+optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001)
\ No newline at end of file
diff --git a/configs/pascal_voc/cascade_rcnn_r50_fpn_1x_voc0712.py b/configs/pascal_voc/cascade_rcnn_r50_fpn_1x_voc0712.py
index c516ea7a3e6b812b1d204dd90c01c5517ddbb6b4..7a582d484aac6a113f74e36a1180a705979389c0 100644
--- a/configs/pascal_voc/cascade_rcnn_r50_fpn_1x_voc0712.py
+++ b/configs/pascal_voc/cascade_rcnn_r50_fpn_1x_voc0712.py
@@ -58,11 +58,16 @@ model = dict(roi_head=dict(bbox_head=[
 # optimizer
-optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001)
+optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
 optimizer_config = dict(grad_clip=None)
 # learning policy
 # actual epoch = 3 * 3 = 9
-lr_config = dict(policy='step', step=[3])
+lr_config = dict(
+    policy='step',
+    warmup='linear',
+    warmup_iters=200,
+    warmup_ratio=0.001,
+    step=[3])
 # runtime settings
 runner = dict(
     type='EpochBasedRunner', max_epochs=4)  # actual epoch = 4 * 3 = 12
diff --git a/configs/pascal_voc/faster_rcnn_r152_fpn_1x_voc0712.py b/configs/pascal_voc/faster_rcnn_r152_fpn_1x_voc0712.py
index d50b6594a7495615a98566adbe7097929bbe01da..2d6a6016c173c88972fcf4d4ff286f5d9f030cc0 100644
--- a/configs/pascal_voc/faster_rcnn_r152_fpn_1x_voc0712.py
+++ b/configs/pascal_voc/faster_rcnn_r152_fpn_1x_voc0712.py
@@ -6,4 +6,10 @@ model = dict(
 optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001)
+lr_config = dict(
+    policy='step',
+    warmup='linear',
+    warmup_iters=200,
+    warmup_ratio=0.001,
+    step=[3])
diff --git a/configs/pascal_voc/faster_rcnn_r50_fpn_1x_voc0712.py b/configs/pascal_voc/faster_rcnn_r50_fpn_1x_voc0712.py
index 7866acebea689e7a863a836c326b1407de733fe8..7ab15a6469056f8cc909f193b71e4772636802ce 100644
--- a/configs/pascal_voc/faster_rcnn_r50_fpn_1x_voc0712.py
+++ b/configs/pascal_voc/faster_rcnn_r50_fpn_1x_voc0712.py
@@ -8,7 +8,12 @@ optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001)
 optimizer_config = dict(grad_clip=None)
 # learning policy
 # actual epoch = 3 * 3 = 9
-lr_config = dict(policy='step', step=[3])
+lr_config = dict(
+    policy='step',
+    warmup='linear',
+    warmup_iters=200,
+    warmup_ratio=0.001,
+    step=[3])
 # runtime settings
 runner = dict(
     type='EpochBasedRunner', max_epochs=4)  # actual epoch = 4 * 3 = 12
diff --git a/configs/rpn/rpn_r152_fpn_1x_coco.py b/configs/rpn/rpn_r152_fpn_1x_coco.py
new file mode 100644
index 0000000000000000000000000000000000000000..bb1a9e608c467c633c1d1945389f8c68d96d8d47
--- /dev/null
+++ b/configs/rpn/rpn_r152_fpn_1x_coco.py
@@ -0,0 +1,6 @@
+_base_ = './rpn_r50_fpn_1x_coco.py'
+model = dict(
+    backbone=dict(
+        depth=152,
+        init_cfg=dict(type='Pretrained',
+                      checkpoint='torchvision://resnet152')))
diff --git a/mmdet/distillation/distillers/__init__.py b/mmdet/distillation/distillers/__init__.py
index 4dc0dd0dc8f8fef9e029c5aef3dc80f038ad9b0e..ee7a3ecda1cef71d79eb5c1f738c29be898e811e 100644
--- a/mmdet/distillation/distillers/__init__.py
+++ b/mmdet/distillation/distillers/__init__.py
@@ -2,10 +2,12 @@ from .detection_distiller import DetectionDistiller
 from .backbone_distiller import BackboneDistiller
 from .fpn_distiller import FPNDistiller
 from .head_distiller import HeadDistiller
+from .feature_mimicking import FeatureMimicking
 __all__ = [
-    'HeadDistiller'
+    'HeadDistiller',
+    'FeatureMimicking',
\ No newline at end of file
diff --git a/mmdet/distillation/distillers/feature_mimicking.py b/mmdet/distillation/distillers/feature_mimicking.py
new file mode 100644
index 0000000000000000000000000000000000000000..32bc9dac388eaf2c993a2a814c53069166b79ba0
--- /dev/null
+++ b/mmdet/distillation/distillers/feature_mimicking.py
@@ -0,0 +1,201 @@
+import torch.nn as nn
+import torch.nn.functional as F
+import torch
+from mmdet.models.detectors.base import BaseDetector
+from mmdet.models import build_detector
+from mmcv.runner import  load_checkpoint, _load_checkpoint, load_state_dict
+from ..builder import DISTILLER,build_distill_loss
+from collections import OrderedDict
+class FeatureMimicking(BaseDetector):
+    """Feature mimicking for detectors.
+    It typically consists of teacher_model and student_model.
+    """
+    def __init__(self,
+                 teacher_cfg,
+                 student_cfg,
+                 distill_cfg=None,
+                 teacher_pretrained=None,
+                 init_student=''):
+        super(FeatureMimicking, self).__init__()
+        self.teacher = build_detector(teacher_cfg.model,
+                                        train_cfg=teacher_cfg.get('train_cfg'),
+                                        test_cfg=teacher_cfg.get('test_cfg'))
+        self.init_weights_teacher(teacher_pretrained)
+        self.teacher.eval()
+        self.student= build_detector(student_cfg.model,
+                                        train_cfg=student_cfg.get('train_cfg'),
+                                        test_cfg=student_cfg.get('test_cfg'))
+        self.student.init_weights()
+        if init_student:
+            assert init_student in ['neck', 'head', 'neck_head']
+            def check_key(key, init_student):
+                if 'neck' in key and 'neck' in init_student:
+                    return True
+                elif 'head' in key and 'head' in init_student:
+                    return True
+                else:
+                    return False
+            t_checkpoint = _load_checkpoint(teacher_pretrained, map_location='cpu')
+            all_name = []
+            for name, v in t_checkpoint["state_dict"].items():
+                if check_key(name, init_student):
+                    all_name.append((name, v))
+            state_dict = OrderedDict(all_name)
+            load_state_dict(self.student, state_dict)
+        self.distill_losses = nn.ModuleDict()
+        self.distill_cfg = distill_cfg
+        student_modules = dict(self.student.named_modules())
+        teacher_modules = dict(self.teacher.named_modules())
+        def regitster_hooks(student_module,teacher_module):
+            def hook_teacher_forward(module, input, output):
+                    self.register_buffer(teacher_module,output)
+            def hook_student_forward(module, input, output):
+                    self.register_buffer( student_module,output )
+            return hook_teacher_forward,hook_student_forward
+        for item_loc in distill_cfg:
+            student_module = 'student_' + item_loc.student_module.replace('.','_')
+            teacher_module = 'teacher_' + item_loc.teacher_module.replace('.','_')
+            self.register_buffer(student_module,None)
+            self.register_buffer(teacher_module,None)
+            hook_teacher_forward,hook_student_forward = regitster_hooks(student_module, teacher_module)
+            teacher_modules[item_loc.teacher_module].register_forward_hook(hook_teacher_forward)
+            student_modules[item_loc.student_module].register_forward_hook(hook_student_forward)
+            for item_loss in item_loc.methods:
+                loss_name = item_loss.name
+                self.distill_losses[loss_name] = build_distill_loss(item_loss)
+    def base_parameters(self):
+        return nn.ModuleList([self.student, self.distill_losses])
+    def discriminator_parameters(self):
+        return self.discriminator
+    @property
+    def with_neck(self):
+        """bool: whether the detector has a neck"""
+        return hasattr(self.student, 'neck') and self.student.neck is not None
+    # TODO: these properties need to be carefully handled
+    # for both single stage & two stage detectors
+    @property
+    def with_shared_head(self):
+        """bool: whether the detector has a shared head in the RoI Head"""
+        return hasattr(self.student, 'roi_head') and self.student.roi_head.with_shared_head
+    @property
+    def with_bbox(self):
+        """bool: whether the detector has a bbox head"""
+        return ((hasattr(self.student, 'roi_head') and self.student.roi_head.with_bbox)
+                or (hasattr(self.student, 'bbox_head') and self.student.bbox_head is not None))
+    @property
+    def with_mask(self):
+        """bool: whether the detector has a mask head"""
+        return ((hasattr(self.student, 'roi_head') and self.student.roi_head.with_mask)
+                or (hasattr(self.student, 'mask_head') and self.student.mask_head is not None))
+    def init_weights_teacher(self, path=None):
+        """Load the pretrained model in teacher detector.
+        Args:
+            pretrained (str, optional): Path to pre-trained weights.
+                Defaults to None.
+        """
+        checkpoint = load_checkpoint(self.teacher, path, map_location='cpu')
+    def forward_train(self,
+                      img,
+                      img_metas,
+                      gt_bboxes,
+                      gt_labels,
+                      gt_bboxes_ignore=None,
+                      gt_masks=None,
+                      proposals=None,
+                      **kwargs):
+        """
+        Args:
+            img (Tensor): Input images of shape (N, C, H, W).
+                Typically these should be mean centered and std scaled.
+            img_metas (list[dict]): A List of image info dict where each dict
+                has: 'img_shape', 'scale_factor', 'flip', and may also contain
+                'filename', 'ori_shape', 'pad_shape', and 'img_norm_cfg'.
+                For details on the values of these keys see
+                :class:`mmdet.datasets.pipelines.Collect`.
+        Returns:
+            dict[str, Tensor]: A dictionary of loss components(student's losses and distiller's losses).
+        """
+        with torch.no_grad():
+            self.teacher.eval()
+            f_t = self.teacher.extract_feat(img)
+        f_s = self.student.extract_feat(img)
+        losses = dict()
+        proposal_cfg = self.student.train_cfg.get('rpn_proposal',
+                                            self.student.test_cfg.rpn)
+        s_rpn_losses, s_proposal_list = self.student.rpn_head.forward_train(
+            f_s,
+            img_metas,
+            gt_bboxes,
+            gt_labels=None,
+            gt_bboxes_ignore=gt_bboxes_ignore,
+            proposal_cfg=proposal_cfg)
+        losses.update(s_rpn_losses)
+        s_roi_losses = self.student.roi_head.forward_train(f_s, img_metas, s_proposal_list,
+                                                 gt_bboxes, gt_labels,
+                                                 gt_bboxes_ignore, gt_masks,
+                                                 **kwargs)
+        losses.update(s_roi_losses)
+        with torch.no_grad():
+            t_roi_with_ps_losses = self.teacher.roi_head.forward_train(f_t, img_metas, s_proposal_list,
+                                                 gt_bboxes, gt_labels,
+                                                 gt_bboxes_ignore, gt_masks,
+                                                 **kwargs)
+        buffer_dict = dict(self.named_buffers())
+        for item_loc in self.distill_cfg:
+            student_module = 'student_' + item_loc.student_module.replace('.','_')
+            teacher_module = 'teacher_' + item_loc.teacher_module.replace('.','_')
+            student_feat = buffer_dict[student_module]
+            teacher_feat = buffer_dict[teacher_module]
+            for item_loss in item_loc.methods:
+                loss_name = item_loss.name
+                losses[loss_name] = self.distill_losses[loss_name](student_feat, teacher_feat)
+        return losses
+    def simple_test(self, img, img_metas, **kwargs):
+        return self.student.simple_test(img, img_metas, **kwargs)
+    def aug_test(self, imgs, img_metas, **kwargs):
+        return self.student.aug_test(img, img_metas, **kwargs)
+    def extract_feat(self, imgs):
+        """Extract features from images."""
+        return self.student.extract_feat(imgs)
\ No newline at end of file