diff --git a/configs/distillers/mimic_backbone/mb_cascade_rcnn_swinS_fpn_3x_distill_cascade_rcnn_swinT_fpn_1x_coco.py b/configs/distillers/mimic_backbone/mb_cascade_rcnn_swinS_fpn_3x_distill_cascade_rcnn_swinT_fpn_1x_coco.py new file mode 100644 index 0000000000000000000000000000000000000000..5f999497c5642c487c7ae301de24a621c36b2014 --- /dev/null +++ b/configs/distillers/mimic_backbone/mb_cascade_rcnn_swinS_fpn_3x_distill_cascade_rcnn_swinT_fpn_1x_coco.py @@ -0,0 +1,77 @@ +_base_ = [ + '../../_base_/models/mask_rcnn_swin_fpn.py', + '../../_base_/datasets/coco_instance.py', + '../../_base_/schedules/schedule_1x.py', '../../_base_/default_runtime.py' +] + +# model settings +find_unused_parameters=True +weight=1 +distiller = dict( + type='BackboneDistiller', + teacher_pretrained = '/mnt/data3/wangguohua/model/mmdet/swin/cascade_mask_rcnn_swin_small_patch4_window7.pth', + init_student = True, + distill_cfg = [ dict(feature_level = 0, + methods=[dict(type='MSELoss', + name='loss_mb_0', + student_channels = 96, + teacher_channels = 96, + weight = weight, + ) + ] + ), + dict(feature_level = 1, + methods=[dict(type='MSELoss', + name='loss_mb_1', + student_channels = 192, + teacher_channels = 192, + weight = weight, + ) + ] + ), + dict(feature_level = 2, + methods=[dict(type='MSELoss', + name='loss_mb_2', + student_channels = 384, + teacher_channels = 384, + weight = weight, + ) + ] + ), + dict(feature_level = 3, + methods=[dict(type='MSELoss', + name='loss_mb_3', + student_channels = 768, + teacher_channels = 768, + weight = weight, + ) + ] + ), + ] + ) + +student_cfg = 'configs/swin/cascade_mask_rcnn_swin_tiny_patch4_window7_mstrain_480-800_giou_4conv1f_adamw_1x_coco.py' +teacher_cfg = 'configs/swin/cascade_mask_rcnn_swin_small_patch4_window7_mstrain_480-800_giou_4conv1f_adamw_3x_coco.py' + +data = dict( + samples_per_gpu=2, + workers_per_gpu=2,) +#data = dict(train=dict(pipeline=train_pipeline)) + +optimizer = dict(_delete_=True, type='AdamW', lr=0.0001, betas=(0.9, 0.999), weight_decay=0.05, + paramwise_cfg=dict(custom_keys={'absolute_pos_embed': dict(decay_mult=0.), + 'relative_position_bias_table': dict(decay_mult=0.), + 'norm': dict(decay_mult=0.)})) +lr_config = dict(step=[8, 11]) +#runner = dict(type='EpochBasedRunnerAmp', max_epochs=12) +runner = dict(type='EpochBasedRunner', max_epochs=12) +# do not use mmdet version fp16 +# fp16 = None +# optimizer_config = dict( +# type="DistOptimizerHook", +# update_interval=1, +# grad_clip=None, +# coalesce=True, +# bucket_size_mb=-1, +# use_fp16=True, +# ) \ No newline at end of file diff --git a/configs/distillers/mimic_backbone/mb_cascade_rcnn_swinT_fpn_3x_distill_cascade_rcnn_swinT_fpn_1x_coco.py b/configs/distillers/mimic_backbone/mb_cascade_rcnn_swinT_fpn_3x_distill_cascade_rcnn_swinT_fpn_1x_coco.py new file mode 100644 index 0000000000000000000000000000000000000000..391d9e0a95a6b7e9c954531b9acf5e3b8bb989c0 --- /dev/null +++ b/configs/distillers/mimic_backbone/mb_cascade_rcnn_swinT_fpn_3x_distill_cascade_rcnn_swinT_fpn_1x_coco.py @@ -0,0 +1,77 @@ +_base_ = [ + '../../_base_/models/mask_rcnn_swin_fpn.py', + '../../_base_/datasets/coco_instance.py', + '../../_base_/schedules/schedule_1x.py', '../../_base_/default_runtime.py' +] + +# model settings +find_unused_parameters=True +weight=1 +distiller = dict( + type='BackboneDistiller', + teacher_pretrained = '/mnt/data3/wangguohua/model/mmdet/swin/cascade_mask_rcnn_swin_tiny_patch4_window7.pth', + init_student = True, + distill_cfg = [ dict(feature_level = 0, + methods=[dict(type='MSELoss', + name='loss_mb_0', + student_channels = 96, + teacher_channels = 96, + weight = weight, + ) + ] + ), + dict(feature_level = 1, + methods=[dict(type='MSELoss', + name='loss_mb_1', + student_channels = 192, + teacher_channels = 192, + weight = weight, + ) + ] + ), + dict(feature_level = 2, + methods=[dict(type='MSELoss', + name='loss_mb_2', + student_channels = 384, + teacher_channels = 384, + weight = weight, + ) + ] + ), + dict(feature_level = 3, + methods=[dict(type='MSELoss', + name='loss_mb_3', + student_channels = 768, + teacher_channels = 768, + weight = weight, + ) + ] + ), + ] + ) + +student_cfg = 'configs/swin/cascade_mask_rcnn_swin_tiny_patch4_window7_mstrain_480-800_giou_4conv1f_adamw_1x_coco.py' +teacher_cfg = 'configs/swin/cascade_mask_rcnn_swin_tiny_patch4_window7_mstrain_480-800_giou_4conv1f_adamw_3x_coco.py' + +data = dict( + samples_per_gpu=2, + workers_per_gpu=2,) +#data = dict(train=dict(pipeline=train_pipeline)) + +optimizer = dict(_delete_=True, type='AdamW', lr=0.0001, betas=(0.9, 0.999), weight_decay=0.05, + paramwise_cfg=dict(custom_keys={'absolute_pos_embed': dict(decay_mult=0.), + 'relative_position_bias_table': dict(decay_mult=0.), + 'norm': dict(decay_mult=0.)})) +lr_config = dict(step=[8, 11]) +#runner = dict(type='EpochBasedRunnerAmp', max_epochs=12) +runner = dict(type='EpochBasedRunner', max_epochs=12) +# do not use mmdet version fp16 +# fp16 = None +# optimizer_config = dict( +# type="DistOptimizerHook", +# update_interval=1, +# grad_clip=None, +# coalesce=True, +# bucket_size_mb=-1, +# use_fp16=True, +# ) \ No newline at end of file