diff --git a/ROBUSTNESS_BENCHMARKING.md b/ROBUSTNESS_BENCHMARKING.md new file mode 100644 index 0000000000000000000000000000000000000000..826c13eaa2bd45f9a52319c5134ec9acdc702f78 --- /dev/null +++ b/ROBUSTNESS_BENCHMARKING.md @@ -0,0 +1,110 @@ +# Corruption Benchmarking + +## Introduction + +We provide tools to test object detection and instance segmentation models on the image corruption benchmark defined in [Benchmarking Robustness in Object Detection: Autonomous Driving when Winter is Coming](https://arxiv.org/abs/1907.07484). +This page provides basic tutorials how to use the benchmark. + +``` +@article{michaelis2019winter, + title={Benchmarking Robustness in Object Detection: + Autonomous Driving when Winter is Coming}, + author={Michaelis, Claudio and Mitzkus, Benjamin and + Geirhos, Robert and Rusak, Evgenia and + Bringmann, Oliver and Ecker, Alexander S. and + Bethge, Matthias and Brendel, Wieland}, + journal={arXiv:1907.07484}, + year={2019} +} +``` + + + +## About the benchmark + +To submit results to the benchmark please visit the [benchmark homepage](https://github.com/bethgelab/robust-detection-benchmark) + +The benchmark is modelled after the [imagenet-c benchmark](https://github.com/hendrycks/robustness) which was originally +published in [Benchmarking Neural Network Robustness to Common Corruptions and Perturbations](https://arxiv.org/abs/1903.12261) (ICLR 2019) by Dan Hendrycks and Thomas Dietterich. + +The image corruption functions are included in this library but can be installed separately using: + +```shell +pip install imagecorruptions +``` + +Compared to imagenet-c a few changes had to be made to handle images of arbitrary size and greyscale images. +We also modfied the 'motion blur' and 'snow' corruptions to remove dependency from a linux specific library, +which would have to be installed separately otherwise. For details please refer to the [imagecorruptions repository](https://github.com/bethgelab/imagecorruptions). + +## Inference with pretrained models + +We provide a testing script to evaluate a models performance on any combination of the corruptions provided in the benchmark. + +### Test a dataset + +- [x] single GPU testing +- [ ] multiple GPU testing +- [ ] visualize detection results + +You can use the following commands to test a models performance under the 15 corruptions used in the benchmark. + +```shell +# single-gpu testing +python tools/test_robustness.py ${CONFIG_FILE} ${CHECKPOINT_FILE} [--out ${RESULT_FILE}] [--eval ${EVAL_METRICS}] +``` + +Alternatively different group of corruptions can be selected. + +```shell +# noise +python tools/test_robustness.py ${CONFIG_FILE} ${CHECKPOINT_FILE} [--out ${RESULT_FILE}] [--eval ${EVAL_METRICS}] --corruptions noise + +# blur +python tools/test_robustness.py ${CONFIG_FILE} ${CHECKPOINT_FILE} [--out ${RESULT_FILE}] [--eval ${EVAL_METRICS}] --corruptions blur + +# wetaher +python tools/test_robustness.py ${CONFIG_FILE} ${CHECKPOINT_FILE} [--out ${RESULT_FILE}] [--eval ${EVAL_METRICS}] --corruptions weather + +# digital +python tools/test_robustness.py ${CONFIG_FILE} ${CHECKPOINT_FILE} [--out ${RESULT_FILE}] [--eval ${EVAL_METRICS}] --corruptions digital +``` + +Or a costom set of corruptions e.g.: +```shell +# gaussian noise, zoom blur and snow +python tools/test_robustness.py ${CONFIG_FILE} ${CHECKPOINT_FILE} [--out ${RESULT_FILE}] [--eval ${EVAL_METRICS}] --corruptions gaussian_noise zoom_blur snow +``` + +Finally the corruption severities to evaluate can be chosen. +Severity 0 corresponds to clean data and the effect increases from 1 to 5. + +```shell +# severity 1 +python tools/test_robustness.py ${CONFIG_FILE} ${CHECKPOINT_FILE} [--out ${RESULT_FILE}] [--eval ${EVAL_METRICS}] --severities 1 + +# severities 0,2,4 +python tools/test_robustness.py ${CONFIG_FILE} ${CHECKPOINT_FILE} [--out ${RESULT_FILE}] [--eval ${EVAL_METRICS}] --severities 0 2 4 +``` + +## Results for modelzoo models + +The results on COCO 2017val are shown in the below table. + +Model | Backbone | Style | Lr schd | box AP clean | box AP corr. | box % | mask AP clean | mask AP corr. | mask % | +:-----:|:---------:|:-------:|:-------:|:------------:|:------------:|:-----:|:-------------:|:-------------:|:------:| +Faster R-CNN | R-50-FPN | pytorch | 1x | 36.3 | 18.2 | 50.2 | - | - | - | +Faster R-CNN | R-101-FPN | pytorch | 1x | 38.5 | 20.9 | 54.2 | - | - | - | +Faster R-CNN | X-101-32x4d-FPN | pytorch |1x | 40.1 | 22.3 | 55.5 | - | - | - | +Faster R-CNN | X-101-64x4d-FPN | pytorch |1x | 41.3 | 23.4 | 56.6 | - | - | - | +Faster R-CNN | R-50-FPN-DCN | pytorch | 1x | 40.0 | 22.4 | 56.1 | - | - | - | +Faster R-CNN | X-101-32x4d-FPN-DCN | pytorch | 1x | 43.4 | 26.7 | 61.6 | - | - | - | +Mask R-CNN | R-50-FPN | pytorch | 1x | 37.3 | 18.7 | 50.1 | 34.2 | 16.8 | 49.1 | +Mask R-CNN | R-50-FPN-DCN | pytorch | 1x | 41.1 | 23.3 | 56.7 | 37.2 | 20.7 | 55.7 | +Cascade R-CNN | R-50-FPN | pytorch | 1x | 40.4 | 20.1 | 49.7 | - | - | - | +Cascade Mask R-CNN | R-50-FPN | pytorch | 1x| 41.2 | 20.7 | 50.2 | 35.7 | 17.6 | 49.3 | +RetinaNet | R-50-FPN | pytorch | 1x | 35.6 | 17.8 | 50.1 | - | - | - | +Hybrid Task Cascade | X-101-64x4d-FPN-DCN | pytorch | 1x | 50.6 | 32.7 | 64.7 | 43.8 | 28.1 | 64.0 | + +Results may vary slightly due to the stochastic application of the corruptions. + diff --git a/demo/corruptions_sev_3.png b/demo/corruptions_sev_3.png new file mode 100644 index 0000000000000000000000000000000000000000..bbbd19a8d4c87677cb0cf64833a5eb1ce4b95e40 Binary files /dev/null and b/demo/corruptions_sev_3.png differ diff --git a/mmdet/datasets/custom.py b/mmdet/datasets/custom.py index 18d7195da8dcf539827921dd7e2323d630c91d20..25944f31e4d7219b078571b56edabca2c26bd960 100644 --- a/mmdet/datasets/custom.py +++ b/mmdet/datasets/custom.py @@ -3,6 +3,7 @@ import warnings import mmcv import numpy as np +from imagecorruptions import corrupt from mmcv.parallel import DataContainer as DC from torch.utils.data import Dataset @@ -56,6 +57,8 @@ class CustomDataset(Dataset): seg_scale_factor=1, extra_aug=None, resize_keep_ratio=True, + corruption=None, + corruption_severity=1, skip_img_without_anno=True, test_mode=False): # prefix of images path @@ -131,6 +134,10 @@ class CustomDataset(Dataset): self.resize_keep_ratio = resize_keep_ratio self.skip_img_without_anno = skip_img_without_anno + # corruptions + self.corruption = corruption + self.corruption_severity = corruption_severity + def __len__(self): return len(self.img_infos) @@ -181,6 +188,12 @@ class CustomDataset(Dataset): img_info = self.img_infos[idx] # load image img = mmcv.imread(osp.join(self.img_prefix, img_info['filename'])) + # corruption + if self.corruption is not None: + img = corrupt( + img, + severity=self.corruption_severity, + corruption_name=self.corruption) # load proposals if necessary if self.proposals is not None: proposals = self.proposals[idx][:self.num_max_proposals] @@ -274,6 +287,13 @@ class CustomDataset(Dataset): """Prepare an image for testing (multi-scale and flipping)""" img_info = self.img_infos[idx] img = mmcv.imread(osp.join(self.img_prefix, img_info['filename'])) + # corruption + if self.corruption is not None: + img = corrupt( + img, + severity=self.corruption_severity, + corruption_name=self.corruption) + # load proposals if necessary if self.proposals is not None: proposal = self.proposals[idx][:self.num_max_proposals] if not (proposal.shape[1] == 4 or proposal.shape[1] == 5): diff --git a/setup.py b/setup.py index 4c3c9a31273baf556fc774f5e96f7ce8ca7c17ff..b94a430c36286180452728706d6436237dc49574 100644 --- a/setup.py +++ b/setup.py @@ -144,7 +144,7 @@ if __name__ == '__main__': tests_require=['pytest'], install_requires=[ 'mmcv>=0.2.10', 'numpy', 'matplotlib', 'six', 'terminaltables', - 'pycocotools', 'torch>=1.1' + 'pycocotools', 'torch>=1.1', 'imagecorruptions' ], ext_modules=[ make_cython_ext( diff --git a/tools/robustness_eval.py b/tools/robustness_eval.py new file mode 100644 index 0000000000000000000000000000000000000000..a07aa015945e5edca7f7a15fa0a778e02b561e7d --- /dev/null +++ b/tools/robustness_eval.py @@ -0,0 +1,256 @@ +import os.path as osp +from argparse import ArgumentParser + +import mmcv +import numpy as np + + +def print_coco_results(results): + + def _print(result, ap=1, iouThr=None, areaRng='all', maxDets=100): + iStr = ' {:<18} {} @[ IoU={:<9} | \ + area={:>6s} | maxDets={:>3d} ] = {:0.3f}' + + titleStr = 'Average Precision' if ap == 1 else 'Average Recall' + typeStr = '(AP)' if ap == 1 else '(AR)' + iouStr = '{:0.2f}:{:0.2f}'.format(.5, .95) \ + if iouThr is None else '{:0.2f}'.format(iouThr) + print(iStr.format(titleStr, typeStr, iouStr, areaRng, maxDets, result)) + + stats = np.zeros((12, )) + stats[0] = _print(results[0], 1) + stats[1] = _print(results[1], 1, iouThr=.5) + stats[2] = _print(results[2], 1, iouThr=.75) + stats[3] = _print(results[3], 1, areaRng='small') + stats[4] = _print(results[4], 1, areaRng='medium') + stats[5] = _print(results[5], 1, areaRng='large') + stats[6] = _print(results[6], 0, maxDets=1) + stats[7] = _print(results[7], 0, maxDets=10) + stats[8] = _print(results[8], 0) + stats[9] = _print(results[9], 0, areaRng='small') + stats[10] = _print(results[10], 0, areaRng='medium') + stats[11] = _print(results[11], 0, areaRng='large') + + +def get_coco_style_results(filename, + task='bbox', + metric=None, + prints='mPC', + aggregate='benchmark'): + + assert aggregate in ['benchmark', 'all'] + + if prints == 'all': + prints = ['P', 'mPC', 'rPC'] + elif isinstance(prints, str): + prints = [prints] + for p in prints: + assert p in ['P', 'mPC', 'rPC'] + + if metric is None: + metrics = [ + 'AP', 'AP50', 'AP75', 'APs', 'APm', 'APl', 'AR1', 'AR10', 'AR100', + 'ARs', 'ARm', 'ARl' + ] + elif isinstance(metric, list): + metrics = metric + else: + metrics = [metric] + + for metric_name in metrics: + assert metric_name in [ + 'AP', 'AP50', 'AP75', 'APs', 'APm', 'APl', 'AR1', 'AR10', 'AR100', + 'ARs', 'ARm', 'ARl' + ] + + eval_output = mmcv.load(filename) + + num_distortions = len(list(eval_output.keys())) + results = np.zeros((num_distortions, 6, len(metrics)), dtype='float32') + + for corr_i, distortion in enumerate(eval_output): + for severity in eval_output[distortion]: + for metric_j, metric_name in enumerate(metrics): + mAP = eval_output[distortion][severity][task][metric_name] + results[corr_i, severity, metric_j] = mAP + + P = results[0, 0, :] + if aggregate == 'benchmark': + mPC = np.mean(results[:15, 1:, :], axis=(0, 1)) + else: + mPC = np.mean(results[:, 1:, :], axis=(0, 1)) + rPC = mPC / P + + print('\nmodel: {}'.format(osp.basename(filename))) + if metric is None: + if 'P' in prints: + print('Performance on Clean Data [P] ({})'.format(task)) + print_coco_results(P) + if 'mPC' in prints: + print('Mean Performance under Corruption [mPC] ({})'.format(task)) + print_coco_results(mPC) + if 'rPC' in prints: + print('Realtive Performance under Corruption [rPC] ({})'.format( + task)) + print_coco_results(rPC) + else: + if 'P' in prints: + print('Performance on Clean Data [P] ({})'.format(task)) + for metric_i, metric_name in enumerate(metrics): + print('{:5} = {:0.3f}'.format(metric_name, P[metric_i])) + if 'mPC' in prints: + print('Mean Performance under Corruption [mPC] ({})'.format(task)) + for metric_i, metric_name in enumerate(metrics): + print('{:5} = {:0.3f}'.format(metric_name, mPC[metric_i])) + if 'rPC' in prints: + print('Relative Performance under Corruption [rPC] ({})'.format( + task)) + for metric_i, metric_name in enumerate(metrics): + print('{:5} => {:0.1f} %'.format(metric_name, + rPC[metric_i] * 100)) + + return results + + +def get_voc_style_results(filename, prints='mPC', aggregate='benchmark'): + + assert aggregate in ['benchmark', 'all'] + + if prints == 'all': + prints = ['P', 'mPC', 'rPC'] + elif isinstance(prints, str): + prints = [prints] + for p in prints: + assert p in ['P', 'mPC', 'rPC'] + + eval_output = mmcv.load(filename) + + num_distortions = len(list(eval_output.keys())) + results = np.zeros((num_distortions, 6, 20), dtype='float32') + + for i, distortion in enumerate(eval_output): + for severity in eval_output[distortion]: + mAP = [ + eval_output[distortion][severity][j]['ap'] + for j in range(len(eval_output[distortion][severity])) + ] + results[i, severity, :] = mAP + + P = results[0, 0, :] + if aggregate == 'benchmark': + mPC = np.mean(results[:15, 1:, :], axis=(0, 1)) + else: + mPC = np.mean(results[:, 1:, :], axis=(0, 1)) + rPC = mPC / P + + print('\nmodel: {}'.format(osp.basename(filename))) + if 'P' in prints: + print('{:48} = {:0.3f}'.format('Performance on Clean Data [P] in AP50', + np.mean(P))) + if 'mPC' in prints: + print('{:48} = {:0.3f}'.format( + 'Mean Performance under Corruption [mPC] in AP50', np.mean(mPC))) + if 'rPC' in prints: + print('{:48} = {:0.1f}'.format( + 'Realtive Performance under Corruption [rPC] in %', + np.mean(rPC) * 100)) + + return np.mean(results, axis=2, keepdims=True) + + +def get_results(filename, + dataset='coco', + task='bbox', + metric=None, + prints='mPC', + aggregate='benchmark'): + assert dataset in ['coco', 'voc', 'cityscapes'] + + if dataset in ['coco', 'cityscapes']: + results = get_coco_style_results( + filename, + task=task, + metric=metric, + prints=prints, + aggregate=aggregate) + elif dataset == 'voc': + if task != 'bbox': + print('Only bbox analysis is supported for Pascal VOC') + print('Will report bbox results\n') + if metric not in [None, ['AP'], ['AP50']]: + print('Only the AP50 metric is supported for Pascal VOC') + print('Will report AP50 metric\n') + results = get_voc_style_results( + filename, prints=prints, aggregate=aggregate) + + return results + + +def get_distortions_from_file(filename): + + eval_output = mmcv.load(filename) + + return get_distortions_from_results(eval_output) + + +def get_distortions_from_results(eval_output): + distortions = [] + for i, distortion in enumerate(eval_output): + distortions.append(distortion.replace("_", " ")) + return distortions + + +def main(): + parser = ArgumentParser(description='Corruption Result Analysis') + parser.add_argument('filename', help='result file path') + parser.add_argument( + '--dataset', + type=str, + choices=['coco', 'voc', 'cityscapes'], + default='coco', + help='dataset type') + parser.add_argument( + '--task', + type=str, + nargs='+', + choices=['bbox', 'segm'], + default=['bbox'], + help='task to report') + parser.add_argument( + '--metric', + nargs='+', + choices=[ + None, 'AP', 'AP50', 'AP75', 'APs', 'APm', 'APl', 'AR1', 'AR10', + 'AR100', 'ARs', 'ARm', 'ARl' + ], + default=None, + help='metric to report') + parser.add_argument( + '--prints', + type=str, + nargs='+', + choices=['P', 'mPC', 'rPC'], + default='mPC', + help='corruption benchmark metric to print') + parser.add_argument( + '--aggregate', + type=str, + choices=['all', 'benchmark'], + default='benchmark', + help='aggregate all results or only those \ + for benchmark corruptions') + + args = parser.parse_args() + + for task in args.task: + get_results( + args.filename, + dataset=args.dataset, + task=task, + metric=args.metric, + prints=args.prints, + aggregate=args.aggregate) + + +if __name__ == '__main__': + main() diff --git a/tools/test_robustness.py b/tools/test_robustness.py new file mode 100644 index 0000000000000000000000000000000000000000..584654b618bef768dfe15204caedaecb68b2c9ea --- /dev/null +++ b/tools/test_robustness.py @@ -0,0 +1,469 @@ +import argparse +import os +import os.path as osp +import shutil +import tempfile + +import mmcv +import numpy as np +import torch +import torch.distributed as dist +from mmcv.parallel import MMDataParallel, MMDistributedDataParallel +from mmcv.runner import get_dist_info, load_checkpoint +from pycocotools.coco import COCO +from pycocotools.cocoeval import COCOeval +from robustness_eval import get_results + +from mmdet import datasets +from mmdet.apis import init_dist, set_random_seed +from mmdet.core import (eval_map, fast_eval_recall, results2json, + wrap_fp16_model) +from mmdet.datasets import build_dataloader, build_dataset +from mmdet.models import build_detector + + +def coco_eval_with_return(result_files, + result_types, + coco, + max_dets=(100, 300, 1000)): + for res_type in result_types: + assert res_type in [ + 'proposal', 'proposal_fast', 'bbox', 'segm', 'keypoints' + ] + + if mmcv.is_str(coco): + coco = COCO(coco) + assert isinstance(coco, COCO) + + if result_types == ['proposal_fast']: + ar = fast_eval_recall(result_files, coco, np.array(max_dets)) + for i, num in enumerate(max_dets): + print('AR@{}\t= {:.4f}'.format(num, ar[i])) + return + + eval_results = {} + for res_type in result_types: + result_file = result_files[res_type] + assert result_file.endswith('.json') + + coco_dets = coco.loadRes(result_file) + img_ids = coco.getImgIds() + iou_type = 'bbox' if res_type == 'proposal' else res_type + cocoEval = COCOeval(coco, coco_dets, iou_type) + cocoEval.params.imgIds = img_ids + if res_type == 'proposal': + cocoEval.params.useCats = 0 + cocoEval.params.maxDets = list(max_dets) + cocoEval.evaluate() + cocoEval.accumulate() + cocoEval.summarize() + if res_type == 'segm' or res_type == 'bbox': + metric_names = [ + 'AP', 'AP50', 'AP75', 'APs', 'APm', 'APl', 'AR1', 'AR10', + 'AR100', 'ARs', 'ARm', 'ARl' + ] + eval_results[res_type] = { + metric_names[i]: cocoEval.stats[i] + for i in range(len(metric_names)) + } + else: + eval_results[res_type] = cocoEval.stats + + return eval_results + + +def voc_eval_with_return(result_file, + dataset, + iou_thr=0.5, + print_summary=True, + only_ap=True): + det_results = mmcv.load(result_file) + gt_bboxes = [] + gt_labels = [] + gt_ignore = [] + for i in range(len(dataset)): + ann = dataset.get_ann_info(i) + bboxes = ann['bboxes'] + labels = ann['labels'] + if 'bboxes_ignore' in ann: + ignore = np.concatenate([ + np.zeros(bboxes.shape[0], dtype=np.bool), + np.ones(ann['bboxes_ignore'].shape[0], dtype=np.bool) + ]) + gt_ignore.append(ignore) + bboxes = np.vstack([bboxes, ann['bboxes_ignore']]) + labels = np.concatenate([labels, ann['labels_ignore']]) + gt_bboxes.append(bboxes) + gt_labels.append(labels) + if not gt_ignore: + gt_ignore = gt_ignore + if hasattr(dataset, 'year') and dataset.year == 2007: + dataset_name = 'voc07' + else: + dataset_name = dataset.CLASSES + mean_ap, eval_results = eval_map( + det_results, + gt_bboxes, + gt_labels, + gt_ignore=gt_ignore, + scale_ranges=None, + iou_thr=iou_thr, + dataset=dataset_name, + print_summary=print_summary) + + if only_ap: + eval_results = [{ + 'ap': eval_results[i]['ap'] + } for i in range(len(eval_results))] + + return mean_ap, eval_results + + +def single_gpu_test(model, data_loader, show=False): + model.eval() + results = [] + dataset = data_loader.dataset + prog_bar = mmcv.ProgressBar(len(dataset)) + for i, data in enumerate(data_loader): + with torch.no_grad(): + result = model(return_loss=False, rescale=not show, **data) + results.append(result) + + if show: + model.module.show_result(data, result, dataset.img_norm_cfg) + + batch_size = data['img'][0].size(0) + for _ in range(batch_size): + prog_bar.update() + return results + + +def multi_gpu_test(model, data_loader, tmpdir=None): + model.eval() + results = [] + dataset = data_loader.dataset + rank, world_size = get_dist_info() + if rank == 0: + prog_bar = mmcv.ProgressBar(len(dataset)) + for i, data in enumerate(data_loader): + with torch.no_grad(): + result = model(return_loss=False, rescale=True, **data) + results.append(result) + + if rank == 0: + batch_size = data['img'][0].size(0) + for _ in range(batch_size * world_size): + prog_bar.update() + + # collect results from all ranks + results = collect_results(results, len(dataset), tmpdir) + + return results + + +def collect_results(result_part, size, tmpdir=None): + rank, world_size = get_dist_info() + # create a tmp dir if it is not specified + if tmpdir is None: + MAX_LEN = 512 + # 32 is whitespace + dir_tensor = torch.full((MAX_LEN, ), + 32, + dtype=torch.uint8, + device='cuda') + if rank == 0: + tmpdir = tempfile.mkdtemp() + tmpdir = torch.tensor( + bytearray(tmpdir.encode()), dtype=torch.uint8, device='cuda') + dir_tensor[:len(tmpdir)] = tmpdir + dist.broadcast(dir_tensor, 0) + tmpdir = dir_tensor.cpu().numpy().tobytes().decode().rstrip() + else: + mmcv.mkdir_or_exist(tmpdir) + # dump the part result to the dir + mmcv.dump(result_part, osp.join(tmpdir, 'part_{}.pkl'.format(rank))) + dist.barrier() + # collect all parts + if rank != 0: + return None + else: + # load results of all parts from tmp dir + part_list = [] + for i in range(world_size): + part_file = osp.join(tmpdir, 'part_{}.pkl'.format(i)) + part_list.append(mmcv.load(part_file)) + # sort the results + ordered_results = [] + for res in zip(*part_list): + ordered_results.extend(list(res)) + # the dataloader may pad some samples + ordered_results = ordered_results[:size] + # remove tmp dir + shutil.rmtree(tmpdir) + return ordered_results + + +def parse_args(): + parser = argparse.ArgumentParser(description='MMDet test detector') + parser.add_argument('config', help='test config file path') + parser.add_argument('checkpoint', help='checkpoint file') + parser.add_argument('--out', help='output result file') + parser.add_argument( + '--corruptions', + type=str, + nargs='+', + default='benchmark', + choices=[ + 'all', 'benchmark', 'noise', 'blur', 'weather', 'digital', + 'holdout', 'None', 'gaussian_noise', 'shot_noise', 'impulse_noise', + 'defocus_blur', 'glass_blur', 'motion_blur', 'zoom_blur', 'snow', + 'frost', 'fog', 'brightness', 'contrast', 'elastic_transform', + 'pixelate', 'jpeg_compression', 'speckle_noise', 'gaussian_blur', + 'spatter', 'saturate' + ], + help='corruptions') + parser.add_argument( + '--severities', + type=int, + nargs='+', + default=[0, 1, 2, 3, 4, 5], + help='corruption severity levels') + parser.add_argument( + '--eval', + type=str, + nargs='+', + choices=['proposal', 'proposal_fast', 'bbox', 'segm', 'keypoints'], + help='eval types') + parser.add_argument( + '--iou-thr', + type=float, + default=0.5, + help='IoU threshold for pascal voc evaluation') + parser.add_argument( + '--summaries', + type=bool, + default=False, + help='Print summaries for every corruption and severity') + parser.add_argument( + '--workers', type=int, default=32, help='workers per gpu') + parser.add_argument('--show', action='store_true', help='show results') + parser.add_argument('--tmpdir', help='tmp dir for writing some results') + parser.add_argument('--seed', type=int, default=None, help='random seed') + parser.add_argument( + '--launcher', + choices=['none', 'pytorch', 'slurm', 'mpi'], + default='none', + help='job launcher') + parser.add_argument('--local_rank', type=int, default=0) + parser.add_argument( + '--final-prints', + type=str, + nargs='+', + choices=['P', 'mPC', 'rPC'], + default='mPC', + help='corruption benchmark metric to print at the end') + parser.add_argument( + '--final-prints-aggregate', + type=str, + choices=['all', 'benchmark'], + default='benchmark', + help='aggregate all results or only those for benchmark corruptions') + args = parser.parse_args() + if 'LOCAL_RANK' not in os.environ: + os.environ['LOCAL_RANK'] = str(args.local_rank) + return args + + +def main(): + args = parse_args() + + assert args.out or args.show, \ + ('Please specify at least one operation (save or show the results) ' + 'with the argument "--out" or "--show"') + + if args.out is not None and not args.out.endswith(('.pkl', '.pickle')): + raise ValueError('The output file must be a pkl file.') + + cfg = mmcv.Config.fromfile(args.config) + # set cudnn_benchmark + if cfg.get('cudnn_benchmark', False): + torch.backends.cudnn.benchmark = True + cfg.model.pretrained = None + cfg.data.test.test_mode = True + if args.workers == 0: + args.workers = cfg.data.workers_per_gpu + + # init distributed env first, since logger depends on the dist info. + if args.launcher == 'none': + distributed = False + else: + distributed = True + init_dist(args.launcher, **cfg.dist_params) + + # set random seeds + if args.seed is not None: + set_random_seed(args.seed) + + if 'all' in args.corruptions: + corruptions = [ + 'gaussian_noise', 'shot_noise', 'impulse_noise', 'defocus_blur', + 'glass_blur', 'motion_blur', 'zoom_blur', 'snow', 'frost', 'fog', + 'brightness', 'contrast', 'elastic_transform', 'pixelate', + 'jpeg_compression', 'speckle_noise', 'gaussian_blur', 'spatter', + 'saturate' + ] + elif 'benchmark' in args.corruptions: + corruptions = [ + 'gaussian_noise', 'shot_noise', 'impulse_noise', 'defocus_blur', + 'glass_blur', 'motion_blur', 'zoom_blur', 'snow', 'frost', 'fog', + 'brightness', 'contrast', 'elastic_transform', 'pixelate', + 'jpeg_compression' + ] + elif 'noise' in args.corruptions: + corruptions = ['gaussian_noise', 'shot_noise', 'impulse_noise'] + elif 'blur' in args.corruptions: + corruptions = [ + 'defocus_blur', 'glass_blur', 'motion_blur', 'zoom_blur' + ] + elif 'weather' in args.corruptions: + corruptions = ['snow', 'frost', 'fog', 'brightness'] + elif 'digital' in args.corruptions: + corruptions = [ + 'contrast', 'elastic_transform', 'pixelate', 'jpeg_compression' + ] + elif 'holdout' in args.corruptions: + corruptions = ['speckle_noise', 'gaussian_blur', 'spatter', 'saturate'] + elif 'None' in args.corruptions: + corruptions = ['None'] + args.severities = [0] + else: + corruptions = args.corruptions + + aggregated_results = {} + for corr_i, corruption in enumerate(corruptions): + aggregated_results[corruption] = {} + for sev_i, corruption_severity in enumerate(args.severities): + # evaluate severity 0 (= no corruption) only once + if corr_i > 0 and corruption_severity == 0: + aggregated_results[corruption][0] = \ + aggregated_results[corruptions[0]][0] + continue + + # assign corruption and severity + if corruption_severity == 0: + # evaluate without corruptions for severity = 0 + cfg.data.test['corruption'] = None + cfg.data.test['corruption_severity'] = 0 + else: + cfg.data.test['corruption'] = corruption + cfg.data.test['corruption_severity'] = corruption_severity + + # print info + print('\nTesting {} at severity {}'.format(corruption, + corruption_severity)) + + # build the dataloader + # TODO: support multiple images per gpu + # (only minor changes are needed) + dataset = build_dataset(cfg.data.test) + data_loader = build_dataloader( + dataset, + imgs_per_gpu=1, + workers_per_gpu=args.workers, + dist=distributed, + shuffle=False) + + # build the model and load checkpoint + model = build_detector( + cfg.model, train_cfg=None, test_cfg=cfg.test_cfg) + fp16_cfg = cfg.get('fp16', None) + if fp16_cfg is not None: + wrap_fp16_model(model) + checkpoint = load_checkpoint( + model, args.checkpoint, map_location='cpu') + # old versions did not save class info in checkpoints, + # this walkaround is for backward compatibility + if 'CLASSES' in checkpoint['meta']: + model.CLASSES = checkpoint['meta']['CLASSES'] + else: + model.CLASSES = dataset.CLASSES + + if not distributed: + model = MMDataParallel(model, device_ids=[0]) + outputs = single_gpu_test(model, data_loader, args.show) + else: + model = MMDistributedDataParallel(model.cuda()) + outputs = multi_gpu_test(model, data_loader, args.tmpdir) + + rank, _ = get_dist_info() + if args.out and rank == 0: + eval_results_filename = ( + osp.splitext(args.out)[0] + '_results' + + osp.splitext(args.out)[1]) + mmcv.dump(outputs, args.out) + eval_types = args.eval + if cfg.dataset_type == 'VOCDataset': + if eval_types: + for eval_type in eval_types: + if eval_type == 'bbox': + test_dataset = mmcv.runner.obj_from_dict( + cfg.data.test, datasets) + mean_ap, eval_results = \ + voc_eval_with_return( + args.out, test_dataset, + args.iou_thr, args.summaries) + aggregated_results[corruption][ + corruption_severity] = eval_results + else: + print('\nOnly "bbox" evaluation \ + is supported for pascal voc') + else: + if eval_types: + print('Starting evaluate {}'.format( + ' and '.join(eval_types))) + if eval_types == ['proposal_fast']: + result_file = args.out + else: + if not isinstance(outputs[0], dict): + result_files = results2json( + dataset, outputs, args.out) + else: + for name in outputs[0]: + print('\nEvaluating {}'.format(name)) + outputs_ = [out[name] for out in outputs] + result_file = args.out + + '.{}'.format(name) + result_files = results2json( + dataset, outputs_, result_file) + eval_results = coco_eval_with_return( + result_files, eval_types, dataset.coco) + aggregated_results[corruption][ + corruption_severity] = eval_results + else: + print('\nNo task was selected for evaluation;' + '\nUse --eval to select a task') + + # save results after each evaluation + mmcv.dump(aggregated_results, eval_results_filename) + + # print filan results + print('\nAggregated results:') + prints = args.final_prints + aggregate = args.final_prints_aggregate + + if cfg.dataset_type == 'VOCDataset': + get_results( + eval_results_filename, + dataset='voc', + prints=prints, + aggregate=aggregate) + else: + get_results( + eval_results_filename, + dataset='coco', + prints=prints, + aggregate=aggregate) + + +if __name__ == '__main__': + main()