提交 d44664a2 编辑于 作者: suilin0432's avatar suilin0432
浏览文件

update

上级 bde5a41c
......@@ -27,7 +27,7 @@ from .cityscapes_panoptic import register_all_cityscapes_panoptic
from .coco import load_sem_seg, register_coco_instances, register_coco_instances_wsl
from .coco_panoptic import register_coco_panoptic, register_coco_panoptic_separated
from .lvis import get_lvis_instances_meta, register_lvis_instances
from .pascal_voc import register_pascal_voc, register_pascal_voc_wsl, register_pascal_voc_wsl_top1
from .pascal_voc import register_pascal_voc, register_pascal_voc_wsl, register_pascal_voc_wsl_top1, register_pascal_voc_wsl_thres, register_pascal_voc_wsl_contain
# ==== Predefined datasets and splits for COCO ==========
......@@ -119,6 +119,14 @@ COCO_WSL = {
"coco_2014_valminusminival_casd_wsl_ssod_filter": ("coco/val2014", "coco/annotations/casd_valminusminival2014_wsl_ssod5w_filter_05.json"),
"coco_2014_train_casd_wsl_ssod_filter_all": ("coco/train2014", "coco/annotations/casd_train2014_wsl_ssod5w_filter_05_all.json"),
"coco_2014_valminusminival_casd_wsl_ssod_filter_all": ("coco/val2014", "coco/annotations/casd_valminusminival2014_wsl_ssod5w_filter_05_all.json"),
"coco_2014_train_casd_wsl_thres2_top0": ("coco/train2014", "coco/annotations/casd_train2014_wsl_thres_2_top_0.json"),
"coco_2014_valminusminival_casd_wsl_thres2_top0": ("coco/val2014", "coco/annotations/casd_valminusminival2014_wsl_thres_2_top_0.json"),
"coco_2014_train_casd_wsl_top05": ("coco/train2014", "coco/annotations/casd_train2014_wsl_top_05.json"),
"coco_2014_valminusminival_casd_wsl_top05": ("coco/val2014", "coco/annotations/casd_valminusminival2014_wsl_top_05.json"),
"coco_2014_train_casd_wsl_top15": ("coco/train2014", "coco/annotations/casd_train2014_wsl_top_15.json"),
"coco_2014_valminusminival_casd_wsl_top15": ("coco/val2014", "coco/annotations/casd_valminusminival2014_wsl_top_15.json"),
"coco_2014_train_casd_wsl_w2f": ("coco/train2014", "coco/annotations/casd_train2014_wsl_w2f.json"),
"coco_2014_valminusminival_casd_wsl_w2f": ("coco/val2014", "coco/annotations/casd_valminusminival2014_wsl_w2f.json"),
}
# 注册弱监督的打好 pgt 标记的数据集
......@@ -283,6 +291,56 @@ def register_all_pascal_voc_wsl_top1(root):
register_pascal_voc_wsl_top1(name, os.path.join(root, dirname), split, year)
MetadataCatalog.get(name).evaluator_type = "pascal_voc"
def register_all_pascal_voc_wsl_thres(root):
SPLITS = [
("voc_2007_train_wsl_thres", "VOC2007", "train"),
("voc_2007_val_wsl_thres", "VOC2007", "val")
]
for name, dirname, split in SPLITS:
year = 2007 if "2007" in name else 2012
register_pascal_voc_wsl_thres(name, os.path.join(root, dirname), split, year)
MetadataCatalog.get(name).evaluator_type = "pascal_voc"
def register_all_pascal_voc_wsl_contain_075(root):
SPLITS = [
("voc_2007_train_wsl_contain_075", "VOC2007", "train"),
("voc_2007_val_wsl_contain_075", "VOC2007", "val")
]
for name, dirname, split in SPLITS:
year = 2007 if "2007" in name else 2012
register_pascal_voc_wsl_contain(name, os.path.join(root, dirname), split, year, "075")
MetadataCatalog.get(name).evaluator_type = "pascal_voc"
def register_all_pascal_voc_wsl_contain_08(root):
SPLITS = [
("voc_2007_train_wsl_contain_08", "VOC2007", "train"),
("voc_2007_val_wsl_contain_08", "VOC2007", "val")
]
for name, dirname, split in SPLITS:
year = 2007 if "2007" in name else 2012
register_pascal_voc_wsl_contain(name, os.path.join(root, dirname), split, year, "08")
MetadataCatalog.get(name).evaluator_type = "pascal_voc"
def register_all_pascal_voc_wsl_contain_09(root):
SPLITS = [
("voc_2007_train_wsl_contain_09", "VOC2007", "train"),
("voc_2007_val_wsl_contain_09", "VOC2007", "val")
]
for name, dirname, split in SPLITS:
year = 2007 if "2007" in name else 2012
register_pascal_voc_wsl_contain(name, os.path.join(root, dirname), split, year, "09")
MetadataCatalog.get(name).evaluator_type = "pascal_voc"
def register_all_pascal_voc_wsl_contain_095(root):
SPLITS = [
("voc_2007_train_wsl_contain_095", "VOC2007", "train"),
("voc_2007_val_wsl_contain_095", "VOC2007", "val")
]
for name, dirname, split in SPLITS:
year = 2007 if "2007" in name else 2012
register_pascal_voc_wsl_contain(name, os.path.join(root, dirname), split, year, "095")
MetadataCatalog.get(name).evaluator_type = "pascal_voc"
def register_all_ade20k(root):
root = os.path.join(root, "ADEChallengeData2016")
for name, dirname in [("train", "training"), ("val", "validation")]:
......@@ -314,4 +372,9 @@ if __name__.endswith(".builtin"):
register_all_pascal_voc(_root)
register_all_pascal_voc_wsl(_root)
register_all_pascal_voc_wsl_top1(_root)
register_all_pascal_voc_wsl_thres(_root)
register_all_pascal_voc_wsl_contain_075(_root)
register_all_pascal_voc_wsl_contain_08(_root)
register_all_pascal_voc_wsl_contain_09(_root)
register_all_pascal_voc_wsl_contain_095(_root)
register_all_ade20k(_root)
......@@ -12,7 +12,7 @@ from detectron2.data import DatasetCatalog, MetadataCatalog
from detectron2.structures import BoxMode
from detectron2.utils.file_io import PathManager
__all__ = ["load_voc_instances", "register_pascal_voc", "register_pascal_voc_wsl", "register_pascal_voc_wsl_top1"]
__all__ = ["load_voc_instances", "register_pascal_voc", "register_pascal_voc_wsl", "register_pascal_voc_wsl_top1", "register_pascal_voc_wsl_thres"]
# fmt: off
......@@ -162,6 +162,83 @@ def load_voc_instances_wsl(dirname: str, split: str, class_names: Union[List[str
dicts.append(r)
return dicts
def load_voc_instances_wsl_contain(dirname: str, split: str, thres, class_names: Union[List[str], Tuple[str, ...]]):
# 获取 数据集对应划分(train, val, test) 图片 ids
with PathManager.open(os.path.join(dirname, "ImageSets", "Main", split + ".txt")) as f:
fileids = np.loadtxt(f, dtype=np.str)
# 针对 single-input 的文件
# print("load from {}/single_voc07_wsl_{}_contain.json".format(dirname, split))
# annotation_wsl = json.load(open(
# "{}/single_voc07_wsl_{}_contain.json".format(dirname, split), "r"
# ))
# 获取 annotations, wsl 预测之后的结果会保存为 json 的格式
if "07" in dirname:
annotation_wsl = json.load(open(
"{}/voc07_wsl_{}_contain_{}.json".format(dirname, split, thres), "r"
))
elif "12" in dirname:
annotation_wsl = json.load(open(
"{}/casd_voc12_wsl_{}_contain_{}.json".format(dirname, split, thres), "r"
))
else:
assert False, "Wrong dirname: {}".format(dirname)
multi_class_labels = None
if "multi_label" in annotation_wsl:
multi_class_labels = annotation_wsl.pop("multi_label")
annotation_dirname = PathManager.get_local_path(os.path.join(dirname, "Annotations/"))
dicts = []
for fileid in fileids:
anno = annotation_wsl[str(int(fileid))]
jpeg_file = os.path.join(dirname, "JPEGImages", fileid + ".jpg")
anno_file = os.path.join(annotation_dirname, fileid + ".xml")
if not os.path.isfile(anno_file):
with Image.open(jpeg_file) as img:
width, height = img.size
r = {"file_name": jpeg_file, "image_id": fileid, "height": height, "width": width}
instances = []
for obj in anno:
bbox = obj["bbox"]
bbox = [int(i) for i in bbox] # 因为 predict 出来的 bbox 是float, 要转化为 int list
category_id = obj["category_id"] - 1 # 因为保存统计时将 index + 1 了从而方便 TIDE 统计了, 因此这里需要 - 1
instances.append(
{
"category_id": category_id, "bbox": bbox, "bbox_mode": BoxMode.XYXY_ABS
}
)
r["annotations"] = instances
if multi_class_labels is not None:
r["multi_label"] = multi_class_labels[str(int(fileid))]
dicts.append(r)
continue
with PathManager.open(anno_file) as f:
tree = ET.parse(f)
r = {
"file_name": jpeg_file,
"image_id": fileid,
"height": int(tree.findall("./size/height")[0].text),
"width": int(tree.findall("./size/width")[0].text),
}
instances = []
# 这里是从 annotation_wsl 中进行 gt 信息的提取, 而不是 从 anno file 中提取真正的 gt 信息出来
for obj in anno:
bbox = obj["bbox"]
bbox = [int(i) for i in bbox]
category_id = obj["category_id"] - 1
instances.append(
{
"category_id": category_id, "bbox": bbox, "bbox_mode": BoxMode.XYXY_ABS
}
)
r["annotations"] = instances
if multi_class_labels is not None:
r["multi_label"] = multi_class_labels[str(int(fileid))]
dicts.append(r)
return dicts
def load_voc_instances_wsl_top1(dirname: str, split: str, class_names: Union[List[str], Tuple[str, ...]]):
# 获取 数据集对应划分(train, val, test) 图片 ids
with PathManager.open(os.path.join(dirname, "ImageSets", "Main", split + ".txt")) as f:
......@@ -220,6 +297,64 @@ def load_voc_instances_wsl_top1(dirname: str, split: str, class_names: Union[Lis
dicts.append(r)
return dicts
def load_voc_instances_wsl_thres(dirname: str, split: str, class_names: Union[List[str], Tuple[str, ...]]):
# 获取 数据集对应划分(train, val, test) 图片 ids
with PathManager.open(os.path.join(dirname, "ImageSets", "Main", split + ".txt")) as f:
fileids = np.loadtxt(f, dtype=np.str)
# 获取 annotations, wsl 预测之后的结果会保存为 json 的格式
annotation_wsl = json.load(open(
"{}/voc07_wsl_{}_only_thres.json".format(dirname, split), "r"
))
annotation_dirname = PathManager.get_local_path(os.path.join(dirname, "Annotations/"))
dicts = []
for fileid in fileids:
anno = annotation_wsl[str(int(fileid))]
jpeg_file = os.path.join(dirname, "JPEGImages", fileid + ".jpg")
anno_file = os.path.join(annotation_dirname, fileid + ".xml")
if not os.path.isfile(anno_file):
with Image.open(jpeg_file) as img:
width, height = img.size
r = {"file_name": jpeg_file, "image_id": fileid, "height": height, "width": width}
instances = []
for obj in anno:
bbox = obj["bbox"]
bbox = [int(i) for i in bbox] # 因为 predict 出来的 bbox 是float, 要转化为 int list
category_id = obj["category_id"] - 1 # 因为保存统计时将 index + 1 了从而方便 TIDE 统计了, 因此这里需要 - 1
instances.append(
{
"category_id": category_id, "bbox": bbox, "bbox_mode": BoxMode.XYXY_ABS
}
)
r["annotations"] = instances
dicts.append(r)
continue
with PathManager.open(anno_file) as f:
tree = ET.parse(f)
r = {
"file_name": jpeg_file,
"image_id": fileid,
"height": int(tree.findall("./size/height")[0].text),
"width": int(tree.findall("./size/width")[0].text),
}
instances = []
# 这里是从 annotation_wsl 中进行 gt 信息的提取, 而不是 从 anno file 中提取真正的 gt 信息出来
for obj in anno:
bbox = obj["bbox"]
bbox = [int(i) for i in bbox]
category_id = obj["category_id"] - 1
instances.append(
{
"category_id": category_id, "bbox": bbox, "bbox_mode": BoxMode.XYXY_ABS
}
)
r["annotations"] = instances
dicts.append(r)
return dicts
def register_pascal_voc(name, dirname, split, year, class_names=CLASS_NAMES):
DatasetCatalog.register(name, lambda: load_voc_instances(dirname, split, class_names))
MetadataCatalog.get(name).set(
......@@ -233,9 +368,22 @@ def register_pascal_voc_wsl(name, dirname, split, year, class_names=CLASS_NAMES)
thing_classes=list(class_names), dirname=dirname, year=year, split=split
)
def register_pascal_voc_wsl_contain(name, dirname, split, year, thres, class_names=CLASS_NAMES):
DatasetCatalog.register(name, lambda: load_voc_instances_wsl_contain(dirname, split, thres, class_names))
MetadataCatalog.get(name).set(
thing_classes=list(class_names), dirname=dirname, year=year, split=split
)
# top1 pgt 版本的注册
def register_pascal_voc_wsl_top1(name, dirname, split, year, class_names=CLASS_NAMES):
DatasetCatalog.register(name, lambda: load_voc_instances_wsl_top1(dirname, split, class_names))
MetadataCatalog.get(name).set(
thing_classes=list(class_names), dirname=dirname, year=year, split=split
)
# thres pgt 版本的注册
def register_pascal_voc_wsl_thres(name, dirname, split, year, class_names=CLASS_NAMES):
DatasetCatalog.register(name, lambda: load_voc_instances_wsl_thres(dirname, split, class_names))
MetadataCatalog.get(name).set(
thing_classes=list(class_names), dirname=dirname, year=year, split=split
)
\ No newline at end of file
......@@ -300,10 +300,30 @@ class FastRCNNOutputs:
self.gt_boxes.tensor[fg_inds],
reduction="sum",
)
elif self.box_reg_loss_type == "smooth_l1_mean":
gt_proposal_deltas = self.box2box_transform.get_deltas(
self.proposals.tensor, self.gt_boxes.tensor
)
loss_box_reg = smooth_l1_loss(
self.pred_proposal_deltas[fg_inds[:, None], gt_class_cols],
gt_proposal_deltas[fg_inds],
self.smooth_l1_beta,
reduction="mean",
)
elif self.box_reg_loss_type == "none":
gt_proposal_deltas = self.box2box_transform.get_deltas(
self.proposals.tensor, self.gt_boxes.tensor
)
loss_box_reg = smooth_l1_loss(
self.pred_proposal_deltas[fg_inds[:, None], gt_class_cols],
gt_proposal_deltas[fg_inds],
self.smooth_l1_beta,
reduction="none",
).sum(1)
else:
raise ValueError(f"Invalid bbox reg loss type '{self.box_reg_loss_type}'")
loss_box_reg = loss_box_reg / self.gt_classes.numel()
if not (self.box_reg_loss_type in ["smooth_l1_mean"]):
loss_box_reg = loss_box_reg / self.gt_classes.numel()
return loss_box_reg
def losses(self):
......@@ -511,6 +531,14 @@ class FastRCNNOutputLayers(nn.Module):
loss_box_reg = smooth_l1_loss(
fg_pred_deltas, gt_pred_deltas, self.smooth_l1_beta, reduction="mean"
)
elif self.box_reg_loss_type == "none":
gt_pred_deltas = self.box2box_transform.get_deltas(
proposal_boxes[fg_inds],
gt_boxes[fg_inds],
)
loss_box_reg = smooth_l1_loss(
fg_pred_deltas, gt_pred_deltas, self.smooth_l1_beta, reduction="none"
).sum(1)
else:
raise ValueError(f"Invalid bbox reg loss type '{self.box_reg_loss_type}'")
# The reg loss is normalized using the total number of regions (R), not the number
......@@ -524,7 +552,7 @@ class FastRCNNOutputLayers(nn.Module):
# example in minibatch (2). Normalizing by the total number of regions, R,
# means that the single example in minibatch (1) and each of the 100 examples
# in minibatch (2) are given equal influence.
if not (self.box_reg_loss_type == "smooth_l1_mean"):
if not (self.box_reg_loss_type in ["smooth_l1_mean"]):
loss_box_reg = loss_box_reg / max(gt_classes.numel(), 1.0)
# return loss_box_reg / max(gt_classes.numel(), 1.0) # return 0 if empty
return loss_box_reg
......
支持 Markdown
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册