# Copyright (c) Facebook, Inc. and its affiliates. import functools import json import logging import multiprocessing as mp import numpy as np import os from itertools import chain import custom_pycocotools.mask as mask_util from PIL import Image from custom_detectron2.structures import BoxMode from custom_detectron2.utils.comm import get_world_size from custom_detectron2.utils.file_io import PathManager from custom_detectron2.utils.logger import setup_logger try: import cv2 # noqa except ImportError: # OpenCV is an optional dependency at the moment pass logger = logging.getLogger(__name__) def _get_cityscapes_files(image_dir, gt_dir): files = [] # scan through the directory cities = PathManager.ls(image_dir) logger.info(f"{len(cities)} cities found in '{image_dir}'.") for city in cities: city_img_dir = os.path.join(image_dir, city) city_gt_dir = os.path.join(gt_dir, city) for basename in PathManager.ls(city_img_dir): image_file = os.path.join(city_img_dir, basename) suffix = "leftImg8bit.png" assert basename.endswith(suffix), basename basename = basename[: -len(suffix)] instance_file = os.path.join(city_gt_dir, basename + "gtFine_instanceIds.png") label_file = os.path.join(city_gt_dir, basename + "gtFine_labelIds.png") json_file = os.path.join(city_gt_dir, basename + "gtFine_polygons.json") files.append((image_file, instance_file, label_file, json_file)) assert len(files), "No images found in {}".format(image_dir) for f in files[0]: assert PathManager.isfile(f), f return files def load_cityscapes_instances(image_dir, gt_dir, from_json=True, to_polygons=True): """ Args: image_dir (str): path to the raw dataset. e.g., "~/cityscapes/leftImg8bit/train". gt_dir (str): path to the raw annotations. e.g., "~/cityscapes/gtFine/train". from_json (bool): whether to read annotations from the raw json file or the png files. to_polygons (bool): whether to represent the segmentation as polygons (COCO's format) instead of masks (cityscapes's format). Returns: list[dict]: a list of dicts in Detectron2 standard format. (See `Using Custom Datasets `_ ) """ if from_json: assert to_polygons, ( "Cityscapes's json annotations are in polygon format. " "Converting to mask format is not supported now." ) files = _get_cityscapes_files(image_dir, gt_dir) logger.info("Preprocessing cityscapes annotations ...") # This is still not fast: all workers will execute duplicate works and will # take up to 10m on a 8GPU server. pool = mp.Pool(processes=max(mp.cpu_count() // get_world_size() // 2, 4)) ret = pool.map( functools.partial(_cityscapes_files_to_dict, from_json=from_json, to_polygons=to_polygons), files, ) logger.info("Loaded {} images from {}".format(len(ret), image_dir)) # Map cityscape ids to contiguous ids from cityscapesscripts.helpers.labels import labels labels = [l for l in labels if l.hasInstances and not l.ignoreInEval] dataset_id_to_contiguous_id = {l.id: idx for idx, l in enumerate(labels)} for dict_per_image in ret: for anno in dict_per_image["annotations"]: anno["category_id"] = dataset_id_to_contiguous_id[anno["category_id"]] return ret def load_cityscapes_semantic(image_dir, gt_dir): """ Args: image_dir (str): path to the raw dataset. e.g., "~/cityscapes/leftImg8bit/train". gt_dir (str): path to the raw annotations. e.g., "~/cityscapes/gtFine/train". Returns: list[dict]: a list of dict, each has "file_name" and "sem_seg_file_name". """ ret = [] # gt_dir is small and contain many small files. make sense to fetch to local first gt_dir = PathManager.get_local_path(gt_dir) for image_file, _, label_file, json_file in _get_cityscapes_files(image_dir, gt_dir): label_file = label_file.replace("labelIds", "labelTrainIds") with PathManager.open(json_file, "r") as f: jsonobj = json.load(f) ret.append( { "file_name": image_file, "sem_seg_file_name": label_file, "height": jsonobj["imgHeight"], "width": jsonobj["imgWidth"], } ) assert len(ret), f"No images found in {image_dir}!" assert PathManager.isfile( ret[0]["sem_seg_file_name"] ), "Please generate labelTrainIds.png with cityscapesscripts/preparation/createTrainIdLabelImgs.py" # noqa return ret def _cityscapes_files_to_dict(files, from_json, to_polygons): """ Parse cityscapes annotation files to a instance segmentation dataset dict. Args: files (tuple): consists of (image_file, instance_id_file, label_id_file, json_file) from_json (bool): whether to read annotations from the raw json file or the png files. to_polygons (bool): whether to represent the segmentation as polygons (COCO's format) instead of masks (cityscapes's format). Returns: A dict in Detectron2 Dataset format. """ from cityscapesscripts.helpers.labels import id2label, name2label image_file, instance_id_file, _, json_file = files annos = [] if from_json: from shapely.geometry import MultiPolygon, Polygon with PathManager.open(json_file, "r") as f: jsonobj = json.load(f) ret = { "file_name": image_file, "image_id": os.path.basename(image_file), "height": jsonobj["imgHeight"], "width": jsonobj["imgWidth"], } # `polygons_union` contains the union of all valid polygons. polygons_union = Polygon() # CityscapesScripts draw the polygons in sequential order # and each polygon *overwrites* existing ones. See # (https://github.com/mcordts/cityscapesScripts/blob/master/cityscapesscripts/preparation/json2instanceImg.py) # noqa # We use reverse order, and each polygon *avoids* early ones. # This will resolve the ploygon overlaps in the same way as CityscapesScripts. for obj in jsonobj["objects"][::-1]: if "deleted" in obj: # cityscapes data format specific continue label_name = obj["label"] try: label = name2label[label_name] except KeyError: if label_name.endswith("group"): # crowd area label = name2label[label_name[: -len("group")]] else: raise if label.id < 0: # cityscapes data format continue # Cityscapes's raw annotations uses integer coordinates # Therefore +0.5 here poly_coord = np.asarray(obj["polygon"], dtype="f4") + 0.5 # CityscapesScript uses PIL.ImageDraw.polygon to rasterize # polygons for evaluation. This function operates in integer space # and draws each pixel whose center falls into the polygon. # Therefore it draws a polygon which is 0.5 "fatter" in expectation. # We therefore dilate the input polygon by 0.5 as our input. poly = Polygon(poly_coord).buffer(0.5, resolution=4) if not label.hasInstances or label.ignoreInEval: # even if we won't store the polygon it still contributes to overlaps resolution polygons_union = polygons_union.union(poly) continue # Take non-overlapping part of the polygon poly_wo_overlaps = poly.difference(polygons_union) if poly_wo_overlaps.is_empty: continue polygons_union = polygons_union.union(poly) anno = {} anno["iscrowd"] = label_name.endswith("group") anno["category_id"] = label.id if isinstance(poly_wo_overlaps, Polygon): poly_list = [poly_wo_overlaps] elif isinstance(poly_wo_overlaps, MultiPolygon): poly_list = poly_wo_overlaps.geoms else: raise NotImplementedError("Unknown geometric structure {}".format(poly_wo_overlaps)) poly_coord = [] for poly_el in poly_list: # COCO API can work only with exterior boundaries now, hence we store only them. # TODO: store both exterior and interior boundaries once other parts of the # codebase support holes in polygons. poly_coord.append(list(chain(*poly_el.exterior.coords))) anno["segmentation"] = poly_coord (xmin, ymin, xmax, ymax) = poly_wo_overlaps.bounds anno["bbox"] = (xmin, ymin, xmax, ymax) anno["bbox_mode"] = BoxMode.XYXY_ABS annos.append(anno) else: # See also the official annotation parsing scripts at # https://github.com/mcordts/cityscapesScripts/blob/master/cityscapesscripts/evaluation/instances2dict.py # noqa with PathManager.open(instance_id_file, "rb") as f: inst_image = np.asarray(Image.open(f), order="F") # ids < 24 are stuff labels (filtering them first is about 5% faster) flattened_ids = np.unique(inst_image[inst_image >= 24]) ret = { "file_name": image_file, "image_id": os.path.basename(image_file), "height": inst_image.shape[0], "width": inst_image.shape[1], } for instance_id in flattened_ids: # For non-crowd annotations, instance_id // 1000 is the label_id # Crowd annotations have <1000 instance ids label_id = instance_id // 1000 if instance_id >= 1000 else instance_id label = id2label[label_id] if not label.hasInstances or label.ignoreInEval: continue anno = {} anno["iscrowd"] = instance_id < 1000 anno["category_id"] = label.id mask = np.asarray(inst_image == instance_id, dtype=np.uint8, order="F") inds = np.nonzero(mask) ymin, ymax = inds[0].min(), inds[0].max() xmin, xmax = inds[1].min(), inds[1].max() anno["bbox"] = (xmin, ymin, xmax, ymax) if xmax <= xmin or ymax <= ymin: continue anno["bbox_mode"] = BoxMode.XYXY_ABS if to_polygons: # This conversion comes from D4809743 and D5171122, # when Mask-RCNN was first developed. contours = cv2.findContours(mask.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)[ -2 ] polygons = [c.reshape(-1).tolist() for c in contours if len(c) >= 3] # opencv's can produce invalid polygons if len(polygons) == 0: continue anno["segmentation"] = polygons else: anno["segmentation"] = mask_util.encode(mask[:, :, None])[0] annos.append(anno) ret["annotations"] = annos return ret if __name__ == "__main__": """ Test the cityscapes dataset loader. Usage: python -m detectron2.data.datasets.cityscapes \ cityscapes/leftImg8bit/train cityscapes/gtFine/train """ import argparse parser = argparse.ArgumentParser() parser.add_argument("image_dir") parser.add_argument("gt_dir") parser.add_argument("--type", choices=["instance", "semantic"], default="instance") args = parser.parse_args() from custom_detectron2.data.catalog import Metadata from custom_detectron2.utils.visualizer import Visualizer from cityscapesscripts.helpers.labels import labels logger = setup_logger(name=__name__) dirname = "cityscapes-data-vis" os.makedirs(dirname, exist_ok=True) if args.type == "instance": dicts = load_cityscapes_instances( args.image_dir, args.gt_dir, from_json=True, to_polygons=True ) logger.info("Done loading {} samples.".format(len(dicts))) thing_classes = [k.name for k in labels if k.hasInstances and not k.ignoreInEval] meta = Metadata().set(thing_classes=thing_classes) else: dicts = load_cityscapes_semantic(args.image_dir, args.gt_dir) logger.info("Done loading {} samples.".format(len(dicts))) stuff_classes = [k.name for k in labels if k.trainId != 255] stuff_colors = [k.color for k in labels if k.trainId != 255] meta = Metadata().set(stuff_classes=stuff_classes, stuff_colors=stuff_colors) for d in dicts: img = np.array(Image.open(PathManager.open(d["file_name"], "rb"))) visualizer = Visualizer(img, metadata=meta) vis = visualizer.draw_dataset_dict(d) # cv2.imshow("a", vis.get_image()[:, :, ::-1]) # cv2.waitKey() fpath = os.path.join(dirname, os.path.basename(d["file_name"])) vis.save(fpath)