a NbMg62@sddlZddlmZmZmZmZmZddlm Z ddl m Z ddl Z ddl Z ddlZddlZddlZddlZddlmZddlmZGdd d e ZGd d d eZGd d d eZGdddeZddZedkr ddlmZddlZeededddZeddS)N)AnyCallableListOptionalTuple)Image) VisionDataset)tqdm)MyPathcseZdZdZdeeeeeeeeddfdd Zee j ddd Z ee e dd d Z eee e fd d dZdedddZddZedddZZS) CocoDetectiona`MS Coco Detection `_ Dataset. It requires the `COCO API to be installed `_. Args: root (string): Root directory where images are downloaded to. annFile (string): Path to json annotation file. transform (callable, optional): A function/transform that takes in an PIL image and returns a transformed version. E.g, ``transforms.PILToTensor`` target_transform (callable, optional): A function/transform that takes in the target and transforms it. transforms (callable, optional): A function/transform that takes input sample and its target as entry and returns a transformed version. NT)rootannFile transformtarget_transform transformsreturnc sXt||||ddlm}|||_tt|jj|_ ddg|_ ||_ ||_ dS)Nr)COCOimagetext) super__init__pycocotools.cocorcocolistsortedimgskeysids column_namesget_imgget_cap) selfr rrrrr r!r __class__v/data/vision/torralba/selfmanaged/torralba/projects/jomat/hui/stable_diffusion/release/hf_demo/custom_datasets/coco.pyr"s    zCocoDetection.__init__idrcCs^|j|dd}ttj|j|d }t|d}Wdn1sP0Y|S)Nr file_namerbRGB) rloadImgsopenospathjoinr rconvert)r"r(r/fimgr%r%r& _load_image5s.zCocoDetection._load_imagecCs|j|j|SN)rloadAnns getAnnIdsr"r(r%r%r& _load_target<szCocoDetection._load_targetindexrcCs\|j|}d|i}|jr*||}||d<|jrD||}|g|d<|jdurX||}|S)Nr(rcaption)rr r4r!r9r)r"r;r(retrtargetr%r%r& __getitem__?s      zCocoDetection.__getitem__')ncCsf|dus|dkr|St|}||ks(J|jdd||d|}||_td|dt||S)NzCOCO dataset subsampled from z to )lenrprint)r"rAZori_lenrr%r%r& subsampleNs zCocoDetection.subsamplecCs ||_|Sr5)r)r"rr%r%r&with_transformZszCocoDetection.with_transformrcCs t|jSr5)rCrr"r%r%r&__len__^szCocoDetection.__len__)NNNTT)r@)__name__ __module__ __qualname____doc__strrrrintrr4rrr9rr?rErFrI __classcell__r%r%r#r&r s( r cs,eZdZdZeeedfdd ZZS) CocoCaptionsa`MS Coco Captions `_ Dataset. It requires the `COCO API to be installed `_. Args: root (string): Root directory where images are downloaded to. annFile (string): Path to json annotation file. transform (callable, optional): A function/transform that takes in an PIL image and returns a transformed version. E.g, ``transforms.PILToTensor`` target_transform (callable, optional): A function/transform that takes in the target and transforms it. transforms (callable, optional): A function/transform that takes input sample and its target as entry and returns a transformed version. Example: .. code:: python import torchvision.datasets as dset import torchvision.transforms as transforms cap = dset.CocoCaptions(root = 'dir where images are', annFile = 'json annotation file', transform=transforms.PILToTensor()) print('Number of samples: ', len(cap)) img, target = cap[3] # load 4th sample print("Image Size: ", img.size()) print(target) Output: :: Number of samples: 82783 Image Size: (3L, 427L, 640L) [u'A plane emitting smoke stream flying over a mountain.', u'A plane darts across a bright blue sky behind a mountain covered in snow', u'A plane leaves a contrail above the snowy mountain top.', u'A mountain that has a plane flying overheard in the distance.', u'A mountain view with a plume of smoke in the background'] r'csddt|DS)NcSsg|] }|dqS)r<r%).0annr%r%r& z-CocoCaptions._load_target..)rr9r8r#r%r&r9szCocoCaptions._load_target) rJrKrLrMrOrrNr9rPr%r%r#r&rQcs*rQc sTeZdZgdZd eeeeeeeeeeeddfdd Zd d d Z Z S) CocoCaptions_clip_filtered)paintingdrawingZgraffitiNFe/afs/csail.mit.edu/u/h/huiren/code/diffusion/stable_diffusion/data/coco/coco_clip_filtered_ids.pickle)r rrrr regenerateid_filerc s t|||||tjtj|ddtj|rr|srt|d}t ||_ Wdq1sf0Yn| \|_ } | d\|_ } t d| dt d| dt|d,}t|j |t d |Wdn1s0Yt d t|dS) NT)exist_okr*gffffff?znaive Filtered z imageszClip Filtered wbzFiltered ids saved to zCOCO filtered dataset size: )rrr.makedirsr/dirnameexistsr-pickleloadr naive_filterZ clip_filterrDdumprC) r"r rrrrrZr[r2naive_filtered_numZclip_filtered_numr#r%r&rs  , ,z#CocoCaptions_clip_filtered.__init__rWcCs^g}d}|jD]F}||}d}|D] }||vr$d}|d7}qFq$|s||q||fS)NrFTr )rr9lowerappend)r"Z filter_promptnew_idsrer(r>filteredpromptr%r%r&rcs    z'CocoCaptions_clip_filtered.naive_filter)NNNFrY)rW) rJrKrLZpositive_promptrNrrboolrrcrPr%r%r#r&rVs"rVc seZdZededddddfeeeeeeeeeddfdd Zdd Z e d d d Z e e e e fd ddZZS)CustomCocoCaptionscoco_valcoco_caption_valr/afs/csail.mit.edu/u/h/huiren/code/diffusion/stable_diffusion/jomat-code/filtering/ms_coco_captions_testset100.txtN)r r custom_filerrrrcs8t|||||ddg|_||_||||_dS)Nrr)rrrrpload_custom_datar)r"r rrprrrr#r%r&rs   zCustomCocoCaptions.__init__cCsg|_t|d}|}Wdn1s.0Y|dd}||_|ddD]v}|d}t|t|kr|dg}|d|ddg7}||d|}t|t|ksJ|j|q\t j |j|d|_dS)Nrr,r rB)columns) custom_datar- readlinesstripsplitheadrCr0rgpd DataFrame)r"rpr2datarylineZsub_dataZ sub_data_newr%r%r&rqs & z#CustomCocoCaptions.load_custom_datarGcCs t|jSr5)rCrurHr%r%r&rIszCustomCocoCaptions.__len__r:cCsx|jj|}t|d}d|i}|jr8||}||d<|jrP|d}|g|d<t|d|d<|jdurt||}|S)Nimage_idr(rr< random_seedseed)ruilocrOr r4r!r)r"r;r|r(r=rr<r%r%r&r?s      zCustomCocoCaptions.__getitem__)rJrKrLr db_root_dirrNrrrrqrOrIrrr?rPr%r%r#r&rlsDrlcCstddd}|jjddgd}g}|D]}||jj|7}q&ttdtdd d }|j}t|t|}t |}t |d }t d d }t ||Wdn1s0YdS)Nz\/afs/csail.mit.edu/u/h/huiren/code/diffusion/stable_diffusion/.datasets/coco_2017/train2017/zv/afs/csail.mit.edu/u/h/huiren/code/diffusion/stable_diffusion/.datasets/coco_2017/annotations/instances_train2017.json)r rpersonZanimal)ZsupNmsZ coco_trainZcoco_caption_trainF)r rrZdzk/afs/csail.mit.edu/u/h/huiren/code/diffusion/stable_diffusion/data/coco/coco_clip_filtered_subset100.pickler])r rZ getCatIdsZ catToImgsrVr rrsetrrandomsampler-rard)Z coco_instanceZdiscard_cat_idZdiscard_img_idcat_idZcoco_clip_filteredZcoco_clip_filtered_idsrhr2r%r%r&get_validation_sets   r__main__rmrnro)r rrp)os.pathr.typingrrrrrPILrZtorchvision.datasets.visionrracsvpandasrztorch torchvisionrer Zmypathr r rQrVrlrrJrrdatasetr%r%r%r&s.    Q/Q1