U S du @s8ddlZddlZddlZddlZddlZddlZddlZddlZddl Z ddl Z ddl Z ddl Z ddlmZddlmZddlmZmZddlmZddlmZddlmZddl mZddlmZdd lm Z m!Z!m"Z"dd l#m$Z$dd l%m&Z&dd l'm(Z(ej)*d ddl+m,Z,ddl-m.Z.ddZ/ddZ0GdddeZ1ddZ2Gddde j3Z4Gddde!Z5ddZ6ddZ7Gd d!d!e!Z8Gd"d#d#e!Z9e:d$kr4ej;e?e>Z>e>@\ZAZBeAjCstDej)EeAjFdd&ZGej)HeGdZIeId'eAjCZJej)KeAjLeJZLej)KeLd(d)ZMd*ZNej)OeMreMeA_PeQeej)KeLd+ZReReAjFeA_FeLEd,ZSeSd&ZJd-ZNej)KeLd(ZTej)KeLd.ZUejVeLd-d/ejVeTd-d/ejVeUd-d/zؐzpd1d2eAjFDZ_e`eBZaejbe_eafZceNrd-ecjdje_fecgd3ehZieijd4ehZkd5ekd6<e0eAD]ZlemeAelekel<qLd7ekkrxekd6=d-Znnekd7Zoe\d8eod*ZnejpfekZqekei_Xe.ecjdZderZsd9eJeLeJd:d;dd;d?Ztetd@ZudAeikreijvZwnehZwebeuewZwe.ewesdA<dBeTdCd-d-dDd;ZxdEeikr8eijyZznehZzebexezZze\dFeze{e j|e{dGkr~e.ezesdH<dIeAjNe;eLeTeUeceidJd;dKdLdMd-dNd;dOdPdQid;dRdSidTZ}e{e j|e{dGkre}~dHezidUeikreijZnehZe\dVdWdBej)KeTdXdYd-d&dZd-d[d;iZe}~eebe}eZd\ekrjeeqd]rjeqjPejjed^<nd\ekrzed\=d_d2eDesdU<ejeqfd`e(d*daiesZXeLeX_Le.ecjZeee\dbejD]2Zle\eldcejeljj:dceejelqecjjejecjdjZZens>eeijXjddEddZndeZdfeijXkrXeijXjZndeZe\dgeeeijX_eAjreeeeed_e\dhedjeeeen eed_e\die\djedjdkdldmZdndoZddlZeejeeejeeAjr2zeXedeWnek r0eYnXeAjsNeXjsNeXedeWn`ek reAjWreXjYdkrz ddlZWnek rddlZYnXeYnXW5eAjWreAjNseXjYdkrej)EeL\ZZZCej)KeZd0eCZZejVej)EeZdd-d/e[eLeZeXjYdkr2e\eXj]^XdS)pN)version) OmegaConf) DataLoaderDataset)partial)Image)seed_everything)Trainer)ModelCheckpointCallbackLearningRateMonitor)rank_zero_only)rank_zero_info) DDPPluginz./stable_diffusion)Txt2ImgIterableBaseDataset)instantiate_from_configc Ksdd}tjf|}|jddtddddd |jd d tdddd d |jd ddddtd|jdd|ddddd |jd|ddddd |jdddd|jdd|ddddd |jd!d"td#d$d%|jd&d'tdd(d%|jd)d*td+d,d%|jd-d.dd/d0|S)1NcSs<t|tr|S|dkrdS|dkr.dStddS)N)yestruety1T)nofalsefn0FzBoolean value expected.) isinstanceboollowerargparseArgumentTypeError)vr"O/home/ugrad/diffusion-model-based-task-driven-training/instruct-pix2pix/main.pystr2bools   zget_parser..str2boolz-nz--nameT?zpostfix for logdir)typeconstdefaultnargshelpz-rz--resumez*resume from logdir or checkpoint in logdirz-bz--base*zbase_config.yamlzpaths to base configs. Loaded from left-to-right. Parameters can be overwritten or added with command-line options of the form `--key value`.)r*metavarr+r)z-tz--trainFtrainz --no-testz disable testz-pz --projectz'name of new or path to existing project)r+z-dz--debugzenable post-mortem debugging)r'r*r(r)r+z-sz--seedzseed for seed_everything)r'r)r+z-fz --postfixzpost-postfix for default namez-lz--logdirlogszdirectory for logging dat shitz --scale_lr store_truez1scale base-lr by ngpu * batch_size * n_accumulate)actionr)r+)rArgumentParser add_argumentstrlistint) parser_kwargsr$parserr"r"r# get_parsers      r:cs8t}t|}|gtfddtDS)Nc3s&|]}t|t|kr|VqdSN)getattr.0kargsoptr"r# sz*nondefault_trainer_args..)rr3r add_argparse_args parse_argssortedvars)rBr9r"r@r#nondefault_trainer_argss  rHc@s(eZdZdZddZddZddZdS) WrappedDatasetzMWraps an arbitrary object with __len__ and __getitem__ into a pytorch datasetcCs ||_dSr;data)selfdatasetr"r"r#__init__szWrappedDataset.__init__cCs t|jSr;)lenrKrLr"r"r#__len__szWrappedDataset.__len__cCs |j|Sr;rJ)rLidxr"r"r# __getitem__szWrappedDataset.__getitem__N)__name__ __module__ __qualname____doc__rNrQrSr"r"r"r#rIsrIcCstjj}|j}|j}t|tr|j|j }|j |||d||_ t j tt j dd}t j t j d||St j t j dd|SdS)Nr)torchutilsrKget_worker_inforMidrr num_records num_workers valid_ids sample_idsnprandomchoicerO get_stateseed)_ worker_inforM worker_id split_size current_idr"r"r#worker_init_fns   rkc sVeZdZdfdd ZddZdddZd d Zdd d Zdd dZdddZ Z S)DataModuleFromConfigNFc st||_t|_|dk r$|n|d|_| |_|dk rN||jd<|j|_|dk rp||jd<t |j | d|_ |dk r||jd<t |j |d|_ |dk r||jd<|j|_||_dS)Nr. validation)shuffletestpredict)superrN batch_sizedictdataset_configsr^use_worker_init_fn_train_dataloadertrain_dataloaderr_val_dataloaderval_dataloader_test_dataloadertest_dataloader_predict_dataloaderpredict_dataloaderwrap) rLrsr.rnrprqrr^shuffle_test_loaderrvshuffle_val_dataloader __class__r"r#rNs$     zDataModuleFromConfig.__init__cCs|jD] }t|q dSr;)ruvaluesr)rLdata_cfgr"r"r# prepare_datasz!DataModuleFromConfig.prepare_datacsDtfddjD_jr@jD]}tj|j|<q&dS)Nc3s |]}|tj|fVqdSr;)rrur=rPr"r#rCsz-DataModuleFromConfig.setup..)rtrudatasetsrrI)rLstager?r"rPr#setups    zDataModuleFromConfig.setupcCsJt|jdt}|s|jr t}nd}t|jd|j|j|r>dnd|ddS)Nr.FT)rsr^rorkpersistent_workersrrrrvrkrrsr^)rLis_iterable_datasetinit_fnr"r"r#rws  z&DataModuleFromConfig._train_dataloadercCs>t|jdts|jrt}nd}t|jd|j|j||ddS)NrnTrsr^rkrorrrLrorr"r"r#rys z$DataModuleFromConfig._val_dataloadercCsLt|jdt}|s|jr t}nd}|o,| }t|jd|j|j||ddS)Nr.rpTrr)rLrorrr"r"r#r{s  z%DataModuleFromConfig._test_dataloadercCs<t|jdts|jrt}nd}t|jd|j|j|ddS)NrqT)rsr^rkrrrr"r"r#r}sz(DataModuleFromConfig._predict_dataloader) NNNNFNFFF)N)F)F)F) rTrUrVrNrrrwryr{r} __classcell__r"r"rr#rls  rlcs,eZdZfddZddZddZZS) SetupCallbackcs8t||_||_||_||_||_||_||_dSr;) rrrNresumenowlogdirckptdircfgdirconfiglightning_config)rLrrrrrrrrr"r#rNs zSetupCallback.__init__cCs0|jdkr,tdtj|jd}||dSNrzSummoning checkpoint. last.ckpt) global_rankprintospathjoinrsave_checkpoint)rLtrainer pl_module ckpt_pathr"r"r#on_keyboard_interrupts z#SetupCallback.on_keyboard_interruptc Cs|jdkrd|jkrrfr"r"r# :szall_gather..cSsg|]}t|qSr")r7item)r>sizer"r"r#r<s)rr)dim)rrrYTensorpickledumps ByteStorage from_buffer ByteTensorrrreshapedtypernumelranger all_gathermaxappend FloatTensorrcatzipcpunumpytobytesloadsr6)rK world_size origin_sizebufferstoragetensor tensor_type local_size size_listmax_size tensor_listrfpadding data_listr new_shape resized_listr"r"r#r sL            rcsXeZdZdfdd ZeddZedd Zdd d Zd dZddZ ddZ Z S) ImageLoggerTFNc st||_||_||_tjj|ji|_ ddt dt t |jdD|_|s`|jg|_||_||_||_| rz| ni|_||_dS)NcSsg|] }d|qS)rmr")r>rr"r"r#rjsz(ImageLogger.__init__..rX)rrrNrescale batch_freq max_imagesplloggersTestTubeLogger _testtubelogger_log_imagesrr7ralog2 log_stepsclampdisabledlog_on_batch_idxlog_images_kwargslog_first_step) rLbatch_frequencyrrincrease_log_stepsrrrrrrr"r#rN`s & zImageLogger.__init__cCsN|D]D}tj||}|dd}|d|}|jjj|||jdqdS)N?@/) global_step) torchvisionrZ make_gridlogger experiment add_imager)rLrimages batch_idxsplitr?gridtagr"r"r#rss zImageLogger._testtubec Cs"tj|d|}ddddd} |D]} tjj|| dd} |jrL| d d } | d d d d d} | } | d t j } d |||| | } tj|| } tjtj| d ddt| | q"d |||} tj|| } t| d&}|D]}|t|dqW5QRXdS)Nrbeforeafterz before-vqz after-gen)realsinputsreconstructionsamples)nrowrrrrXrmrzgs-{:06}_e-{:06}_b-{:06}_{}.pngTrz$gs-{:06}_e-{:06}_b-{:06}_prompt.jsonw )rrrrrZrr transposesqueezerastyperauint8rrrr fromarrayropenwritejsonr)rLsave_dirrrpromptsr current_epochrrootnamesr?rfilenamerrpr"r"r# log_local~s6  zImageLogger.log_localr.c Cs|jr |n|j}||r8t|dr8t|jr8|jdksL|dkr|dkrt|j}|j }|rh| t |j|fd|i|j }W5QRX|ddd|j} ddt| D} |D]} t|| jd|j} || d| || <t t|| d| || <t|| t jr|| || <|jrt || d d || <q||jj||| |j|j||j|d d } | |||j||r|dS) N log_imagesrvalredit c_crossattncSsg|]}|D]}|q qSr"r")r>psrr"r"r#rsz'ImageLogger.log_img..grc_sdSr;r")rAkwargsr"r"r#z%ImageLogger.log_img..)rrcheck_frequencyhasattrcallablerrr'rtrainingevalrYno_gradrrminshaperrrdetachrrrrrrgetr.) rLrbatchrr check_idxris_trainrrr?Nrr"r"r#log_imgsJ   "zImageLogger.log_imgcCsH||jdks||jkrD|dks&|jrDt|jdkr@|jddSdS)NrTF)rrrrOpop)rLr/r"r"r#r$s zImageLogger.check_frequencycCs,|js(|jdks|jr(|j|||dddS)Nrr.r)rrrr2rLrroutputsr.rdataloader_idxr"r"r#on_train_batch_endszImageLogger.on_train_batch_endcCsZ|js"|jdkr"|j|||ddt|drV|jrV|ddkrV|dkrV|j|||ddS)Nrrr4calibrate_grad_norm)r)rrr2r%r9 log_gradientsr5r"r"r#on_validation_batch_ends  z#ImageLogger.on_validation_batch_end)TTTFFFN)r.) rTrUrVrNr rrr2r$r8r<rr"r"rr#r_s   $rc@seZdZddZddZdS) CUDACallbackcCs*tj|jtj|jt|_dSr;)rYrreset_peak_memory_statsroot_gpu synchronizetime start_timerr"r"r#on_train_epoch_startsz!CUDACallback.on_train_epoch_startcCstj|jtj|jd}t|j}z@|j|}|j|}t d|ddt d|ddWnt k rYnXdS)NizAverage Epoch time: z.2fz secondszAverage Peak memory MiB) rYrr@r?max_memory_allocatedrArBtraining_type_pluginreducerAttributeError)rLrrr6 max_memory epoch_timer"r"r#on_train_epoch_ends  zCUDACallback.on_train_epoch_endN)rTrUrVrCrKr"r"r"r#r=sr=__main__z%Y-%m-%dT%H-%M-%Srrf checkpointsrFzconfigs/*.yamlrTconfigsr debug_runscCsg|]}t|qSr")rload)r>cfgr"r"r#r<srrrddp acceleratorgpuszRunning on GPUs z%pytorch_lightning.loggers.WandbLogger)namerr\)targetparamsz(pytorch_lightning.loggers.TestTubeLoggertesttube)rUr)wandbrXrYrz+pytorch_lightning.callbacks.ModelCheckpointz {epoch:06})dirpathrverbose save_lastmodelcheckpointzMerged modelckpt-cfg: z1.4.0checkpoint_callbackzmain.SetupCallback)rrrrrrrzmain.ImageLoggeri)rrrzmain.LearningRateMonitorlogging_intervalsteprVzmain.CUDACallback)setup_callback image_loggerlearning_rate_logger cuda_callbackrzeCaution: Saving checkpoints every n train steps without deleting. This might require some free space.rrz{epoch:06}-{step:09}i)rZrr[ save_top_kevery_n_train_stepssave_weights_onlyignore_keys_callbackresume_from_checkpointrcCsg|]}tt|qSr")r callbacks_cfgr=r"r"r#rsplugins)find_unused_parametersz#### Data #####z, ,rXaccumulate_grad_batcheszaccumulate_grad_batches = zrSetting learning rate to {:.2e} = {} (accumulate_grad_batches) * {} (num_gpus) * {} (batchsize) * {:.2e} (base_lr)z++++ NOT USING LR SCALING ++++zSetting learning rate to z.2ecOs.tjdkr*tdtjtd}t|dSr)rrrrrrrr)rAr!rr"r"r#melks rpcOstjdkrddl}|dS)Nr)rrpudb set_trace)rAr!rqr"r"r#diveins rs)rrsysdatetimeglobrrarArYrpytorch_lightningrrr packagingr omegaconfrtorch.utils.datarr functoolsrPILrtorch.distributed distributedrrpytorch_lightning.trainerr pytorch_lightning.callbacksr r r 'pytorch_lightning.utilities.distributedr pytorch_lightning.utilitiesrpytorch_lightning.pluginsrrr ldm.data.baserldm.utilrr:rHrIrkLightningDataModulerlrrrrr=rTrstrftimegetcwdr9rDparse_known_argsrBunknownrUAssertionErrorrbase cfg_fnamesplitextcfg_namenownamerrckptrisfilerjrF base_configs_tmprrrdebugrrdstrenamerprofilersummaryrN from_dotlistclimergermodelrWload_emar3rrr-trainer_configr?r<rgpuinfo Namespace trainer_optrttrainer_kwargsdefault_logger_cfgsdefault_logger_cfgr logger_cfgdefault_modelckpt_cfgr] modelckpt_cfgparse __version__default_callbacks_cfgupdaterrk)default_metrics_over_trainsteps_ckpt_dictr%rifrom_argparse_argsrKrrrrrOrsbase_learning_ratebsbase_lrrTstripngpuroscale_lr learning_raterrprssignalSIGUSR1SIGUSR2r.fit Exceptionno_test interruptedrprqdebugger ImportErrorpdb post_mortemr"r"r"r#s(             d N%?w *                            0