o u_f@sdZddlZddlZddlZddlZddlmZddlm Z m Z ddl m Z m Z ddlmZGdddejZGdd d ejZGd d d ZGd d d ZGdddejjZddZeddZddZdS)z Copyright (c) 2022, salesforce.com, inc. All rights reserved. SPDX-License-Identifier: BSD-3-Clause For full license text, see the LICENSE file in the repo root or https://opensource.org/licenses/BSD-3-Clause N)download_cached_fileis_dist_avail_and_initialized) get_abs_pathis_url) OmegaConfcsfeZdZdZfddZeddZddZedd Z ed d Z d d Z ddZ dddZ ZS) BaseModelzBase class for models.ctdSNsuper__init__self __class__E/mnt/petrelfs/wufan/project/UnimerDemo/unimernet/models/base_model.pyr zBaseModel.__init__cCt|djSNrlist parametersdevicer rrrrzBaseModel.devicecCst|rt|ddd}tj|dd}ntj|r!tj|dd}ntdd|vr0|d}n|}|j |dd}t d |d t d ||S) z Load from a finetuned checkpoint. This should expect no mismatch in the model keys and the checkpoint keys. FT) check_hashprogresscpu) map_locationz!checkpoint url or path is invalidmodel)strictz!Missing keys exist when loading ''.zload checkpoint from %s) rrtorchloadospathisfile RuntimeErrorkeysload_state_dictlogginginfo)rurl_or_filename cached_file checkpoint state_dictmsgrrrload_checkpoints   zBaseModel.load_checkpointcCs t||j}||}|S)a: Build a pretrained model from default configuration file, specified by model_type. Args: - model_type (str): model type, specifying architecture and checkpoints. Returns: - model (nn.Module): pretrained or finetuned model, depending on the configuration. )rr#default_config_pathr from_config)cls model_type model_cfgrrrrfrom_pretrained;s zBaseModel.from_pretrainedcCs&||jvs Jd|t|j|S)NzUnknown model type {})PRETRAINED_MODEL_CONFIG_DICTformatr)r4r5rrrr2Ks zBaseModel.default_config_pathcKs|dd}|dd}|r-|dd}|sJd|jdd|i|td |d |rN|d d}|dus=Jd |j|d td|d dSdS)a- Load checkpoint as specified in the config file. If load_finetuned is True, load the finetuned model; otherwise, load the pretrained model. When loading the pretrained model, each task-specific architecture may define their own load_from_pretrained() method. load_pretrainedTload_finetunedF pretrainedNz9Found load_finetuned is False, but pretrain_path is None.r,zLoaded pretrained model 'r!Z finetunedz8Found load_finetuned is True, but finetune_path is None.)r,zLoaded finetuned model 'r)getZload_from_pretrainedr*r+r1)rcfgkwargsr:r;Z pretrain_pathZ finetune_pathrrrload_checkpoint_from_configRs      z%BaseModel.load_checkpoint_from_configcKsdSr r)rr?rrrbefore_evaluationjzBaseModel.before_evaluationTcCs^d}|D]}d}|jD]}||9}q ||7}q|r-|dkr&d|dSd|dS|S)Nrg.Az{:.1f}Mz{:.1f}Kg@@)rshaper9)rZ return_strtotpwxrrr show_n_paramsms    zBaseModel.show_n_params)T)__name__ __module__ __qualname____doc__r propertyrr1 classmethodr7r2r@rArI __classcell__rrrrrs    rcs4eZdZdZfddZddZeddZZS) BaseEncoderzK Base class for primitive encoders, such as ViT, TimeSformer, etc. crr r r rrrr rzBaseEncoder.__init__cKstr )NotImplementedError)rsamplesr?rrrforward_featuresrBzBaseEncoder.forward_featurescCrrrr rrrrrzBaseEncoder.device) rJrKrLrMr rTrNrrPrrrrrQ}s  rQc@seZdZedddZdS)SharedQueueMixinNcCst|}t|}|jd}t|j}|j|dksJ|j|jdd|||f<|j|jdd|||f<|durMt|}|j|jdd|||f<|||j}||jd<dSr) concat_all_gatherrDintZ queue_ptr queue_sizeTZ image_queueZ text_queueZ idx_queue)rZ image_featZ text_featidxsZ image_featsZ text_feats batch_sizeptrrrr_dequeue_and_enqueues  z%SharedQueueMixin._dequeue_and_enqueuer )rJrKrLr"no_gradr]rrrrrUsrUc@s,eZdZeddZeddZdS)MomentumDistilationMixincCsH|jD]}t|d|dD]\}}|j|jd|_qqdS)NrrCF) model_pairsziprdatacopy_ requires_gradrZ model_pairparamZparam_mrrr copy_paramss  z$MomentumDistilationMixin.copy_paramscCsR|jD]#}t|d|dD]\}}|j|j|jd|j|_qqdS)NrrCg?)r`rarrbmomentumrerrr_momentum_updates   z)MomentumDistilationMixin._momentum_updateN)rJrKrLr"r^rgrirrrrr_s  r_c@s(eZdZdZeddZeddZdS) GatherLayerz Gather tensors from all workers with support for backward propagation: This implementation does not cut the gradients as torch.distributed.all_gather does. cs2fddttjD}tj|t|S)Ncg|]}tqSr)r" zeros_like.0_rHrr  z'GatherLayer.forward..)ranger" distributedget_world_size all_gathertuple)ctxrHoutputrrprforwards  zGatherLayer.forwardcGs$t|}tj||tjSr )r"stackrt all_reduceget_rank)rxgradsZ all_gradientsrrrbackwards  zGatherLayer.backwardN)rJrKrLrM staticmethodrzrrrrrrjs  rjcCs.tj}|dkr |St|}tj|ddS)z{ Performs all_gather operation on the provided tensors. Graph remains connected for backward grad computation. rCrdim)r"rtrurjapplycat)tensors world_sizeZ tensor_allrrrall_gather_with_grads  rcsJtsSfddttjD}tjj|ddtj|dd}|S)z Performs all_gather operation on the provided tensors. *** Warning ***: torch.distributed.all_gather has no gradient. crkr)r" ones_likermtensorrrrqrrz%concat_all_gather..F)async_oprr)rrsr"rtrurvr)rZtensors_gatherryrrrrVs  rVcsd||dg|}||<|j|}ttfddtD}t||| |j S)NrCcsg|] }t|qSr)nparange)rniZinit_dimn_tilerrrqsztile..) sizerrepeatr" LongTensorr concatenaters index_selecttor)rHrrZ repeat_idxZ order_indexrrrtiles  r)rMr*r$numpyrr"torch.nnnnunimernet.common.dist_utilsrrunimernet.common.utilsrr omegaconfrModulerrQrUr_autogradFunctionrjrr^rVrrrrrs$  j