U b%c:@sdZddlmZddlZddlmZddlmZddlm Z m Z ddl m Z m Z ddlmZmZmZmZdd lmZd d d d dgZd?ddZeedddeddedddeddeddedddeddeddeddedddd ZGd!d d ejZGd"d d ejZGd#ddejZGd$d d ejZGd%d d ejZd@d&d'ZdAd)d*Z edBd+d,Z!edCd-d.Z"edDd/d0Z#edEd1d2Z$edFd3d4Z%edGd5d6Z&edHd7d8Z'edId9d:Z(edJd;d<Z)edKd=d>Z*dS)Lz Class-Attention in Image Transformers (CaiT) Paper: 'Going deeper with Image Transformers' - https://arxiv.org/abs/2103.17239 Original code and weights from https://github.com/facebookresearch/deit, copyright below )deepcopyN)partialIMAGENET_DEFAULT_MEANIMAGENET_DEFAULT_STD)build_model_with_cfgoverlay_external_default_cfg) PatchEmbedMlpDropPath trunc_normal_)register_modelCait ClassAttnLayerScaleBlockClassAttnLayerScaleBlockTalkingHeadAttnc Ks |ddddddttddd |S) N)r?bicubicTzpatch_embed.projhead) url num_classes input_size pool_sizecrop_pct interpolationfixed_input_sizemeanstd first_conv classifierr)rkwargsr'=/home/chou/Projects/FGVC/FGVC-PIM-master2/timm/models/cait.py_cfgsr)z1https://dl.fbaipublicfiles.com/deit/XXS24_224.pth)rr*)rrz1https://dl.fbaipublicfiles.com/deit/XXS24_384.pth)rz1https://dl.fbaipublicfiles.com/deit/XXS36_224.pthz1https://dl.fbaipublicfiles.com/deit/XXS36_384.pthz0https://dl.fbaipublicfiles.com/deit/XS24_384.pthz/https://dl.fbaipublicfiles.com/deit/S24_224.pthz/https://dl.fbaipublicfiles.com/deit/S24_384.pthz/https://dl.fbaipublicfiles.com/deit/S36_384.pthz/https://dl.fbaipublicfiles.com/deit/M36_384.pthz/https://dl.fbaipublicfiles.com/deit/M48_448.pth)rr+) cait_xxs24_224cait_xxs24_384cait_xxs36_224cait_xxs36_384 cait_xs24_384 cait_s24_224 cait_s24_384 cait_s36_384 cait_m36_384 cait_m48_448cs&eZdZdfdd ZddZZS) rFcst||_||}|d|_tj|||d|_tj|||d|_tj|||d|_t ||_ t|||_ t ||_ dS)N࿩bias) super__init__ num_headsscalennLinearqkvDropout attn_dropproj proj_dropselfdimr=qkv_biasrErGhead_dim __class__r'r(r<Ms   zClassAttn.__init__c Cs|j\}}}||dddfd|d|j||jdddd}|||||j||jdddd}||j}|||||j||jdddd}|| dd}|j dd}| |}|| dd|d|} | | } | | } | S)NrrrrJ)shaperA unsqueezereshaper=permuterBr>rC transposesoftmaxrErFrG) rIxBNCrArBrCattnx_clsr'r'r(forwardZs <* *    zClassAttn.forward)r6Fr7r7__name__ __module__ __qualname__r<r_ __classcell__r'r'rMr(rJs c s>eZdZdddddejejeedf fdd ZddZ Z S) r@Fr7-C6?c st| ||_| |||||d|_|dkr8t|nt|_| ||_t ||} | || ||d|_ tj | t |dd|_tj | t |dd|_dSN)r=rKrErGr7) in_featureshidden_features act_layerdropT) requires_gradr;r<norm1r]r r?Identity drop_pathnorm2intmlp Parametertorchonesgamma_1gamma_2rIrJr= mlp_ratiorKrkrErprj norm_layer attn_block mlp_block init_valuesZmlp_hidden_dimrMr'r(r<ps    z!LayerScaleBlockClassAttn.__init__c CsVtj||fdd}|||j|||}|||j|||}|S)NrrR) rucatrprwr]rnrxrsrq)rIrYr^ur'r'r(r_s  z LayerScaleBlockClassAttn.forward) rarbrcr?GELU LayerNormrr r<r_rdr'r'rMr(rmscs&eZdZdfdd ZddZZS) rr6Fr7cs~t||_||}|d|_tj||d|d|_t||_t|||_ t|||_ t|||_ t||_ dS)Nr8rr9) r;r<r=r>r?r@qkvrDrErFproj_lproj_wrGrHrMr'r(r<s   zTalkingHeadAttn.__init__c Cs|j\}}}||||d|j||jddddd}|d|j|d|d}}}||dd} || dddddddd} | jdd} | | dddddddd} | | } | |dd|||}| |}| |}|S) NrrOrrrPrQrR) rSrrUr=rVr>rWrrXrrErFrG) rIrYrZr[r\rrArBrCr]r'r'r(r_s ."" "   zTalkingHeadAttn.forward)r6Fr7r7r`r'r'rMr(rsc s>eZdZdddddejejeedf fdd ZddZ Z S) rreFr7rfc st| ||_| |||||d|_|dkr8t|nt|_| ||_t ||} | || ||d|_ tj | t |dd|_tj | t |dd|_dSrgrmryrMr'r(r<s    zLayerScaleBlock.__init__c CsD|||j|||}|||j|||}|SN)rprwr]rnrxrsrqrIrYr'r'r(r_s  zLayerScaleBlock.forward) rarbrcr?rrrr r<r_rdr'r'rMr(rscseZdZdddddddddd d d eejd d d eeeej e e d e e ddffdd Z ddZejjddZddZdddZddZddZZS)rr*rr reTr7ư>)epsNrfrOcsTt||_ |_|_|||| d|_|jj}tt dd |_ tt d| |_ tj d|_fddt|Dt f ddt|D|_t f ddt|D|_ |_t dd d g|_|dkrt |nt|_t|j d d t|j d d ||jdS) N)img_size patch_sizein_chans embed_dimr)pcsg|]}qSr'r'.0i)drop_path_rater'r( sz!Cait.__init__..cs0g|](} | d qS) rJr=rzrKrkrErpr{rjr|r}r~r'r) rjr|attn_drop_rate block_layersdpr drop_rater init_scaler}rzr{r=rKr'r(rscs,g|]$} dddd qS)r7rr'r) rjattn_block_token_onlyblock_layers_tokenrrmlp_block_token_onlymlp_ratio_clstkr{r=rKr'r(rsrr)num_chs reductionmodule{Gz?r#)r;r<r num_featuresr patch_embed num_patchesr?rtruzeros cls_token pos_embedrDpos_droprange ModuleListblocksblocks_token_onlynormdict feature_infor@rorr apply _init_weights)rIrrrrrdepthr=rzrKrrrr{ global_poolrrZ patch_layerrjr|r}rrrZdepth_token_onlyrrrM)rjr|rrrrrrrrrr}rrzrr{r=rKr(r<s4  &     z Cait.__init__cCsrt|tjrBt|jddt|tjrn|jdk rntj|jdn,t|tjrntj|jdtj|jddS)Nrrrr) isinstancer?r@r weightr:init constant_r)rImr'r'r(rs  zCait._init_weightscCsddhS)Nrrr'rIr'r'r(no_weight_decay szCait.no_weight_decaycCs|jSr)rrr'r'r(get_classifierszCait.get_classifierrcCs*||_|dkrt|j|nt|_dS)Nr)rr?r@rror)rIrrr'r'r(reset_classifierszCait.reset_classifiercCs|jd}||}|j|dd}||j}||}t|jD]\}}||}qBt|jD]\}}|||}q^t j ||fdd}| |}|dddfS)NrrQrrR) rSrrexpandrr enumeraterrrurr)rIrYrZZ cls_tokensrblkr'r'r(forward_featuress       zCait.forward_featurescCs||}||}|Sr)rrrr'r'r(r_,s  z Cait.forward)r)rarbrcrr?rrrr rrr rr<rrujitignorerrrrr_rdr'r'rMr(rsB 9   cCs:d|kr|d}i}|D]\}}|||dd<q|S)Nmodelzmodule.r)itemsreplace) state_dictrZcheckpoint_no_modulerBrCr'r'r(checkpoint_filter_fn2s rFcKs6|ddrtdtt||ft|td|}|S)N features_onlyzrrrr=rr,r)r,rrrr&Z model_argsrr'r'r(r,Gsr,cKs0tfdddddd|}td d|i|}|S) Nrrrrrrr-r)r-rrr'r'r(r-Nsr-cKs0tfdddddd|}td d|i|}|S) Nrr$rrrr.r)r.rrr'r'r(r.Usr.cKs0tfdddddd|}td d|i|}|S) Nrrrrrrr/r)r/rrr'r'r(r/\sr/cKs0tfdddddd|}td d|i|}|S) Nri rrrr0r)r0rrr'r'r(r0csr0cKs0tfdddddd|}td d|i|}|S) Nrrrr6rrr1r)r1rrr'r'r(r1jsr1cKs0tfdddddd|}td d|i|}|S) Nrrrr6rrr2r)r2rrr'r'r(r2qsr2cKs0tfdddddd|}td d|i|}|S) Nrrrr6rrr3r)r3rrr'r'r(r3xsr3cKs0tfdddddd|}tdd|i|}|S) Nrrrrrr4r)r4rrr'r'r(r4sr4cKs0tfdddddd|}tdd|i|}|S) Nrr0rrr5r)r5rrr'r'r(r5sr5)r)N)F)F)F)F)F)F)F)F)F)F)F)+__doc__copyrrutorch.nnr? functoolsr timm.datarrhelpersrr layersr r r r registryr__all__r)rrModulerrrrrrrr,r-r.r/r0r1r2r3r4r5r'r'r'r(s     &#*j