U b%c@sdZddlZddlmZmZmZddlmZmZm Z m Z m Z m Z m Z mZddlmZddlZddlmZddlmZmZddlmZdd lmZmZmZmZmZmZm Z m!Z!m"Z"m#Z#m$Z$dd l%m&Z&d d d ddgZ'dddZ(e(dde(dde(dddde(ddde(ddde(ddde(ddde(d dde(d!dde(d"dde(d#dde(d$d%ddd&d'd(e(d)ddd*d+e(d)ddd*d+e(d)ddd*d+e(d)ddd*d+e(d)ddd*dd,d-Z)eGd.d d Z*eGd/d d Z+dd2d3Z,dee-e-fe e.ee.fe/ee*d5d6d7Z0e1e+e*d8dd9d:dd'd;e*d8d:dd?d:dd@d;e*d=dAd?d:ddBd;e*d=dCd?dddBd;fdDddEdFe+e*d8dd9d:dd'd;e*d8d:dd?d:dd@d;e*d=dCd?d:ddBd;e*d=dd?dddBd;fdDddEdFe+e*d8ddGd:dd'd;e*d8dHdGd:dd'd;e*d=dIdJd:dd@d;e*d=d:dKd:ddBd;e*d=ddLdddBd;fdMddNdFe+e,dOdPdQdRdSdTe+e,dUdVdRdSdTe+e,dWdVdRdSdTe+e,dWdCdXdRdSdTe+e,dYdVdRdSdTe+e,dYdCdXdRdSdTe+e,dZdVdRdSdTe+e,dZdCdXdRdSdTe+e*d=d:dLddDd@d;e*d=dCd[d:dDd@d;e*d=d>d\d:dDd@d;e*d=dCd\d:dd'd;fd9d]dd^d_d`e+e*daddLddd'e1dbe*d=dCd[d:dDd@d;e*d=d>d\d:dDd@d;e*d=dCd\d:dd'd;fd9dcdd^d_e1dddedfe+e*dadHdLdd@dge*dadCd[d:d@dge*d=d>dhd:d@dge*d=dHd^d:d@dgfdSdiddje1dkdddldme+e*d=dHdLdd@dge*d=dCd[d:d@dge*d=d>dhd:d@dge*d=dHd^d:d@dgfdSdiddndoe+e*d=dHdLddDd@d;e*d=dCd[d:dDd@d;e*d=d>dhd:dDd@d;e*d=dHd^d:dDd@d;fdSdidpdd_dndqe+e*d=d:dLddDd@d;e*d=d:d[d:dDd@d;e*d=d:dhd:dDd@d;e*d=d:d^d:dDd@d;fdSdidpdd_dre1dkdsdtduZ2e&ddvdwZ3e&ddxdyZ4e&ddzd{Z5e&dd|d}Z6e&dd~dZ7e&dddZ8e&dddZ9e&dddZ:e&dddZ;e&dddZe&dddZ?e&dddZ@e&dddZAe&dddZBe&dddZCe e*ee*fee*dddZDddZEeGdddZFGdddejGZHeFdddZIGdddejGZJGdddejGZKGdddejGZLGdddejGZMGdddejGZNGdddejGZOe1eJeKeLeMeNeOdZPe-ejGdddZQe e-ejGfdddZRGdddejSZTdeFdddZUdddZVddZWe e-e fe*e+dddZXddeXfe+eYe.e e-e fe e.e eFe e dddÄZZe+dĜddƄZ[Gdd d ejGZ\dddɄZ]ddd˄Z^dS)a Bring-Your-Own-Blocks Network A flexible network w/ dataclass based config for stacking those NN blocks. This model is currently used to implement the following networks: GPU Efficient (ResNets) - gernet_l/m/s (original versions called genet, but this was already used (by SENet author)). Paper: `Neural Architecture Design for GPU-Efficient Networks` - https://arxiv.org/abs/2006.14090 Code and weights: https://github.com/idstcv/GPU-Efficient-Networks, licensed Apache 2.0 RepVGG - repvgg_* Paper: `Making VGG-style ConvNets Great Again` - https://arxiv.org/abs/2101.03697 Code and weights: https://github.com/DingXiaoH/RepVGG, licensed MIT In all cases the models have been modified to fit within the design of ByobNet. I've remapped the original weights and verified accuracies. For GPU Efficient nets, I used the original names for the blocks since they were for the most part the same as original residual blocks in ResNe(X)t, DarkNet, and other existing models. Note also some changes introduced in RegNet were also present in the stem and bottleneck blocks for this model. A significant number of different network archs can be implemented here, including variants of the above nets that include attention. Hacked together by / copyright Ross Wightman, 2021. N) dataclassfieldreplace)TupleListDictOptionalUnionAnyCallableSequence)partialIMAGENET_DEFAULT_MEANIMAGENET_DEFAULT_STD)build_model_with_cfg) ClassifierHead ConvBnActBatchNormAct2dDropPath AvgPool2dSame create_conv2d get_act_layerconvert_norm_actget_attnmake_divisible to_2tuple)register_modelByobNet ByoModelCfg ByoBlockCfgcreate_byob_stem create_blockc Ks|dddddttddd |S) N)r')r(g?bilinearz stem.convzhead.fc) url num_classes input_size pool_sizecrop_pct interpolationmeanstd first_conv classifierr)r*kwargsr5@/home/chou/Projects/FGVC/FGVC-PIM-master2/timm/models/byobnet.py_cfg,sr7zjhttps://github.com/rwightman/pytorch-image-models/releases/download/v0.1-ger-weights/gernet_s-756b4751.pth)r*zjhttps://github.com/rwightman/pytorch-image-models/releases/download/v0.1-ger-weights/gernet_m-0873c53a.pthzjhttps://github.com/rwightman/pytorch-image-models/releases/download/v0.1-ger-weights/gernet_l-f31e2e8d.pth)r&r8)r9)r*r,r-znhttps://github.com/rwightman/pytorch-image-models/releases/download/v0.1-repvgg-weights/repvgg_a2-c1ee6d2b.pth)zstem.conv_kxk.convzstem.conv_1x1.conv)r*r2znhttps://github.com/rwightman/pytorch-image-models/releases/download/v0.1-repvgg-weights/repvgg_b0-80ac3f1b.pthznhttps://github.com/rwightman/pytorch-image-models/releases/download/v0.1-repvgg-weights/repvgg_b1-77ca2989.pthzphttps://github.com/rwightman/pytorch-image-models/releases/download/v0.1-repvgg-weights/repvgg_b1g4-abde5d92.pthznhttps://github.com/rwightman/pytorch-image-models/releases/download/v0.1-repvgg-weights/repvgg_b2-25b7494e.pthzphttps://github.com/rwightman/pytorch-image-models/releases/download/v0.1-repvgg-weights/repvgg_b2g4-165a85f2.pthznhttps://github.com/rwightman/pytorch-image-models/releases/download/v0.1-repvgg-weights/repvgg_b3-199bc50d.pthzphttps://github.com/rwightman/pytorch-image-models/releases/download/v0.1-repvgg-weights/repvgg_b3g4-73c370bf.pthzkhttps://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/resnet51q_ra2-d47dcc76.pthz stem.conv1)r& r:?)r*r2r,r-test_input_sizer.zstem.conv1.convbicubic)r2r,r-r/)r2r,r-r/min_input_size)gernet_sgernet_mgernet_l repvgg_a2 repvgg_b0 repvgg_b1 repvgg_b1g4 repvgg_b2 repvgg_b2g4 repvgg_b3 repvgg_b3g4 resnet51q resnet61q geresnet50t gcresnet50t gcresnext26tsbat_resnext26tsc@seZdZUeeejfed<eed<eed<dZ eed<dZ e eee fed<dZ eed <dZe eed <dZe eeefed <dZe eed <dZe eeefed <dZe eeefed<dS)r!typedcsNgsr;br attn_layer attn_kwargsself_attn_layerself_attn_kwargs block_kwargs)__name__ __module__ __qualname__r strnnModule__annotations__intrTrUrr rVfloatrWrXrr rYrZr[r5r5r5r6r!ns   c@seZdZUeeeeedffdfed<dZeed<dZ eed<dZ e eed<d Z e ed <d Zeed <d Ze ed<dZeed<dZeed<dZeed<dZeed<dZe eed<edddZeed<dZe eed<edddZeed<ed ddZeeefed!<dS)"r .blocksconv1x1 downsample3x3 stem_typemaxpool stem_pool stem_chsr; width_factorr num_featuresTzero_init_last_bnFfixed_input_sizerelu act_layer batchnorm norm_layerNrWcCstSNdictr5r5r5r6zByoModelCfg.)default_factoryrXrYcCstSrvrwr5r5r5r6ryrzrZcCstSrvrwr5r5r5r6ryrzr[)r\r]r^rr r!rbrgr_rirkrrmrcrnrdrorpboolrqrsrurWrrXrxrYrZr[rr r5r5r5r6r s           rr;r;r;r;cs>d}ddkrfddtfddt|||D}|S)N)@r8rcs|dddkr|SdS)NrrSrr5)Zchsidx)groupsr5r6ryrzz_rep_vgg_bcfg..cs&g|]\}}}td|||dqS)rep)rPrQrRrU)r!).0rQrRwf) group_sizer5r6 sz!_rep_vgg_bcfg..)tuplezip)rQrrrRZbcfgr5)rrr6 _rep_vgg_bcfgs  rF)typeseveryfirstreturncKst|dkstt|tr@tt|r&dn|||}|s@|dg}t|g}t|D]6}||krh|dn|d}|tf|dd|g7}qTt|S)z' interleave 2 block types in stack rSrr)rPrQ) lenAssertionError isinstancerclistrangesetr!r)rrrQrr4rei block_typer5r5r6interleave_blockss   rbasicrrS)rPrQrRrTrUrVbottlerig?@r~rli )rermrkro0r&r(ii0r8 i)rSr~r)?rrg@)rQrrr)rerirm)r;r;r;@)r)@rrg@)rr)rrr@)rrrrriquad2isilu)rermrirkrorsedge)rPrQrRrTrUrVr[quadT) extra_conv)rermrirkrorsr[)rPrQrRrTrVitieredger9)extent extra_params)rermrirkrWrXgc)rermrirkrWrj)rermrirkrorsrWZbat) block_size)rermrirkrorsrWrX)rAr@r?rBrCrDrErFrGrHrIrJrKrLrMrNrOcKstdd|i|S)z GEResNet-Large (GENet-Large from official impl) `Neural Architecture Design for GPU-Efficient Networks` - https://arxiv.org/abs/2006.14090 rA pretrained)rA_create_byobnetrr4r5r5r6rA^srAcKstdd|i|S)z GEResNet-Medium (GENet-Normal from official impl) `Neural Architecture Design for GPU-Efficient Networks` - https://arxiv.org/abs/2006.14090 r@r)r@rrr5r5r6r@fsr@cKstdd|i|S)z EResNet-Small (GENet-Small from official impl) `Neural Architecture Design for GPU-Efficient Networks` - https://arxiv.org/abs/2006.14090 r?r)r?rrr5r5r6r?nsr?cKstdd|i|S)z^ RepVGG-A2 `Making VGG-style ConvNets Great Again` - https://arxiv.org/abs/2101.03697 rBr)rBrrr5r5r6rBvsrBcKstdd|i|S)z^ RepVGG-B0 `Making VGG-style ConvNets Great Again` - https://arxiv.org/abs/2101.03697 rCr)rCrrr5r5r6rC~srCcKstdd|i|S)z^ RepVGG-B1 `Making VGG-style ConvNets Great Again` - https://arxiv.org/abs/2101.03697 rDr)rDrrr5r5r6rDsrDcKstdd|i|S)z` RepVGG-B1g4 `Making VGG-style ConvNets Great Again` - https://arxiv.org/abs/2101.03697 rEr)rErrr5r5r6rEsrEcKstdd|i|S)z^ RepVGG-B2 `Making VGG-style ConvNets Great Again` - https://arxiv.org/abs/2101.03697 rFr)rFrrr5r5r6rFsrFcKstdd|i|S)z` RepVGG-B2g4 `Making VGG-style ConvNets Great Again` - https://arxiv.org/abs/2101.03697 rGr)rGrrr5r5r6rGsrGcKstdd|i|S)z^ RepVGG-B3 `Making VGG-style ConvNets Great Again` - https://arxiv.org/abs/2101.03697 rHr)rHrrr5r5r6rHsrHcKstdd|i|S)z` RepVGG-B3g4 `Making VGG-style ConvNets Great Again` - https://arxiv.org/abs/2101.03697 rIr)rIrrr5r5r6rIsrIcKstdd|i|S) rJr)rJrrr5r5r6rJsrJcKstdd|i|S)rrKr)rKrrr5r5r6rKsrKcKstdd|i|S)rrLr)rLrrr5r5r6rLsrLcKstdd|i|S)rrMr)rMrrr5r5r6rMsrMcKstdd|i|S)rrNr)rNrrr5r5r6rNsrNcKstdd|i|S)rrOr)rOrrr5r5r6rOsrO)stage_blocks_cfgrcsFt|ts|f}g}t|D]$\}|fddtjD7}q|S)Ncsg|]}tddqS)rrQ)r)r_cfgr5r6rsz%expand_blocks_cfg..)rr enumeraterrQ)r block_cfgsrr5rr6expand_blocks_cfgs  rcCs$|sdS||dkst||SdS)Nrr)r)rchannelsr5r5r6 num_groupssrc@sTeZdZUeZeed<eZeed<e j Z eed<dZ e eed<dZe eed<dS)LayerFn conv_norm_actnorm_actactNattn self_attn)r\r]r^rrr rbrrr`ReLUrrrrr5r5r5r6rs   rcs,eZdZd edfdd ZddZZS) DownsampleAvgrFNlayersc stt||pt}|dkr$|nd}|dks8|dkrf|dkrL|dkrLtntj}|d|ddd|_n t|_|j ||d|d|_ dS)z0 AvgPool Downsampling as in 'D' ResNet variants.rrSTF) ceil_modecount_include_pad apply_actN) superr__init__rrr` AvgPool2dpoolIdentityrconv) selfin_chsout_chsstridedilationrrZ avg_strideZ avg_pool_fn __class__r5r6rs  zDownsampleAvg.__init__cCs|||Srv)rrrxr5r5r6forwardszDownsampleAvg.forward)rrFN)r\r]r^rrr __classcell__r5r5rr6rs rrcKs:|dkrtf|S|j|d|dfddi|SdS)Navgrr kernel_sizer)rrpop)Zdownsample_typerr4r5r5r6create_downsample s rc s@eZdZdZded fd d ZdedddZddZZ S) BasicBlockz$ ResNet Basic Block - kxk + kxk r&rrrNr;rTFrc s.tt|| pt} t||}t||}||ksN|dksN|d|dkrlt|||||dd| d|_n t |_| j |||||dd|_ | s| j dkrt n| ||_ | j ||||d|| dd|_ | r| j dkrt n| ||_| dkrt| nt |_| rt n | jdd |_dS) NrrFrrrrrr)rr)rr drop_blockrrTinplace)rrrrrrrshortcutr`rr conv1_kxkr conv2_kxk attn_lastr drop_pathr)rrrrrrr bottle_ratiorgr linear_outrrdrop_path_ratemid_chsrrr5r6rs:      ""zBasicBlock.__init__rpcCs>|rtj|jjj|j|jfD]}t|dr"| q"dSNreset_parameters r`initzeros_rbnweightrrhasattrrrrprr5r5r6 init_weights/s  zBasicBlock.init_weightscCsD||}||}||}||}||}|||}|Srv)rrrrrrrrrr5r5r6r6s     zBasicBlock.forward) r&rrNr;rTFNNr)F r\r]r^__doc__rrr|rrrr5r5rr6rs"rc s@eZdZdZded fd d Zded ddZddZZ S)BottleneckBlockz4 ResNet-like Bottleneck Block - 1x1 - kxk - 1x1 r&rrr;NrFrrc stt|| pt} t||}t||}||ksN|dksN|d|dkrlt|||||dd| d|_n t |_| ||d|_ | j |||||d|| d|_ | j |||||d|| d|_ | r| j ||||d|| d|_ n t |_ | s| jdkrt n| ||_| j ||ddd|_| r8| jdkr@t n| ||_|dkr^t|nt |_| rvt n | jd d |_dS) NrrFrrrrr)rrrrrTr)rrrrrrrrr`rr conv1_1x1r conv2b_kxkr conv3_1x1rrrr)rrrrrrrrrgrrrrrrrrrr5r6rGsb        $&zBottleneckBlock.__init__rcCs>|rtj|jjj|j|jfD]}t|dr"| q"dSr) r`rrrrrrrrrrr5r5r6rhs  zBottleneckBlock.init_weightscCsb||}||}||}||}||}||}||}||}|||}|Srv) rrrrrrrrrrr5r5r6ros        zBottleneckBlock.forward) r&rrr;NrFFFNNr)Frr5r5rr6rCs!rc s@eZdZdZded fd d ZdedddZddZZ S) DarkBlocka  DarkNet-like (1x1 + 3x3 w/ stride) block The GE-Net impl included a 1x1 + 3x3 block in their search space. It was not used in the feature models. This block is pretty much a DarkNet block (also DenseNet) hence the name. Neither DarkNet or DenseNet uses strides within the block (external 3x3 or maxpool downsampling is done in front of the block repeats). If one does want to use a lot of these blocks w/ stride, I'd recommend using the EdgeBlock (3x3 /w stride + 1x1) for more optimal compute. r&rrr;NrTFrrc s$tt|| pt} t||}t||}||ksN|dksN|d|dkrlt|||||dd| d|_n t |_| ||d|_ | s| j dkrt n| ||_ | j |||||d|| dd|_ | r| j dkrt n| ||_| dkrt| nt |_| rt n | jdd|_dS) NrrFrrrrrrrTr)rrrrrrrrr`rrrrrrrrrrrrrrrrrrgrrrrrrrrr5r6rs<      ""zDarkBlock.__init__rcCs>|rtj|jjj|j|jfD]}t|dr"| q"dSrrrr5r5r6rs  zDarkBlock.init_weightscCsN||}||}||}||}||}||}|||}|Srv)rrrrrrrrr5r5r6rs      zDarkBlock.forward) r&rrr;NrTFNNr)Frr5r5rr6r~s rc s@eZdZdZded fd d Zded ddZddZZ S) EdgeBlocka EdgeResidual-like (3x3 + 1x1) block A two layer block like DarkBlock, but with the order of the 3x3 and 1x1 convs reversed. Very similar to the EfficientNet Edge-Residual block but this block it ends with activations, is intended to be used with either expansion or bottleneck contraction, and can use DW/group/non-grouped convs. FIXME is there a more common 3x3 + 1x1 conv block to name this after? r&rrr;NrFrrc s&tt|| pt} t||}t||}||ksN|dksN|d|dkrlt|||||dd| d|_n t |_| j |||||d|| d|_ | s| j dkrt n| ||_ | j ||ddd|_ | r| j dkrt n| ||_| dkrt| nt |_| rt n | jdd |_dS) NrrFrrrrTr)rr rrrrrrr`rrrr conv2_1x1rrrrr rr5r6rs:      ""zEdgeBlock.__init__rcCs>|rtj|jjj|j|jfD]}t|dr"| q"dSr) r`rrr rrrrrrrr5r5r6rs  zEdgeBlock.init_weightscCsN||}||}||}||}||}||}|||}|Srv)rrrr rrrrr5r5r6rs      zEdgeBlock.forward) r&rrr;NrFFNNr)Frr5r5rr6r s r c s@eZdZdZded fd d Zded ddZddZZ S) RepVggBlockz RepVGG Block. Adapted from impl at https://github.com/DingXiaoH/RepVGG This version does not currently support the deploy optimization. It is currently fixed in 'train' mode. r&rrr;Nr$rrc  stt|| pt} t||} ||ko@|dko@|d|dk} | rT| j|ddnd|_| j|||||d| | dd|_| j||d|| dd|_ | j dkrt n| ||_ | dkr| rt | nt |_| jdd |_dS) NrrFrr )rrrrTr)rr rrrridentityrconv_kxkconv_1x1rr`rrrr)rrrrrrrrrgrrrrZ use_identrr5r6rs&   zRepVggBlock.__init__FrcCsX|D]4}t|tjrtj|jddtj|jddqt|j drT|j dS)Ng?rr) modulesrr` BatchNorm2drnormal_rbiasrrr)rrpmr5r5r6rs    zRepVggBlock.init_weightscCsh|jdkr ||||}n0||}||||}||}||}||}||}|Srv)rrrrrr)rrrr5r5r6r s     zRepVggBlock.forward) r&rrr;Nr$NNr)Frr5r5rr6r s r c s@eZdZdZded fd d ZdedddZddZZ S) SelfAttnBlockzI ResNet-like Bottleneck Block - 1x1 - optional kxk - self attn - 1x1 r&rrr;NrFTrrc sPtt|| dk stt||}t||}||ksP|dksP|d|dkrnt|||||dd| d|_n t |_| ||d|_ | r| j |||||d||d|_ d}n t |_ | dkrint | d}| j|fd|i||_| r| |nt |_| j ||ddd|_|d kr&t|nt |_| r>t n | jd d |_dS) NrrFrr) feat_sizerrrTr)rrrrrrrrr`rrrrrxrr post_attnrrrr)rrrrrrrrrgrrZ post_attn_narrrrrrZ opt_kwargsrr5r6rsD       zSelfAttnBlock.__init__rcCs0|rtj|jjjt|jdr,|jdSr) r`rrrrrrrr)rrpr5r5r6r:s zSelfAttnBlock.init_weightscCsX||}||}||}||}||}||}||}|||}|Srv)rrrrrrrrrr5r5r6r@s       zSelfAttnBlock.forward) r&rrr;NrFFTNNNr)Frr5r5rr6rsr)rrZdarkrrrrZblock_fncCs |t|<dSrv)_block_registryrr5r5r6register_blockXsr)blockcKs>t|tjtfr|f|S|tks0td|t|f|S)NzUnknown block type ()rr`rar rr)rr4r5r5r6r#\s cs$eZdZd edfdd ZZS) Stemr&r~rjN?rc  st|dkst| pt} tttfr.r$rSrr~FTrrnum_chs reductionmodule)rrmaxrr&)rrrrrrrrrr feature_inforrrrappendrx add_modulelowerr` MaxPool2d)rrrrrrnum_repnum_actr!rrm prev_featZ stem_stridesZstem_norm_actsprev_chsZ curr_striderchrTnalayer_fnZ conv_namerr r6resB   " z Stem.__init__)r&r~rjr&NrN)r\r]r^rrrr5r5rr6rcsrstemc sD|pt}|dkstd|krDd|kr*dnd}t||d|||d}nd|krpt|d|d |d|f||d }nd |krt||dd ||d }n|d|krt||d|d}nbd|kr|rt||dd||d}n|j||ddd}n,|rt||dd||d}n|j||ddd}t|tr,fdd|jD}nt|ddg}||fS)N)r$rrrdeepr7x7rhrrrSr~)r-r.rrrr&r9)rrr5r;)r-r!rrr)rrr6r(r)r-rrrc s&g|]}t|d|dgdqS).r&)r&)rxjoin)rf feat_prefixr5r6rsz$create_byob_stem..r#)rrrr rrr(rx) rrri pool_typer<rr.r4r(r5r;r6r"s,  $ cs"|dkr dStfdd|DS)Ncsg|] }|qSr5r5rrTr7r5r6rsz$reduce_feat_size..)r)rrr5r7r6reduce_feat_sizesr?cCs|dk r |n|}|piS)a2 Override model level attn/self-attn/block kwargs w/ block level NOTE: kwargs are NOT merged across levels, block_kwargs will fully replace model_kwargs for the block if set to anything that isn't None. i.e. an empty block_kwargs dict will remove kwargs set at model level for that block Nr5)r[Z model_kwargsZ out_kwargsr5r5r6override_kwargssr@)r[ block_cfg model_cfgcCs|d}|jdk s|jdk rl|js(d}n8t|j|j}|jp@|j}|dk r\tt|f|nd}t||d}|jdk s|jdk r|jsd}n8t|j|j}|jp|j}|dk rtt|f|nd}t||d}||d<|t|j |j dS)Nr)rr) rXrWr@r rrrZrYupdater[)r[rArBZ layer_fnsrWrXrYrZr5r5r6update_block_kwargss(    rE)rr output_stride stem_featrrblock_kwargs_fnc Cs|pt}g}dd|jD}dd|D} ddtd|t| | D} d} |d} |d} |}g}t|D]R\}}|dj}|dkr|r||| |kr|dkr| |9} d}| |9} | d krdnd }g}t|D]\}}t |j |j }|j }t |tr|||}t| ||dkr$|nd|| f||j|j| |||d }|jd kr\||d <||||d|t|jf|g7}| }|} |dkr|dkrt||}q|tj|g7}t| | d|d}qr||tj||fS)NcSsg|] }t|qSr5)rr>r5r5r6rsz&create_byob_stages..cSsg|]}tdd|DqS)cSsg|] }|jqSr5r)rbcr5r5r6rsz1create_byob_stages...)sum)rZ stage_bcsr5r5r6rscSsg|] }|qSr5)tolist)rrr5r5r6rsrrr%r$)rrSrS) rrrrrrrgrrrCr)rArBzstages.r#)rretorchlinspacerJsplitrrTr)rrRrnrUrr rxrVrgrPr#r?r` Sequential)rrrFrGrrrHr(rdepthsZdprrZ net_strider0r/stagesZ stage_idxZstage_block_cfgsrZfirst_dilationre block_idxrArrr[r5r5r6create_byob_stagess` "         rSrcCszt|j}t|j|d}tt|j|d}|jrBtt|jf|jnd}|j r`tt|j f|j nd}t |||||d}|S)N)rurs)rrrrr) rrsrrur rrWrrXrYrZr)rrrrrrr3r5r5r6 get_layer_fns#s rTcsJeZdZdZded fd d Zd d ZdddZddZddZ Z S)ra# 'Bring-your-own-blocks' Net A flexible network backbone that allows building model stem + blocks via dataclass cfg definition w/ factory functions for module instantiation. Current assumption is that both stem and blocks are in conv-bn-act order (w/ block ending in act). r%r&rrlTNrrc st||_||_t|} |jr4|dk s4td|dk rDt|nd} g|_t t |j pb|j dj |j} t|| |j|j| d\|_} |j| ddt| | ddd} t|| || d| | d\|_}|j|dd|dd}|jr"t t |j|j|_| ||jd |_n||_t|_|jt|j|ddd d g7_t|j|||jd |_|D]\}}t ||qt|!D]}t"|d r|j#|dqdS)Nz8img_size argument is required for fixed input size modelrrr"r%r7)rrr$r final_convr#r= drop_raterr)$rrr+rWrTrqrrr(rcrrmrerRrnr"rirkr4extendr?rSrQrorrUr`rrxrhead named_modules _init_weightsrrr)rrr+in_chans global_poolrFrpimg_sizerWrrrrmrGZ stage_featr0nrrr5r6r5sH       zByobNet.__init__cCs|jjSrv)rYfc)rr5r5r6get_classifier\szByobNet.get_classifiercCst|j|||jd|_dS)NrV)rrorWrY)rr+r]r5r5r6reset_classifier_szByobNet.reset_classifiercCs"||}||}||}|Srv)r4rQrUrr5r5r6forward_featuresbs   zByobNet.forward_featurescCs||}||}|Srv)rcrYrr5r5r6rhs  zByobNet.forward)r%r&rrlTNrr)r) r\r]r^rr rrarbrcrrr5r5rr6r-s' cCst|tjrb|jd|jd|j}||j}|jjdt d||j dk r|j j nbt|tj rtjj|jddd|j dk rtj|j n(t|tjrtj|jtj|j dS)Nrrrrg{Gz?)r0r1)rr`Conv2dr out_channelsrrdatarmathsqrtrzero_Linearrrrones_)rr_fan_outr5r5r6r[ns      r[cKs*tt||ft|t|tddd|S)NT)flatten_sequential) default_cfgrB feature_cfg)rr default_cfgs model_cfgsrx)variantrr4r5r5r6r~sr)r$)r}rr)F)F)F)F)F)F)F)F)F)F)F)F)F)F)F)F)F)F)r$r$r4N)rS)r$)F)_rrg dataclassesrrrtypingrrrrr r r r functoolsr rLtorch.nnr` timm.datarrhelpersrrrrrrrrrrrrrregistryr__all__r7rpr!r rr_rcr|rrxrqrAr@r?rBrCrDrErFrGrHrIrJrKrLrMrNrOrrrrarrrrrr r rrrr#rOrr"r?r@rErdrSrTrr[rr5r5r5r6s(   4  8            .                   0;65.7 /#  $  < A