B ðcóã @søddlZddlmZddlZddlZddlZddlmmZddl m Z m Z m Z m Z mZmZdd„Zdd„Zdd „Zd d d d gd d gddddgd dddfdd„Zd!dd„Zd"dd„Zddd d gd d gddddgd ddgddddf dd„Zd#dd „ZdS)$éNé)Ú activationsÚforward_adapted_unflattenÚget_activationÚget_readout_operÚmake_backbone_defaultÚ TransposecCs t||dƒS)NÚ forward_flex)r)Ú pretrainedÚx©r úP/home/vaishanth/workspace/independent_study_project/MiDaS/midas/backbones/vit.pyÚ forward_vit srcCs |dd…d|j…f|d|jd…f}}tt t|ƒ¡ƒ}| d||d¡ dddd¡}tj|||fdd}| dddd¡ d||d¡}t j ||gdd}|S) NrréÿÿÿÿééÚbilinear)ÚsizeÚmode)Údim) Ú start_indexÚintÚmathÚsqrtÚlenÚreshapeÚpermuteÚFÚ interpolateÚtorchÚcat)ÚselfÚposembZgs_hZgs_wZ posemb_tokÚ posemb_gridÚgs_oldr r r Ú_resize_pos_embedsr%c Cs<|j\}}}}| |j||jd||jd¡}|jd}t|jdƒrj|j |¡}t|tt fƒrj|d}|j  |¡  d¡  dd¡}t |ddƒdk rÊ|j |dd¡}|j |dd¡} tj|| |fdd}n0|jrØ||}|j |dd¡}tj||fdd}|js ||}| |¡}x|jD]} | |ƒ}qW| |¡}|S)NrrÚbackbonerrÚ dist_token)r)Úshaper%Ú pos_embedÚ patch_sizeÚhasattrÚ patch_embedr&Ú isinstanceÚlistÚtupleÚprojÚflattenÚ transposeÚgetattrÚ cls_tokenÚexpandr'rr Úno_embed_classÚpos_dropÚblocksÚnorm) r!r ÚbÚcÚhÚwr)ÚBÚ cls_tokensr'Úblkr r r r !s4         r é`éÀi€irééé Úignorec Cs>t||||||||ƒ}t t|j¡|j_t t|j¡|j_|S)N)rÚtypesÚ MethodTyper Úmodelr%) rIÚfeaturesrÚhooksÚ vit_featuresÚ use_readoutrÚstart_index_readoutr r r r Ú_make_vit_b16_backboneKs rOcCs@tjd|d}|dkr"ddddgn|}t|ddd d g|d |d S) NÚvit_large_patch16_384)r rCrEéééii)rJrKrLrM)ÚtimmÚ create_modelrO)r rMrKrIr r r Ú_make_pretrained_vitl16_384bs rVcCs>tjd|d}|dkr"ddddgn|}t|ddd d g||d S) NÚvit_base_patch16_384)r rrCrDrErArBi€i)rJrKrM)rTrUrO)r rMrKrIr r r Ú_make_pretrained_vitb16_384osrXrSiéFc  Cs t ¡} || _|rdn|} x2t| ƒD]&} | jjjj|  tt | dƒƒ¡q$Wx4t| dƒD]&} | jj ||  tt | dƒƒ¡qZWt | _ t |||| ƒ} x>t| ƒD]2} t  t ¡t ¡t ¡¡}td| d›dƒq¢Wxút| dƒD]ì} | |kr&tj|| || dd| dd| ddddd}n,| |krNtj|d |d d ddd }nd}| | tddƒt dt |dd |dd g¡¡tj||| dddd g}|dk r²| |¡tj |Ž}td| d›dƒqäW| | j_|| j_t t| j¡| j_t t| j¡| j_| S) Nrrézpretrained.act_postprocessz=valuerT)Ú in_channelsÚ out_channelsÚ kernel_sizeÚstrideÚpaddingÚbiasÚdilationÚgroupsr)r[r\r]r^r_rY)ÚnnÚModulerIÚranger,r&ÚstagesÚregister_forward_hookrÚstrr8rrÚ SequentialÚIdentityÚexecÚConvTranspose2dÚConv2drÚ UnflattenrÚSizeÚappendrr*rGrHr r%)rIrJrrKrLr*Z number_stagesÚ use_vit_onlyrMrr Zused_number_stagesÚsÚ readout_operÚvalueÚ final_layerÚlayersr r r Ú_make_vit_b_rn50_backbonexsf  &      $    rwcCsFtjd|d}|dkr"ddddgn|}t|ddd d gd d g|||d S) NÚvit_base_resnet50_384)r rrrDrErSiii€)rJrrKrqrM)rTrUrw)r rMrKrqrIr r r Ú_make_pretrained_vitb_rn50_384Ðs ry)rFN)rFN)rFNF)rÚtorch.nnrcrTrGrZtorch.nn.functionalÚ functionalrÚutilsrrrrrrrr%r rOrVrXrwryr r r r Ús8  ,      O