o h2hB@s@ddlZddlZddlZddlmZddlmmZddlm Z gdZ ddZ ddZ Gd d d ej ZGd d d ej ZGd ddej ZGdddej ZGdddej ZGdddej ZGdddej ZGdddej ZGdddej ZGdddej Zdddiejdfdd Zd!d"ZGd#d$d$ZdS)%NHuggingfaceTokenizer)T5Model T5Encoder T5DecoderT5EncoderModelcCsB|jtjkrt|rt|jjd}tj|| |d}|S)Ni)minmax)dtypetorchfloat16isinfanyfinfor clamp)xrr0/home/ubuntu/wan22/wan2.2-main/wan/modules/t5.py fp16_clampsrcCsVt|trtj|jdSt|trtjj|jjdddSt|t rOtjj|j dj|j ddtjj|j j|j ddtjj|j j|jdddSt|trtjj|jj|j |jddtjj|jj|j ddtjj|jj|j ddtjj|jj|j|jdddSt|trtjj|jjd|j|jdddSdS)N?)stdrg) isinstance T5LayerNormnninitones_weightrnormal_token_embedding T5FeedForwardgatedimfc1fc2dim_ffn T5Attentionqdim_attnkvo num_headsT5RelativeEmbedding embedding num_buckets)mrrr init_weightss$     $  r2c@seZdZddZdS)GELUc Cs6d|dttdtj|dt|dS)Ng?rg@gHm?g@)r tanhmathsqrtpipowselfrrrrforward0s "z GELU.forwardN)__name__ __module__ __qualname__r;rrrrr3.s r3c&eZdZdfdd ZddZZS)rư>cs0tt|||_||_tt||_ dSN) superr__init__r#epsr Parameterr onesr)r:r#rD __class__rrrC7szT5LayerNorm.__init__cCsR|t|djddd|j}|jjtjtj fvr$| |j}|j|S)NrT)r#keepdim) r rsqrtfloatr8meanrDrr r bfloat16type_asr9rrrr;=s   zT5LayerNorm.forward)r@r<r=r>rCr; __classcell__rrrGrr5srcs(eZdZdfdd ZdddZZS) r'皙?cs||dksJtt|||_||_||_|||_tj||dd|_ tj||dd|_ tj||dd|_ tj||dd|_ t ||_dS)NrFbias)rBr'rCr#r)r-head_dimrLinearr(r*r+r,Dropoutdropout)r:r#r)r-rXrGrrrCGs zT5Attention.__init__Nc CsN|dur|n|}|d|j|j}}}|||d||}|||d||} |||d||} ||||d| d} |durL| |7} |durv|jdvsWJ|jdkrd||dddn| d}| |dkt |j jt d|| | } tj| dd| } t d | | }||d||}||}||}|S) z x: [B, L1, C]. context: [B, L2, C] or None. mask: [B, L2] or [B, L1, L2] or None. NrrIr)rrzbinc,bjnc->bnijr#zbnij,bjnc->binc)sizer-rUr(viewr*r+ new_zerosndim unsqueeze masked_fill_r rr r einsumFsoftmaxrLrOreshaper,rX) r:rcontextmaskpos_biasbncr(r*r+ attn_biasattnrrrr;Vs0    zT5Attention.forwardrRNNNrPrrrGrr'Esr'cr?)r!rRcsjtt|||_||_ttj||ddt|_ tj||dd|_ tj||dd|_ t ||_ dSNFrS)rBr!rCr#r&r SequentialrVr3r"r$r%rWrX)r:r#r&rXrGrrrC}szT5FeedForward.__init__cCs6||||}||}||}||}|SrA)r$r"rXr%r9rrrr;s    zT5FeedForward.forwardrmrPrrrGrr!{s r!c,eZdZ  dfdd Zd ddZZS) T5SelfAttentionTrRcstt|||_||_||_||_||_||_t ||_ t |||||_ t ||_ t||||_|r9d|_dSt||dd|_dS)NT bidirectional)rBrrrCr#r)r&r-r0 shared_posrnorm1r'rlnorm2r!ffnr. pos_embeddingr:r#r)r&r-r0rurXrGrrrCs   zT5SelfAttention.__init__NcCs\|jr|n ||d|d}t||j||||d}t||||}|S)Nrrfrg)ruryr[rrlrvrxrw)r:rrfrgerrrr;s zT5SelfAttention.forwardTrR)NNrPrrrGrrrs rrcs4eZdZ  dfdd Z    d ddZZS) T5CrossAttentionTrRcstt|||_||_||_||_||_||_t ||_ t |||||_ t ||_ t |||||_t ||_t||||_|rFd|_dSt||dd|_dS)NFrs)rBr~rCr#r)r&r-r0rurrvr' self_attnrw cross_attnnorm3r!rxr.ryrzrGrrrCs     zT5CrossAttention.__init__NcCsz|jr|n ||d|d}t||j||||d}t||j||||d}t||| |}|S)Nrr{)rerf) ruryr[rrrvrrwrxr)r:rrfencoder_states encoder_maskrgr|rrrr;s  zT5CrossAttention.forwardr})NNNNrPrrrGrr~sr~cs.eZdZdfdd ZddZddZZS) r.cs8tt|||_||_||_||_t|||_ dSrA) rBr.rCr0r-rtmax_distr Embeddingr/)r:r0r-rtrrGrrrCs zT5RelativeEmbedding.__init__cCsb|jjj}tj||ddtj||dd}||}||}|dddd}|S)N)devicerrr) r/rrr aranger__relative_position_bucketpermute contiguous)r:lqlkrrel_posrel_pos_embedsrrrr;s   zT5RelativeEmbedding.forwardcCs|jr|jd}|dk|}t|}n|j}d}t|t| }|d}|t||t |j |||}t|t ||d}|t ||k||7}|S)Nrrr) rtr0longr absr zeros_likelogrLr5r full_likewhere)r:rr0 rel_buckets max_exact rel_pos_largerrrrs(  z-T5RelativeEmbedding._relative_position_bucket)r)r<r=r>rCr;rrQrrrGrr.s  r.crq) rTrRc tt||_|_|_|_||_|_|_ t |t j r$|nt ||_ r4tddnd|_t |_t fddt|D|_t|_|tdS)NTrsc  g|] }tqSr)rr.0_r#r)r&rXr0r-rurr &  z&T5Encoder.__init__..)rBrrCr#r)r&r- num_layersr0rurrrr r.ryrWrX ModuleListrangeblocksrnormapplyr2 r:vocabr#r)r&r-rr0rurXrGrrrC .    zT5Encoder.__init__NcCsh||}||}|jr||d|dnd}|jD] }||||d}q||}||}|S)Nrrg)r rXruryr[rr)r:idsrfrr|blockrrrr;/s     zT5Encoder.forwardr}rArPrrrGrr   "rcrq) rTrRc r)NFrsc rr)r~rrrrrVrz&T5Decoder.__init__..)rBrrCr#r)r&r-rr0rurrrr r.ryrWrXrrrrrrr2rrGrrrC=rzT5Decoder.__init__Nc Cs|\}}|durttd|||j}n|jdkr+t|dd|d}| |}| |}|j rD| |d|dnd}|j D] } | |||||d}qI||}| |}|S)NrrrIr)r[r trilrFtorr^r_expandr rXruryrr) r:rrfrrrhsrr|rrrrr;_s$       zT5Decoder.forwardr}rnrPrrrGrr;rrcs*eZdZ  dfdd ZddZZS)rTrRc stt|||_||_||_||_||_||_||_ ||_ t |||_ t|j ||||||| | |_t|j ||||||| | |_t j||dd|_|tdSro)rBrrC vocab_sizer#r)r&r-encoder_layersdecoder_layersr0rrr rencoderrdecoderrVheadrr2) r:rr#r)r&r-rrr0rurXrGrrrCvs(   zT5Model.__init__cCs*|||}|||||}||}|SrA)rrr)r: encoder_idsr decoder_ids decoder_maskrrrrr;s  zT5Model.forwardr}rPrrrGrrts  "rFcpuc Ks|r|rJ|rt}|d|d<|d|d<|d} n|r6t}|d|d<|d|d<|d} nt}t||d i|} Wdn1sOwY| j||d} |rrddlm} | d |fi|} | | fS| S) Nrrrrr)r rrrzgoogle/r) rpoprrr rr tokenizersr) name encoder_only decoder_onlyreturn_tokenizertokenizer_kwargsr rkwargs model_clsrmodelr tokenizerrrr_t5s*     rc Ks:tddddddddddd }|jd i|td i|S) Niii(@ FrR) rr#r)r&r-rrr0rurXumt5-xxlr)r)dictupdater)rcfgrrrumt5_xxls rc@s0eZdZejejdddfddZddZdS)rNcCs||_||_||_||_||_tdd||dd}t d|| t j |dd||_ |dur>||j dd|_ n|j |jt||dd |_dS) NTF)rrr rzloading r) map_location)sync_module_states whitespace)rseq_lenclean)text_lenr rcheckpoint_pathtokenizer_pathrevalrequires_grad_logginginfoload_state_dictr loadrrrr)r:rr rrrshard_fnrrrrrCs,   zT5EncoderModel.__init__cCs^|j|ddd\}}||}||}|djdd}|||}ddt||DS)NT) return_maskadd_special_tokensrrrZcSsg|] \}}|d|qSrAr)rur+rrrrsz+T5EncoderModel.__call__..)rrgtsumrrzip)r:textsrrrfseq_lensrerrr__call__s   zT5EncoderModel.__call__) r<r=r>r rNcudacurrent_devicerCrrrrrrs r)rr5r torch.nnrtorch.nn.functional functionalrbrr__all__rr2Moduler3rr'r!rrr~r.rrrfloat32rrrrrrrs8  6"+.09, )