o A h(<@sddlmZddlZddlZddlmZddlmZddlmZm Z m Z m Z m Z Gdddej Zd d Zd d ZGd ddejj ZGdddejj ZGdddejj ZGdddejj ZGdddejj ZdS))OptionalN) rearrange)nn)MLPTextProjectionTimestepEmbedder apply_gate attentioncs@eZdZ    d dedeffdd Zdd Zd d ZZS) RMSNormTư>Ndimepscs@||d}t||_|rttj|fi||_dSdS)a Initialize the RMSNorm normalization layer. Args: dim (int): The dimension of the input tensor. eps (float, optional): A small value added to the denominator for numerical stability. Default is 1e-6. Attributes: eps (float): A small value added to the denominator for numerical stability. weight (nn.Parameter): Learnable scaling parameter. devicedtypeN)super__init__rr Parametertorchonesweight)selfr elementwise_affinerrrfactory_kwargs __class__)/data/code/test/modules/connector_edit.pyr s  zRMSNorm.__init__cCs$|t|djddd|jS)z Apply the RMSNorm normalization to the input tensor. Args: x (torch.Tensor): The input tensor. Returns: torch.Tensor: The normalized tensor. T)keepdim)rrsqrtpowmeanr)rxrrr_norm&s$ z RMSNorm._normcCs,|||}t|dr||j}|S)z Forward pass through the RMSNorm layer. Args: x (torch.Tensor): The input tensor. Returns: torch.Tensor: The output tensor after applying RMSNorm. r)r&floattype_ashasattrr)rr%outputrrrforward3s  zRMSNorm.forward)Tr NN) __name__ __module__ __qualname__intr'rr&r+ __classcell__rrrrr s r cCs*|dkrtjS|dkr tStd|d)z Get the normalization layer. Args: norm_layer (str): The type of normalization layer. Returns: norm_layer (nn.Module): The normalization layer. layerrmsz Norm layer z is not implemented)r LayerNormr NotImplementedError) norm_layerrrrget_norm_layerDs  r6cCsJ|dkrddS|dkrddS|dkrtjS|dkrtjStd|) zget activation layer Args: act_type (str): the activation type Returns: torch.nn.functional: the activation layer gelucSstS)NrGELUrrrr`sz&get_activation_layer..Z gelu_tanhcSs tjddS)Ntanh) approximater8rrrrr:bs relusiluzUnknown activation type: )rReLUSiLU ValueError)act_typerrrget_activation_layerVs rCcseZdZ         dded ed ed ed ed ededeejdeej ffdd Z  ddej dej dej dej fddZ Z S)IndividualTokenRefinerBlock@r>Fr1TNmlp_width_ratio mlp_drop_raterBqk_norm qk_norm_typeqkv_biasneed_CArrc  s| | d} t| |_||_||} t||}tj|fddd| |_tj||dfd|i| |_ t |}|rG|| fddd| nt |_ |rY|| fddd| nt |_ tj||fd|i| |_tj|fddd| |_t|}td ||||d| |_t|tj|d|fddi| |_|jrtd ||||||||d | |_tj|jd jtj|jd jdS) NrTr rrbias) in_channelshidden_channels act_layerdropr) hidden_size heads_numrGrHrBrIrJrKrr)rrrLrUr/rr3norm1Linear self_attn_qkvr6Identityself_attn_q_normself_attn_k_normself_attn_projnorm2rCrmlp SequentialadaLN_modulationCrossAttnBlockcross_attnblockinitzeros_rrO)rrTrUrGrHrBrIrJrKrLrrrhead_dimmlp_hidden_dim qk_norm_layerrRrrrrks    z$IndividualTokenRefinerBlock.__init__r%c attn_maskyc Cs||jddd\}}||}||}t|dd|jd\} } } || | } || | } t | | | d|d} |t | | |}|j rP| ||||}|t ||||}|S) Nrrr B L (K H D) -> K B L H DrNKHrmoderi)r`chunkrVrXrrUrZtor[r r r\rLrbr^r]) rr%rhrirjgate_msagate_mlpnorm_xqkvqkvattnrrrr+s  z#IndividualTokenRefinerBlock.forward rErFr>Fr1TFNNNNr,r-r.strr'boolrrrrrTensorr+r0rrrrrDjsP    MrDcseZdZ        dded ed ed ed ed edeejdeej ffdd Z  ddej dej dej dej fddZ Z S)rarErFr>Fr1TNrGrHrBrIrJrKrrc sn| | d} t||_||} tj|fddd| |_tj|fddd| |_tj||fd|i| |_tj||dfd|i| |_ t |} |rX| | fddd| nt |_ |rj| | fddd| nt |_ tj||fd|i| |_tj|fddd| |_t|}t|tj|d|fddi| |_tj|jdjtj|jdjdS)NrTr rMrOrr)rrrUrr3rVnorm1_2rW self_attn_q self_attn_kvr6rYrZr[r\r]rCr_r`rcrdrrO)rrTrUrGrHrBrIrJrKrrrrergrRrrrrs| zCrossAttnBlock.__init__r%rhrirjcCs||jddd\}}||}||}||} t| d|jd} ||} t| dd|jd\} } ||  | } | |  | } t | | | d|d } |t | | |}|S) NrrrkzB L (H D) -> B L H D)rorlrmrrp)r`rrrVrrrrUrrZrsr[r r r\)rr%rhrirjrtrurvZnorm_yrxkvryrzr{rrrr+s    zCrossAttnBlock.forward)rErFr>Fr1TNNr}r~rrrrrasJ   >racseZdZ         dded ed ed ed ed ededeejdeej ffdd Z  ddej dej deej dej fddZ ZS)IndividualTokenRefinerrErFr>Fr1TNrGrHrBrIrJrKrLrrc sN| | dt| _t f ddt|D _dS)Nrcs0g|]}td jd qS)) rTrUrGrHrBrIrJrKrLr)rDrL).0_ rBrrUrTrHrGrIrJrKrrr As   z3IndividualTokenRefiner.__init__..)rrrLr ModuleListrangeblocks) rrTrUdepthrGrHrBrIrJrKrLrrrrrr-s    zIndividualTokenRefiner.__init__r%rhmaskrjc Csd}|dur>|jd}|jd}||j}||dd|dd|d}|dd} || @}d|dddddddf<|jD] } | ||||}qA|S)NrrrrNT)shapersrviewrepeat transposerr) rr%rhrrjZself_attn_mask batch_sizeseq_lenZself_attn_mask_1Zself_attn_mask_2blockrrrr+Ss      zIndividualTokenRefiner.forwardr|r})r,r-r.r'rrrrrrrr LongTensorr+r0rrrrr,sP     *rcseZdZdZ         dd ed ed ed edededededeej deej ffdd Z ddej dej deej dej fddZZS)SingleTokenRefinerzE A single token refiner block for llm text embedding refine. rErFr>Fr1TrNrGrHrBrIrJrKrL attn_moderrc s|| d}t| |_| |_|jdksJdtj||fddi||_|jr6tj||fddi||_t|}t ||fi||_ t |||fi||_ t d|||||||| | | d ||_dS)Nrrz,Only support 'torch' mode for token refiner.rOT) rTrUrrGrHrBrIrJrKrLr)rrrrLrrWinput_embedderinput_embedder_CArCr t_embedderr c_embedderrindividual_token_refiner)rrPrTrUrrGrHrBrIrJrKrLrrrrrRrrrrusR   zSingleTokenRefiner.__init__r%trrjc Cs||}|dur|jdd}n|d}||jdd|jdd}||}||}||}|jrC||}|||||}|S||||}|S)Nrrkr ) rr$ unsqueezesumrrrLrr) rr%rrrjZtimestep_aware_representationsZcontext_aware_representations mask_floatrhrrrr+s$      zSingleTokenRefiner.forward) rErFr>Fr1TFrNNr})r,r-r.__doc__r'rrrrrrrrrr+r0rrrrrqsX      9rcs6eZdZddddddejffdd Zd d ZZS) Qwen2Connectorii rFNc st||d}td|||||d||_t|d|_tt d|_ t |j j d7_ WddS1s@wYdS)Nr)rPrTrUrrLirgQr) rrrSrrWglobal_proj_outrrzeros scale_factorno_graddata) rrPrTrUrrLrrrrrrrs  "zQwen2Connector.__init__cCsP|d}||jdd|jddd|j}||}||||}||fS)Nr rrk)rrrrr)rr%rrrx_meanZ global_outencoder_hidden_statesrrrr+s   zQwen2Connector.forward)r,r-r.rbfloat16rr+r0rrrrrsr)typingrrtorch.nneinopsrrlayersrrrr r Moduler r6rCrDrarrrrrrrs   9jXEW