o f&@sddlZddlZddlZddlmZddlZddlmZddl m Z  d1ddZ d2d d Z d2d d Z d3ddZddZddZGdddejjZd4ddZddZddZdd Zd!d"ZGd#d$d$ejZGd%d&d&ejZd'd(Zd)d*Zd+d,ZGd-d.d.ejZ d5d/d0Z!dS)6N)repeat)instantiate_from_config-C6?{Gz?Mb?cCs|dkrtj|d|d|tjdd}| S|dkr]tj|dtjd||}|d|tjd}t|d}||d}d|dd|dd}tj|dd d }| S|d krotj|||tjd}| S|d krtj|||tjdd}| St d |d)Nlinearg?)dtypecosiner+?)a_mina_maxZ sqrt_linearsqrtz schedule 'z ' unknown.) torchlinspacefloat64arangenppicospowclip ValueErrornumpy)scheduleZ n_timestep linear_start linear_endcosine_sbetas timestepsalphasr#cCs<|j^}}|d|}|j|gdt|dRS)Nr r r )shapegather contiguousreshapelen)atZx_shapeb_outr#r#r$extract_into_tensorvs "rJcCs4|rt|t|}tj|t|g|RS||S)a Evaluate a function without caching intermediate activations, allowing for reduced memory at the expense of extra compute in the backward pass. :param func: the function to evaluate. :param inputs: the argument sequence to pass to `func`. :param params: a sequence of parameters `func` depends on but does not explicitly take as arguments. :param flag: if False, disable gradient checkpointing. )tupleCheckpointFunctionapplyrD)funcinputsparamsflagargsr#r#r$ checkpoint|s rSc@s$eZdZeddZeddZdS)rLcGsd||_t|d||_t||d|_t|j|j}Wd|S1s+wY|SN) run_functionr* input_tensors input_paramsrno_grad)ctxrUlengthrRoutput_tensorsr#r#r$forwards  zCheckpointFunction.forwardcGsdd|jD|_tdd|jD}|j|}Wdn1s%wYtjj||j|j|dd}|`|`~d|S)NcSsg|] }|dqST)detachrequires_grad_.0xr#r#r$ sz/CheckpointFunction.backward..cSsg|]}||qSr#)view_asr`r#r#r$rcsT) allow_unused)NN)rVr enable_gradrUautogradgradrW)rYZ output_gradsZshallow_copiesr[Z input_gradsr#r#r$backwards   zCheckpointFunction.backwardN)__name__ __module__ __qualname__ staticmethodr\rir#r#r#r$rLs   rL'FcCs|sV|d}tt| tjd|tjd|j|jd}|dddf|d}tj t |t |gdd}|drTtj |t |ddddfgdd}|St |d |d }|S) aX Create sinusoidal timestep embeddings. :param timesteps: a 1-D Tensor of N indices, one per batch element. These may be fractional. :param dim: the dimension of the output. :param max_period: controls the minimum frequency of the embeddings. :return: an [N x dim] Tensor of positional embeddings. r r)startendrdeviceNr dimr zb -> b d)d)rexpmathlogrfloat32torrfloatcatrsin zeros_liker)r!rtZ max_periodZ repeat_onlyhalffreqsrR embeddingr#r#r$timestep_embeddings(  rcCs|D]}|q|S)z< Zero out the parameters of a module and return it. ) parametersr^zero_)modulepr#r#r$ zero_modules rcCs |D] }||q|S)z9 Scale the parameters of a module and return it. )rr^mul_)rscalerr#r#r$ scale_modules rcCs|jttdt|jdS)z6 Take the mean over all non-batch dimensions. r rs)meanr*r+rDr@)tensorr#r#r$ mean_flatsrcCs td|S)z Make a standard normalization layer. :param channels: number of input channels. :return: an nn.Module for normalization. ) GroupNorm32)channelsr#r#r$ normalizations rc@seZdZddZdS)SiLUcCs|t|SrT)rsigmoidselfrbr#r#r$r\sz SiLU.forwardN)rjrkrlr\r#r#r#r$rs rcseZdZfddZZS)rcst||jSrT)superr\r{typerr __class__r#r$r\szGroupNorm32.forward)rjrkrlr\ __classcell__r#r#rr$rsrcOV|dkr tj|i|S|dkrtj|i|S|dkr$tj|i|Std|)z4 Create a 1D, 2D, or 3D convolution module. r r unsupported dimensions: )nnConv1dConv2dConv3drdimsrRkwargsr#r#r$conv_ndrcOstj|i|S)z! Create a linear module. )rLinear)rRrr#r#r$rsrcOr)z8 Create a 1D, 2D, or 3D average pooling module. r r rr)r AvgPool1d AvgPool2d AvgPool3drrr#r#r$ avg_pool_nd rrcs$eZdZfddZddZZS)HybridConditionercs"tt||_t||_dSrT)r__init__rconcat_conditionercrossattn_conditioner)rZc_concat_configZc_crossattn_configrr#r$r  zHybridConditioner.__init__cCs"||}||}|g|gdS)N)c_concat c_crossattn)rr)rrrr#r#r$r\rzHybridConditioner.forward)rjrkrlrr\rr#r#rr$rs rcs,fdd}fdd}|r|S|S)Ncs<tjdgddRdjdgdtdRS)Nr rqrr?)rrandnrrDr#rrr@r#r$#s znoise_like..cstjdS)Nrq)rrr#rr#r$r&sr#)r@rrr repeat_noisenoiser#rr$ noise_like"sr)rrrr])r )rnF)F)"osrwrtorch.nnrrreinopsraudioldm.utilsrr%r3r6r>rJrSrgFunctionrLrrrrrModuler GroupNormrrrrrrr#r#r#r$s8     $