zTeddlZddlmcmZddlZddlmZddl m Z m Z ddl m ZddlmZmZmZGddejjZGdd ejjZdS) N) get_window) pad_centertiny)mel)dynamic_range_compressiondynamic_range_decompressionwindow_sumsquarec6eZdZdZdfd ZdZdZdZxZS)STFTzFadapted from Prem Seetharaman's https://github.com/pseeth/pytorch-stfthannc $tt|||_||_||_||_d|_|j|jz }tj tj |j}t|jdz dz}tj tj |d|ddftj|d|ddfg}tj|dddddf}tjtj||zjdddddf} |Z||ksJt)||d} t+| |} tj| } || z}| | z} |d||d| dS)NT)fftbins forward_basis inverse_basis)superr __init__ filter_length hop_length win_lengthwindowforward_transformnpffteyeintvstackrealimagtorch FloatTensorlinalgpinvTrr from_numpyfloatregister_buffer) selfrrrrscale fourier_basiscutoffrr fft_window __class__s //home/deep/mustango_demo/audioldm/audio/stft.pyrz STFT.__init__s dD""$$$*$$ !%"T_4 26$*<#=#=>> d(1,q022 W]7F7AAA:. / /wwPQPQPQz9R1S1S T  )-4 *CDD ) INN5=0 1 1 3AAAtQQQJ ?     J....#FJEEEJ#J >>J)*55;;==J Z 'M Z 'M _m.A.A.C.CDDD _m.A.A.C.CDDDDDcb|jj}||}|d}|d}||_||d|}t j|dt|j dz t|j dz ddfd}| d}t j |tj|jd|jd}t|j dz dz}|ddd|ddf}|dd|dddf}tj|dz|dzz} tjtj|j|j} | | fS) Nrrrreflect)modeF requires_gradstridepadding)rdevicetosize num_samplesviewFpad unsqueezerrsqueezeconv1dr!autogradVariablersqrtatan2data) r) input_datar9 num_batchesr<rr, real_part imag_part magnitudephases r/ transformzSTFT.transform4s#*]]6**  ooa((  ooa(( & __[![AA U   # # #a' ( (#d.@1.D*E*Eq! L    ''** H  N # #D$6e # L L?    d(1,122%aaa&!!!m4 %aaa!!!m4 Jy!|il:;; '' ININ(S(STT%r0c|jj}||||}}tj|tj|z|tj|zgd}tj|tj |j d|j d}|j t|j |d|j |j|jt$j}tjt%j|t-|kd}tj tj|d}|}|dddd|fxx||zcc<|t/|j|j z z}|ddddt1|jd z df}|dddddt1|jd z  f}|S) NrdimFr4rr6)rrn_fftdtyper)rr9r:r!catcossinr>conv_transpose1drCrDrrrr r;rrrfloat32r&whererr'r)r)rLrMr9recombine_magnitude_phaseinverse_transform window_sumapprox_nonzero_indicess r/inversez STFT.inverseVs#*$<<//&1A1A5 $)I 5)) )9uy7G7G+G Ha% % % !. % N # #D$6e # L L?     ; ") r""??(j J&+%5d:&6&6677:&& "00 ,,E1J$J aaa$:: ; ; ;z&@  ; ; ; t'9!:!:T_!L L -aaaC8JQ8N4O4O4Q4Q.QR-aaa4Ts4;MPQ;Q7R7R6R4T.TU  r0c||\|_|_||j|j}|SN)rNrLrMr_)r)rHreconstructions r/forwardz STFT.forwards9%)^^J%?%?" dndjAAr0)r ) __name__ __module__ __qualname____doc__rrNr_rc __classcell__r.s@r/r r syPP E E E E E ED    D*!*!*!Xr0r c>eZdZfdZdZdZejfdZxZ S) TacotronSTFTc<tt|||_||_t ||||_t|||||}tj | }| d|dS)N mel_basis) rrkrn_mel_channels sampling_rater stft_fnlibrosa_mel_fnr!r&r'r() r)rrrrnromel_fminmel_fmaxrmr.s r/rzTacotronSTFT.__init__s lD!!**,,,,*M:zBB " =.(H  $Y//5577  [)44444r0c&t||}|Sra)r)r) magnitudes normalize_funoutputs r/spectral_normalizezTacotronSTFT.spectral_normalizes*:}EE r0c$t|}|Sra)r)r)rurws r/spectral_de_normalizez"TacotronSTFT.spectral_de_normalizes,Z88 r0ctj|jdks!Jtj|jtj|jdks!Jtj|j|j|\}}|j}tj|j|}|||}tj |d}|||}|||fS)aComputes mel-spectrograms from a batch of waves PARAMS ------ y: Variable(torch.FloatTensor) with shape (B, T) in range [-1, 1] RETURNS ------- mel_output: torch.FloatTensor of shape (B, n_mel_channels, T) rRrrP) r!minrGmaxrprNmatmulrmrxnorm)r)yrvruphases mel_outputenergylog_magnitudess r/mel_spectrogramzTacotronSTFT.mel_spectrogramsy  B&&& !&(9(9&&&y  A%%%uy'8'8%%%!\33A66 F_ \$.*== ,,ZGG JA...00]KK>611r0) rdrerfrrxrzr!logrrhris@r/rkrksq55555(05y22222222r0rk)r!torch.nn.functionalnn functionalr>numpyr scipy.signalr librosa.utilrrlibrosa.filtersrrqaudioldm.audio.audio_processingrrr Moduler rkr0r/rs ######))))))))111111vvvvv58?vvvr222222222258?2222222222r0