U Ad#U@slddlZddlZddlZddlZddlmZddlmmZ ddlm Z m Z m Z m Z ddlmZmZmZddlmZddlmZmZdZd%d d ZGd d d ejjZGd ddejjZddZGdddejjZGdddejjZGdddejjZGdddejjZ GdddejjZ!GdddejjZ"GdddejjZ#dd Z$d!d"Z%d#d$Z&dS)&N) AvgPool1dConv1dConv2dConvTranspose1d)remove_weight_norm spectral_norm weight_norm)AttrDict) get_padding init_weights皙?cudac Cstjtj|dd}t|}|}W5QRXt|}t|a t t  |}t |}||d||~|t fS)Nrz config.json generator)ospathjoinsplitopenreadjsonloadsr h Generatortotorchloadload_state_dictevalr) model_pathdevice config_filefdata json_configrcp_dictr&IC:\Green_Program_Files\So-VITS-SVC-Shengshuyan\vdecoder\hifigan\models.py load_models   r(cs.eZdZd fdd ZddZddZZS) ResBlock1r r*cstt|||_ttt|||d|dt||ddtt|||d|dt||ddtt|||d|dt||ddg|_ |j t ttt|||ddt|ddtt|||ddt|ddtt|||ddt|ddg|_ |j t dS)Nr rdilationpadding) superr)__init__rnn ModuleListrrr convs1applyr convs2selfrchannels kernel_sizer. __class__r&r'r2%s4    zResBlock1.__init__cCsLt|j|jD]8\}}t|t}||}t|t}||}||}q|SN)zipr5r7F leaky_relu LRELU_SLOPE)r9xc1c2xtr&r&r'forward<s   zResBlock1.forwardcCs,|jD] }t|q|jD] }t|qdSr>)r5rr7r9lr&r&r'rEs   zResBlock1.remove_weight_norm)r*r+__name__ __module__ __qualname__r2rGr __classcell__r&r&r<r'r)$s r)cs.eZdZd fdd ZddZddZZS) ResBlock2r*r r*csxtt|||_ttt|||d|dt||ddtt|||d|dt||ddg|_ |j t dS)Nr rr-) r1rOr2rr3r4rrr convsr6r r8r<r&r'r2Ms  zResBlock2.__init__cCs,|jD] }t|t}||}||}q|Sr>)rQr@rArB)r9rCcrFr&r&r'rGXs    zResBlock2.forwardcCs|jD] }t|qdSr>)rQrrHr&r&r'r_s zResBlock2.remove_weight_norm)r*rPrJr&r&r<r'rOLs rOcCs tt|ddd|dddS)N)rrr constantr)rrrrS)r@pad)rCr&r&r'padDiffdsrVcs<eZdZdZdfdd Zdd Zd d Zdd dZZS)SineGena Definition of sine generator SineGen(samp_rate, harmonic_num = 0, sine_amp = 0.1, noise_std = 0.003, voiced_threshold = 0, flag_for_pulse=False) samp_rate: sampling rate in Hz harmonic_num: number of harmonic overtones (default 0) sine_amp: amplitude of sine-wavefrom (default 0.1) noise_std: std of Gaussian noise (default 0.003) voiced_thoreshold: F0 threshold for U/V classification (default 0) flag_for_pulse: this SinGen is used inside PulseGen (default False) Note: when flag_for_pulse is True, the first time step of a voiced segment is always sin(np.pi) or cos(0) rr ~jth?FcsHtt|||_||_||_|jd|_||_||_||_ d|_ dS)Nr F) r1rWr2sine_amp noise_std harmonic_numdim sampling_ratevoiced_thresholdflag_for_pulseonnx)r9 samp_rater[rYrZr^r_r<r&r'r2ws zSineGen.__init__cCs||jktj}|Sr>)r^typerfloat32)r9f0uvr&r&r'_f02uvszSineGen._f02uvcCs||jd}tj|jd|jd|jd}d|dddf<|dddddf||dddddf<|jst|dd}t|dk}t|}|d|ddddddf<t tj||dddt j }n| |}tj |ddd } d| dddddf<|dk| dk} tj|dd} t|jdD]} | | | | dddfddf} | ddddf| ddddf| ddddf<d| | ddddf<| | | | | dddfddf<q4tj|| dd}t|dt j }|S) zl f0_values: (batchsize, length, dim) where dim indicates fundamental tone and overtones r rr0r Nr\rS)shiftsdims)r]rrandshaper r_cumsumrV zeros_likesinnppirfrollrangecos)r9Z f0_values rad_valuesrand_ini tmp_over_onetmp_over_one_idx cumsum_shiftZsinesreZuv_1Zu_locZ tmp_cumsumidxZtemp_sumZi_phaser&r&r'_f02sines:,     8$zSineGen._f02sineNc Cs|jrt`|dddfdd}tj|jd|jd|j|jd}|dddddf|dddddf<t |j D]8}|dddddf|d|dddd|df<q||j d}tj |jd|jd|jd}d|dddf<|dddddf||dddddf<t |d}||9}tj|dd|ddddd}tj|dd|d d dd}|d;}|ddddddf|dddd ddfdk}t|} |d | ddddddf<ttj || dd dtj} | |j} ||} tj| dd|d d dd} | |jd| |jd} | t| } | | | } W5QRX| | | fStt|ttd|j dgg|j}|||j} ||} | |jd| |jd} | t| } | | | } W5QRX| | | fSdS)z sine_tensor, uv = forward(f0) input F0: tensor(batchsize=1, length, dim=1) f0 for unvoiced steps should be 0 output sine_tensor: tensor(batchsize=1, length, dim) output uv: tensor(batchsize=1, length, 1) Nr r0rrglinearT) scale_factormode align_cornersnearest)r~rrSrhrir*)r`rno_grad transposezerosrmr\r rqaranger[r]rlrnr@ interpolaterorprrrYrfrZ randn_likemultiply FloatTensorrtrr|)r9rduppZf0_bufr{rvrwrxryrz sine_wavesre noise_ampnoisefnr&r&r'rGs "(,   8      * zSineGen.forward)rr rXrF)N) rKrLrM__doc__r2rfr|rGrNr&r&r<r'rWgs;rWcs,eZdZdZd fdd Zd dd ZZS) SourceModuleHnNSFa SourceModule for hn-nsf SourceModule(sampling_rate, harmonic_num=0, sine_amp=0.1, add_noise_std=0.003, voiced_threshod=0) sampling_rate: sampling_rate in Hz harmonic_num: number of harmonic above F0 (default: 0) sine_amp: amplitude of sine source signal (default: 0.1) add_noise_std: std of additive Gaussian noise (default: 0.003) note that amplitude of noise in unvoiced is decided by sine_amp voiced_threshold: threhold to set U/V given F0 (default: 0) Sine_source, noise_source = SourceModuleHnNSF(F0_sampled) F0_sampled (batchsize, length, 1) Sine_source (batchsize, length, 1) noise_source (batchsize, length 1) uv (batchsize, length, 1) rr rXcsPtt|||_||_t||||||_tj |dd|_ tj |_ dS)Nr ) r1rr2rYrZrW l_sin_genrr3Linearl_linearTanhl_tanh)r9r]r[rY add_noise_stdvoiced_threshodr<r&r'r2$szSourceModuleHnNSF.__init__NcCsL|||\}}}||||jjj}t||jd}|||fS)z Sine_source, noise_source = SourceModuleHnNSF(F0_sampled) F0_sampled (batchsize, length, 1) Sine_source (batchsize, length, 1) noise_source (batchsize, length 1) r*) rrrrweightdtyperrrY)r9rCr sine_wavsre_ sine_mergerr&r&r'rG3szSourceModuleHnNSF.forward)rr rXr)N)rKrLrMrr2rGrNr&r&r<r'rs rcs6eZdZfddZddZd ddZdd ZZS) rc sbtt|||_t|d|_t|d|_tjj t |dd|_ t |ddd|_t|_tt|d|dd d d d |_|d dkrtnt}t|_tt|d|dD]\}\}}|dd|d }|jtt|dd||dd|d ||||d dd |d t|dkrxt |d|d d}|jtd ||d||d ddq|jtd |d dqt|_tt|jD]V}|dd|d }tt|d|dD]$\} \}} |j||||| q֐qtt|d d d d d |_|jt|jtt|d|dd |_ t |d|_!d|_"dS)Nresblock_kernel_sizesupsample_rates)r~r])r]r[inter_channelsupsample_initial_channelr r*r/resblock1upsample_kernel_sizesr0)r;strider/)r;resblock_dilation_sizes gin_channelsF)#r1rr2rlen num_kernels num_upsamplesrr3Upsamplerqprod f0_upsamprm_sourcer4 noise_convsrrconv_prer)rOups enumerater?appendr resblocksrt conv_postr6r condrr`) r9rriukc_cur stride_f0chjdr<r&r'r2DsV   ""  "  zGenerator.__init__cCsd|_d|jj_dS)NT)r`rrr9r&r&r' OnnxExportjszGenerator.OnnxExportNc Cs|js$||dddfdd}|||j\}}}|dd}||}|||}t|jD]}t |t }|j ||}|j ||}||}d} t|jD]B} | dkr|j||j| |} q| |j||j| |7} q| |j}qft |}||}t|}|S)Nr r0)r`rrrrrrrtrr@rArBrrrrrrtanh) r9rCrdg har_sourceZ noi_sourcererx_sourcexsrr&r&r'rGns*       zGenerator.forwardcCsHtd|jD] }t|q|jD] }|q"t|jt|jdS)NzRemoving weight norm...)printrrrrrrHr&r&r'rs     zGenerator.remove_weight_norm)N)rKrLrMr2rrGrrNr&r&r<r'rCs & rcs&eZdZdfdd ZddZZS) DiscriminatorPr,r*Fcstt|||_|dkr tnt}t|tdd|df|dft dddfd|tdd|df|dft dddfd|tdd|df|dft dddfd|tdd |df|dft dddfd|td d |dfdd dg|_ |td dd dd d|_ dS) NFr r,rr)r0r)r*r )r r) r1rr2periodrrr3r4rr rQr)r9rr;ruse_spectral_normnorm_fr<r&r'r2s&&&&zDiscriminatorP.__init__cCsg}|j\}}}||jdkrH|j||j}t|d|fd}||}|||||j|j}|jD]"}||}t|t}||qf| |}||t |dd}||fS)Nrreflectr rS) rmrr@rUviewrQrArBrrrflatten)r9rCfmapbrRtn_padrIr&r&r'rGs      zDiscriminatorP.forward)r,r*FrKrLrMr2rGrNr&r&r<r'rs rcs&eZdZdfdd ZddZZS)MultiPeriodDiscriminatorNcsTtt||dk r|n dddddg|_t|_|jD]}|jt|q:dS)Nr0r*r,r ) r1rr2periodsr3r4discriminatorsrr)r9rrr<r&r'r2s   z!MultiPeriodDiscriminator.__init__c Cspg}g}g}g}t|jD]H\}}||\} } ||\} } || || || || q||||fSr>)rrr r9yy_haty_d_rsy_d_gsfmap_rsfmap_gsrry_d_rfmap_ry_d_gfmap_gr&r&r'rGs      z MultiPeriodDiscriminator.forward)Nrr&r&r<r'rsrcs&eZdZdfdd ZddZZS)DiscriminatorSFcstt||dkrtnt}t|tdddddd|tddddd d d |tdd ddd d d |td ddd d d d |tdddd d d d |tddddd d d |tddddddg|_|tdddddd|_ dS)NFr rrr)r0)groupsr/rrr,r*) r1rr2rrr3r4rrQr)r9rrr<r&r'r2s zDiscriminatorS.__init__cCsXg}|jD]"}||}t|t}||q ||}||t|dd}||fS)Nr rS)rQr@rArBrrrr)r9rCrrIr&r&r'rGs     zDiscriminatorS.forward)Frr&r&r<r'rsrcs$eZdZfddZddZZS)MultiScaleDiscriminatorcsRtt|ttddttg|_ttddddtddddg|_dS)NT)rrr0r) r1rr2r3r4rrr meanpoolsrr<r&r'r2s  z MultiScaleDiscriminator.__init__c Csg}g}g}g}t|jD]t\}}|dkrN|j|d|}|j|d|}||\} } ||\} } || || || || q||||fS)Nrr )rrrrrr&r&r'rGs      zMultiScaleDiscriminator.forwardrr&r&r<r'rs rc CsLd}t||D]4\}}t||D] \}}|tt||7}q q|dS)Nrr0)r?rmeanabs)rrlossdrdgrlglr&r&r' feature_losss rc Csrd}g}g}t||D]P\}}td|d}t|d}|||7}||||q|||fSNrr r0)r?rrritem) disc_real_outputsdisc_generated_outputsrr_lossesg_lossesrrr_lossg_lossr&r&r'discriminator_losss rcCs>d}g}|D](}td|d}||||7}q ||fSr)rrr) disc_outputsr gen_lossesrrIr&r&r'generator_loss%s  r)r)'rrnumpyrqrtorch.nnr3Ztorch.nn.functional functionalr@rrrrZtorch.nn.utilsrrrenvr utilsr r rBr(Moduler)rOrVrWrrrrrrrrrr&r&r&r's2   (,1S$