U l¹xd4]ã@sLddlZddlZddlZddlZddlZddlZddlmZddlm Z ddl Z ddl Z ddl Z ddlZddlZddlZddlZddlZddlmZddlZddlmZddlZe d¡ ej¡dd„Zd d „Zd d „Zd d„Zdd„Z dd„Z!dd„Z"e#dœdd„Z$dd„Z%d"dd„Z&Gdd„de'ƒZ(Gdd„de)ƒZ*Gd d!„d!ƒZ+dS)#éN)ÚPath)Úslicer)ÚSynthesizerTrn)Úload_model_vocoderÚ matplotlibc Cstj |¡s:t|dƒ}| t ddi¡¡W5QRXiSz˜t|dƒ}| ¡}W5QRXt |¡}tj  |¡dkrÐ|  dd¡  d¡d}t d |›ƒt | ¡ƒD]*}tt ¡ƒt||d ƒd kr¤||=q¤WnBtk r}z"t |ƒt |›d ƒddi}W5d}~XYnX|SdS) NÚwÚinfoZ temp_dictÚri ú\ú/éÿÿÿÿzclean Útimeiuz error,auto rebuild file)ÚosÚpathÚexistsÚopenÚwriteÚjsonÚdumpsÚreadÚloadsÚgetsizeÚreplaceÚsplitÚprintÚlistÚkeysÚintr Ú Exception)Ú file_nameÚfÚdataÚ data_dictÚf_nameZwav_hashÚe©r%ú&D:\so-vits-svc\inference\infer_tool.pyÚ read_temps&      r'c Cs*t|dƒ}| t |¡¡W5QRXdS)Nr)rrrr)rr!r r%r%r&Ú write_temp4s r(cs‡fdd„}|S)Ncs0t ¡}ˆ||Ž}tdˆjt ¡|fƒ|S)Nzexecuting '%s' costed %.3fs)r rÚ__name__)ÚargsÚkwargsÚtÚres©Úfuncr%r&Úrun:s ztimeit..runr%)r/r0r%r.r&Útimeit9s r1cCsBt|ƒjdkrdStj|ddd\}}t t|ƒ d¡||¡dS)Nú.wavT)ÚmonoÚsr)rÚsuffixÚlibrosaÚloadÚ soundfilerÚ with_suffix)Ú audio_pathZ raw_audioZraw_sample_rater%r%r&Ú format_wavCsr;cCsrg}t |¡D]^\}}}dd„|Dƒ}dd„|Dƒ|dd…<|D]*}| |¡r@| tj ||¡ dd¡¡q@q|S)NcSsg|]}|ddkr|‘qS©rÚ.r%)Ú.0r r%r%r&Ú Ms z get_end_file..cSsg|]}|ddkr|‘qSr<r%)r>Údr%r%r&r?Ns r r )rÚwalkÚendswithÚappendrÚjoinr)Údir_pathÚendZ file_listsÚrootÚdirsÚfilesZf_filer%r%r&Ú get_end_fileJs  rJcCst d|¡ ¡S)NÚmd5)ÚhashlibÚnewÚ hexdigest)Úcontentr%r%r&Úget_md5UsrPcCs>t|ƒt|ƒkr:tdt|ƒt|ƒƒD]}| |d¡q&dS©Nr)ÚlenÚrangerC)ÚaÚbÚ_r%r%r&Ú fill_a_to_bXsrW)ÚpathscCs$|D]}tj |¡st |¡qdS©N)rrrÚmkdir)rXrr%r%r&rZ]s rZcCsL|jd}||kr|S||}|d}||}tj|||fddd}|SdS)NréÚconstant©rr)Úconstant_values)ÚshapeÚnpÚpad)ÚarrZ target_lengthZcurrent_lengthÚ pad_widthÚpad_leftÚ pad_rightZ padded_arrr%r%r&Ú pad_arraybs rfccs@tdt|ƒ|ƒD]*}|||dkr*||n|||…VqdSrQ)rSrR)Zlist_collectionÚnÚpreÚir%r%r&Úsplit_list_by_nmsrjc@s eZdZdS)ÚF0FilterExceptionN)r)Ú __module__Ú __qualname__r%r%r%r&rkrsrkc @sNeZdZddd„Zddd „Zdd d „Zddd„Zdd„Zdd„Zd dd„Z dS)!ÚSvcNúlogs/44k/kmeans_10000.ptFúlogs/44k/diffusion/model_0.ptúconfigs/diffusion.yamlc  Csš||_| |_||_| |_|dkr©Úorig_srÚ target_srr¢ú0The name you entered is not in the speaker list!zstarting feature retrieval...é)ÚkT)ÚaxisÚkeepdims)r´zend feature retrieval...),rZget_f0_predictorr‡r…rZ compute_f0_uvÚsumrkr}Ú FloatTensorr©Ú unsqueezer6ÚresampleÚ from_numpyr”r’Úrepeat_expand_2dÚsqueezer_r|r‰ÚgetÚ RuntimeErrorÚtyperrRÚ__dict__r—Ú transposersÚnumpyr˜r™Z reconstruct_nZntotalrÚsearchr`ÚsquareÚ expand_dimsršZget_cluster_center_resultÚT)rœÚwavÚtranÚcluster_infer_ratioÚspeakerÚ f0_filterÚ f0_predictorÚ cr_thresholdZf0_predictor_objectÚf0ÚuvZwav16kÚcÚ speaker_idZ feature_indexZfeat_npÚscoreÚixÚweightÚnpyZ cluster_cr%r%r&Ú get_unit_f0ÎsN      "  zSvc.get_unit_f0rçš™™™™™Ù?Úpmédr¢c Cs”tj||jd\}}| rb|j||dd||| d\}}}| d¡}|dd…| | |…f dd¡}n†|j |¡}|dkr~tdƒ‚|s¢t |ƒt kr¢t |jj ƒ|kr¢|}t  t |ƒg¡ |j¡ d¡}|j||||||| d\}}}| d¡}d|jkrt j ¡r| ¡}t  ¡lt ¡}d}|jsÄ|jrj|j t  |¡ |j¡ddd…f¡ddd…f |j¡nd}|jj|||||||d\}}|dj  ¡}|j!r¾|j" |ddd…f|j¡nd}nt  |¡ |j¡}d}|jsê|j!rø|dkr"|j |ddd…f¡ddd…df |j¡n|dd…dd…df}|j!rš|rštj#| $¡ %¡ &¡|jd d }t  '|¡ |j¡}|j( )|¡}t* +| ,d¡|j-d¡}|dd…dd…df}| d d ¡}|j.||||d|d |j/jj0|j/jj1| d }|j" ||¡ ,¡}|j2r@|j3j4|ddd…f|j|dd…dd…df|j5jj6| d\}}|dkr`t* 7||j||j|¡}t ¡|}t8d 9|¡ƒW5QRX||j-d |fS)N)r4r)rÍr¢r±r£)rÎÚgrÏZ predict_f0Ú noice_scaleÚvolr]r­r®r éþÿÿÿT)Zspk_idZ spk_mix_dictZgt_specÚinferZ infer_speedupÚmethodÚk_step)Z adaptive_keyzvits use time:{}):r6r7r…rÖÚsizerÁr‰r½r¾r¿rrRrÀr}Ú LongTensorr©rr¸ryrrr~r£Úno_gradr rzr‹r•Úextractr·r€rÞr!Úfloatr{rr¹ÚdetachrsrÂrºr”r’rr»r¼r_rŽrÚspeeduprßrr›Úenhancerƒr†Z change_rmsrÚformat)rœrÊrÈÚraw_pathrÉÚauto_predict_f0rÛrËrÌÚenhancer_adaptive_keyrÍràÚframeÚspk_mixÚsecond_encodingÚloudness_envelope_adjustmentrÇr4rÐrÎrÏÚn_framesÚsidrÑÚstartrÜÚaudioZ audio_melZaudio16krVÚuse_timer%r%r&rÞþsz "   F*N  ö û   z Svc.infercCstj ¡dSrY)r}rrÚ empty_cache©rœr%r%r&Ú clear_emptyJszSvc.clear_emptycCsD|j d¡|_|`t|dƒr8|jj d¡|j_|j`|`t ¡dS)Nrsr›)r€r©Úhasattrr›ÚgcÚcollectr÷r%r%r&Ú unload_modelNs zSvc.unload_modelçà?çè?c=CsB|r$t|jƒdkr$|j ¡d}d}t|ƒ d¡}tj||d}t ||¡\}}t| |ƒ}t| |ƒ}t|| ƒ}||d}|||}|dkr¢t   dd|¡nd}|rˆt|jƒt|ƒksÂt ‚d}|D]®\}}tt   t|ƒ||j ¡ƒ} |r|| |j7}qÊ|dkrt|||ƒ}!n|g}!t|!ƒD]L\}"}#t||ƒ}$tt   t|#ƒ||j ¡ƒ}%|%d|$}&||&|j7}q*qÊ|t|ƒ7}tjt|ƒ|fd |j¡}'tt|ƒƒD]D}(d})||(D].}*|*dd ksâ|*dd krêtd ƒ‚t||*dƒ}+t||*dƒ},|,|+}-|-dkr$td ƒ‚|*d|*d|-}.|)dk rT|)|+krTtd ƒ‚|,})|.d kr~t |-¡ |j¡|*d}/nt |*d|*d|.¡ |j¡}/t|/ƒ|-krÖ|-t|/ƒ}0tjjj|/d|0gd d |j¡}/|/d|-…|'|(|+|,…<qÀq®tj|'dd d¡ |j¡}1t|1dƒD]8\}(}2|2d krd|1d|(<dt|ƒ|'dd…|(f<q|'|1}'tj|'ddddk ¡s„tdƒ‚|'}d}3g}4|D] \}}tdtt|ƒ|dƒ›dƒtt   t|ƒ||j ¡ƒ}-|rtdƒt  |-¡}5|4  t!t"|5|-ƒƒ¡|3|-|j7}3q”|dkr.t|||ƒ}!n|g}!t|!ƒD]ô\}"}#| dkrltt   t|#ƒ||j ¡ƒn|-}%| dkr˜tdtt|#ƒ|dƒ›dƒt||ƒ}$t  #t  |$g¡|#t  |$g¡g¡}#t$ %¡}6t&j'|6|#|dd|6 (d¡|j)|||6|||| | |||3|||d\}7}8}9|3|97}3|7 *¡ +¡}5t|j |ƒ}$|5|$|$ …}5t"|5|%ƒ}5|dkr"|"dkr"| dkr€|4|| | …n |4| d…}:| dkr¨|5|||…n |5d|…};|:d||;|}<| dkrä|4d|| …n |4d| …}4|4  |<¡| dkr|5||d…n |5|d…}5|4  t!|5ƒ¡q<q”t  ,|4¡S)Nr¢rFr2)Z db_threshr[)ráégzmix value must higer Than zero!zbegin Must lower Than end!z%[i]EndTime Must Equal [i+1]BeginTime!Úreflect)Úmode)Údimgð?g-Cëâ6?zsum(spk_mix_tensor) not equal 1z#=====segment start, zs======zjump empty segmentz###=====segment clip start, rÇ©ré) rÉrërÛrÌrìrÍràrírîrïrð)-rRr‰rrr9rÚcutZ chunks2audiorr`ÚlinspaceÚAssertionErrorÚceilr…r‡rjÚ enumerater}Úzerosr©rrSr¾ÚarangeÚnnÚ functionalrar¶r¸ÚallrÚroundÚextendrrfÚ concatenateÚioÚBytesIOr8rÚseekrÞrsrÂÚarray)=rœÚraw_audio_pathrˆrÈÚslice_dbrÉrërÛÚ pad_secondsZ clip_secondsÚlg_numÚlgr_numrÌrìrÍràÚ use_spk_mixrïrðZwav_pathÚchunksZ audio_dataZaudio_srZper_sizeZlg_sizeZ lg_size_rZ lg_size_c_lZ lg_size_c_rÚlgZ audio_lengthZ slice_tagr!Z aud_lengthÚdatasr³ÚdatÚpad_lenZ per_lengthZa_lengthZspk_mix_tensorriÚlast_endÚmixÚbeginrFÚlengthÚstepZ spk_mix_dataZnum_padZ spk_mix_tenÚxZ global_framerôÚ_audiorêZ out_audioZout_srZ out_frameÚlg1Úlg2Zlg_prer%r%r&Úslice_inferenceXsà                     *     õ   ,&* &zSvc.slice_inference) NroFrprqFFFF)F)rª) rFr×FrØrrªrÙrFFr¢) rýrrrþrØrrªrÙFFr¢) r)rlrmr¡r“rÖrÞrørür)r%r%r%r&rnusN÷ J  1ô Lîrnc@seZdZdd„Zd dd„ZdS) Ú RealTimeVCcCsd|_d|_d|_d|_dS)Nr­i)Ú last_chunkÚlast_oÚ chunk_lenÚpre_lenr÷r%r%r&r¡æszRealTimeVC.__init__rFr×c  Cs*ddl} t |¡\} } |  ¡ ¡d} t ¡} |jdkr’| d¡|j |||||||d\} } |  ¡ ¡} | |j d…|_| |_ | |j d…St  |j| g¡} tj| | | dd|  d¡|j ||| ||||d\} } |  ¡ ¡} | j |j | |j ¡} | |j d…|_| |_ | |j d|j …SdS)Nr)rÉrërÛrËrÇrr[)ÚmaadÚ torchaudior7rsrÂrrr+rrÞr.r,r-r`rr8rÚutilÚ crossfade)rœZ svc_modelrÑZf_pitch_changeZinput_wav_pathrÉrërÛrËr/rôr4Ztemp_wavÚretr%r%r&Úprocessîs<   ü    ü  zRealTimeVC.processN)rFr×F)r)rlrmr¡r4r%r%r%r&r*ås  ür*)r),rLrrÚloggingrr ÚpathlibrÚ inferencerrúr6rÂr`r8r}r0ršrÚmodelsrr–Zdiffusion.unit2melrÚyamlÚ getLoggerÚsetLevelÚWARNINGr'r(r1r;rJrPrWrrZrfrjrrkÚobjectrnr*r%r%r%r&ÚsF       r