U ØA¿dðPã@sddlZddlZddlZddlZddlZddlZddlZddlZddlZddl m Z ddl Z ddl Z ddl ZddlZddlmZddlmZddlmZdaejejejdeadZd Zd Zd e d ed ¡Z d e d ed ¡Z!dRdd„Z"dd„Z#dd„Z$dd„Z%dd„Z&dSdd„Z'dTdd„Z(dd„Z)dUd!d"„Z*iiiid#fd$d%„Z+dVd'd(„Z,d)d*„Z-dWd+d,„Z.d-d.„Z/dXd0d1„Z0dYd2d3„Z1d4d5„Z2dZd6d7„Z3d8d9„Z4d[d;d<„Z5d\d>d?„Z6d@dA„Z7d]dCdD„Z8dEdF„Z9dGdH„Z:d^dJdK„Z;GdLdM„dMƒZdS)_éN)Ú cpu_count)Úread)ÚMiniBatchKMeans)Ú functionalF)ÚstreamÚlevelég0‘@gI@égéé¼TcCs¾tj|ddd}d||dk<tj|dd…ddd…f|ddd|}|rnt |jdd¡ dd¡ |j¡}nt |jdd¡ |j¡}|| d¡| d¡}t  |¡  ¡r¶t dƒ||S) Nr T)ÚdimÚkeepdimi'rgš™™™™™é?g333333ó?éÿÿÿÿ) ÚtorchÚsumÚTensorÚshapeÚuniform_ÚtoÚdeviceÚonesÚ unsqueezeÚisnanÚanyÚexit)Úf0Zx_maskÚuvZ random_scaleZuv_sumÚmeansÚfactorÚf0_norm©r ú7C:\Green_Program_Files\So-VITS-SVC-Shengshuyan\utils.pyÚ normalize_f0s *$r"c Cs¼ts0ddl}| d¡dat d¡}| tj¡ddlm}ddl }|j dd\}}|  |¡|  |¡|  ¡|j  ¡|j|j  ¡|jdd}| |j  ¡ddd …d ¡}| ¡|S) NrÚAggTÚ matplotlib©é é©ÚfigsizeÚ©ÚdtypeÚsepr©é)ÚMATPLOTLIB_FLAGr$ÚuseÚloggingÚ getLoggerÚsetLevelÚWARNINGÚmatplotlib.pylabÚpylabÚnumpyÚsubplotsÚplotÚ tight_layoutÚcanvasÚdrawÚ fromstringÚ tostring_rgbÚuint8ÚreshapeÚget_width_heightÚclose) ÚxÚyr$Ú mpl_loggerÚpltÚnpÚfigÚaxÚdatar r r!Úplot_data_to_numpy.s"       rLcCs˜dd|d ¡}tdtt}t|d}t |dk||||¡}t |¡ ¡}||dk}||dkd}||tk}||tktd}|S)Nr r r r'gð?r)ÚlogÚf0_binÚ f0_mel_maxÚ f0_mel_minrÚwhereÚroundÚlong)rZf0_melÚaÚbZ f0_coarser r r!Ú f0_to_coarseEs   rVc Cs8t ¡| | d¡¡d}W5QRX| dd¡}|S)Nr rr')rÚno_gradZextract_featuresÚsqueezeÚ transpose)ZcmodelrEÚcr r r!Ú get_contentRs  r[c Ks|dkr"ddlm}|||d}nÞ|dkrPddlm}||||d|dd }n°|d krrdd lm}|||d}nŽ|d kr”dd lm}|||d}nl|dkrÆddlm } | ||t j |d|dd}n:|dkrøddl m } | ||t j |d|dd}ntdƒ‚|S)NÚpmr)Ú PMF0Predictor)Ú hop_lengthÚ sampling_rateÚcrepe)ÚCrepeF0PredictorrÚ threshold)r^r_rrbÚharvest)ÚHarvestF0PredictorÚdio)ÚDioF0PredictorÚrmvpe)ÚRMVPEF0Predictor)r^r_r,rrbÚfcpe)ÚFCPEF0PredictorzUnknown f0 predictor)Z!modules.F0Predictor.PMF0Predictorr]Z$modules.F0Predictor.CrepeF0PredictorraZ&modules.F0Predictor.HarvestF0PredictorrdZ"modules.F0Predictor.DioF0PredictorrfZ$modules.F0Predictor.RMVPEF0PredictorrhrÚfloat32Z#modules.F0Predictor.FCPEF0PredictorrjÚ Exception) Ú f0_predictorr^r_Úkargsr]Zf0_predictor_objectrardrfrhrjr r r!Úget_f0_predictorXs(      rocKsÂ|dkr"ddlm}||d}nœ|dkrDddlm}||d}nz|dkrfddlm}||d}nX|d krˆdd lm}||d}n6|d krªdd lm }||d}n|d krÊddl m } | |d}nô|dkrêddl m } | |d}nÔ|dkr ddlm} | |d}n²|dkr.ddlm} | |d}n|dkrPddlm} | |d}nn|dkrrddlm}||d}nL|dkr”ddlm}||d}n*|dkr¶ddlm}||d}ntdƒ‚|S)NÚ vec768l12r)ÚContentVec768L12)rÚvec256l9)ÚContentVec256L9z vec256l9-onnx)ÚContentVec256L9_Onnxzvec256l12-onnx)ÚContentVec256L12_Onnxz vec768l9-onnx)ÚContentVec768L9_Onnxzvec768l12-onnx)ÚContentVec768L12_Onnxzhubertsoft-onnx)ÚHubertSoft_OnnxÚ hubertsoft)Ú HubertSoftz whisper-ppg)Ú WhisperPPGÚ cnhubertlarge)Ú CNHubertLargeÚdphubert)ÚDPHubertzwhisper-ppg-large)ÚWhisperPPGLargez wavlmbase+)Ú WavLMBasePluszUnknown speech encoder)Zvencoder.ContentVec768L12rqZvencoder.ContentVec256L9rsZvencoder.ContentVec256L9_OnnxrtZvencoder.ContentVec256L12_OnnxruZvencoder.ContentVec768L9_OnnxrvZvencoder.ContentVec768L12_OnnxrwZvencoder.HubertSoft_OnnxrxZvencoder.HubertSoftrzZvencoder.WhisperPPGr{Zvencoder.CNHubertLarger}Zvencoder.DPHubertrZvencoder.WhisperPPGLarger€Zvencoder.WavLMBasePlusrrl)Úspeech_encoderrrnrqZspeech_encoder_objectrsrtrurvrwrxrzr{r}rr€rr r r!Úget_speech_encoderosR                           rƒc Csrtj |¡st‚tj|dd}|d}|d}|dk rT|sT|ddk rT| |d¡|d}| t|  ¡ƒdj ¡}t |dƒrŠ|j   ¡}n|  ¡}i} | ¡D]ˆ\} } z4|| | | <|| j| jksØt|| j| jfƒ‚Wqžtk r$d | ksþd | kr td | ƒt d | ¡| | | <YqžXqžt |dƒrB|j  | ¡n | | ¡td ƒt d ||¡¡||||fS)NÚcpu)Ú map_locationÚ iterationÚ learning_rateÚ optimizerÚmodelrÚmoduleÚenc_qZemb_gzq%s is not in the checkpoint,please check your checkpoint.If you're using pretrain model,just ignore this warning.z%s is not in the checkpointzload z%Loaded checkpoint '{}' (iteration {}))ÚosÚpathÚisfileÚAssertionErrorrÚloadÚload_state_dictrÚlistÚvaluesr,ÚhasattrrŠÚ state_dictÚitemsrrlÚprintÚloggerÚinfoÚformat) Úcheckpoint_pathr‰rˆZskip_optimizerZcheckpoint_dictr†r‡Zsaved_state_dictr•Únew_state_dictÚkÚvr r r!Úload_checkpoint›s<   (   ÿrŸcCsNt d ||¡¡t|dƒr(|j ¡}n| ¡}t ||| ¡|dœ|¡dS)Nz6Saving model and optimizer state at iteration {} to {}rŠ)r‰r†rˆr‡)r˜r™ršr”rŠr•rÚsave)r‰rˆr‡r†r›r•r r r!Úsave_checkpoint¾sÿ  ýýr¡ú logs/44k/r'cs¢‡fdd„t ˆ¡Dƒ‰dd„}‡fdd„}|r4|n|‰‡‡fdd„}‡fd d„|d ƒd | …|d ƒd | …Dƒ}d d„‰‡fdd„‰‡fdd„|Dƒd S)a9Freeing up space by deleting saved ckpts Arguments: path_to_models -- Path to the model directory n_ckpts_to_keep -- Number of ckpts to keep, excluding G_0.pth and D_0.pth sort_by_time -- True -> chronologically delete ckpts False -> lexicographically delete ckpts cs&g|]}tj tj ˆ|¡¡r|‘qSr )rŒrrŽÚjoin©Ú.0Úf©Úpath_to_modelsr r!Ú Ósz%clean_checkpoints..cSstt d¡ |¡ d¡ƒS)Nz ._(\d+)\.pthr )ÚintÚreÚcompileÚmatchÚgroup©Ú_fr r r!Úname_keyÔsz#clean_checkpoints..name_keycstj tj ˆ|¡¡S©N)rŒrÚgetmtimer£r¯r§r r!Útime_keyÖsz#clean_checkpoints..time_keycst‡fdd„ˆDƒˆdS)Ncs$g|]}| ˆ¡r| d¡s|‘qS)z_0.pth)Ú startswithÚendswithr¤©Ú_xr r!r©Ús z7clean_checkpoints..x_sorted..©Úkey)Úsortedr·)Ú ckpts_filesÚsort_keyr·r!Úx_sortedÙsz#clean_checkpoints..x_sortedcsg|]}tj ˆ|¡‘qSr )rŒrr£©r¥Úfnr§r r!r©ÛsÚGNÚDcSst d|›¡S)Nz".. Free up space by deleting ckpt )r˜r™)rÀr r r!Údel_infoÝsz#clean_checkpoints..del_infocst |¡ˆ|ƒgSr²)rŒÚremove)rD)rÃr r!Ú del_routineßsz&clean_checkpoints..del_routinecsg|] }ˆ|ƒ‘qSr r r¿)rÅr r!r©ás)rŒÚlistdir)r¨Zn_ckpts_to_keepZ sort_by_timer±r´r¾Úto_delr )r¼rÃrÅr¨r½r!Úclean_checkpointsÊs    "ÿ rÈi"Vc CsŠ| ¡D]\}}| |||¡q| ¡D]\}}| |||¡q(| ¡D]\}}|j|||ddqH| ¡D]\}}| ||||¡qldS)NZHWC)Z dataformats)r–Ú add_scalarZ add_histogramÚ add_imageZ add_audio) ÚwriterZ global_stepÚscalarsÚ histogramsÚimagesZaudiosZaudio_sampling_raterržr r r!Ú summarizeãsrÏúG_*.pthcCs8t tj ||¡¡}|jdd„d|d}t|ƒ|S)NcSstd ttj|ƒ¡ƒS)Nr*)rªr£ÚfilterÚstrÚisdigit)r¦r r r!Úðóz(latest_checkpoint_path..r¹r)ÚglobrŒrr£Úsortr—)Údir_pathÚregexZf_listrDr r r!Úlatest_checkpoint_pathîs rÚc CsÜts0ddl}| d¡dat d¡}| tj¡ddlm}ddl }|j dd\}}|j |ddd d }|j ||d |  d ¡| d ¡| ¡|j ¡|j|j ¡|jdd}| |j ¡ddd…d¡}| ¡|S)Nrr#Tr$r%r(ÚautoÚlowerÚnone©ÚaspectÚoriginÚ interpolation©rJZFramesZChannelsr*r+rr.)r0r$r1r2r3r4r5r6r7r8r9ÚimshowÚcolorbarÚxlabelÚylabelr;r<r=r>r?r@rArBrC) Ú spectrogramr$rFrGrHrIrJÚimrKr r r!Úplot_spectrogram_to_numpyös*     ÿ   réc Csøts0ddl}| d¡dat d¡}| tj¡ddlm}ddl }|j dd\}}|j |  ¡ddd d }|j ||d d } |dk r| d |7} | | ¡| d¡| ¡|j ¡|j|j ¡|jdd} |  |j ¡ddd…d¡} | ¡| S)Nrr#Tr$)éér(rÛrÜrÝrÞrâzDecoder timestepz zEncoder timestepr*r+rr.)r0r$r1r2r3r4r5r6r7r8r9rãrYrärårær;r<r=r>r?r@rArBrC) Ú alignmentr™r$rFrGrHrIrJrèrårKr r r!Úplot_alignment_to_numpys0    ÿ    rícCs"t|ƒ\}}t | tj¡¡|fSr²)rrÚ FloatTensorÚastyperHrk)Ú full_pathr_rKr r r!Úload_wav_to_torch-s rñú|c s.t|dd}‡fdd„|Dƒ}W5QRX|S)Nzutf-8)Úencodingcsg|]}| ¡ ˆ¡‘qSr )ÚstripÚsplit)r¥Úline©rõr r!r©4sz+load_filepaths_and_text..)Úopen)Úfilenamerõr¦Zfilepaths_and_textr r÷r!Úload_filepaths_and_text2srúc Csòt ¡}|jddtddd|jddtdd d | ¡}tj d |j¡}tj  |¡s^t  |¡|j }tj |d ¡}|r¶t |d ƒ}|  ¡}W5QRXt |dƒ}| |¡W5QRXnt |d ƒ}|  ¡}W5QRXt |¡}tf|Ž} || _| S)Nz-cz--configz./configs/config.jsonzJSON file for configuration)ÚtypeÚdefaultÚhelpz-mz--modelTz Model name)rûÚrequiredrýz./logsú config.jsonÚrÚw)ÚargparseÚArgumentParserÚ add_argumentrÒÚ parse_argsrŒrr£r‰ÚexistsÚmakedirsÚconfigrørÚwriteÚjsonÚloadsÚHParamsÚ model_dir) ÚinitÚparserÚargsr Ú config_pathÚconfig_save_pathr¦rKrÚhparamsr r r!Ú get_hparams8s0 ÿ ÿ       rc CsJtj |d¡}t|dƒ}| ¡}W5QRXt |¡}tf|Ž}||_|S)Nrÿr) rŒrr£rørr r r r )r rr¦rKrrr r r!Úget_hparams_from_dirVs   rc CsDt|dƒ}| ¡}W5QRXt |¡}|s6tf|Žntf|Ž}|S)Nr)rørr r r Ú InferHParams)rZ infer_moder¦rKrrr r r!Úget_hparams_from_fileas   rcCs®tj tj t¡¡}tj tj |d¡¡s>t d  |¡¡dSt   d¡}tj |d¡}tj |¡ršt |ƒ  ¡}||krªt d  |dd…|dd…¡¡nt |dƒ |¡dS)Nz.gitzL{} is not a git repository, therefore hash value comparison will be ignored.zgit rev-parse HEADZgithashz7git hash values are different. {}(saved) != {}(current)ér)rŒrÚdirnameÚrealpathÚ__file__rr£r˜ÚwarnršÚ subprocessÚ getoutputrørr )r Z source_dirZcur_hashrZ saved_hashr r r!Úcheck_git_hashis ÿ    ÿrú train.logcCsvt tj |¡¡at tj¡t d¡}tj  |¡s>t  |¡t  tj  ||¡¡}| tj¡|  |¡t |¡tS)Nz.%(asctime)s %(name)s %(levelname)s %(message)s)r2r3rŒrÚbasenamer˜r4ÚDEBUGÚ FormatterrrÚ FileHandlerr£Ú setFormatterÚ addHandler)r rùÚ formatterÚhr r r!Ú get_logger}s       r)ÚleftcCs|dkrt||ƒSt|||ƒS)Nr*)Úrepeat_expand_2d_leftÚrepeat_expand_2d_other)ÚcontentÚ target_lenÚmoder r r!Úrepeat_expand_2dŒsr0cCsª|jd}tj|jd|gtjd |j¡}t |d¡||}d}t|ƒD]V}|||dkr€|dd…|f|dd…|f<qN|d7}|dd…|f|dd…|f<qN|S)Nrr)r,r )rrÚzerosÚfloatrrÚarangeÚrange)r-r.Úsrc_lenÚtargetÚtempÚ current_posÚir r r!r+’s " r+ÚnearestcCs.|ddd…dd…f}tj|||dd}|S)N©Úsizer/r)ÚFÚ interpolate)r-r.r/r6r r r!r,¤sr,cCsÊt |¡d}t |d¡}dd„|Dƒ}|dkr@tj|dd}|d ¡D]P}t |d|¡|d|<t|ƒD](\}}|d|||||7<qrqLt |t j   t j j d¡¡t j   t j j d¡S)NédrcSsg|]}t |¡d‘qS)r‰)rr)r¥rr r r!r©®szmix_model..©r r‰z output.pth) rrîrr=ÚsoftmaxÚkeysÚ zeros_likeÚ enumerater rŒrr£Úcurdir)Z model_pathsÚmix_rater/Z model_temÚmodelsrr9r‰r r r!Ú mix_model«s$rHc Csötjj||dd|dd}tjj| ¡ ¡ ¡|dd|dd}t |¡ |j ¡}t j |  d¡|j ddd ¡}t |¡ |j ¡}t j |  d¡|j ddd ¡}t |t |¡d¡}|t |t d|¡¡t |t |d¡¡9}|S)Nr')rEÚ frame_lengthr^rÚlinearr;gíµ ÷ư>r )ÚlibrosaÚfeatureÚrmsÚdetachr„r8rÚ from_numpyrrr=r>rrrXÚmaxrCÚpowÚtensor)Údata1Zsr1Údata2Zsr2ÚrateZrms1Zrms2r r r!Ú change_rms¸s2 ÿ*ÿ ÿ ÿÿrVú dataset/44k/cCsÎtƒ}tdƒtj ||¡}g}t |¡D] }d|kr*| tj ||¡¡q*t|ƒdkr`tdƒ‚g}t |ƒD](}t   |¡d  dd¡  ¡}| |¡qlt |d¡} t | jd¡} tj | ¡| | } | jddkr6d| jd} t| ƒz"td d d |d d d | ¡j} Wn&tk r4t ¡} t| ƒYnXttdt | jd¡ƒ| jddƒ} t | jdd| ¡} t | ¡}d|_|   | ¡d}t!d| jd|ƒD]}|  "| |||…¡q¤tdƒ| S)Nz"The feature index is constructing.z .wav.soft.ptrz(You need to run preprocess_hubert_f0.py!réþÿÿÿgjAz,Trying doing kmeans %s shape to 10k centers.i'TrFÚrandom)Ú n_clustersÚverboseÚ batch_sizeÚcompute_labelsréé'r z IVF%s,Flati zSuccessfully build index)#rr—rŒrr£rÆÚappendÚlenrlr»rrrYr8rHÚ concatenater3rrYÚshufflerÚfitÚcluster_centers_Ú tracebackÚ format_excÚminrªÚsqrtÚfaissZ index_factoryZextract_index_ivfZnprobeÚtrainr4Úadd)Úspk_nameÚroot_dirZn_cpuZexp_dirZ listdir_resÚfileZnpysÚnameÚphoneZbig_npyZ big_npy_idxr™Zn_ivfÚindexZ index_ivfZbatch_size_addr9r r r!Ú train_indexÍsX     ûùÿ (  rsc@s\eZdZdd„Zdd„Zdd„Zdd„Zd d „Zd d „Zd d„Z dd„Z dd„Z dd„Z dS)r cKs4| ¡D]&\}}t|ƒtkr&tf|Ž}|||<qdSr²)r–rûÚdictr ©ÚselfÚkwargsrržr r r!Ú__init__s  zHParams.__init__cCs |j ¡Sr²)Ú__dict__rB©rvr r r!rB sz HParams.keyscCs |j ¡Sr²)ryr–rzr r r!r– sz HParams.itemscCs |j ¡Sr²)ryr“rzr r r!r“szHParams.valuescCs t|jƒSr²)raryrzr r r!Ú__len__szHParams.__len__cCs t||ƒSr²)Úgetattr©rvrºr r r!Ú __getitem__szHParams.__getitem__cCs t|||ƒSr²)Úsetattr)rvrºÚvaluer r r!Ú __setitem__szHParams.__setitem__cCs ||jkSr²)ryr}r r r!Ú __contains__szHParams.__contains__cCs |j ¡Sr²)ryÚ__repr__rzr r r!rƒszHParams.__repr__cCs |j |¡Sr²)ryÚget©rvrrr r r!r„!sz HParams.getN) Ú__name__Ú __module__Ú __qualname__rxrBr–r“r{r~rr‚rƒr„r r r r!r sr c@seZdZdd„Zdd„ZdS)rcKs4| ¡D]&\}}t|ƒtkr&tf|Ž}|||<qdSr²)r–rûrtrrur r r!rx&s  zInferHParams.__init__cCs | |¡Sr²)r„r…r r r!Ú __getattr__,szInferHParams.__getattr__N)r†r‡rˆrxr‰r r r r!r%src@seZdZddd„Zdd„ZdS)ÚVolume_ExtractorécCs ||_dSr²)Úhop_size)rvrŒr r r!rx1szVolume_Extractor.__init__cCsÂt|tjƒst |¡}t| d¡|jƒ}|d}tjjj|t|jdƒt|jddƒfdd}tjjj |dd…dddd…fd|jf|jddd…dd…d|…fj ddd}t  |¡}|S) Nrr'r Úreflect)r/)Ústrider@r) Ú isinstancerrrªr<rŒÚnnrÚpadÚunfoldÚmeanri)rvÚaudioÚn_framesZaudio2Úvolumer r r!Úextract4s  0R zVolume_Extractor.extractN)r‹)r†r‡rˆrxr—r r r r!rŠ0s rŠ)T)N)NF)r¢r'T)rÐ)N)rò)T)F)r )r*)r:)rW)?rrÖr r2rŒr«rÚsysrfÚmultiprocessingrrjrKr8rHrZscipy.io.wavfilerÚsklearn.clusterrÚtorch.nnrr=r0Ú basicConfigÚstdoutÚWARNr˜rNZf0_maxZf0_minrMrPrOr"rLrVr[rorƒrŸr¡rÈrÏrÚrérírñrúrrrrr)r0r+r,rHrVrsr rrŠr r r r!Úsj       , #           5#