U Bg- edddlZejdedddlmZedddlZedejddejj_ d ejj_ ddl m Z ddlZd ejd <ddlZejddejj_ d ejj_ ddlZejdddlZejddd lmZeZd ZddlZddlZddlZddlmZddlZddlZddlmZddlmcmZddl Z ddl!Z!ddl"m#Z#ddl$Tddl%m&Z&m'Z'ddl(TddlZejdddl"m)Z)ddl*m+Z+e+Z,e j-.ddddZ/d\Z0Z1dZ2dZ3dZ4dZ5ej67rd nd!Z8ej9e:d"Z;e;<d#dZ=e;<d$dZ>e?e>e=Z@eAd%&ZBeCd'&ZDe;<d(dZEeFeEZGdd)lHmIZIe;<d*dZJeIeJZKeLe;d+ZMeNeMe@eGeKeBeDZOd,eODZPd-eODZPejQd.d!/ZReRd0ZSeOD]ZTeTeSvred1eTz eOeTUeSeT7#dd2lVmWZWeSeTZXeWZYeXZD]\Z[Z\e[d3dZ]e\eYe]<eOeTUeYd4YxYwd5eODZPdd6l^m_Z_m`Z`maZae_eOjbjbe`ead7d8d9:d;ZcdKdAZddLdBZedMdFZfdGZgdNdIZhdJZidS)ONLTKNpunktSCIPY)writez TORCH STUFFSTARTFTzX/home/ubuntu/miniconda3/envs/respair/lib/python3.11/site-packages/torch/lib/include/cuda CUDA_HOME) TextCleanerc2tj|d|jdd|}tj|dz|d}|S)Nr)torcharangemax unsqueezeexpandshapetype_asgt)lengthsmasks ?/home/ubuntu/Kanade_Project/gradio/Tsukasa_Speech/importable.pylength_to_maskr$su < & & 0 0 3 3 : :7=;KR P P X XY` a aD 8DFG--a00 1 1D K)Munch)nn) word_tokenize)*)tokenizer_koto_prompttokenizer_koto_text punkt_tab) sent_tokenize)SentenceTokenizerPiii,)n_melsn_fft win_length hop_length)ctj|}t|}tjd|dzt z tz }|S)Ngh㈵>r)r from_numpyfloatto_mellogrmeanstd)wave wave_tensor mel_tensors r preprocessr4Ls]"4((..00K $$J)D:#7#7#:#::;;dBcIJ rc<tj|d\}}tj|d\}}|dkrtj||d}t |t}tj 5t | d}t | d}dddn #1swxYwYtj||gdS)N])sr)top_dbr dim)librosaloadeffectstrimresampler4todevicer no_gradmodel style_encoderrpredictor_encodercat)pathr1r7audioindexr3ref_sref_ps rcompute_style_through_cliprMRs?|DU+++HD"?''R'88LE5 U{{ E22E""%%f--J AA##J$8$8$;$;<<'' (<(rk&(((3U3Z__(((rcXg|]'}t|t(SrS)rDrArBrhs rrkrks(,,,sU3Z]]6,,,rz+Models/Style_Tsukasa_v02/Top_ckpt_24khz.pth) map_locationnetz %s loaded) OrderedDict)strictcLg|]!}t|"SrSrfrhs rrkrkrlr)DiffusionSampler ADPM2SamplerKarrasScheduleg-C6?g@g"@) sigma_min sigma_maxrho)samplersigma_scheduleclamp333333?ffffff?r ?c x t|}|ddtj|t d}tj5tj|jdgt }t|t } t ||| } t ||  } t| dd} t!tjddt | |||d} | ddddf}| ddddf}||zd|z |ddddfzz}||zd|z |ddddfzz}tj | ||| }tj|}tj|}tj|}tj|d |z }tj|d }tj|t|j}d}t;|dD]C}d||||t||jzf<|t||jz }D|dd|dt z}tj||\}}| |dt z}t ||||d}dddn #1swxYwY|!"d dd fS) Nrr attention_maskr r noise embeddingembedding_scalefeatures num_stepsaxismin.i)# textclenaerinsertr LongTensorrArBrrCrrrD text_encoderbertint bert_encoder transposediffusion_samplerrandnsqueeze predictorlstmprepare_projection duration_projsigmoidsumroundr|zerosdatarangesizeF0Ntraindecoderr^numpy)rVrKalphabetadiffusion_stepsrrate_of_speechtokens input_lengths text_maskt_enbert_durd_ens_predsrefdxx_moddurationpred_dur pred_aln_trgc_frameienF0_predN_predasrouts r inferencersk   F MM!Q  f % % ( ( 0 0 : :1 = =F :M:M(&,r*:);<<??GG "=1144V<< !!&-CC::fyj5E5E5G5G:HH!!(++55b"==#5;x+@+@+J+J1+M+M+P+PQW+X+X4<:I5:7F HHHIPPQ  111cdd7OQQQWockQY5DSD>99 1HDU111cdd7^3 3 O ( ()*M9 F F O  # #33A66?0077=**..B.77.H;x//112288Q8??{=#hllnn6I2J2JKK |((++,, - -AGHLGGc(1+2B.C.C$CCC D s8A;+,, ,GGkk"b!!L$:$:1$=$=$@$@$H$HH /222q99l,,Q//226:::mmC '1H1H1K1KMMs:M:M:M:M:M:M:M:M:M:M:M:M:M:M:Mz ;;==     $ $ & &sDSDy 11s8O+Q//Q36Q3c  t|} | ddtj| t d} tj5tj| jdgt } t| t } t | | | } t | |  } t| dd}t!tjddt | |||d}|||zd|z |zz}|ddddf}|ddddf}||zd|z |ddddfzz}||zd|z |ddddfzz}tj||gd }tj ||| | }tj|}tj|}tj|}tj|d |z }tj|d }tj| t|j}d}t=|dD]C}d||||t||jzf<|t||jz }D|dd|dt z}tj ||\}}| |dt z}t!||||d}dddn #1swxYwY|"#d dd f|fS)Nrr rrrr rrr:rr.i)$rrr rrArBrrCrrrDrrrrrrrrrGrrrrrrrr|rrrrrrr^r)rVs_prevrKrrtrrrrrrrrrrrrrrrrrrrrrrrrrs rLongformrs  F MM!Q  f % % ( ( 0 0 : :1 = =F 3M3M(&,r*:);<<??GG "=1144V<< !!&-CC::fyj5E5E5G5G:HH!!(++55b"=="5;x+@+@+J+J1+M+M+P+PQW+X+X4<:I5:7F HHHIPPQ   Z1q5F"22F 111cdd7OQQQWockQY5DSD>99 1HDU111cdd7^3 3C8,,, O ( ()*M9 F F O  # #33A66?0077=**..B.77.H;x//112288Q8??{=#hllnn6I2J2JKK |((++,, - -AGHLGGc(1+2B.C.C$CCC D s8A;+,, ,GGkk"b!!L$:$:1$=$=$@$@$H$HH/222q99l,,Q//226:::mmC '1H1H1K1KMMe3M3M3M3M3M3M3M3M3M3M3M3M3M3M3Ml ;;==     $ $ & &sETEz 2F ::s8PRRRr6{Gz?皙?ct||z}tj|}||k}tj|t}tj|dkddz}tj|dkddz} |drtjdg|f}|dr$tj| t|gf} t|dkst| dkr|Sg} d} t|| D]\} } | || | | | z }||krtj |}td|dz}tj dd|}tj dd|}|d|xx|zcc<|| dxx|zcc<| |n| || | | } | t|kr| || dtj| S)Nr rr ir)) rnpabsdiffastypewhere concatenatelenzipappendrrlinspace)wav_data sample_ratesilence_thresholdmin_silence_durationmin_silence_samplesenvelope silence_masksilence_changessilence_starts silence_endsprocessed_segmentslast_endstartendsilence_durationsilence_segment fade_samplesfade_infade_outs rtrim_long_silencesr0su2[@AAvhH//Lgl11#6677OXo233A6:N8Or122159LA?!n(=>>BG~|c(mm_&EFF  >a3|#4#4#9#9H.,77 s!!(8E>":;;;; 1 1 1 h':;;Ot%8A%=>>Lk!Q 55G{1a66H M\M * * *g 5 * * * \MNN + + +x 7 + + +  % %o 6 6 6 6  % %huSy&9 : : :#h--!!(899"5666 >, - --rcd}|t|kri|dkrKt||dkr2||dz xxd||zz cc<||n|dz }|t|ki|S)Nr r  )rpop)lstrs rmerge_short_elementsrls A c#hh,, q55SQ[[2%% !HHHc!f $HHH GGAJJJJ FA c#hh,, Jrcg}tdt||D]7}d||||z}||8|S)Nrr)rrjoinr) text_listmaxim merged_listr merged_texts r merge_threerwscK 1c)nne , ,((hhy1U7344 ;'''' rc:tt|S)N)rr)rs rmerging_sentencesrs +C00 1 11r)NNr}r~rr r)r}r~r~rr r)r6rr)r)jprintnltkdownloadscipy.io.wavfilerr manual_seedbackendscudnn benchmark deterministicIPython.displaydisplayipdosenvironrandomseedrr text_utilsr rrtimeyamlmunchrrtorch.nn.functional functionalF torchaudior< nltk.tokenizermodelsModules.KotoDama_samplerrrutilsr!konohar"sent_tokenizer transformsMelSpectrogramr-r/r0r4rMrXr\r] is_availablerB safe_loadopenconfiggetr_r`load_ASR_models text_alignerload_KotoDama_PrompterKotoDama_Prompterload_KotoDama_TextSamplerKotoDama_TextSamplerraload_F0_modelspitch_extractorUtils.PLBERT.utilrb BERT_pathplbertrecursive_munchrd build_modelrD_r= params_wholeparamsrjload_state_dict collectionsrp state_dictnew_state_dictitemskvnameModules.diffusion.samplerrtrurv diffusionrrrrrrrrSrrr:s4f  gg""""""m g!!&%)"  t ; !!&%)"  A q#"""""kmm   ''''''OOOOOOOO  k''''''$$$$$$#"$$   - - Tds . < <  c , , , :**,, 7% 899 : :ZZ e , , ::j% ( (x44 +*0WXXX006[\\\ **Y & & .))*))))) JJ|U + + Y  vn566  L,IZ\pqq((%(((,,e,,,uzGV[\\\ e   E EC f}} kC    E #J & &vc{ 3 3 3 3 E / / / / / /J([]]N"((** ) )1u'(t$$ #J & &~e & D D D D D")(%(((UTTTTTTTTT$$ O LNN!>FcsKKK  C2C2C2C2L=;=;=;=;B9.9.9.9.x22222s $!LA M(