o "e@sddlZddlZddlZddlZddlZddlZddlZddlZddl m Z ddl m Z ddl mZGdddeZGdddeZGd d d eZdS) N)text_to_sequence)spectrogram_torch)SynthesizerTrnc@s eZdZ dddZddZdS)OpenVoiceBaseClasscuda:0cCsxd|vr tjs Jt|}ttt|dg|jj ddfd|jj i|j  |}| ||_ ||_||_dS)Ncudasymbols n_speakers)torchr is_availableutilsZget_hparams_from_filerlengetattrdata filter_lengthr modeltoevalhpsdevice)selfZ config_pathrrrr/workspace/OpenVoice/api.py__init__s"  zOpenVoiceBaseClass.__init__cCs@t|}|jj|ddd\}}td|td||dS)NrF)strictzLoaded checkpoint '{}'zmissing/unexpected keys:)r loadrload_state_dictprintformat)rZ ckpt_pathZcheckpoint_dictabrrr load_ckpt#s zOpenVoiceBaseClass.load_ckptN)r)__name__ __module__ __qualname__rr#rrrrrs rc@sFeZdZdddZeddZedddZed d Zdd d ZdS)BaseSpeakerTTSENZH)englishchinesecCs<t||j|rgn|jj}|jjrt|d}t|}|S)Nr) rrrZ text_cleanersZ add_blankcommons intersperser LongTensor)textr is_symbolZ text_normrrrget_text0s   zBaseSpeakerTTS.get_text?cCsPg}|D]}||d7}|dgt|d|7}qt|tj}|S)Nrg?)reshapetolistintnparrayastypefloat32)Zsegment_data_listsrspeedZaudio_segmentsZ segment_datarrraudio_numpy_concat8s z!BaseSpeakerTTS.audio_numpy_concatcCs0tj||d}tdtd|td|S)N) language_strz > Text splitted to sentences. z > ===========================)rZsplit_sentencerjoin)r/r>textsrrrsplit_sentences_into_piecesAs z*BaseSpeakerTTS.split_sentences_into_piecesEnglishc Csd|j|d}|dusJd|d|||}g}|D]u} tdd| } d|d| d|d} || |jd} |j} |jj |} t >| d | } t | dg | }t | g | }|jj| ||d d d |d dd j}Wdn1swY||q|j||jjj|d}|dur|St|||jjjdS)Nz language z is not supportedz([a-z])([A-Z])z\1 \2[]FrgMbX?g333333?r2)sidZ noise_scaleZ noise_scale_wZ length_scalerr)r;r<)language_marksgetlowerrBresubr1rrZspeakersr no_grad unsqueezerr.sizerinferrcpufloatnumpyappendr= sampling_rate soundfilewrite)rr/ output_pathZspeakerlanguager<markrAZ audio_listtZstn_tstrZ speaker_idZx_tstZ x_tst_lengthsrFaudiorrrttsIs:    zBaseSpeakerTTS.ttsN)r2)rCr2) r$r%r&rH staticmethodr1r=rBr]rrrrr'*s   r'cs@eZdZfddZdddZddd Zd d Zd d ZZS)ToneColorConvertercsFtj|i||ddrddl}||j|_dSd|_dS)NZenable_watermarkTr)superrrIwavmark load_modelrrwatermark_model)rargskwargsra __class__rrrfs   zToneColorConverter.__init__Nc Cst|tr|g}|j}|j}g}|D]Y}tj||jjd\}}t |} | |} | d} t | |jj |jj|jj|jjdd |} t|j| dd d} || Wdn1sfwYqt|d}|durtjtj|dd t|||S) Nr;rFcenterr r r3T)exist_ok) isinstancestrrrlibrosarrrUr FloatTensorrrNrr hop_length win_lengthrMrZref_enc transposerTdetachstackmeanosmakedirspathdirnamesaverQ) rZ ref_wav_list se_save_pathrrgsfnameZ audio_refr;ygrrr extract_seqs4      zToneColorConverter.extract_se333333?defaultc Cs|j}tj||jjd\}} t|}tlt | |j } | d} t | |jj|jj|jj|jjdd |j } t| dg |j } |jj| | |||dddj}|||}|durt|WdSt|||jjWddS1swYdS)NrhrFrir3)Zsid_srcZsid_tgttaurG)rrnrrrUr tensorrRrMrorrrNrrrprqr.rOrZvoice_conversionrQrS add_watermarkrVrW) rZaudio_src_pathZsrc_seZtgt_serXrmessagerr\ sample_rater~specZ spec_lengthsrrrconverts0     "zToneColorConverter.convertc Cs*|jdur|S|j}t|d}t|d}d}d}t|D]r}||||||d|} t| |kr?td|S||d|dd} t +t |  |d} t |  |d} |j | | } | }Wdn1s}wY|||||||d|<q |S)Nr3 >r r z&Audio too short, fail to add watermark)rcrrZstring_to_bitsr4rrangerr rMrorencodersrQsqueeze)rr\rrbitsn_repeatKcoeffntrunckZ message_npysignalZmessage_tensorZsignal_wmd_tensorZsignal_wmd_npyrrrrs,       "z ToneColorConverter.add_watermarkc Csg}d}d}t|D]V}||||||d|}t||kr)tddSt&t||jd}|j |dk } Wdn1sVwY|| q t|dd }t|} | S) Nrr r z)Audio too short, fail to detect watermarkZFailrg?r3)rrrr rMrorrrNrcdecoder6rsrQrSrrTr7rtr4rZbits_to_string) rr\rrrrrrrZmessage_decoded_npyrrrrdetect_watermarks     &  z#ToneColorConverter.detect_watermark)N)Nrr) r$r%r&rrrrr __classcell__rrrfrr_es   r_)r rSr7rKrVrr,rvrnr/rZmel_processingrmodelsrobjectrr'r_rrrrs   ;