U lxd)@sddlZddlZddlZddlZddlZddlZddlZddlmZddl m Z ddl m Z d ddZ dd d Z Gd d d e ZdS)N)repeat_expand_2d)tqdm)DatasetFTcsg}d} t|D]\} } } | D]ʉtfdd|Dr tj| } |r`| t|ddn| }|dk r| |kr|r||S|dk r||krq |dk r||krq |s|dd}|dt|d }||| d7} q q|r||S)Nrcsg|]}d|qS).)endswith).0extfile(D:\so-vits-svc\diffusion\data_loaders.py sz traverse_dir..r) oswalkanypathjoinlensortsplitappend)root_dir extensionsamount str_include str_excludeis_pureis_sortis_ext file_listcntroot_filesmix_path pure_pathrr r r traverse_dir s.    r(cCst|jj|jj|jj|jj|jj||jj|j j |j |jj |jj dd }tjjj||s\|jjndd|jj dkrt|jjnd|jj dkr|jjdknd|jj dkrdndd}t|jj|jj|jj|jj|jjd|j |jj|j j d }tjjj|ddddd }||fS) NT) filelists waveform_sechop_size sample_rate load_all_data whole_audiorn_spkspkdevicefp16use_augrcpurF) batch_sizeshuffle num_workerspersistent_workers pin_memory) r)r*r+r,r-r.r0rr/)r5r6r7r9) AudioDatasetdataZtraining_filesduration block_sizeZ sampling_ratetrainZcache_all_datarmodelr/r0Z cache_deviceZ cache_fp16torchutils DataLoaderr5r7Zvalidation_files)argsr.Z data_train loader_trainZ data_valid loader_validr r r get_data_loaders5sR  rFcsFeZdZdddgddddffdd Zdd Zd d Zd d ZZS)r:TFwavrr4c " st||_||_||_||_||_| |_i|_i|_ |rJt d|n t d|t |d} | |_W5QRXt|jt|jdD]H}tj|d}|}tj||jd}|d}tj|dd \}}ttj|td d | }|d }t|}t|d | }|d }t|}t|d | }| dk r| dkr|dd}||kr||nd}|dks|| krtdnd}t t|g| }|r|d}t|}t|| }|d}tj|dd \}}tj|td }t|| }||j |<|d} t| | }!|!d}!t!|!|"d#dd}!| r~|$}|$}|!$}!||||!||||d|j|<q|d}tj|dd \}}||j |<|||||d|j|<qdS)NzLoad all the data filelists:z#Load the f0, volume data filelists:r)totalr)filenamesrz.f0.npyT) allow_pickle)dtyperz.vol.npyz .aug_vol.npyr/zV [x] Muiti-speaker traing error : spk_id must be a positive integer from 0 to n_spk-1 .mel.npyz .aug_mel.npy.soft.pt)r<melaug_melunitsf0volumeaug_volspk_id)r<rUrVrWrX)%super__init__r*r,r+r)r.r3 data_bufferpitch_aug_dictprintopenread splitlinespathsrrrrsplitextlibrosaZ get_durationnploadr@ from_numpyarrayfloat unsqueezetor ValueError LongTensorrsize transposehalf)"selfr)r*r+r,r0r-r.rr/r1r2r3fname_extnameZ path_audior<Zpath_f0rUr$Z path_volumerVZ path_augvolrWZspk_namerXZpath_melrRZ path_augmelrSZkeyshift path_unitsrT __class__r r rZ`s    $       zAudioDataset.__init__cCsJ|j|}|j|}|d|jdkr>||dt|jS|||S)Nr<皙?r)rar[r* __getitem__rget_data)rpZfile_idxrrr[r r r rxs   zAudioDataset.__getitem__c Cstj|d}|j|j}|d}|jr.|n|j}|jr>dntd||d}t ||}t ||} t ddgo||j } | rdnd} | | } | dkr|d} t | } | ||| } t| } n| ||| } | d } d}| r|j|}d |d | ||| }| d }|dkr\|d }t|}|d}t|| ddd}|||| }| rvdnd}| |}|||| }| d}tt |gg}t| |||||||dS)Nrr<rwTFrSrRrPrU rTrQrrWrVrX)rRrUrVrTrX aug_shiftrsrr)rrrbr+r,r.r*randomuniformintchoicer3getrdrer@rfrhr\rrmrnrgdict)rprrr[rsZframe_resolutionr<r*Zidx_fromZ start_frameZunits_frame_lenZaug_flagZmel_keyrRrUr|Z f0_framesrTrtZvol_keyrVZ volume_framesrXr r r rysD             zAudioDataset.get_datacCs t|jS)N)rra)rpr r r __len__szAudioDataset.__len__)__name__ __module__ __qualname__rZrxryr __classcell__r r rur r:_sl Er:)NNNFFT)F)rr}renumpyrdrcr@rArrtorch.utils.datarr(rFr:r r r r s$    ) *