o GgQ8@sddlmZddlZddlZddlmZddlZddlZddlZddlm Z ddl m Z m Z m Z ddlmZddlmZddlmZdd lmZmZdd lmZdd lmZdd lmZmZddlZd ejj _!GdddZ"dS)) annotationsN)tqdm)AdamW)LinearLR SequentialLR ConstantLR) Accelerator)DistributedDataParallelKwargs)LanceDiffusionDataset) DataLoaderDistributedSampler)EMA)CFM)existsdefaultFc@seZdZdddddddddddd ddeed d d d fd.ddZddZd d!Zd"d#Zed$d%Z d/d&d'Z d(d)Z d0d1d,d-Z dS)2Traineri NiN sample?z test_e2-ttsZtest_runFmodelrbatch_size_typestrnoise_scheduler str | Noneduration_predictortorch.nn.Module | Nonewandb_resume_idaccelerate_kwargsdict ema_kwargs bnb_optimizerboolreset_lruse_style_prompt grad_ckptcCs||_tdd}tjjrdnd}td|ddl}td||g| d||_|dkrVt |r9dd||di}ndd|d i}|jj ||||||| | | | |jj | d d |jj j |_|jd d |_td|j||_|jrt|fddi||_|j|jj|jj jdvr|j||_||_||_t||| |_t|d|_| |_| |_| |_ | |_!||_"||_#||_$||_%|rddl&}|j'j(|)|d|_*n t+|)|d|_*|jj jdkr||jj j,j-d<|.|/|j0|j|j*|j1|j2\|_|_*|_1|_2dS)NF)find_unused_parameterswandbzUsing logger: r)log_withkwargs_handlersgradient_accumulation_stepsallow)resumenameid)r,r-) epochs learning_ratenum_warmup_updates batch_sizer max_samplesgrad_accumulation_steps max_grad_normgpusr) project_name init_kwargsconfignofp32z!!!!!!!!!!!!!!!!!Zinclude_online_model) DEEPSPEEDFSDPzckpts/test_e2-tts)lrr<train_micro_batch_size_per_gpu)3argsr r'apiapi_keyprint tbe.commonr acceleratorr init_trackers num_processesstatemixed_precision precisionreplaceris_mainr ema_modeltodevicedistributed_typehalfr/r1save_per_updatesrlast_per_stepscheckpoint_pathr3r4r5rrr#r$r% bitsandbytesoptimZ AdamW8bit parameters optimizerrdeepspeed_plugindeepspeed_configget_dataloader get_schedulerprepare schedulertrain_dataloader)selfrrAr/r0r1rSrUr2rr3r4r5rr wandb_projectwandb_run_namerrTrr r!r#r$r%Z ddp_kwargsloggertber8bnbr@r@x/mnt/sfs/music/hkchen/workspace/F5-TTS-HW/exps/base_model_without_bpm_pure_music_mulan_style_emb/f5_tts/model/trainer.py__init__"s|     .zTrainer.__init__cCsl|j|jj}t|j|j|j}||}t|jdd|d}t|jdd|d}t |j||g|gd|_ dS)Ng:0yE>r) start_factor end_factor total_iters) schedulers milestones) r1rFrHlenr`r/r4rrYrr_)ra warmup_steps total_stepsZ decay_stepsZwarmup_schedulerZdecay_schedulerr@r@rgr]s  zTrainer.get_schedulercCs,t|j|j|j}t|jd|d|_dS)Nr)factorrk)rnr`r/r4rrYr_)rarpr@r@rgget_constant_schedulerszTrainer.get_constant_schedulercCs|jjd}|jjd}|jjd}tt|jj|jj|jj |jj |jj |jj |jj |jj|jj|jj|jj|jt|jjd }t||jjddd|jdd|_dS)N|) max_frames min_frames align_lyrics lyrics_slicer$ parse_lyrics lyrics_shiftdownsample_rateskip_empty_lyricstokenizer_typerK start_time pure_probT)datasetr2shuffle num_workers pin_memory collate_fnpersistent_workers)rA prompt_pathsplitlrc_path latent_pathr Z init_dataZ dataset_pathrtrurvrwr$rxryrzr{r|rKtimer~r r2Zcustom_collate_fnr`)rarrrZlddr@r@rgr\s(       zTrainer.get_dataloadercCs|jjSN)rFis_main_process)rar@r@rgrMszTrainer.is_maincCs|j|jrXt|j|j|j|j|j|j |d}t j |j s2t |j |rH|j||j dtd|dS|j||j d|ddSdS)N)model_state_dictZoptimizer_state_dictema_model_state_dictscheduler_state_dictstepz/model_last.ptzSaved last checkpoint at step z/model_.pt)rFwait_for_everyonerMr unwrap_modelr state_dictrYrNr_ospathrrUmakedirssaverD)rarlast checkpointr@r@rgsave_checkpoints   zTrainer.save_checkpointcst|jrtj|jrt|jsdS|jdt|jvr$d}ntddt|jDdddd}tj |jd |d d }|j rr|j |d }fd d| D}tdt|dt|d|j j|dd|j|j |d}fdd| D}tdt|dt|d|j|jj|ddd|vr|jr|js|j|d|d}nd}~ttd||S)Nrz model_last.ptcSsg|] }|dr|qS)r)endswith).0fr@r@rg sz+Trainer.load_checkpoint..cSstdttj|S)N)intjoinfilterrisdigit)xr@r@rgsz)Trainer.load_checkpoint..)key/cpu) map_locationrc.i|]\}}|vr|j|jkr||qSr@shaperkv)ema_dictr@rg z+Trainer.load_checkpoint..zLoading z / z ema_model paramsF)strictrcrr@rr) model_dictr@rgrrz model paramsrrzCheckpoint loaded at step)rrUrrlistdirrFrsortedtorchloadrMrNritemsrDrnload_state_dictrrr_r#gccollect)raZlatest_checkpointrZema_checkpoint_dictZfiltered_ema_dictZcheckpoint_model_dictZfiltered_model_dictrr@)rrrgload_checkpointsR         zTrainer.load_checkpointresumable_with_seedrc Cs|j}|}|}|dkr$t|}t||}||}|jj||d}nd}t||jD]} |j |dkrT| |krTt |d| dd|jd|jj ||dd} nt |d| dd|jd|jj dd } | D]} |j |jj| d } | d  dd d} | d }| d}| d}| d}|j| | ||j|jr|nd|jr|nd|j|d\}}}|j||jdkr|jjr|j|j|j|j|j|jWdn1swY|jr|j|d7}|jj r |jj||jdd|d| j t!||d||j"|j#dkr$|$|||j%dkr3|j$|ddqkq,|j$|dd|j&dS)Nr) num_batcheszEpoch rrrg333333?)descunitdisableinitialtotal smoothing)rrrrZlrcZlatentZlatent_lengthspromptZprompt_lengthsr})textlensr style_promptstyle_prompt_lensr%r})lossr>)r)rrT)r)'r`rrnrrFskip_first_batchesranger/rtrainris_local_main_process accumulatepermuterr$r%backwardr5sync_gradientsclip_grad_norm_rXrYrr_ zero_gradrMrNupdatelogitem get_last_lr set_postfixrrSr4rrT end_training)rarr` start_step global_stepZorig_epoch_stepZ skipped_epochZ skipped_batchZskipped_dataloaderepoch progress_barbatchZ text_inputsmel_specZ mel_lengthsrrr}rcondpredr@r@rgrs~            $ (z Trainer.train)rrrrrrrrrrrrr rr!r"r#r"r$r"r%r")Fr)rr) __name__ __module__ __qualname__rrhr]rrr\propertyrMrrrr@r@r@rgr!s< z   9r)# __future__rrrrr'r torch_npuZ torch.optimrZtorch.optim.lr_schedulerrrr accelerateraccelerate.utilsr Z&f5_tts.dataset.custom_dataset_align2f5r torch.utils.datar r Z ema_pytorchr f5_tts.modelrf5_tts.model.utilsrrrnpuconv allow_hf32rr@r@r@rgs&