gradio>=4.0.0 transformers>=4.30.0 torch>=2.0.0 torchaudio>=2.0.0 numpy>=1.21.0 soundfile>=0.12.0 pydub>=0.25.0 librosa>=0.10.0 datasets>=2.10.0 # For loading VCTK for speaker embeddings accelerate>=0.20.0 speechbrain # Required for x-vector speaker embeddings