File size: 1,193 Bytes
67d6834
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
import os
from preprocess import process_audio_file
from pause import annotate_pauses
from repetition import annotate_repetitions
from syllable import annotate_syllables
from fillerword import annotate_fillerwords
from mispronunciation import annotate_mispronunciation

from feature_extraction import feature_extraction


from annotation import annotate_transcript

def main():

    input_audio_file = "/home/easgrad/shuweiho/workspace/volen/SATE_docker_test/input/454.mp3"                 
    device = "cuda"                            
    pause_threshold = 0.3                       

    print("Start init...")
    
    session_id = process_audio_file(input_audio_file, num_speakers=2, device=device)

    # annotation
    annotate_pauses(session_id, pause_threshold)
    annotate_repetitions(session_id)
    annotate_syllables(session_id)
    annotate_fillerwords(session_id)
    # annotate_mispronunciation(session_id, api_url="http://localhost:8080")
    
    # feature extraction
    # feature_extraction(session_id)

    # transcription generation
    output_annotation = annotate_transcript(session_id)
    print(f"Done: {output_annotation}")

if __name__ == "__main__":
    main()