|
import os |
|
from preprocess import process_audio_file |
|
from pause import annotate_pauses |
|
from repetition import annotate_repetitions |
|
from syllable import annotate_syllables |
|
from fillerword import annotate_fillerwords |
|
from mispronunciation import annotate_mispronunciation |
|
|
|
from feature_extraction import feature_extraction |
|
|
|
|
|
from annotation import annotate_transcript |
|
|
|
def main(): |
|
|
|
input_audio_file = "/home/easgrad/shuweiho/workspace/volen/SATE_docker_test/input/454.mp3" |
|
device = "cuda" |
|
pause_threshold = 0.3 |
|
|
|
print("Start init...") |
|
|
|
session_id = process_audio_file(input_audio_file, num_speakers=2, device=device) |
|
|
|
|
|
annotate_pauses(session_id, pause_threshold) |
|
annotate_repetitions(session_id) |
|
annotate_syllables(session_id) |
|
annotate_fillerwords(session_id) |
|
|
|
|
|
|
|
|
|
|
|
|
|
output_annotation = annotate_transcript(session_id) |
|
print(f"Done: {output_annotation}") |
|
|
|
if __name__ == "__main__": |
|
main() |
|
|