KoalaBrainResearcher
Make description more accurate
285a14d
import gradio as gr
import librosa
import os
import spaces
from pathlib import Path
from pytorch.inference import PianoTranscription
from utils import config
# from synthviz import create_video # TODO enable video rendering
from midi2audio import FluidSynth
RESULTS_DIR='results'
# Initialize the transcriptor
transcriptor = PianoTranscription("Note_pedal")
# Soundfont
soundfont_path = "soundfont/MuseScore_General.sf3"
fs = FluidSynth(soundfont_path)
@spaces.GPU
def transcribe_and_visualize(audio_file):
# Generate a unique filename for the MIDI and video outputs
# base_name = os.path.splitext(os.path.basename(audio_file.name))[0]
base_name = os.path.splitext(os.path.basename(audio_file))[0]
midi_filename = f"{base_name}_transcription.mid"
# video_filename = f"{base_name}_output.mp4" # TODO enable video rendering
flac_filename = f"{base_name}_transcription.flac"
# Load and transcribe audio
audio, _ = librosa.core.load(audio_file, sr=config.sample_rate)
transcriptor.transcribe(audio, midi_filename)
# Create visualization video
# create_video(input_midi=midi_filename, video_filename=video_filename) # TODO enable video rendering
# Convert MIDI to FLAC
fs.midi_to_audio(midi_filename, flac_filename)
# Return midi
return flac_filename, midi_filename
# Create Gradio interface
iface = gr.Interface(
fn=transcribe_and_visualize,
inputs=gr.Audio(type="filepath", label="Upload Piano Audio"),
# outputs=gr.Video(label="Transcription Visualization"),
outputs=[gr.Audio(label="MIDI transcription"), gr.File(label="MIDI file")],
title="MOZART - AI Piano Transcriber",
description="Gradio-based piano transcriber, using Bytedance's Piano Transcription AI model. Upload a piano audio file to transcribe it into a MIDI file. Open in a piano roll app like Synthesia to see the magic.",
)
# Launch the interface
iface.launch()