youssefga28's picture
Update app.py
a44c2d8 verified
import os
import requests
import tempfile
import gradio as gr
from moviepy import VideoFileClip
from speechbrain.inference.interfaces import foreign_class
import whisper
from together import Together
# Initialize Whisper once
_whisper_model = whisper.load_model("base")
# Initialize SpeechBrain classifier once
_classifier = foreign_class(
source="warisqr7/accent-id-commonaccent_xlsr-en-english",
pymodule_file="custom_interface.py",
classname="CustomEncoderWav2vec2Classifier"
)
# Helper to download direct‐mp4 URL to a temp file
def download_video(url: str) -> str:
resp = requests.get(url, stream=True)
resp.raise_for_status()
tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".mp4")
for chunk in resp.iter_content(8192):
tmp.write(chunk)
tmp.close()
return tmp.name
# Helper to extract audio to a temp file
def extract_audio(video_path: str) -> str:
tmp_audio = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3").name
clip = VideoFileClip(video_path)
clip.audio.write_audiofile(tmp_audio, logger=None)
clip.close()
return tmp_audio
# Main pipeline
def analyze_url(video_url):
try:
# 1. Download & extract
vid = download_video(video_url)
aud = extract_audio(vid)
# 2. Accent classification
out_prob, score, idx, lab = _classifier.classify_file(aud)
accent = lab[0]
conf_pct = round(float(score) * 100, 2)
# 3. Transcription
result = _whisper_model.transcribe(aud)
transcript = result["text"]
# 4. LLM analysis
api_key = os.getenv('API_KEY')
client = Together(api_key=api_key)
prompt = f"""
You are an English-speaking coach. Given this transcript of a spoken English audio with an {accent} accent and classification confidence {conf_pct}%:
\"\"\"{transcript}\"\"\"
Evaluate how confident the speaker sounds for a job interview based on fluency, clarity, filler usage, professional English, and pacing.
Provide:
- A proficiency score between 0 and 100
- A brief explanation
- Give Bullet points, but nothing in bold.
"""
resp = client.chat.completions.create(
model="meta-llama/Llama-3.3-70B-Instruct-Turbo-Free",
messages=[{"role": "user", "content": prompt}]
)
analysis = resp.choices[0].message.content.strip()
# Clean up temp files
os.remove(vid)
os.remove(aud)
return accent, f"{conf_pct}%", transcript, analysis
except Exception as e:
return "Error", "", "", str(e)
# Build Gradio interface
with gr.Blocks(title="English Accent & Proficiency Analyzer") as demo:
gr.Markdown("## 🎙️ English Accent Detection & Proficiency Analysis")
with gr.Row():
inp = gr.Textbox(label="Direct MP4 Video URL", placeholder="https://...")
run = gr.Button("Analyze")
with gr.Row():
out1 = gr.Textbox(label="Detected Accent")
out2 = gr.Textbox(label="Accent Classification Confidence Score")
out3 = gr.Textbox(label="Transcript", lines=5)
out4 = gr.Textbox(label="Proficiency Analysis", lines=10)
run.click(
fn=analyze_url,
inputs=inp,
outputs=[out1, out2, out3, out4],
api_name="analyze"
)
if __name__ == "__main__":
demo.launch()