|
import os |
|
import requests |
|
import tempfile |
|
import gradio as gr |
|
from moviepy import VideoFileClip |
|
from speechbrain.inference.interfaces import foreign_class |
|
import whisper |
|
from together import Together |
|
|
|
|
|
_whisper_model = whisper.load_model("base") |
|
|
|
|
|
_classifier = foreign_class( |
|
source="warisqr7/accent-id-commonaccent_xlsr-en-english", |
|
pymodule_file="custom_interface.py", |
|
classname="CustomEncoderWav2vec2Classifier" |
|
) |
|
|
|
|
|
def download_video(url: str) -> str: |
|
resp = requests.get(url, stream=True) |
|
resp.raise_for_status() |
|
tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") |
|
for chunk in resp.iter_content(8192): |
|
tmp.write(chunk) |
|
tmp.close() |
|
return tmp.name |
|
|
|
|
|
def extract_audio(video_path: str) -> str: |
|
tmp_audio = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3").name |
|
clip = VideoFileClip(video_path) |
|
clip.audio.write_audiofile(tmp_audio, logger=None) |
|
clip.close() |
|
return tmp_audio |
|
|
|
|
|
def analyze_url(video_url): |
|
try: |
|
|
|
vid = download_video(video_url) |
|
aud = extract_audio(vid) |
|
|
|
|
|
out_prob, score, idx, lab = _classifier.classify_file(aud) |
|
accent = lab[0] |
|
conf_pct = round(float(score) * 100, 2) |
|
|
|
|
|
result = _whisper_model.transcribe(aud) |
|
transcript = result["text"] |
|
|
|
|
|
api_key = os.getenv('API_KEY') |
|
client = Together(api_key=api_key) |
|
prompt = f""" |
|
You are an English-speaking coach. Given this transcript of a spoken English audio with an {accent} accent and classification confidence {conf_pct}%: |
|
\"\"\"{transcript}\"\"\" |
|
|
|
Evaluate how confident the speaker sounds for a job interview based on fluency, clarity, filler usage, professional English, and pacing. |
|
Provide: |
|
- A proficiency score between 0 and 100 |
|
- A brief explanation |
|
- Give Bullet points, but nothing in bold. |
|
""" |
|
resp = client.chat.completions.create( |
|
model="meta-llama/Llama-3.3-70B-Instruct-Turbo-Free", |
|
messages=[{"role": "user", "content": prompt}] |
|
) |
|
analysis = resp.choices[0].message.content.strip() |
|
|
|
|
|
os.remove(vid) |
|
os.remove(aud) |
|
|
|
return accent, f"{conf_pct}%", transcript, analysis |
|
|
|
except Exception as e: |
|
return "Error", "", "", str(e) |
|
|
|
|
|
with gr.Blocks(title="English Accent & Proficiency Analyzer") as demo: |
|
gr.Markdown("## 🎙️ English Accent Detection & Proficiency Analysis") |
|
with gr.Row(): |
|
inp = gr.Textbox(label="Direct MP4 Video URL", placeholder="https://...") |
|
run = gr.Button("Analyze") |
|
with gr.Row(): |
|
out1 = gr.Textbox(label="Detected Accent") |
|
out2 = gr.Textbox(label="Accent Classification Confidence Score") |
|
out3 = gr.Textbox(label="Transcript", lines=5) |
|
out4 = gr.Textbox(label="Proficiency Analysis", lines=10) |
|
|
|
run.click( |
|
fn=analyze_url, |
|
inputs=inp, |
|
outputs=[out1, out2, out3, out4], |
|
api_name="analyze" |
|
) |
|
|
|
if __name__ == "__main__": |
|
demo.launch() |
|
|