Spaces:

youssefga28
/

English-Accent-Classifier

Sleeping

App Files Files Community

English-Accent-Classifier / app.py

youssefga28

Update app.py

a44c2d8 verified 2 months ago

raw

history blame contribute delete

3.32 kB

	import os
	import requests
	import tempfile
	import gradio as gr
	from moviepy import VideoFileClip
	from speechbrain.inference.interfaces import foreign_class
	import whisper
	from together import Together

	# Initialize Whisper once
	_whisper_model = whisper.load_model("base")

	# Initialize SpeechBrain classifier once
	_classifier = foreign_class(
	source="warisqr7/accent-id-commonaccent_xlsr-en-english",
	pymodule_file="custom_interface.py",
	classname="CustomEncoderWav2vec2Classifier"
	)

	# Helper to download direct‐mp4 URL to a temp file
	def download_video(url: str) -> str:
	resp = requests.get(url, stream=True)
	resp.raise_for_status()
	tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".mp4")
	for chunk in resp.iter_content(8192):
	tmp.write(chunk)
	tmp.close()
	return tmp.name

	# Helper to extract audio to a temp file
	def extract_audio(video_path: str) -> str:
	tmp_audio = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3").name
	clip = VideoFileClip(video_path)
	clip.audio.write_audiofile(tmp_audio, logger=None)
	clip.close()
	return tmp_audio

	# Main pipeline
	def analyze_url(video_url):
	try:
	# 1. Download & extract
	vid = download_video(video_url)
	aud = extract_audio(vid)

	# 2. Accent classification
	out_prob, score, idx, lab = _classifier.classify_file(aud)
	accent = lab[0]
	conf_pct = round(float(score) * 100, 2)

	# 3. Transcription
	result = _whisper_model.transcribe(aud)
	transcript = result["text"]

	# 4. LLM analysis
	api_key = os.getenv('API_KEY')
	client = Together(api_key=api_key)
	prompt = f"""
	You are an English-speaking coach. Given this transcript of a spoken English audio with an {accent} accent and classification confidence {conf_pct}%:
	\"\"\"{transcript}\"\"\"

	Evaluate how confident the speaker sounds for a job interview based on fluency, clarity, filler usage, professional English, and pacing.
	Provide:
	- A proficiency score between 0 and 100
	- A brief explanation
	- Give Bullet points, but nothing in bold.
	"""
	resp = client.chat.completions.create(
	model="meta-llama/Llama-3.3-70B-Instruct-Turbo-Free",
	messages=[{"role": "user", "content": prompt}]
	)
	analysis = resp.choices[0].message.content.strip()

	# Clean up temp files
	os.remove(vid)
	os.remove(aud)

	return accent, f"{conf_pct}%", transcript, analysis

	except Exception as e:
	return "Error", "", "", str(e)

	# Build Gradio interface
	with gr.Blocks(title="English Accent & Proficiency Analyzer") as demo:
	gr.Markdown("## 🎙️ English Accent Detection & Proficiency Analysis")
	with gr.Row():
	inp = gr.Textbox(label="Direct MP4 Video URL", placeholder="https://...")
	run = gr.Button("Analyze")
	with gr.Row():
	out1 = gr.Textbox(label="Detected Accent")
	out2 = gr.Textbox(label="Accent Classification Confidence Score")
	out3 = gr.Textbox(label="Transcript", lines=5)
	out4 = gr.Textbox(label="Proficiency Analysis", lines=10)

	run.click(
	fn=analyze_url,
	inputs=inp,
	outputs=[out1, out2, out3, out4],
	api_name="analyze"
	)

	if __name__ == "__main__":
	demo.launch()