# Imports import gradio as gr import wikipedia import numpy as np import faiss from langdetect import detect from gtts import gTTS from transformers import pipeline from sentence_transformers import SentenceTransformer import tempfile, os import torch import speech_recognition as sr from functools import lru_cache from pydub import AudioSegment # ===== Model Setup ===== models = {} def load_models(): models['encoder'] = SentenceTransformer('paraphrase-multilingual-MiniLM-L12-v2') models['to_en'] = pipeline('translation', model='Helsinki-NLP/opus-mt-mul-en') for lang in ['fr', 'ar', 'zh', 'es']: models[f'en_to_{lang}'] = pipeline('translation_en_to_' + lang, model=f'Helsinki-NLP/opus-mt-en-{lang}') models['answer_gen'] = pipeline('text2text-generation', model='google/flan-t5-base', max_length=1024) # increased length load_models() # ===== Utility Functions ===== def detect_language(text): try: return detect(text) except: return 'en' def translate(text, src, tgt): if src == tgt: return text if src != 'en': text = models['to_en'](text)[0]['translation_text'] if f'en_to_{tgt}' in models: return models[f'en_to_{tgt}'](text)[0]['translation_text'] return text def tts_play(text, lang): tts = gTTS(text=text, lang=lang) path = tempfile.mktemp(suffix=".mp3") tts.save(path) return path def chunk_text(text, max_words=100): # increased chunk size sentences = text.split('. ') chunks, current_chunk, current_len = [], [], 0 for sent in sentences: words = sent.split() if current_len + len(words) > max_words: chunks.append('. '.join(current_chunk)) current_chunk = [sent] current_len = len(words) else: current_chunk.append(sent) current_len += len(words) if current_chunk: chunks.append('. '.join(current_chunk)) return chunks def build_faiss_index(chunks, model): embeddings = model.encode(chunks, convert_to_numpy=True) index = faiss.IndexFlatL2(embeddings.shape[1]) index.add(embeddings) return index @lru_cache(maxsize=20) def prepare_faiss_for_topic(topic): wikipedia.set_lang('en') page = wikipedia.page(topic) content = page.content[:5000] # increase content for better answers chunks = chunk_text(content) index = build_faiss_index(chunks, models['encoder']) return chunks, index def retrieve_context(question, index, chunks, model, top_k=5): # increased top_k q_emb = model.encode([question], convert_to_numpy=True) _, indices = index.search(q_emb, top_k) return ' '.join([chunks[i] for i in indices[0]]) # ===== Main Inference Function ===== def qa_system(audio, text_question, topic, output_lang): question = "" if audio is not None: try: r = sr.Recognizer() audio_wav_path = tempfile.mktemp(suffix=".wav") sound = AudioSegment.from_file(audio) sound.export(audio_wav_path, format="wav") with sr.AudioFile(audio_wav_path) as source: audio_data = r.record(source) question = r.recognize_google(audio_data) except Exception as e: return f"❌ Could not understand the audio: {e}", None, None elif text_question: question = text_question.strip() else: return "❌ Please provide a voice or text question.", None, None input_lang = detect_language(question) try: chunks, faiss_index = prepare_faiss_for_topic(topic) except: return "Error loading topic from Wikipedia", None, None context = retrieve_context(question, faiss_index, chunks, models['encoder'], top_k=5) question_en = translate(question, input_lang, 'en') prompt = f"Answer based on the context:\nContext: {context}\nQuestion: {question_en}" answer_en = models['answer_gen'](prompt)[0]['generated_text'] if output_lang == 'en': answer = answer_en elif output_lang == 'am': answer = "Amharic translation not supported." else: answer = translate(answer_en, 'en', output_lang) audio_path = tts_play(answer, output_lang) return f"You asked: {question}\n\nAnswer: {answer}", audio_path, answer # ===== Gradio UI ===== lang_options = ['en', 'am', 'fr', 'ar', 'es', 'zh'] demo = gr.Interface( fn=qa_system, inputs=[ gr.Audio(type="filepath", label="🎤 Ask your Question by Voice (optional)"), gr.Textbox(label="✍️ Or type your Question here (optional)"), gr.Textbox(value="Artificial intelligence", label="📚 Wikipedia Topic"), gr.Dropdown(choices=lang_options, value='en', label="🌍 Output Language") ], outputs=[ gr.Textbox(label="🤖 Answer Output"), gr.Audio(label="🔊 Answer Playback"), gr.Textbox(label="📝 Translated Answer Text") ], title="🌍 Multilingual Voice/Text Q&A Assistant", description="""

👋 Welcome to the Multilingual Wikipedia Q&A Assistant

You can ask questions using voice or text in different languages, and get spoken and translated answers using AI + Wikipedia. 🌐

""" ) # Launch the app demo.launch()