besucoder commited on
Commit
3fed2fb
Β·
verified Β·
1 Parent(s): 4337770
Files changed (1) hide show
  1. multi.py +150 -0
multi.py ADDED
@@ -0,0 +1,150 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Imports
2
+ import gradio as gr
3
+ import wikipedia
4
+ import numpy as np
5
+ import faiss
6
+ from langdetect import detect
7
+ from gtts import gTTS
8
+ from transformers import pipeline
9
+ from sentence_transformers import SentenceTransformer
10
+ import tempfile, os
11
+ import torch
12
+ import speech_recognition as sr
13
+ from functools import lru_cache
14
+ from pydub import AudioSegment
15
+
16
+ # ===== Model Setup =====
17
+ models = {}
18
+ def load_models():
19
+ models['encoder'] = SentenceTransformer('paraphrase-multilingual-MiniLM-L12-v2')
20
+ models['to_en'] = pipeline('translation', model='Helsinki-NLP/opus-mt-mul-en')
21
+ for lang in ['fr', 'ar', 'zh', 'es']:
22
+ models[f'en_to_{lang}'] = pipeline('translation_en_to_' + lang, model=f'Helsinki-NLP/opus-mt-en-{lang}')
23
+ models['answer_gen'] = pipeline('text2text-generation', model='google/flan-t5-base', max_length=1024) # increased length
24
+
25
+ load_models()
26
+
27
+ # ===== Utility Functions =====
28
+ def detect_language(text):
29
+ try:
30
+ return detect(text)
31
+ except:
32
+ return 'en'
33
+
34
+ def translate(text, src, tgt):
35
+ if src == tgt:
36
+ return text
37
+ if src != 'en':
38
+ text = models['to_en'](text)[0]['translation_text']
39
+ if f'en_to_{tgt}' in models:
40
+ return models[f'en_to_{tgt}'](text)[0]['translation_text']
41
+ return text
42
+
43
+ def tts_play(text, lang):
44
+ tts = gTTS(text=text, lang=lang)
45
+ path = tempfile.mktemp(suffix=".mp3")
46
+ tts.save(path)
47
+ return path
48
+
49
+ def chunk_text(text, max_words=100): # increased chunk size
50
+ sentences = text.split('. ')
51
+ chunks, current_chunk, current_len = [], [], 0
52
+ for sent in sentences:
53
+ words = sent.split()
54
+ if current_len + len(words) > max_words:
55
+ chunks.append('. '.join(current_chunk))
56
+ current_chunk = [sent]
57
+ current_len = len(words)
58
+ else:
59
+ current_chunk.append(sent)
60
+ current_len += len(words)
61
+ if current_chunk:
62
+ chunks.append('. '.join(current_chunk))
63
+ return chunks
64
+
65
+ def build_faiss_index(chunks, model):
66
+ embeddings = model.encode(chunks, convert_to_numpy=True)
67
+ index = faiss.IndexFlatL2(embeddings.shape[1])
68
+ index.add(embeddings)
69
+ return index
70
+
71
+ @lru_cache(maxsize=20)
72
+ def prepare_faiss_for_topic(topic):
73
+ wikipedia.set_lang('en')
74
+ page = wikipedia.page(topic)
75
+ content = page.content[:5000] # increase content for better answers
76
+ chunks = chunk_text(content)
77
+ index = build_faiss_index(chunks, models['encoder'])
78
+ return chunks, index
79
+
80
+ def retrieve_context(question, index, chunks, model, top_k=5): # increased top_k
81
+ q_emb = model.encode([question], convert_to_numpy=True)
82
+ _, indices = index.search(q_emb, top_k)
83
+ return ' '.join([chunks[i] for i in indices[0]])
84
+
85
+ # ===== Main Inference Function =====
86
+ def qa_system(audio, text_question, topic, output_lang):
87
+ question = ""
88
+ if audio is not None:
89
+ try:
90
+ r = sr.Recognizer()
91
+ audio_wav_path = tempfile.mktemp(suffix=".wav")
92
+ sound = AudioSegment.from_file(audio)
93
+ sound.export(audio_wav_path, format="wav")
94
+ with sr.AudioFile(audio_wav_path) as source:
95
+ audio_data = r.record(source)
96
+ question = r.recognize_google(audio_data)
97
+ except Exception as e:
98
+ return f"❌ Could not understand the audio: {e}", None, None
99
+ elif text_question:
100
+ question = text_question.strip()
101
+ else:
102
+ return "❌ Please provide a voice or text question.", None, None
103
+
104
+ input_lang = detect_language(question)
105
+
106
+ try:
107
+ chunks, faiss_index = prepare_faiss_for_topic(topic)
108
+ except:
109
+ return "Error loading topic from Wikipedia", None, None
110
+
111
+ context = retrieve_context(question, faiss_index, chunks, models['encoder'], top_k=5)
112
+ question_en = translate(question, input_lang, 'en')
113
+ prompt = f"Answer based on the context:\nContext: {context}\nQuestion: {question_en}"
114
+ answer_en = models['answer_gen'](prompt)[0]['generated_text']
115
+
116
+ if output_lang == 'en':
117
+ answer = answer_en
118
+ elif output_lang == 'am':
119
+ answer = "Amharic translation not supported."
120
+ else:
121
+ answer = translate(answer_en, 'en', output_lang)
122
+
123
+ audio_path = tts_play(answer, output_lang)
124
+ return f"You asked: {question}\n\nAnswer: {answer}", audio_path, answer
125
+
126
+ # ===== Gradio UI =====
127
+ lang_options = ['en', 'am', 'fr', 'ar', 'es', 'zh']
128
+
129
+ demo = gr.Interface(
130
+ fn=qa_system,
131
+ inputs=[
132
+ gr.Audio(type="filepath", label="🎀 Ask your Question by Voice (optional)"),
133
+ gr.Textbox(label="✍️ Or type your Question here (optional)"),
134
+ gr.Textbox(value="Artificial intelligence", label="πŸ“š Wikipedia Topic"),
135
+ gr.Dropdown(choices=lang_options, value='en', label="🌍 Output Language")
136
+ ],
137
+ outputs=[
138
+ gr.Textbox(label="πŸ€– Answer Output"),
139
+ gr.Audio(label="πŸ”Š Answer Playback"),
140
+ gr.Textbox(label="πŸ“ Translated Answer Text")
141
+ ],
142
+ title="🌍 Multilingual Voice/Text Q&A Assistant",
143
+ description="""
144
+ <h3 style='text-align: center; font-weight: bold; font-style: italic;'>πŸ‘‹ Welcome to the Multilingual Wikipedia Q&A Assistant</h3>
145
+ <p style='text-align: center;'>You can ask questions using voice or text in different languages, and get spoken and translated answers using AI + Wikipedia. 🌐</p>
146
+ """
147
+ )
148
+
149
+ # Launch the app
150
+ demo.launch()