besucoder commited on
Commit
db6de8c
Β·
verified Β·
1 Parent(s): c4d47f7
Files changed (1) hide show
  1. app.py +209 -133
app.py CHANGED
@@ -3,158 +3,234 @@ import gradio as gr
3
  import wikipedia
4
  import numpy as np
5
  import faiss
6
- from langdetect import detect
7
  from gtts import gTTS
8
- from transformers import pipeline
9
- from sentence_transformers import SentenceTransformer
10
  import tempfile
 
11
  import speech_recognition as sr
12
  from pydub import AudioSegment
13
- from functools import lru_cache
14
-
15
- # --- Load models ---
16
- models = {}
17
- def load_models():
18
- models['encoder'] = SentenceTransformer('paraphrase-multilingual-MiniLM-L12-v2')
19
- models['to_en'] = pipeline('translation', model='Helsinki-NLP/opus-mt-mul-en')
20
- for lang in ['fr', 'ar', 'zh', 'es']:
21
- models[f'en_to_{lang}'] = pipeline('translation_en_to_' + lang, model=f'Helsinki-NLP/opus-mt-en-{lang}')
22
- models['answer_gen'] = pipeline('text2text-generation', model='google/flan-t5-base', max_length=1024)
23
- load_models()
24
-
25
- # --- Utility functions ---
26
- def detect_language(text):
27
- try:
28
- return detect(text)
29
- except:
30
- return 'en'
 
31
 
32
  def translate(text, src, tgt):
33
- if src == tgt:
34
- return text
35
- if src != 'en':
36
- text = models['to_en'](text)[0]['translation_text']
37
- if f'en_to_{tgt}' in models:
38
- return models[f'en_to_{tgt}'](text)[0]['translation_text']
39
  return text
40
 
41
- def tts_play(text, lang):
42
- tts = gTTS(text=text, lang=lang)
43
- path = tempfile.mktemp(suffix='.mp3')
44
- tts.save(path)
45
- return path
46
-
47
- def chunk_text(text, max_words=100):
48
- sentences = text.split('. ')
49
- chunks, current, length = [], [], 0
50
- for sent in sentences:
51
- words = sent.split()
52
- if length + len(words) > max_words:
53
- chunks.append(' '.join(current))
54
- current, length = [sent], len(words)
55
- else:
56
- current.append(sent)
57
- length += len(words)
58
- if current: chunks.append(' '.join(current))
59
- return chunks
60
-
61
- def build_faiss_index(chunks, model):
62
- emb = model.encode(chunks, convert_to_numpy=True)
63
- index = faiss.IndexFlatL2(emb.shape[1])
64
- index.add(emb)
65
- return index
66
-
67
- @lru_cache(maxsize=20)
68
- def prepare_faiss_for_topic(topic):
69
- wikipedia.set_lang('en')
70
- page = wikipedia.page(topic)
71
- chunks = chunk_text(page.content) # Use full content, no slicing limit
72
- return chunks, build_faiss_index(chunks, models['encoder'])
73
-
74
- def retrieve_context(q, idx, chunks, model, top_k=5):
75
- emb = model.encode([q], convert_to_numpy=True)
76
- _, inds = idx.search(emb, top_k)
77
- return ' '.join(chunks[i] for i in inds[0])
78
-
79
- # --- Main Q&A function ---
80
- def qa_system(audio, text_q, topic, lang, history):
81
- q = ''
82
- if audio:
83
- try:
84
- r = sr.Recognizer()
85
- wav = tempfile.mktemp('.wav')
86
- AudioSegment.from_file(audio).export(wav, format='wav')
87
- with sr.AudioFile(wav) as src:
88
- q = r.recognize_google(r.record(src))
89
- except Exception as e:
90
- return f"❌ Could not transcribe audio: {e}", None, history, ''
91
- elif text_q:
92
- q = text_q.strip()
93
- else:
94
- return '❌ Please speak or type your question.', None, history, ''
95
-
96
  try:
97
- chunks, idx = prepare_faiss_for_topic(topic)
 
 
 
98
  except Exception as e:
99
- return f'Error loading content: {e}', None, history, ''
100
-
101
- ctx = retrieve_context(q, idx, chunks, models['encoder'])
102
- q_en = translate(q, detect_language(q), 'en')
103
-
104
- # Debug prints β€” remove in production
105
- print("Question (original):", q)
106
- print("Question (English):", q_en)
107
- print("Retrieved context snippet:", ctx[:500], "...\n")
108
-
109
- prompt = f"Context:\n{ctx}\n\nQuestion: {q_en}\nAnswer:"
110
- ans_en = models['answer_gen'](prompt)[0]['generated_text']
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
111
 
112
- print("Generated answer (English):", ans_en)
 
 
113
 
114
- ans = ans_en if lang == 'en' else translate(ans_en, 'en', lang)
115
- audio_path = tts_play(ans, lang)
116
- history.append((q, ans))
117
- chat = '\n\n'.join(f"Q{i+1}: {x}\nA{i+1}: {y}" for i,(x,y) in enumerate(history))
118
- return f'You asked: {q}\n\nAnswer: {ans}', audio_path, history, chat
119
 
120
- def clear_all():
121
- return None, '', None, [], ''
122
 
123
- # --- Gradio UI with styling ---
124
- css_style = """
125
  .gradio-container {
126
- background-color: #cce7ff !important; /* Light blue */
127
- border: 3px solid #000022 !important; /* Balanced blue-black border */
128
- border-radius: 12px;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
129
  padding: 20px;
 
 
 
 
 
 
 
 
 
130
  }
131
  """
132
 
133
- with gr.Blocks(css=css_style) as demo:
134
- gr.Markdown("""
135
- <h1 style='color:#003366; text-align:center; margin-bottom: 0;'>🌐 Multilingual Wikipedia Q&A Assistant</h1>
136
- <p style='text-align:center; font-size:16px; margin-top: 0;'>Ask your questions by typing or speaking, and get answers in your language!</p>
137
- """)
138
-
139
- state = gr.State([])
140
-
141
- with gr.Row():
142
- ai = gr.Audio(type='filepath', label='🎀 Speak your question')
143
- ti = gr.Textbox(lines=3, placeholder='Or type your question here')
 
 
 
 
 
 
 
144
 
145
- with gr.Row():
146
- tp = gr.Textbox(value='Artificial intelligence', label='Wikipedia Topic')
147
- lg = gr.Dropdown(['en','am','fr','ar','es','zh'], value='en', label='Output Language')
148
 
 
149
  with gr.Row():
150
- sb = gr.Button('πŸ” Get Answer')
151
- cb = gr.Button('πŸ—‘οΈ Clear All')
152
-
153
- ao = gr.Textbox(label='πŸ€– Answer')
154
- av = gr.Audio(label='πŸ”Š Listen Answer')
155
- cd = gr.Markdown(label='πŸ—‚οΈ Chat History')
156
-
157
- sb.click(qa_system, inputs=[ai, ti, tp, lg, state], outputs=[ao, av, state, cd])
158
- cb.click(clear_all, outputs=[ai, ti, tp, state, cd])
159
-
160
- demo.launch(share=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
  import wikipedia
4
  import numpy as np
5
  import faiss
 
6
  from gtts import gTTS
 
 
7
  import tempfile
8
+ from langdetect import detect
9
  import speech_recognition as sr
10
  from pydub import AudioSegment
11
+ from transformers import pipeline # SentenceTransformer is not here
12
+ from sentence_transformers import SentenceTransformer # Correct import
13
+ import os
14
+ from pydub.silence import split_on_silence
15
+ import time
16
+ import pytesseract
17
+
18
+
19
+ # Initialize models
20
+ models = {
21
+ 'translator': pipeline('translation', model='Helsinki-NLP/opus-mt-mul-en'),
22
+ 'answer_gen': pipeline('text2text-generation', model='google/flan-t5-base'),
23
+ 'encoder': SentenceTransformer('paraphrase-multilingual-MiniLM-L12-v2')
24
+ }
25
+
26
+ # Add translation models
27
+ for lang in ['fr', 'ar', 'zh', 'es']:
28
+ models[f'en_to_{lang}'] = pipeline(f'translation_en_to_{lang}',
29
+ model=f'Helsinki-NLP/opus-mt-en-{lang}')
30
 
31
  def translate(text, src, tgt):
32
+ if src == tgt: return text
33
+ if src != 'en': text = models['translator'](text)[0]['translation_text']
34
+ if f'en_to_{tgt}' in models: return models[f'en_to_{tgt}'](text)[0]['translation_text']
 
 
 
35
  return text
36
 
37
+ def text_to_speech(text, lang):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
  try:
39
+ tts = gTTS(text=text, lang=lang)
40
+ audio_path = tempfile.mktemp(suffix='.mp3')
41
+ tts.save(audio_path)
42
+ return audio_path
43
  except Exception as e:
44
+ print(f"TTS Error: {e}")
45
+ return None
46
+
47
+ def process_audio(audio_path):
48
+ recognizer = sr.Recognizer()
49
+ sound = AudioSegment.from_file(audio_path)
50
+ chunks = split_on_silence(sound,
51
+ min_silence_len=500,
52
+ silence_thresh=sound.dBFS-14,
53
+ keep_silence=500
54
+ )
55
+
56
+ full_text = ""
57
+ for chunk in chunks:
58
+ chunk_path = tempfile.mktemp(suffix='.wav')
59
+ chunk.export(chunk_path, format="wav")
60
+ with sr.AudioFile(chunk_path) as source:
61
+ audio = recognizer.record(source)
62
+ try:
63
+ text = recognizer.recognize_google(audio)
64
+ full_text += f" {text}"
65
+ except:
66
+ continue
67
+ os.unlink(chunk_path)
68
+
69
+ return full_text.strip() if full_text else None
70
+
71
+ def get_wikipedia_content(topic):
72
+ try:
73
+ wikipedia.set_lang('en')
74
+ try:
75
+ page = wikipedia.page(topic, auto_suggest=False)
76
+ return page.summary[:1000]
77
+ except wikipedia.exceptions.DisambiguationError as e:
78
+ page = wikipedia.page(e.options[0])
79
+ return page.summary[:1000]
80
+ except Exception as e:
81
+ print(f"Wikipedia error: {e}")
82
+ return None
83
+
84
+ def generate_response(text, topic, lang):
85
+ context = get_wikipedia_content(topic)
86
+ if not context:
87
+ return "Could not find information. Please try another topic.", None
88
+
89
+ prompt = f"Context: {context}\nQuestion: {text}\nAnswer:"
90
+ answer = models['answer_gen'](prompt, max_length=200)[0]['generated_text']
91
+ translated = translate(answer, 'en', lang) if lang != 'en' else answer
92
+ audio_path = text_to_speech(translated, lang)
93
+
94
+ return translated, audio_path
95
+
96
+ def handle_interaction(audio, text, topic, lang, chat_history):
97
+ if audio is not None:
98
+ recognized_text = process_audio(audio)
99
+ if recognized_text:
100
+ text = recognized_text
101
+ else:
102
+ chat_history.append(("", "Could not understand audio. Please try again."))
103
+ return chat_history, "", None
104
 
105
+ if not text.strip():
106
+ chat_history.append(("", "Please enter a question."))
107
+ return chat_history, "", None
108
 
109
+ response, audio_output = generate_response(text, topic, lang)
110
+ chat_history.append((text, response))
 
 
 
111
 
112
+ return chat_history, "", audio_output
 
113
 
114
+ # Custom CSS with light blue and dark blue theme
115
+ custom_css = """
116
  .gradio-container {
117
+ background: #f0f8ff !important;
118
+ border: 3px solid #00008b !important;
119
+ border-radius: 10px !important;
120
+ font-family: 'Arial', sans-serif;
121
+ }
122
+ .gr-box {
123
+ background-color: #e6f2ff !important;
124
+ border: 2px solid #00008b !important;
125
+ border-radius: 8px !important;
126
+ }
127
+ .gr-button {
128
+ background-color: #4d94ff !important;
129
+ border: 2px solid #00008b !important;
130
+ color: white !important;
131
+ border-radius: 6px !important;
132
+ }
133
+ .gr-button:hover {
134
+ background-color: #1a75ff !important;
135
+ }
136
+ .gr-chatbot {
137
+ background-color: #e6f2ff !important;
138
+ border: 2px solid #00008b !important;
139
+ border-radius: 8px !important;
140
+ }
141
+ .gr-textbox, .gr-dropdown, .gr-audio {
142
+ background-color: #e6f2ff !important;
143
+ border: 2px solid #00008b !important;
144
+ border-radius: 6px !important;
145
+ }
146
+ .welcome-header {
147
+ text-align: center;
148
+ color: #00008b !important;
149
+ margin-bottom: 20px;
150
+ }
151
+ .welcome-message {
152
+ background-color: #e6f2ff;
153
  padding: 20px;
154
+ border-radius: 10px;
155
+ border: 2px solid #00008b;
156
+ margin-bottom: 20px;
157
+ }
158
+ .avatar {
159
+ width: 80px;
160
+ height: 80px;
161
+ margin: 0 auto;
162
+ display: block;
163
  }
164
  """
165
 
166
+ # Welcome page content
167
+ welcome_html = """
168
+ <div class="welcome-header">
169
+ <img src="https://i.imgur.com/6wBs5mO.png" class="avatar" alt="AI Assistant">
170
+ <h1>Welcome to Your Multilingual AI Assistant! 🌍</h1>
171
+ </div>
172
+ <div class="welcome-message">
173
+ <h3>Hello! I'm your personal Wikipedia assistant πŸ€–</h3>
174
+ <p>I can help you find information on any topic in multiple languages. Here's what I can do:</p>
175
+ <ul>
176
+ <li>πŸ” Answer questions from Wikipedia knowledge</li>
177
+ <li>πŸ—£οΈ Understand both voice and text input</li>
178
+ <li>🌐 Respond in English, French, Spanish, Chinese, or Arabic</li>
179
+ <li>πŸ”Š Speak answers back to you</li>
180
+ </ul>
181
+ <p>To get started, simply type your question or click the microphone to speak!</p>
182
+ </div>
183
+ """
184
 
185
+ with gr.Blocks(css=custom_css, title="🌍 Multilingual AI Assistant") as demo:
186
+ # Welcome page
187
+ gr.HTML(welcome_html)
188
 
189
+ # Main interface
190
  with gr.Row():
191
+ with gr.Column(scale=1):
192
+ audio_input = gr.Audio(
193
+ sources=["microphone", "upload"],
194
+ type="filepath",
195
+ label="🎀 Speak or upload audio",
196
+ interactive=True
197
+ )
198
+ topic_input = gr.Textbox(
199
+ "Artificial Intelligence",
200
+ label="πŸ“š Wikipedia Topic"
201
+ )
202
+ lang_input = gr.Dropdown(
203
+ ["en", "fr", "es", "zh", "ar"],
204
+ value="en",
205
+ label="🌐 Output Language"
206
+ )
207
+
208
+ with gr.Column(scale=2):
209
+ chatbot = gr.Chatbot(label="Conversation")
210
+ text_input = gr.Textbox(
211
+ placeholder="Type your question here...",
212
+ label="✏️ Or type here"
213
+ )
214
+ with gr.Row():
215
+ clear_btn = gr.Button("πŸ—‘οΈ Clear Chat")
216
+ submit_btn = gr.Button("πŸš€ Submit", variant="primary")
217
+
218
+ audio_output = gr.Audio(label="πŸ”Š Answer", visible=True)
219
+
220
+ # Event handlers
221
+ submit_btn.click(
222
+ handle_interaction,
223
+ inputs=[audio_input, text_input, topic_input, lang_input, chatbot],
224
+ outputs=[chatbot, text_input, audio_output]
225
+ )
226
+ text_input.submit(
227
+ handle_interaction,
228
+ inputs=[audio_input, text_input, topic_input, lang_input, chatbot],
229
+ outputs=[chatbot, text_input, audio_output]
230
+ )
231
+ clear_btn.click(
232
+ lambda: ([], "", None),
233
+ outputs=[chatbot, text_input, audio_output]
234
+ )
235
+
236
+ demo.launch(share=True)