Update app.py
Browse files
app.py
CHANGED
@@ -1,18 +1,21 @@
|
|
1 |
import gradio as gr
|
2 |
import torch
|
3 |
-
from transformers import pipeline
|
|
|
4 |
import re
|
|
|
|
|
5 |
|
6 |
-
#
|
|
|
|
|
|
|
|
|
7 |
atlas_pipe = None
|
8 |
-
transliteration_tokenizer = None
|
9 |
-
transliteration_model = None
|
10 |
|
11 |
-
def
|
12 |
-
"""Load
|
13 |
-
global atlas_pipe
|
14 |
-
|
15 |
-
# Load Atlas-Chat model
|
16 |
if atlas_pipe is None:
|
17 |
print("๐๏ธ Loading Atlas-Chat-2B model...")
|
18 |
atlas_pipe = pipeline(
|
@@ -22,15 +25,7 @@ def load_models():
|
|
22 |
device="cuda" if torch.cuda.is_available() else "cpu"
|
23 |
)
|
24 |
print("โ
Atlas-Chat model loaded!")
|
25 |
-
|
26 |
-
# Load Transliteration model
|
27 |
-
if transliteration_tokenizer is None or transliteration_model is None:
|
28 |
-
print("๐ Loading Transliteration model...")
|
29 |
-
transliteration_tokenizer = AutoTokenizer.from_pretrained("atlasia/Transliteration-Moroccan-Darija")
|
30 |
-
transliteration_model = AutoModelForSeq2SeqLM.from_pretrained("atlasia/Transliteration-Moroccan-Darija")
|
31 |
-
print("โ
Transliteration model loaded!")
|
32 |
-
|
33 |
-
return atlas_pipe, transliteration_tokenizer, transliteration_model
|
34 |
|
35 |
def detect_arabizi(text):
|
36 |
"""
|
@@ -89,40 +84,70 @@ def detect_arabizi(text):
|
|
89 |
|
90 |
return False
|
91 |
|
92 |
-
def
|
93 |
"""
|
94 |
-
Convert Arabizi text to Arabic using
|
95 |
"""
|
96 |
try:
|
97 |
-
|
|
|
|
|
|
|
98 |
|
99 |
-
|
100 |
-
|
101 |
|
102 |
-
#
|
103 |
-
|
104 |
-
|
105 |
-
|
106 |
-
max_length
|
107 |
-
num_beams
|
108 |
-
early_stopping
|
109 |
-
|
110 |
-
|
111 |
|
112 |
-
#
|
113 |
-
|
114 |
-
|
115 |
-
return arabic_text.strip()
|
116 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
117 |
except Exception as e:
|
118 |
-
print(f"โ
|
119 |
-
# Fallback to original text if conversion fails
|
120 |
return arabizi_text
|
121 |
|
122 |
def arabic_to_arabizi(arabic_text):
|
123 |
"""
|
124 |
-
Convert Arabic script to Arabizi using
|
125 |
-
(Keeping this as backup since no reverse model available)
|
126 |
"""
|
127 |
if not arabic_text:
|
128 |
return arabic_text
|
@@ -151,35 +176,17 @@ def arabic_to_arabizi(arabic_text):
|
|
151 |
'ุทุงุฌูู': 'tajine', 'ุฃุชุงู': 'atay', 'ุฎูุจุฒ': 'khobz',
|
152 |
'ูุงูู': 'kayn', 'ู
ุงูุงููุด': 'makaynsh', 'ุดู': 'chi',
|
153 |
'ุฒููู': 'zwin', 'ุฒูููุง': 'zwina', 'ู
ุฒูุงู': 'mzyan', 'ู
ุฒูุงูุง': 'mzyana',
|
154 |
-
'
|
155 |
-
'
|
156 |
-
'
|
157 |
-
'
|
158 |
-
'
|
159 |
-
'
|
160 |
-
'
|
161 |
-
'
|
162 |
-
'
|
163 |
-
'
|
164 |
-
'
|
165 |
-
'ูุชุจุช': 'ktebt', 'ูุชุจุชู': 'ktebti', 'ูุชุจ': 'kteb', 'ูุชุจุงุช': 'ktebat',
|
166 |
-
'ูุนุจุช': 'l3ebt', 'ูุนุจุชู': 'l3ebti', 'ูุนุจ': 'l3eb', 'ูุนุจุงุช': 'l3ebat',
|
167 |
-
'ุฎุฏู
ุช': 'khdmt', 'ุฎุฏู
ุชู': 'khdmti', 'ุฎุฏู
': 'khdm', 'ุฎุฏู
ุงุช': 'khdmat',
|
168 |
-
'ุตููุช': 'sllit', 'ุตููุชู': 'slliti', 'ุตูุง': 'slla', 'ุตูุงุช': 'sllat',
|
169 |
-
'ุทุจุฎุช': '6bkht', 'ุทุจุฎุชู': '6bkhti', 'ุทุจุฎ': '6bekh', 'ุทุจุฎุงุช': '6bekhat',
|
170 |
-
'ูุงุญุฏ': 'wa7ed', 'ุฌูุฌ': 'joj', 'ุชูุงุชุง': 'tlata', 'ุฑุจุนุง': 'reb3a',
|
171 |
-
'ุฎู
ุณุง': 'khamsa', 'ุณุชุง': 'setta', 'ุณุจุนุง': 'seb3a', 'ุชู
ููุง': 'tmnya',
|
172 |
-
'ุชุณุนุง': 'tes3a', 'ุนุดุฑุง': '3echra', 'ุญุฏุงุด': '7dach', 'ุทูุงุด': '6nach',
|
173 |
-
'ููุงุฑ': 'nhar', 'ูููุง': 'lila', 'ุตุจุงุญ': 'sba7', 'ุนุดูุง': '3echiya',
|
174 |
-
'ุฃู
ุณ': 'ems', 'ุงูุจุงุฑุญ': 'lbare7', 'ุบุฏุง': 'ghda', 'ุจุนุฏ ุบุฏุง': 'b3d ghda',
|
175 |
-
'ุฏุงุฑ': 'dar', 'ุจูุช': 'bit', 'ุดุงุฑุน': 'char3', 'ู
ุฏููุง': 'mdina',
|
176 |
-
'ูุฑููุจุง': 'karhouba', 'ุทูู
ูุจูู': 'tomobil', 'ูุทุงุฑ': 'q6ar', 'ุจุงุต': 'bas',
|
177 |
-
'ู
ุงููุง': 'makla', 'ุดุฑุงุจ': 'chrab', 'ูู
ุง': 'lma', 'ุนุทุด': '36ch',
|
178 |
-
'ุฌูุน': 'jo3', 'ุดุจุนุงู': 'cheb3an', 'ุนูุงู': '3yyan', 'ุตุญูุญ': 's7i7',
|
179 |
-
'ู
ุฑูุถ': 'mrid', 'ุฏูุชูุฑ': 'doktor', 'ุณุจูุทุงุฑ': 'sbitar', 'ุฏูุง': 'dwa',
|
180 |
-
'ูููุณ': 'flous', 'ุฏุฑูู
': 'derhem', 'ุฑูุงู': 'riyal', 'ุงูููุฑู': 'lyoro',
|
181 |
-
'ุฎุฏู
ุง': 'khedma', 'ู
ุนูู
': 'mo3alim', 'ุทุงูุจ': 'talib', 'ุฃุณุชุงุฐ': 'ostaz',
|
182 |
-
'ูุชุงุจ': 'ktab', 'ููู
': 'qalam', 'ูุงุบุท': 'kaghet', 'ุทุงููุง': 'tabla'
|
183 |
}
|
184 |
|
185 |
# CHARACTER MAPPINGS (Arabic โ Arabizi)
|
@@ -196,7 +203,7 @@ def arabic_to_arabizi(arabic_text):
|
|
196 |
|
197 |
result = arabic_text
|
198 |
|
199 |
-
# Step 1: Apply word mappings
|
200 |
for arabic_word, arabizi_word in word_mappings.items():
|
201 |
# Use word boundaries to avoid partial matches
|
202 |
result = re.sub(r'\b' + re.escape(arabic_word) + r'\b', arabizi_word, result)
|
@@ -208,39 +215,36 @@ def arabic_to_arabizi(arabic_text):
|
|
208 |
return result.strip()
|
209 |
|
210 |
def chat_with_atlas(message, history):
|
211 |
-
"""Generate response from Atlas-Chat model with
|
212 |
if not message.strip():
|
213 |
return "ahlan wa sahlan! kifash n9der n3awnek? / ู
ุฑุญุจุง! ูููุงุด ููุฏุฑ ูุนุงูููุ"
|
214 |
|
215 |
try:
|
216 |
-
# Load
|
217 |
-
atlas_model
|
218 |
|
219 |
# Detect if input is Arabizi
|
220 |
is_arabizi_input = detect_arabizi(message)
|
221 |
|
222 |
-
print("\n" + "="*
|
223 |
-
print("๐ DEBUG LOG
|
224 |
-
print("="*
|
225 |
-
print(f"๐ฅ
|
226 |
-
print(f"
|
227 |
|
228 |
# Prepare input for the model
|
229 |
if is_arabizi_input:
|
230 |
-
|
231 |
-
|
232 |
-
|
233 |
-
print(f"โ
ARABIC CONVERSION: '{arabic_input}'")
|
234 |
model_input = arabic_input
|
235 |
else:
|
236 |
-
|
237 |
-
print(f"\nโก๏ธ NO CONVERSION NEEDED - Using original input")
|
238 |
model_input = message
|
239 |
|
240 |
-
print(f"
|
241 |
-
print(f"๐ค MODEL INPUT: '{model_input}'")
|
242 |
|
243 |
-
# Generate response using
|
244 |
messages = [{"role": "user", "content": model_input}]
|
245 |
|
246 |
outputs = atlas_model(
|
@@ -253,28 +257,22 @@ def chat_with_atlas(message, history):
|
|
253 |
|
254 |
# Extract the response
|
255 |
response = outputs[0]["generated_text"][-1]["content"].strip()
|
256 |
-
print(f"โ
|
257 |
|
258 |
# Convert response back to Arabizi if input was Arabizi
|
259 |
if is_arabizi_input:
|
260 |
-
print(
|
261 |
arabizi_response = arabic_to_arabizi(response)
|
262 |
-
print(f"โ
FINAL
|
263 |
-
print("="*
|
264 |
-
print("๐ฏ FINAL OUTPUT TO USER:", arabizi_response)
|
265 |
-
print("="*60 + "\n")
|
266 |
return arabizi_response
|
267 |
else:
|
268 |
-
|
269 |
-
print(f"\nโก๏ธ NO BACK-CONVERSION NEEDED")
|
270 |
-
print("="*60)
|
271 |
-
print("๐ฏ FINAL OUTPUT TO USER:", response)
|
272 |
-
print("="*60 + "\n")
|
273 |
return response
|
274 |
|
275 |
except Exception as e:
|
276 |
-
print(f"\nโ ERROR
|
277 |
-
print("="*
|
278 |
# Return error in appropriate language
|
279 |
if detect_arabizi(message):
|
280 |
return f"sorry, kan chi mochkil: {str(e)}. 3awd jar'b!"
|
@@ -284,19 +282,20 @@ def chat_with_atlas(message, history):
|
|
284 |
# Create the Gradio interface
|
285 |
demo = gr.ChatInterface(
|
286 |
fn=chat_with_atlas,
|
287 |
-
title="๐๏ธ Atlas-Chat:
|
288 |
description="""
|
289 |
-
**ู
ุฑุญุจุง ุจู ูู ุฃุทูุณ
|
290 |
|
291 |
-
|
292 |
-
- **Arabic Script (ุงูุนุฑุจูุฉ)** โ
|
293 |
-
- **Arabizi (3arabi bi 7oruf latin)** โ
|
294 |
-
- **English** โ
|
295 |
|
296 |
-
**โก
|
297 |
-
-
|
298 |
-
-
|
299 |
-
-
|
|
|
300 |
|
301 |
**ุฌุฑุจ ูุฐู ุงูุฃุณุฆูุฉ / Try these questions:**
|
302 |
""",
|
|
|
1 |
import gradio as gr
|
2 |
import torch
|
3 |
+
from transformers import pipeline
|
4 |
+
import requests
|
5 |
import re
|
6 |
+
import os
|
7 |
+
from huggingface_hub import login
|
8 |
|
9 |
+
# Authenticate with Hugging Face
|
10 |
+
if "HF_TOKEN" in os.environ:
|
11 |
+
login(token=os.environ["HF_TOKEN"])
|
12 |
+
|
13 |
+
# Global variable to store the Atlas-Chat model
|
14 |
atlas_pipe = None
|
|
|
|
|
15 |
|
16 |
+
def load_atlas_model():
|
17 |
+
"""Load only the Atlas-Chat model locally"""
|
18 |
+
global atlas_pipe
|
|
|
|
|
19 |
if atlas_pipe is None:
|
20 |
print("๐๏ธ Loading Atlas-Chat-2B model...")
|
21 |
atlas_pipe = pipeline(
|
|
|
25 |
device="cuda" if torch.cuda.is_available() else "cpu"
|
26 |
)
|
27 |
print("โ
Atlas-Chat model loaded!")
|
28 |
+
return atlas_pipe
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
29 |
|
30 |
def detect_arabizi(text):
|
31 |
"""
|
|
|
84 |
|
85 |
return False
|
86 |
|
87 |
+
def arabizi_to_arabic_api(arabizi_text):
|
88 |
"""
|
89 |
+
Convert Arabizi text to Arabic using Hugging Face Inference API
|
90 |
"""
|
91 |
try:
|
92 |
+
# Check if HF_TOKEN is available
|
93 |
+
if "HF_TOKEN" not in os.environ:
|
94 |
+
print("โ HF_TOKEN not found, falling back to original text")
|
95 |
+
return arabizi_text
|
96 |
|
97 |
+
API_URL = "https://api-inference.huggingface.co/models/atlasia/Transliteration-Moroccan-Darija"
|
98 |
+
headers = {"Authorization": f"Bearer {os.environ['HF_TOKEN']}"}
|
99 |
|
100 |
+
# Prepare the payload
|
101 |
+
payload = {
|
102 |
+
"inputs": arabizi_text,
|
103 |
+
"parameters": {
|
104 |
+
"max_length": 512,
|
105 |
+
"num_beams": 4,
|
106 |
+
"early_stopping": True
|
107 |
+
}
|
108 |
+
}
|
109 |
|
110 |
+
# Make API request with timeout
|
111 |
+
response = requests.post(API_URL, headers=headers, json=payload, timeout=30)
|
|
|
|
|
112 |
|
113 |
+
# Check if request was successful
|
114 |
+
if response.status_code == 200:
|
115 |
+
result = response.json()
|
116 |
+
|
117 |
+
# Handle different response formats
|
118 |
+
if isinstance(result, list) and len(result) > 0:
|
119 |
+
if "generated_text" in result[0]:
|
120 |
+
return result[0]["generated_text"].strip()
|
121 |
+
elif isinstance(result[0], str):
|
122 |
+
return result[0].strip()
|
123 |
+
elif isinstance(result, dict) and "generated_text" in result:
|
124 |
+
return result["generated_text"].strip()
|
125 |
+
elif isinstance(result, str):
|
126 |
+
return result.strip()
|
127 |
+
else:
|
128 |
+
print(f"โ Unexpected API response format: {result}")
|
129 |
+
return arabizi_text
|
130 |
+
|
131 |
+
elif response.status_code == 503:
|
132 |
+
print("โณ Model is loading, falling back to original text")
|
133 |
+
return arabizi_text
|
134 |
+
else:
|
135 |
+
print(f"โ API error {response.status_code}: {response.text}")
|
136 |
+
return arabizi_text
|
137 |
+
|
138 |
+
except requests.exceptions.Timeout:
|
139 |
+
print("โฐ API timeout, falling back to original text")
|
140 |
+
return arabizi_text
|
141 |
+
except requests.exceptions.RequestException as e:
|
142 |
+
print(f"โ API request failed: {e}")
|
143 |
+
return arabizi_text
|
144 |
except Exception as e:
|
145 |
+
print(f"โ Unexpected error in API conversion: {e}")
|
|
|
146 |
return arabizi_text
|
147 |
|
148 |
def arabic_to_arabizi(arabic_text):
|
149 |
"""
|
150 |
+
Convert Arabic script to Arabizi using comprehensive hard-coded mappings
|
|
|
151 |
"""
|
152 |
if not arabic_text:
|
153 |
return arabic_text
|
|
|
176 |
'ุทุงุฌูู': 'tajine', 'ุฃุชุงู': 'atay', 'ุฎูุจุฒ': 'khobz',
|
177 |
'ูุงูู': 'kayn', 'ู
ุงูุงููุด': 'makaynsh', 'ุดู': 'chi',
|
178 |
'ุฒููู': 'zwin', 'ุฒูููุง': 'zwina', 'ู
ุฒูุงู': 'mzyan', 'ู
ุฒูุงูุง': 'mzyana',
|
179 |
+
'ูุงูููู': 'kaynin', 'ู
ุทุนู
': 'ma63am', 'ู
ุทุงุนู
': 'ma6a3im',
|
180 |
+
'ู
ุดููุฑ': 'mashhur', 'ู
ุดููุฑูู': 'mashhurin', 'ูุณุท': 'wost',
|
181 |
+
'ุงูู
ุฏููุฉ': 'lmdina', 'ู
ุฏููุฉ': 'mdina', 'ุฅูุทุงูู': 'italiy',
|
182 |
+
'ูุงุจุงูู': 'yabani', 'ู
ุบุฑุจู': 'maghribi', 'ูุฑูุณู': 'fransi',
|
183 |
+
'ุฃู
ุฑููู': 'amriki', 'ุตููู': 'sini', 'ููุฏู': 'hindi',
|
184 |
+
'ูุญู
': 'la7m', 'ุฏุฌุงุฌ': 'djaj', 'ุญูุช': '7ut', 'ุฎุถุฑุฉ': 'khodra',
|
185 |
+
'ููุงูู': 'fawakeh', 'ุฌุจู': 'jben', 'ุฒุจุฏุฉ': 'zebda', 'ุญููุจ': '7lib',
|
186 |
+
'ูููุฉ': 'qahwa', 'ุดุงู': 'atay', 'ู
ุงุก': 'ma', 'ุนุตูุฑ': '3asir',
|
187 |
+
'ุฎุจุฒ': 'khobz', 'ุฑุฒ': 'roz', 'ู
ูุฑููุฉ': 'makarona', 'ุจุทุงุทุง': 'batata',
|
188 |
+
'ุทู
ุงุทู
': 'toma6im', 'ุจุตู': 'basal', 'ุซูู
': 'tum', 'ูููู': 'felfel',
|
189 |
+
'ู
ูุญ': 'mel7', 'ุณูุฑ': 'sokkar', 'ุฒูุช': 'zit', 'ุฎู': 'khall'
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
190 |
}
|
191 |
|
192 |
# CHARACTER MAPPINGS (Arabic โ Arabizi)
|
|
|
203 |
|
204 |
result = arabic_text
|
205 |
|
206 |
+
# Step 1: Apply word mappings (most specific first)
|
207 |
for arabic_word, arabizi_word in word_mappings.items():
|
208 |
# Use word boundaries to avoid partial matches
|
209 |
result = re.sub(r'\b' + re.escape(arabic_word) + r'\b', arabizi_word, result)
|
|
|
215 |
return result.strip()
|
216 |
|
217 |
def chat_with_atlas(message, history):
|
218 |
+
"""Generate response from Atlas-Chat model with API-powered Arabizi conversion"""
|
219 |
if not message.strip():
|
220 |
return "ahlan wa sahlan! kifash n9der n3awnek? / ู
ุฑุญุจุง! ูููุงุด ููุฏุฑ ูุนุงูููุ"
|
221 |
|
222 |
try:
|
223 |
+
# Load Atlas-Chat model
|
224 |
+
atlas_model = load_atlas_model()
|
225 |
|
226 |
# Detect if input is Arabizi
|
227 |
is_arabizi_input = detect_arabizi(message)
|
228 |
|
229 |
+
print("\n" + "="*50)
|
230 |
+
print("๐ ATLAS-CHAT DEBUG LOG")
|
231 |
+
print("="*50)
|
232 |
+
print(f"๐ฅ INPUT: '{message}'")
|
233 |
+
print(f"๐ ARABIZI: {is_arabizi_input}")
|
234 |
|
235 |
# Prepare input for the model
|
236 |
if is_arabizi_input:
|
237 |
+
print("๐ Converting ArabiziโArabic via API...")
|
238 |
+
arabic_input = arabizi_to_arabic_api(message)
|
239 |
+
print(f"โ
ARABIC: '{arabic_input}'")
|
|
|
240 |
model_input = arabic_input
|
241 |
else:
|
242 |
+
print("โก๏ธ No conversion needed")
|
|
|
243 |
model_input = message
|
244 |
|
245 |
+
print(f"๐ค Sending to Atlas-Chat...")
|
|
|
246 |
|
247 |
+
# Generate response using Atlas-Chat
|
248 |
messages = [{"role": "user", "content": model_input}]
|
249 |
|
250 |
outputs = atlas_model(
|
|
|
257 |
|
258 |
# Extract the response
|
259 |
response = outputs[0]["generated_text"][-1]["content"].strip()
|
260 |
+
print(f"โ
RESPONSE: '{response[:100]}{'...' if len(response) > 100 else ''}'")
|
261 |
|
262 |
# Convert response back to Arabizi if input was Arabizi
|
263 |
if is_arabizi_input:
|
264 |
+
print("๐ Converting ArabicโArabizi...")
|
265 |
arabizi_response = arabic_to_arabizi(response)
|
266 |
+
print(f"โ
FINAL: '{arabizi_response[:100]}{'...' if len(arabizi_response) > 100 else ''}'")
|
267 |
+
print("="*50 + "\n")
|
|
|
|
|
268 |
return arabizi_response
|
269 |
else:
|
270 |
+
print("="*50 + "\n")
|
|
|
|
|
|
|
|
|
271 |
return response
|
272 |
|
273 |
except Exception as e:
|
274 |
+
print(f"\nโ ERROR: {str(e)}")
|
275 |
+
print("="*50 + "\n")
|
276 |
# Return error in appropriate language
|
277 |
if detect_arabizi(message):
|
278 |
return f"sorry, kan chi mochkil: {str(e)}. 3awd jar'b!"
|
|
|
282 |
# Create the Gradio interface
|
283 |
demo = gr.ChatInterface(
|
284 |
fn=chat_with_atlas,
|
285 |
+
title="๐๏ธ Atlas-Chat: AI-Powered Moroccan Arabic Assistant",
|
286 |
description="""
|
287 |
+
**ู
ุฑุญุจุง ุจู ูู ุฃุทูุณ ุดุงุช!** Welcome to Atlas-Chat! ๐ฒ๐ฆ
|
288 |
|
289 |
+
**๐ Powered by Hugging Face Inference API:**
|
290 |
+
- **Arabic Script (ุงูุนุฑุจูุฉ)** โ Direct conversation
|
291 |
+
- **Arabizi (3arabi bi 7oruf latin)** โ API conversion โ Arabizi response
|
292 |
+
- **English** โ Direct conversation
|
293 |
|
294 |
+
**โก Features:**
|
295 |
+
- Professional AI Arabizi conversion via API
|
296 |
+
- No local model conflicts
|
297 |
+
- Fast and reliable responses
|
298 |
+
- Comprehensive language detection
|
299 |
|
300 |
**ุฌุฑุจ ูุฐู ุงูุฃุณุฆูุฉ / Try these questions:**
|
301 |
""",
|