# =========================================================== # VOVODO – Assistant virtuel pour la Réserve de Chinko (RCA) # Auteur : Presley Koyaweda # Description : Application Gradio avec RAG (Pixtral + FAISS) # =========================================================== import os import torch import gradio as gr import numpy as np import faiss from PyPDF2 import PdfReader from huggingface_hub import login, snapshot_download from sentence_transformers import SentenceTransformer from mistral_common.tokens.tokenizers.mistral import MistralTokenizer from mistral_inference.transformer import Transformer from mistral_inference.generate import generate from mistral_common.protocol.instruct.messages import UserMessage, TextChunk from mistral_common.protocol.instruct.request import ChatCompletionRequest # === Authentification HF (nécessite HUGGINGFACEHUB_API_TOKEN dans .env ou variable env) === login(os.environ["HUGGINGFACEHUB_API_TOKEN"]) # === 1. Chargement et découpage des documents PDF === def load_chunks(folder="data", chunk_size=1000, overlap=200): chunks = [] for fname in os.listdir(folder): if fname.endswith(".pdf"): with open(os.path.join(folder, fname), "rb") as f: reader = PdfReader(f) full_text = "" for page in reader.pages: text = page.extract_text() if text: full_text += text + "\n" for i in range(0, len(full_text), chunk_size - overlap): chunk = full_text[i:i + chunk_size] if chunk.strip(): chunks.append(chunk) return chunks documents = load_chunks() texts = documents.copy() # === 2. Embedding + Indexation FAISS === embedder = SentenceTransformer("sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2") embeddings = embedder.encode(texts, normalize_embeddings=True) dimension = embeddings.shape[1] index = faiss.IndexFlatIP(dimension) index.add(np.array(embeddings)) # === 3. Chargement du modèle Pixtral localement === model_dir = os.path.expanduser("~/pixtral/Pixtral") snapshot_download( "mistral-community/pixtral-12b-240910", local_dir=model_dir, allow_patterns=["*.json", "*.safetensors"] ) tokenizer = MistralTokenizer.from_file(f"{model_dir}/tekken.json") model = Transformer.from_folder(model_dir) # === 4. Fonction de génération avec contexte vectoriel === def vovodo_fr(message: str) -> str: # Recherche dans la base query_embedding = embedder.encode([message], normalize_embeddings=True) D, I = index.search(np.array(query_embedding), k=3) context = "\n".join([texts[i] for i in I[0]]) # Création du prompt prompt = f"Contexte : {context}\n\nQuestion : {message}\nRéponse :" messages = [UserMessage(content=[TextChunk(text=prompt)])] req = ChatCompletionRequest(messages=messages) # Génération encoded = tokenizer.encode_chat_completion(req) out_tokens, _ = generate( [encoded.tokens], model, max_tokens=512, temperature=0.3, eos_id=tokenizer.instruct_tokenizer.tokenizer.eos_id ) output = tokenizer.decode(out_tokens[0]) return output.split("Réponse :")[-1].strip() # === 5. Interface Gradio === gr.ChatInterface( fn=vovodo_fr, title="🌿 VOVODO – Assistant Chinko (Pixtral + FAISS)", description="Posez vos questions sur les documents de la Réserve de Chinko. Modèle : Pixtral 12B + MiniLM.", theme="soft", ).launch(share=True)