import gradio as gr import pandas as pd from transformers import pipeline # 1) Load & stringify your CSV df = pd.read_csv("synthetic_profit.csv") table = df.astype(str).to_dict(orient="records") # 2) Instantiate the TAPAS pipeline from Transformers qa = pipeline( "table-question-answering", model="google/tapas-base-finetuned-wtq", tokenizer="google/tapas-base-finetuned-wtq", device=-1, # CPU; change to 0 if you have a GPU ) # 3) Few-shot examples teach “filter + sum” vs. “filter + mean” EXAMPLES = """ Example 1: Q: What is the total revenue for Product A in EMEA in Q1 2024? A: Filter Product=A & Region=EMEA & FiscalYear=2024 & FiscalQuarter=Q1, then sum Revenue → 3075162.49 Example 2: Q: What is the total cost for Product A in EMEA in Q1 2024? A: Filter Product=A & Region=EMEA & FiscalYear=2024 & FiscalQuarter=Q1, then sum Cost → 2894321.75 Example 3: Q: What is the total margin for Product A in EMEA in Q1 2024? A: Filter Product=A & Region=EMEA & FiscalYear=2024 & FiscalQuarter=Q1, then sum ProfitMargin → 0.18 Example 4: Q: What is the average profit margin for Product A in EMEA in Q1 2024? A: Filter Product=A & Region=EMEA & FiscalYear=2024 & FiscalQuarter=Q1, then mean ProfitMargin → 0.18 """ def answer_question(question: str) -> str: prompt = EXAMPLES + f"\nQ: {question}\nA:" try: result = qa(table=table, query=prompt) return result.get("answer", "No answer found.") except Exception as e: return f"❌ Pipeline error:\n{e}" # 4) Gradio UI iface = gr.Interface( fn=answer_question, inputs=gr.Textbox(lines=2, placeholder="e.g. What is the total revenue for Product A in Q1 2024?"), outputs=gr.Textbox(lines=3), title="SAP Profitability Q&A", description=( "Ask simple sum/mean questions on the synthetic SAP data. \n" "Powered by google/tapas-base-finetuned-wtq with four few-shot examples." ), allow_flagging="never", ) if __name__ == "__main__": iface.launch(server_name="0.0.0.0", server_port=7860)