Spaces:

PD03
/

talk_to_data

Sleeping

App Files Files Community

PD03 commited on Jun 25

Commit

6a97111

verified ·

1 Parent(s): 887b999

Update app.py

Browse files

Files changed (1) hide show

app.py +54 -16

app.py CHANGED Viewed

@@ -1,29 +1,67 @@
-# app.py
-import pandas as pd
-from transformers import pipeline
-import gradio as gr
-# Load synthetic data
-df = pd.read_csv("synthetic_profit.csv")
-# Initialize TAPAS QA pipeline
 qa = pipeline(
     "table-question-answering",
     model="google/tapas-base-finetuned-sqa",
     tokenizer="google/tapas-base-finetuned-sqa"
 )
-def answer(query: str) -> str:
-    res = qa(table=df, query=query)
-    return f"**Answer:** {res['answer']} _(agg: {res['aggregate']})_"
 demo = gr.Interface(
     fn=answer,
-    inputs=gr.Textbox(lines=2, placeholder="e.g. What was profit margin for Product B in EMEA Q2 2024?"),
-    outputs="markdown",
     title="S/4HANA Profitability Chat",
-    description="Ask questions of synthetic S/4HANA data using TAPAS"
 )
-if __name__ == "__main__":
-    demo.launch()

+# 3) load TAPAS
+from transformers import pipeline
 qa = pipeline(
     "table-question-answering",
     model="google/tapas-base-finetuned-sqa",
     tokenizer="google/tapas-base-finetuned-sqa"
 )
+# 4) cast to strings to avoid the regex bug
+df_str = df.astype(str)
+# 5) sanity check
+print( qa(table=df_str, query="What was the ProfitMargin for Product B in EMEA Q2 2024?") )
+# 6) launch Gradio
+import gradio as gr
+import re
+def answer(q: str) -> str:
+    # --- 1. try to parse explicit total/average queries ---
+    m = re.search(r"\b(total|average)\s+(ProfitMargin|Profit|Revenue|Cost)\b", q, re.IGNORECASE)
+    p = re.search(r"\bProduct\s*([A-D])\b", q, re.IGNORECASE)
+    t = re.search(r"\b(Q[1-4])\s*(\d{4})\b", q, re.IGNORECASE)
+    if m and p and t:
+        agg_type = m.group(1).lower()      # "total" or "average"
+        metric   = m.group(2)              # column name
+        product  = f"Product {p.group(1).upper()}"
+        quarter  = t.group(1)
+        year     = int(t.group(2))
+        # filter the *numeric* DataFrame
+        subset = df[
+            (df["Product"]       == product) &
+            (df["FiscalQuarter"] == quarter) &
+            (df["FiscalYear"]    == year)
+        ]
+        if not subset.empty:
+            if agg_type == "total":
+                val = subset[metric].sum()
+                return f"Total {metric} for {product} in {quarter} {year}: {val:,.2f}"
+            else:  # average
+                val = subset[metric].mean()
+                # show 3 decimal places for margins, 2 for currency
+                fmt = "{:,.3f}" if metric=="ProfitMargin" else "{:,.2f}"
+                return f"Average {metric} for {product} in {quarter} {year}: " + fmt.format(val)
+    # --- 2. fallback to TAPAS for everything else ---
+    res = qa(table=df_str, query=q)
+    agg = res.get("aggregator","")
+    if agg and agg != "NONE":
+        return f"Answer: {res['answer']}  (agg: {agg})"
+    # last-resort: raw answer
+    return f"Answer: {res['answer']}"
 demo = gr.Interface(
     fn=answer,
+    inputs=gr.Textbox(lines=2, placeholder="e.g. Profit for Product A in Q1 2023?"),
+    outputs="text",
     title="S/4HANA Profitability Chat",
 )
+demo.launch(share=True, debug=True)