Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -2,79 +2,102 @@ import os
|
|
2 |
import gradio as gr
|
3 |
import pandas as pd
|
4 |
import tensorflow as tf
|
5 |
-
from tapas.scripts import prediction_utils
|
6 |
-
from tapas.utils import number_annotation_utils
|
7 |
-
from tapas.protos import interaction_pb2
|
8 |
|
9 |
-
#
|
10 |
-
|
|
|
|
|
11 |
|
|
|
12 |
df = pd.read_csv("synthetic_profit.csv")
|
13 |
-
# Ensure all values are strings
|
14 |
df = df.astype(str)
|
15 |
-
# Build TAPAS-style table: header row + data rows
|
16 |
-
table = [list(df.columns)] + df.values.tolist()
|
17 |
|
18 |
-
# 2)
|
19 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
20 |
config = tf_example_utils.ClassifierConversionConfig(
|
21 |
vocab_file="tapas_sqa_base/vocab.txt",
|
22 |
max_seq_length=512,
|
23 |
max_column_id=512,
|
24 |
max_row_id=512,
|
25 |
-
strip_column_names=False,
|
26 |
-
add_aggregation_candidates=True,
|
27 |
)
|
28 |
converter = tf_example_utils.ToClassifierTensorflowExample(config)
|
29 |
|
30 |
-
#
|
31 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
32 |
interaction = interaction_pb2.Interaction()
|
33 |
-
#
|
34 |
q = interaction.questions.add()
|
35 |
-
q.original_text =
|
36 |
-
#
|
37 |
for col in table[0]:
|
38 |
interaction.table.columns.add().text = col
|
39 |
-
#
|
40 |
-
for
|
41 |
-
|
42 |
-
for cell in
|
43 |
-
|
44 |
-
#
|
45 |
number_annotation_utils.add_numeric_values(interaction)
|
46 |
-
|
|
|
|
|
47 |
|
48 |
-
#
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
#
|
62 |
-
coords = prediction_utils.parse_coordinates(
|
63 |
-
#
|
64 |
answers = []
|
65 |
-
for r, c in coords:
|
|
|
66 |
answers.append(table[r+1][c])
|
67 |
-
return ", ".join(answers)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
68 |
|
69 |
-
# 6) Gradio interface
|
70 |
iface = gr.Interface(
|
71 |
-
fn=
|
72 |
-
inputs=gr.Textbox(lines=2,
|
73 |
-
outputs=gr.Textbox(
|
74 |
title="SAP Profitability Q&A (TAPAS Low-Level)",
|
75 |
description=(
|
76 |
-
"
|
77 |
-
"
|
78 |
),
|
79 |
allow_flagging="never",
|
80 |
)
|
|
|
2 |
import gradio as gr
|
3 |
import pandas as pd
|
4 |
import tensorflow as tf
|
|
|
|
|
|
|
5 |
|
6 |
+
# TAPAS imports
|
7 |
+
from tapas.protos import interaction_pb2
|
8 |
+
from tapas.utils import number_annotation_utils, tf_example_utils, prediction_utils
|
9 |
+
from tapas.scripts.run_task_main import get_classifier_model, get_task_config
|
10 |
|
11 |
+
# 1) Load & stringify your CSV
|
12 |
df = pd.read_csv("synthetic_profit.csv")
|
|
|
13 |
df = df.astype(str)
|
|
|
|
|
14 |
|
15 |
+
# 2) Build the “list of lists” table
|
16 |
+
# (header row + all data rows)
|
17 |
+
table = [list(df.columns)]
|
18 |
+
table.extend(df.values.tolist())
|
19 |
+
|
20 |
+
# 3) Prepare the TAPAS converter + model
|
21 |
+
# – add_aggregation_candidates=True to surface SUM/AVG ops
|
22 |
+
# – strip_column_names=False so your exact headers stay visible
|
23 |
config = tf_example_utils.ClassifierConversionConfig(
|
24 |
vocab_file="tapas_sqa_base/vocab.txt",
|
25 |
max_seq_length=512,
|
26 |
max_column_id=512,
|
27 |
max_row_id=512,
|
28 |
+
strip_column_names=False,
|
29 |
+
add_aggregation_candidates=True,
|
30 |
)
|
31 |
converter = tf_example_utils.ToClassifierTensorflowExample(config)
|
32 |
|
33 |
+
# 4) Load your pretrained checkpoint
|
34 |
+
# (uses the same flags as run_task_main.py --mode=predict)
|
35 |
+
task_config = get_task_config(
|
36 |
+
task="sqa",
|
37 |
+
init_checkpoint="tapas_sqa_base/model.ckpt-0",
|
38 |
+
vocab_file=config.vocab_file,
|
39 |
+
bsz=1,
|
40 |
+
max_seq_length=config.max_seq_length,
|
41 |
+
)
|
42 |
+
model, tokenizer = get_classifier_model(task_config)
|
43 |
+
|
44 |
+
# 5) Convert a single (table, query) into a TF Example
|
45 |
+
def make_tf_example(table, query):
|
46 |
interaction = interaction_pb2.Interaction()
|
47 |
+
# a) question
|
48 |
q = interaction.questions.add()
|
49 |
+
q.original_text = query
|
50 |
+
# b) columns
|
51 |
for col in table[0]:
|
52 |
interaction.table.columns.add().text = col
|
53 |
+
# c) rows
|
54 |
+
for row_vals in table[1:]:
|
55 |
+
row = interaction.table.rows.add()
|
56 |
+
for cell in row_vals:
|
57 |
+
row.cells.add().text = cell
|
58 |
+
# d) numeric annotation helps SUM/AVG
|
59 |
number_annotation_utils.add_numeric_values(interaction)
|
60 |
+
# e) convert to example
|
61 |
+
serialized = converter.convert(interaction)
|
62 |
+
return serialized
|
63 |
|
64 |
+
# 6) Run TAPAS and parse its coordinate output
|
65 |
+
def predict_answer(query):
|
66 |
+
# build TF example
|
67 |
+
example = make_tf_example(table, query)
|
68 |
+
# run prediction
|
69 |
+
input_fn = tf_example_utils.input_fn_builder(
|
70 |
+
[example],
|
71 |
+
is_training=False,
|
72 |
+
drop_remainder=False,
|
73 |
+
batch_size=1,
|
74 |
+
seq_length=config.max_seq_length,
|
75 |
+
)
|
76 |
+
preds = model.predict(input_fn)
|
77 |
+
# parse answer coordinates
|
78 |
+
coords = prediction_utils.parse_coordinates(preds[0]["answer_coordinates"])
|
79 |
+
# map back to table values
|
80 |
answers = []
|
81 |
+
for (r, c) in coords:
|
82 |
+
# table[0] is header row, so data starts at index 1
|
83 |
answers.append(table[r+1][c])
|
84 |
+
return ", ".join(answers) if answers else "No answer found."
|
85 |
+
|
86 |
+
# 7) Gradio interface
|
87 |
+
def answer_fn(question: str) -> str:
|
88 |
+
try:
|
89 |
+
return predict_answer(question)
|
90 |
+
except Exception as e:
|
91 |
+
return f"❌ Error: {e}"
|
92 |
|
|
|
93 |
iface = gr.Interface(
|
94 |
+
fn=answer_fn,
|
95 |
+
inputs=gr.Textbox(lines=2, label="Your question"),
|
96 |
+
outputs=gr.Textbox(label="Answer"),
|
97 |
title="SAP Profitability Q&A (TAPAS Low-Level)",
|
98 |
description=(
|
99 |
+
"Uses TAPAS’s Interaction + Converter APIs with aggregation candidates "
|
100 |
+
"and numeric annotations to reliably answer sum/average queries."
|
101 |
),
|
102 |
allow_flagging="never",
|
103 |
)
|