Spaces:
Sleeping
Sleeping
import os | |
import gradio as gr | |
import pandas as pd | |
import tensorflow as tf | |
from tapas.scripts import prediction_utils | |
from tapas.utils import number_annotation_utils | |
from tapas.protos import interaction_pb2 | |
# 1) Read CSV and build list-of-lists table | |
import pandas as pd | |
df = pd.read_csv("synthetic_profit.csv") | |
# Ensure all values are strings | |
df = df.astype(str) | |
# Build TAPAS-style table: header row + data rows | |
table = [list(df.columns)] + df.values.tolist() | |
# 2) Configure TAPAS conversion with aggregation support | |
from tapas.utils import example_utils as tf_example_utils | |
config = tf_example_utils.ClassifierConversionConfig( | |
vocab_file="tapas_sqa_base/vocab.txt", | |
max_seq_length=512, | |
max_column_id=512, | |
max_row_id=512, | |
strip_column_names=False, # Keep header names | |
add_aggregation_candidates=True, # Propose SUM/AVERAGE operations | |
) | |
converter = tf_example_utils.ToClassifierTensorflowExample(config) | |
# 3) Helper: convert one interaction to model input | |
def interaction_from_query(question: str): | |
interaction = interaction_pb2.Interaction() | |
# Add question | |
q = interaction.questions.add() | |
q.original_text = question | |
# Add table columns | |
for col in table[0]: | |
interaction.table.columns.add().text = col | |
# Add table rows/cells | |
for row in table[1:]: | |
r = interaction.table.rows.add() | |
for cell in row: | |
r.cells.add().text = cell | |
# Annotate numeric values | |
number_annotation_utils.add_numeric_values(interaction) | |
return interaction | |
# 4) Instantiate TAPAS model and tokenizer | |
from transformers import TFAutoModelForSequenceClassification, AutoTokenizer | |
MODEL = "google/tapas-base-finetuned-wtq" | |
tokenizer = AutoTokenizer.from_pretrained(MODEL) | |
model = TFAutoModelForSequenceClassification.from_pretrained(MODEL) | |
# 5) Prediction helper | |
def predict_answer(question: str): | |
interaction = interaction_from_query(question) | |
# Convert to TensorFlowExample | |
tf_example = converter.convert(interaction) | |
# Run prediction | |
result = model(tf_example.features) | |
# Parse answer coordinates | |
coords = prediction_utils.parse_coordinates(result.logits) | |
# Map coordinates back to table cells | |
answers = [] | |
for r, c in coords: | |
answers.append(table[r+1][c]) | |
return ", ".join(answers) | |
# 6) Gradio interface | |
iface = gr.Interface( | |
fn=predict_answer, | |
inputs=gr.Textbox(lines=2, placeholder="Ask a question…"), | |
outputs=gr.Textbox(lines=3), | |
title="SAP Profitability Q&A (TAPAS Low-Level)", | |
description=( | |
"Low-level TAPAS: list-of-lists input, numeric annotations, " | |
"aggregation candidates, and coordinate post-processing." | |
), | |
allow_flagging="never", | |
) | |
if __name__ == "__main__": | |
iface.launch(server_name="0.0.0.0", server_port=7860) |