Spaces:

PD03
/

talk_to_data

Sleeping

App Files Files Community

talk_to_data / app.py

PD03

Update app.py

b1f2bdd verified about 2 months ago

raw

history blame

2.83 kB

	import os
	import gradio as gr
	import pandas as pd
	import tensorflow as tf
	from tapas.scripts import prediction_utils
	from tapas.utils import number_annotation_utils
	from tapas.protos import interaction_pb2

	# 1) Read CSV and build list-of-lists table
	import pandas as pd

	df = pd.read_csv("synthetic_profit.csv")
	# Ensure all values are strings
	df = df.astype(str)
	# Build TAPAS-style table: header row + data rows
	table = [list(df.columns)] + df.values.tolist()

	# 2) Configure TAPAS conversion with aggregation support
	from tapas.utils import example_utils as tf_example_utils
	config = tf_example_utils.ClassifierConversionConfig(
	vocab_file="tapas_sqa_base/vocab.txt",
	max_seq_length=512,
	max_column_id=512,
	max_row_id=512,
	strip_column_names=False, # Keep header names
	add_aggregation_candidates=True, # Propose SUM/AVERAGE operations
	)
	converter = tf_example_utils.ToClassifierTensorflowExample(config)

	# 3) Helper: convert one interaction to model input
	def interaction_from_query(question: str):
	interaction = interaction_pb2.Interaction()
	# Add question
	q = interaction.questions.add()
	q.original_text = question
	# Add table columns
	for col in table[0]:
	interaction.table.columns.add().text = col
	# Add table rows/cells
	for row in table[1:]:
	r = interaction.table.rows.add()
	for cell in row:
	r.cells.add().text = cell
	# Annotate numeric values
	number_annotation_utils.add_numeric_values(interaction)
	return interaction

	# 4) Instantiate TAPAS model and tokenizer
	from transformers import TFAutoModelForSequenceClassification, AutoTokenizer
	MODEL = "google/tapas-base-finetuned-wtq"
	tokenizer = AutoTokenizer.from_pretrained(MODEL)
	model = TFAutoModelForSequenceClassification.from_pretrained(MODEL)

	# 5) Prediction helper
	def predict_answer(question: str):
	interaction = interaction_from_query(question)
	# Convert to TensorFlowExample
	tf_example = converter.convert(interaction)
	# Run prediction
	result = model(tf_example.features)
	# Parse answer coordinates
	coords = prediction_utils.parse_coordinates(result.logits)
	# Map coordinates back to table cells
	answers = []
	for r, c in coords:
	answers.append(table[r+1][c])
	return ", ".join(answers)

	# 6) Gradio interface
	iface = gr.Interface(
	fn=predict_answer,
	inputs=gr.Textbox(lines=2, placeholder="Ask a question…"),
	outputs=gr.Textbox(lines=3),
	title="SAP Profitability Q&A (TAPAS Low-Level)",
	description=(
	"Low-level TAPAS: list-of-lists input, numeric annotations, "
	"aggregation candidates, and coordinate post-processing."
	),
	allow_flagging="never",
	)

	if __name__ == "__main__":
	iface.launch(server_name="0.0.0.0", server_port=7860)