Spaces:

albertmartinez
/

openalex-topic-classification

Running

App Files Files Community

openalex-topic-classification / app.py

albertmartinez

Upgrade gradio

e36317e 2 months ago

raw

history blame contribute delete

3.84 kB

	import gradio as gr
	from transformers import pipeline
	import logging

	# Logging configuration
	logging.basicConfig(level=logging.INFO)
	logger = logging.getLogger(__name__)

	# Model information
	MODEL_LINKS = {
	"OpenAlex": "https://huggingface.co/OpenAlex/bert-base-multilingual-cased-finetuned-openalex-topic-classification-title-abstract",
	"albertmartinez": "https://huggingface.co/albertmartinez/openalex-topic-classification-title-abstract"
	}

	# Load models only once
	try:
	model = pipeline("text-classification",
	model="OpenAlex/bert-base-multilingual-cased-finetuned-openalex-topic-classification-title-abstract")
	model2 = pipeline("text-classification",
	model="albertmartinez/openalex-topic-classification-title-abstract")
	logger.info("Models loaded successfully")
	except Exception as e:
	logger.error(f"Error loading models: {str(e)}")
	raise

	def classify_text(text, top_k):
	"""
	Classify the given text using two different models.

	Args:
	text (str): Text to classify in format "<TITLE> {title}\n<ABSTRACT> {abstract}"
	top_k (int): Number of classifications to return

	Returns:
	tuple: Two dictionaries with classifications from each model
	"""
	try:
	if not text or not isinstance(text, str):
	raise ValueError("Input text must be a non-empty string")

	if not isinstance(top_k, int) or top_k < 1:
	raise ValueError("top_k must be a positive integer")

	results = [
	{p["label"]: p["score"] for p in model(text, top_k=top_k, truncation=True, max_length=512)},
	{p["label"]: p["score"] for p in model2(text, top_k=top_k, truncation=True, max_length=512)}
	]
	return results
	except Exception as e:
	logger.error(f"Classification error: {str(e)}")
	raise gr.Error(f"Classification error: {str(e)}")

	# Example text
	EXAMPLE_TEXT = """<TITLE> Machine Learning Applications in Healthcare
	<ABSTRACT> This paper explores the use of machine learning algorithms in healthcare systems for disease prediction and diagnosis."""

	demo = gr.Interface(
	fn=classify_text,
	inputs=[
	gr.Textbox(
	lines=5,
	label="Text",
	placeholder="<TITLE> {title}\n<ABSTRACT> {abstract}",
	value=EXAMPLE_TEXT
	),
	gr.Number(
	label="Number of classifications (top_k)",
	value=10,
	precision=0,
	minimum=1,
	maximum=20
	)
	],
	outputs=[
	gr.Label(label="Model 1: OpenAlex"),
	gr.Label(label="Model 2: albertmartinez")
	],
	title="OpenAlex Topic Classification",
	description="""
	Enter a text with title and abstract to get its topic classification.

	Input format:
	```
	<TITLE> Your title here
	<ABSTRACT> Your abstract here
	```

	The system uses two different models to provide a more robust classification:

	1. [OpenAlex Model]({openalex_link}): Based on BERT multilingual model, fine-tuned on OpenAlex data
	2. [AlbertMartinez Model]({albert_link}): Based on BERT multilingual model, fine-tuned on [OpenAlex data](https://huggingface.co/datasets/albertmartinez/openalex-topic-title-abstract)

	For more information about the models and their performance, visit their Hugging Face pages.
	""".format(
	openalex_link=MODEL_LINKS["OpenAlex"],
	albert_link=MODEL_LINKS["albertmartinez"]
	),
	examples=[
	[EXAMPLE_TEXT, 5],
	["<TITLE> Climate Change Impact\n<ABSTRACT> Study of global warming effects on biodiversity", 3]
	],
	flagging_mode="never",
	api_name="classify"
	)

	if __name__ == "__main__":
	logger.info(f"Gradio version: {gr.__version__}")
	demo.launch()