import gradio as gr
from transformers import pipeline
import logging
# Logging configuration
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
# Model information
MODEL_LINKS = {
"OpenAlex": "https://huggingface.co/OpenAlex/bert-base-multilingual-cased-finetuned-openalex-topic-classification-title-abstract",
"albertmartinez": "https://huggingface.co/albertmartinez/openalex-topic-classification-title-abstract"
}
# Load models only once
try:
model = pipeline("text-classification",
model="OpenAlex/bert-base-multilingual-cased-finetuned-openalex-topic-classification-title-abstract")
model2 = pipeline("text-classification",
model="albertmartinez/openalex-topic-classification-title-abstract")
logger.info("Models loaded successfully")
except Exception as e:
logger.error(f"Error loading models: {str(e)}")
raise
def classify_text(text, top_k):
"""
Classify the given text using two different models.
Args:
text (str): Text to classify in format "
{title}\n {abstract}"
top_k (int): Number of classifications to return
Returns:
tuple: Two dictionaries with classifications from each model
"""
try:
if not text or not isinstance(text, str):
raise ValueError("Input text must be a non-empty string")
if not isinstance(top_k, int) or top_k < 1:
raise ValueError("top_k must be a positive integer")
results = [
{p["label"]: p["score"] for p in model(text, top_k=top_k, truncation=True, max_length=512)},
{p["label"]: p["score"] for p in model2(text, top_k=top_k, truncation=True, max_length=512)}
]
return results
except Exception as e:
logger.error(f"Classification error: {str(e)}")
raise gr.Error(f"Classification error: {str(e)}")
# Example text
EXAMPLE_TEXT = """ Machine Learning Applications in Healthcare
This paper explores the use of machine learning algorithms in healthcare systems for disease prediction and diagnosis."""
demo = gr.Interface(
fn=classify_text,
inputs=[
gr.Textbox(
lines=5,
label="Text",
placeholder=" {title}\n {abstract}",
value=EXAMPLE_TEXT
),
gr.Number(
label="Number of classifications (top_k)",
value=10,
precision=0,
minimum=1,
maximum=20
)
],
outputs=[
gr.Label(label="Model 1: OpenAlex"),
gr.Label(label="Model 2: albertmartinez")
],
title="OpenAlex Topic Classification",
description="""
Enter a text with title and abstract to get its topic classification.
Input format:
```
Your title here
Your abstract here
```
The system uses two different models to provide a more robust classification:
1. [OpenAlex Model]({openalex_link}): Based on BERT multilingual model, fine-tuned on OpenAlex data
2. [AlbertMartinez Model]({albert_link}): Based on BERT multilingual model, fine-tuned on [OpenAlex data](https://huggingface.co/datasets/albertmartinez/openalex-topic-title-abstract)
For more information about the models and their performance, visit their Hugging Face pages.
""".format(
openalex_link=MODEL_LINKS["OpenAlex"],
albert_link=MODEL_LINKS["albertmartinez"]
),
examples=[
[EXAMPLE_TEXT, 5],
[" Climate Change Impact\n Study of global warming effects on biodiversity", 3]
],
flagging_mode="never",
api_name="classify"
)
if __name__ == "__main__":
logger.info(f"Gradio version: {gr.__version__}")
demo.launch()