Spaces:
Runtime error
Runtime error
import requests | |
import gradio as gr | |
from bs4 import BeautifulSoup | |
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline | |
# Lightweight model for instruction-tuned summarization | |
tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-base") | |
model = AutoModelForSeq2SeqLM.from_pretrained("google/flan-t5-base") | |
llm_pipeline = pipeline("text2text-generation", model=model, tokenizer=tokenizer) | |
def extract_arxiv_abstract(url): | |
try: | |
headers = {"User-Agent": "Mozilla/5.0"} | |
response = requests.get(url, headers=headers) | |
soup = BeautifulSoup(response.text, "html.parser") | |
abstract = soup.find("blockquote", class_="abstract") | |
if abstract: | |
return abstract.get_text(strip=True).replace("Abstract:", "") | |
else: | |
return "Abstract not found on this page. Please check the URL." | |
except Exception as e: | |
return f"Failed to fetch abstract: {str(e)}" | |
def summarize_research_paper(arxiv_url): | |
abstract_text = extract_arxiv_abstract(arxiv_url) | |
if abstract_text.startswith("Failed"): | |
return abstract_text | |
prompt = f"summarize: {abstract_text}" | |
summary = llm_pipeline(prompt, max_new_tokens=256)[0]['generated_text'] | |
return f"π Original Abstract:\n{abstract_text}\n\nπ§ Summary:\n{summary}" | |
gr.Interface( | |
fn=summarize_research_paper, | |
inputs=gr.Textbox(label="arXiv Paper URL", placeholder="https://arxiv.org/abs/2306.10001"), | |
outputs=gr.Textbox(label="Summary", lines=15), | |
title="π§ Research Paper Summarizer", | |
description="Summarizes arXiv paper abstracts using FLAN-T5. Works fast on CPU Hugging Face Spaces." | |
).launch() |