import requests import gradio as gr from bs4 import BeautifulSoup from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline # Lightweight model for instruction-tuned summarization tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-base") model = AutoModelForSeq2SeqLM.from_pretrained("google/flan-t5-base") llm_pipeline = pipeline("text2text-generation", model=model, tokenizer=tokenizer) def extract_arxiv_abstract(url): try: headers = {"User-Agent": "Mozilla/5.0"} response = requests.get(url, headers=headers) soup = BeautifulSoup(response.text, "html.parser") abstract = soup.find("blockquote", class_="abstract") if abstract: return abstract.get_text(strip=True).replace("Abstract:", "") else: return "Abstract not found on this page. Please check the URL." except Exception as e: return f"Failed to fetch abstract: {str(e)}" def summarize_research_paper(arxiv_url): abstract_text = extract_arxiv_abstract(arxiv_url) if abstract_text.startswith("Failed"): return abstract_text prompt = f"summarize: {abstract_text}" summary = llm_pipeline(prompt, max_new_tokens=256)[0]['generated_text'] return f"šŸ“„ Original Abstract:\n{abstract_text}\n\n🧠 Summary:\n{summary}" gr.Interface( fn=summarize_research_paper, inputs=gr.Textbox(label="arXiv Paper URL", placeholder="https://arxiv.org/abs/2306.10001"), outputs=gr.Textbox(label="Summary", lines=15), title="🧠 Research Paper Summarizer", description="Summarizes arXiv paper abstracts using FLAN-T5. Works fast on CPU Hugging Face Spaces." ).launch()