Spaces:

himel7
/

biasdetect

Sleeping

App Files Files Community

himel7 commited on 5 days ago

Commit

208382e

verified ·

1 Parent(s): 3dec021

Update app.py

Browse files

Files changed (1) hide show

app.py +47 -10

app.py CHANGED Viewed

@@ -4,12 +4,24 @@ import fitz  # PyMuPDF
 import re
 import pandas as pd
-# Load models
 bias_detector = pipeline("text-classification", model="himel7/bias-detector")
 bias_type_classifier = pipeline("text-classification", model="maximuspowers/bias-type-classifier")
 def extract_text_from_pdf(pdf_file):
-    """Extract text from a PDF file using PyMuPDF"""
     text = ""
     with fitz.open(pdf_file) as pdf:
         for page in pdf:
@@ -17,12 +29,10 @@ def extract_text_from_pdf(pdf_file):
     return text
 def split_into_sentences(text):
-    """Split text into sentences (basic split by .!? with spaces)"""
     sentences = re.split(r'(?<=[.!?])\s+', text.strip())
     return [s for s in sentences if s]
 def analyze_sentence(sentence):
-    """Run bias detection and (if biased) bias type classification"""
     detection_result = bias_detector(sentence)[0]
     label = detection_result['label']
     score = detection_result['score']
@@ -46,7 +56,6 @@ def analyze_sentence(sentence):
         }
 def analyze_pdf(pdf_file):
-    """Full pipeline: extract text, split sentences, analyze bias"""
     text = extract_text_from_pdf(pdf_file)
     sentences = split_into_sentences(text)
@@ -64,16 +73,29 @@ def analyze_pdf(pdf_file):
     - **Unbiased Sentences:** {unbiased} ({(unbiased/total)*100:.1f}%)
     """
-    # Create a DataFrame for table display
     df = pd.DataFrame(results)
     return stats_md, df
 def analyze_text(text):
-    """Single text input analysis"""
     return analyze_sentence(text)
-# Top HTML badges
 badges_html = """
 <p align="center">
   <a href="https://huggingface.co/himel7/bias-detector">
@@ -91,9 +113,11 @@ badges_html = """
 </p>
 """
 with gr.Blocks() as demo:
     gr.HTML(badges_html)
-    gr.Markdown("## Bias Detector + Bias Type Classifier")
     with gr.Tab("Single Sentence"):
         text_input = gr.Textbox(lines=3, placeholder="Enter a sentence...")
@@ -101,6 +125,12 @@ with gr.Blocks() as demo:
         btn = gr.Button("Analyze")
         btn.click(analyze_text, inputs=text_input, outputs=output)
     with gr.Tab("Analyze PDF"):
         pdf_input = gr.File(label="Upload PDF", file_types=[".pdf"])
         stats_output = gr.Markdown()
@@ -108,5 +138,12 @@ with gr.Blocks() as demo:
         analyze_btn = gr.Button("Analyze PDF")
         analyze_btn.click(analyze_pdf, inputs=pdf_input, outputs=[stats_output, table_output])
 if __name__ == "__main__":
     demo.launch()

 import re
 import pandas as pd
+# Load detection models
 bias_detector = pipeline("text-classification", model="himel7/bias-detector")
 bias_type_classifier = pipeline("text-classification", model="maximuspowers/bias-type-classifier")
+# Load neutralizer models (lazy load for speed)
+neutralizer_models = {
+    "BART Neutralizer": "himel7/bias-neutralizer-bart",
+    "T5 Small Neutralizer": "himel7/bias-neutralizer-t5s"
+}
+neutralizers = {}
+def get_neutralizer(model_name):
+    if model_name not in neutralizers:
+        neutralizers[model_name] = pipeline("text2text-generation", model=neutralizer_models[model_name])
+    return neutralizers[model_name]
+# Utils
 def extract_text_from_pdf(pdf_file):
     text = ""
     with fitz.open(pdf_file) as pdf:
         for page in pdf:
     return text
 def split_into_sentences(text):
     sentences = re.split(r'(?<=[.!?])\s+', text.strip())
     return [s for s in sentences if s]
 def analyze_sentence(sentence):
     detection_result = bias_detector(sentence)[0]
     label = detection_result['label']
     score = detection_result['score']
         }
 def analyze_pdf(pdf_file):
     text = extract_text_from_pdf(pdf_file)
     sentences = split_into_sentences(text)
     - **Unbiased Sentences:** {unbiased} ({(unbiased/total)*100:.1f}%)
     """
     df = pd.DataFrame(results)
     return stats_md, df
 def analyze_text(text):
     return analyze_sentence(text)
+# New: Neutralize Bias
+def neutralize_text(text, model_choice):
+    neutralizer = get_neutralizer(model_choice)
+    result = neutralizer(text, max_length=512, do_sample=False)
+    return result[0]["generated_text"]
+def neutralize_pdf(pdf_file, model_choice):
+    text = extract_text_from_pdf(pdf_file)
+    sentences = split_into_sentences(text)
+    neutralizer = get_neutralizer(model_choice)
+    neutralized_sentences = [neutralizer(s, max_length=512, do_sample=False)[0]["generated_text"] for s in sentences]
+    neutralized_text = " ".join(neutralized_sentences)
+    return neutralized_text
+# Top badges
 badges_html = """
 <p align="center">
   <a href="https://huggingface.co/himel7/bias-detector">
 </p>
 """
+# Build UI
 with gr.Blocks() as demo:
     gr.HTML(badges_html)
+    gr.Markdown("## Bias Analyzer & Neutralizer")
+    gr.Markdown("### This app helps you to detect biases in sentences, analyse them, and neutralize sentences.")
     with gr.Tab("Single Sentence"):
         text_input = gr.Textbox(lines=3, placeholder="Enter a sentence...")
         btn = gr.Button("Analyze")
         btn.click(analyze_text, inputs=text_input, outputs=output)
+        gr.Markdown("### Neutralize Bias")
+        model_choice = gr.Dropdown(list(neutralizer_models.keys()), label="Neutralizer Model", value="BART Neutralizer")
+        neutral_output = gr.Textbox(label="Neutralized Sentence", lines=3)
+        neutral_btn = gr.Button("Neutralize")
+        neutral_btn.click(neutralize_text, inputs=[text_input, model_choice], outputs=neutral_output)
     with gr.Tab("Analyze PDF"):
         pdf_input = gr.File(label="Upload PDF", file_types=[".pdf"])
         stats_output = gr.Markdown()
         analyze_btn = gr.Button("Analyze PDF")
         analyze_btn.click(analyze_pdf, inputs=pdf_input, outputs=[stats_output, table_output])
+        gr.Markdown("### Neutralize Entire PDF")
+        model_choice_pdf = gr.Dropdown(list(neutralizer_models.keys()), label="Neutralizer Model", value="BART Neutralizer")
+        neutral_pdf_output = gr.Textbox(label="Neutralized PDF Text", lines=15)
+        neutral_pdf_btn = gr.Button("Neutralize PDF")
+        neutral_pdf_btn.click(neutralize_pdf, inputs=[pdf_input, model_choice_pdf], outputs=neutral_pdf_output)
 if __name__ == "__main__":
     demo.launch()