himel7 commited on
Commit
208382e
·
verified ·
1 Parent(s): 3dec021

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +47 -10
app.py CHANGED
@@ -4,12 +4,24 @@ import fitz # PyMuPDF
4
  import re
5
  import pandas as pd
6
 
7
- # Load models
8
  bias_detector = pipeline("text-classification", model="himel7/bias-detector")
9
  bias_type_classifier = pipeline("text-classification", model="maximuspowers/bias-type-classifier")
10
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  def extract_text_from_pdf(pdf_file):
12
- """Extract text from a PDF file using PyMuPDF"""
13
  text = ""
14
  with fitz.open(pdf_file) as pdf:
15
  for page in pdf:
@@ -17,12 +29,10 @@ def extract_text_from_pdf(pdf_file):
17
  return text
18
 
19
  def split_into_sentences(text):
20
- """Split text into sentences (basic split by .!? with spaces)"""
21
  sentences = re.split(r'(?<=[.!?])\s+', text.strip())
22
  return [s for s in sentences if s]
23
 
24
  def analyze_sentence(sentence):
25
- """Run bias detection and (if biased) bias type classification"""
26
  detection_result = bias_detector(sentence)[0]
27
  label = detection_result['label']
28
  score = detection_result['score']
@@ -46,7 +56,6 @@ def analyze_sentence(sentence):
46
  }
47
 
48
  def analyze_pdf(pdf_file):
49
- """Full pipeline: extract text, split sentences, analyze bias"""
50
  text = extract_text_from_pdf(pdf_file)
51
  sentences = split_into_sentences(text)
52
 
@@ -64,16 +73,29 @@ def analyze_pdf(pdf_file):
64
  - **Unbiased Sentences:** {unbiased} ({(unbiased/total)*100:.1f}%)
65
  """
66
 
67
- # Create a DataFrame for table display
68
  df = pd.DataFrame(results)
69
  return stats_md, df
70
 
71
-
72
  def analyze_text(text):
73
- """Single text input analysis"""
74
  return analyze_sentence(text)
75
 
76
- # Top HTML badges
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
77
  badges_html = """
78
  <p align="center">
79
  <a href="https://huggingface.co/himel7/bias-detector">
@@ -91,9 +113,11 @@ badges_html = """
91
  </p>
92
  """
93
 
 
94
  with gr.Blocks() as demo:
95
  gr.HTML(badges_html)
96
- gr.Markdown("## Bias Detector + Bias Type Classifier")
 
97
 
98
  with gr.Tab("Single Sentence"):
99
  text_input = gr.Textbox(lines=3, placeholder="Enter a sentence...")
@@ -101,6 +125,12 @@ with gr.Blocks() as demo:
101
  btn = gr.Button("Analyze")
102
  btn.click(analyze_text, inputs=text_input, outputs=output)
103
 
 
 
 
 
 
 
104
  with gr.Tab("Analyze PDF"):
105
  pdf_input = gr.File(label="Upload PDF", file_types=[".pdf"])
106
  stats_output = gr.Markdown()
@@ -108,5 +138,12 @@ with gr.Blocks() as demo:
108
  analyze_btn = gr.Button("Analyze PDF")
109
  analyze_btn.click(analyze_pdf, inputs=pdf_input, outputs=[stats_output, table_output])
110
 
 
 
 
 
 
 
 
111
  if __name__ == "__main__":
112
  demo.launch()
 
4
  import re
5
  import pandas as pd
6
 
7
+ # Load detection models
8
  bias_detector = pipeline("text-classification", model="himel7/bias-detector")
9
  bias_type_classifier = pipeline("text-classification", model="maximuspowers/bias-type-classifier")
10
 
11
+ # Load neutralizer models (lazy load for speed)
12
+ neutralizer_models = {
13
+ "BART Neutralizer": "himel7/bias-neutralizer-bart",
14
+ "T5 Small Neutralizer": "himel7/bias-neutralizer-t5s"
15
+ }
16
+ neutralizers = {}
17
+
18
+ def get_neutralizer(model_name):
19
+ if model_name not in neutralizers:
20
+ neutralizers[model_name] = pipeline("text2text-generation", model=neutralizer_models[model_name])
21
+ return neutralizers[model_name]
22
+
23
+ # Utils
24
  def extract_text_from_pdf(pdf_file):
 
25
  text = ""
26
  with fitz.open(pdf_file) as pdf:
27
  for page in pdf:
 
29
  return text
30
 
31
  def split_into_sentences(text):
 
32
  sentences = re.split(r'(?<=[.!?])\s+', text.strip())
33
  return [s for s in sentences if s]
34
 
35
  def analyze_sentence(sentence):
 
36
  detection_result = bias_detector(sentence)[0]
37
  label = detection_result['label']
38
  score = detection_result['score']
 
56
  }
57
 
58
  def analyze_pdf(pdf_file):
 
59
  text = extract_text_from_pdf(pdf_file)
60
  sentences = split_into_sentences(text)
61
 
 
73
  - **Unbiased Sentences:** {unbiased} ({(unbiased/total)*100:.1f}%)
74
  """
75
 
 
76
  df = pd.DataFrame(results)
77
  return stats_md, df
78
 
 
79
  def analyze_text(text):
 
80
  return analyze_sentence(text)
81
 
82
+ # New: Neutralize Bias
83
+ def neutralize_text(text, model_choice):
84
+ neutralizer = get_neutralizer(model_choice)
85
+ result = neutralizer(text, max_length=512, do_sample=False)
86
+ return result[0]["generated_text"]
87
+
88
+ def neutralize_pdf(pdf_file, model_choice):
89
+ text = extract_text_from_pdf(pdf_file)
90
+ sentences = split_into_sentences(text)
91
+
92
+ neutralizer = get_neutralizer(model_choice)
93
+ neutralized_sentences = [neutralizer(s, max_length=512, do_sample=False)[0]["generated_text"] for s in sentences]
94
+ neutralized_text = " ".join(neutralized_sentences)
95
+ return neutralized_text
96
+
97
+
98
+ # Top badges
99
  badges_html = """
100
  <p align="center">
101
  <a href="https://huggingface.co/himel7/bias-detector">
 
113
  </p>
114
  """
115
 
116
+ # Build UI
117
  with gr.Blocks() as demo:
118
  gr.HTML(badges_html)
119
+ gr.Markdown("## Bias Analyzer & Neutralizer")
120
+ gr.Markdown("### This app helps you to detect biases in sentences, analyse them, and neutralize sentences.")
121
 
122
  with gr.Tab("Single Sentence"):
123
  text_input = gr.Textbox(lines=3, placeholder="Enter a sentence...")
 
125
  btn = gr.Button("Analyze")
126
  btn.click(analyze_text, inputs=text_input, outputs=output)
127
 
128
+ gr.Markdown("### Neutralize Bias")
129
+ model_choice = gr.Dropdown(list(neutralizer_models.keys()), label="Neutralizer Model", value="BART Neutralizer")
130
+ neutral_output = gr.Textbox(label="Neutralized Sentence", lines=3)
131
+ neutral_btn = gr.Button("Neutralize")
132
+ neutral_btn.click(neutralize_text, inputs=[text_input, model_choice], outputs=neutral_output)
133
+
134
  with gr.Tab("Analyze PDF"):
135
  pdf_input = gr.File(label="Upload PDF", file_types=[".pdf"])
136
  stats_output = gr.Markdown()
 
138
  analyze_btn = gr.Button("Analyze PDF")
139
  analyze_btn.click(analyze_pdf, inputs=pdf_input, outputs=[stats_output, table_output])
140
 
141
+ gr.Markdown("### Neutralize Entire PDF")
142
+ model_choice_pdf = gr.Dropdown(list(neutralizer_models.keys()), label="Neutralizer Model", value="BART Neutralizer")
143
+ neutral_pdf_output = gr.Textbox(label="Neutralized PDF Text", lines=15)
144
+ neutral_pdf_btn = gr.Button("Neutralize PDF")
145
+ neutral_pdf_btn.click(neutralize_pdf, inputs=[pdf_input, model_choice_pdf], outputs=neutral_pdf_output)
146
+
147
+
148
  if __name__ == "__main__":
149
  demo.launch()