Armando Medina commited on
Commit
ada15f5
·
verified ·
1 Parent(s): d61acd9

Update app.py

Browse files

revereted back to known good working

Files changed (1) hide show
  1. app.py +13 -11
app.py CHANGED
@@ -25,7 +25,6 @@ COMPLIANCE_ENTITIES = {
25
  SUPPORTED_FILE_TYPES = [".pdf", ".docx", ".txt", ".png", ".jpg", ".jpeg"]
26
 
27
  def extract_text(doc):
28
- MAX_OCR_CHARS = 5000 # Prevent large OCR outputs from images
29
  if not hasattr(doc, "name"):
30
  return "ERROR: No file uploaded."
31
  try:
@@ -43,9 +42,6 @@ def extract_text(doc):
43
  elif fname.endswith((".png", ".jpg", ".jpeg")):
44
  img = Image.open(doc.name)
45
  text = pytesseract.image_to_string(img)
46
- # Truncate large OCR blobs
47
- if len(text) > MAX_OCR_CHARS:
48
- text = text[:MAX_OCR_CHARS] + "\n...[truncated]"
49
  else:
50
  return "ERROR: Unsupported file type."
51
  if not text.strip():
@@ -251,7 +247,6 @@ def executive_summary_template(findings, score, regime):
251
  return " ".join(summary_lines)
252
 
253
  def agentic_compliance(doc, regime):
254
- MAX_FINDINGS_DISPLAY = 20 # Only show 20 findings max
255
  text = extract_text(doc)
256
  if text.startswith("ERROR"):
257
  return text, None, None, None
@@ -266,13 +261,10 @@ def agentic_compliance(doc, regime):
266
  summary = summarize_narrative(relevant, regime)
267
  exec_summary = executive_summary_template(relevant, score, regime)
268
 
269
- findings_sample = relevant[:MAX_FINDINGS_DISPLAY]
270
  findings_md = "\n".join([
271
  f"- **{f['entity']}** (`{f['text']}`), score: {f.get('score', 0):.2f}"
272
- for f in findings_sample
273
- ]) if findings_sample else "No relevant PII found for this regime."
274
- if len(relevant) > MAX_FINDINGS_DISPLAY:
275
- findings_md += f"\n...and {len(relevant) - MAX_FINDINGS_DISPLAY} more not shown."
276
 
277
  fixes_md = "\n".join([f"- {fix}" for fix in fixes]) if fixes else "No action needed."
278
  legend_md = score_legend()
@@ -292,21 +284,31 @@ def agentic_compliance(doc, regime):
292
  redacted_image = None # No inline preview for PDFs
293
 
294
  md = f"""### Compliance Regime: **{regime}**
 
295
  **Executive Summary:**
296
  {exec_summary}
297
- **Findings (showing up to {MAX_FINDINGS_DISPLAY}):**
 
298
  {findings_md}
 
299
  **Risk Score:** {score}
 
300
  **Actionable Recommendations:**
301
  {fixes_md}
 
302
  **Summary:**
303
  {summary}
 
304
  ---
 
305
  {legend_md}
 
306
  ---
 
307
  **Redacted Document Preview:**
308
  <details>
309
  <summary>Show/Hide Redacted Text</summary>
 
310
  </details>
311
  """
312
  return md.strip(), redacted_path, redacted_file_path, redacted_image
 
25
  SUPPORTED_FILE_TYPES = [".pdf", ".docx", ".txt", ".png", ".jpg", ".jpeg"]
26
 
27
  def extract_text(doc):
 
28
  if not hasattr(doc, "name"):
29
  return "ERROR: No file uploaded."
30
  try:
 
42
  elif fname.endswith((".png", ".jpg", ".jpeg")):
43
  img = Image.open(doc.name)
44
  text = pytesseract.image_to_string(img)
 
 
 
45
  else:
46
  return "ERROR: Unsupported file type."
47
  if not text.strip():
 
247
  return " ".join(summary_lines)
248
 
249
  def agentic_compliance(doc, regime):
 
250
  text = extract_text(doc)
251
  if text.startswith("ERROR"):
252
  return text, None, None, None
 
261
  summary = summarize_narrative(relevant, regime)
262
  exec_summary = executive_summary_template(relevant, score, regime)
263
 
 
264
  findings_md = "\n".join([
265
  f"- **{f['entity']}** (`{f['text']}`), score: {f.get('score', 0):.2f}"
266
+ for f in relevant
267
+ ]) if relevant else "No relevant PII found for this regime."
 
 
268
 
269
  fixes_md = "\n".join([f"- {fix}" for fix in fixes]) if fixes else "No action needed."
270
  legend_md = score_legend()
 
284
  redacted_image = None # No inline preview for PDFs
285
 
286
  md = f"""### Compliance Regime: **{regime}**
287
+
288
  **Executive Summary:**
289
  {exec_summary}
290
+
291
+ **Findings:**
292
  {findings_md}
293
+
294
  **Risk Score:** {score}
295
+
296
  **Actionable Recommendations:**
297
  {fixes_md}
298
+
299
  **Summary:**
300
  {summary}
301
+
302
  ---
303
+
304
  {legend_md}
305
+
306
  ---
307
+
308
  **Redacted Document Preview:**
309
  <details>
310
  <summary>Show/Hide Redacted Text</summary>
311
+
312
  </details>
313
  """
314
  return md.strip(), redacted_path, redacted_file_path, redacted_image