Spaces:

Agents-MCP-Hackathon
/

PII_Compliance_Check_MCP

Running

App Files Files Community

Armando Medina commited on Jun 3

Commit

ada15f5

verified ·

1 Parent(s): d61acd9

Update app.py

Browse files

revereted back to known good working

Files changed (1) hide show

app.py +13 -11

app.py CHANGED Viewed

@@ -25,7 +25,6 @@ COMPLIANCE_ENTITIES = {
 SUPPORTED_FILE_TYPES = [".pdf", ".docx", ".txt", ".png", ".jpg", ".jpeg"]
 def extract_text(doc):
-    MAX_OCR_CHARS = 5000  # Prevent large OCR outputs from images
     if not hasattr(doc, "name"):
         return "ERROR: No file uploaded."
     try:
@@ -43,9 +42,6 @@ def extract_text(doc):
         elif fname.endswith((".png", ".jpg", ".jpeg")):
             img = Image.open(doc.name)
             text = pytesseract.image_to_string(img)
-            # Truncate large OCR blobs
-            if len(text) > MAX_OCR_CHARS:
-                text = text[:MAX_OCR_CHARS] + "\n...[truncated]"
         else:
             return "ERROR: Unsupported file type."
         if not text.strip():
@@ -251,7 +247,6 @@ def executive_summary_template(findings, score, regime):
     return " ".join(summary_lines)
 def agentic_compliance(doc, regime):
-    MAX_FINDINGS_DISPLAY = 20  # Only show 20 findings max
     text = extract_text(doc)
     if text.startswith("ERROR"):
         return text, None, None, None
@@ -266,13 +261,10 @@ def agentic_compliance(doc, regime):
     summary = summarize_narrative(relevant, regime)
     exec_summary = executive_summary_template(relevant, score, regime)
-    findings_sample = relevant[:MAX_FINDINGS_DISPLAY]
     findings_md = "\n".join([
         f"- **{f['entity']}** (`{f['text']}`), score: {f.get('score', 0):.2f}"
-        for f in findings_sample
-    ]) if findings_sample else "No relevant PII found for this regime."
-    if len(relevant) > MAX_FINDINGS_DISPLAY:
-        findings_md += f"\n...and {len(relevant) - MAX_FINDINGS_DISPLAY} more not shown."
     fixes_md = "\n".join([f"- {fix}" for fix in fixes]) if fixes else "No action needed."
     legend_md = score_legend()
@@ -292,21 +284,31 @@ def agentic_compliance(doc, regime):
             redacted_image = None  # No inline preview for PDFs
     md = f"""### Compliance Regime: **{regime}**
 **Executive Summary:**
 {exec_summary}
-**Findings (showing up to {MAX_FINDINGS_DISPLAY}):**
 {findings_md}
 **Risk Score:** {score}
 **Actionable Recommendations:**
 {fixes_md}
 **Summary:**
 {summary}
 ---
 {legend_md}
 ---
 **Redacted Document Preview:**
 <details>
 <summary>Show/Hide Redacted Text</summary>
 </details>
 """
     return md.strip(), redacted_path, redacted_file_path, redacted_image

 SUPPORTED_FILE_TYPES = [".pdf", ".docx", ".txt", ".png", ".jpg", ".jpeg"]
 def extract_text(doc):
     if not hasattr(doc, "name"):
         return "ERROR: No file uploaded."
     try:
         elif fname.endswith((".png", ".jpg", ".jpeg")):
             img = Image.open(doc.name)
             text = pytesseract.image_to_string(img)
         else:
             return "ERROR: Unsupported file type."
         if not text.strip():
     return " ".join(summary_lines)
 def agentic_compliance(doc, regime):
     text = extract_text(doc)
     if text.startswith("ERROR"):
         return text, None, None, None
     summary = summarize_narrative(relevant, regime)
     exec_summary = executive_summary_template(relevant, score, regime)
     findings_md = "\n".join([
         f"- **{f['entity']}** (`{f['text']}`), score: {f.get('score', 0):.2f}"
+        for f in relevant
+    ]) if relevant else "No relevant PII found for this regime."
     fixes_md = "\n".join([f"- {fix}" for fix in fixes]) if fixes else "No action needed."
     legend_md = score_legend()
             redacted_image = None  # No inline preview for PDFs
     md = f"""### Compliance Regime: **{regime}**
 **Executive Summary:**
 {exec_summary}
+**Findings:**
 {findings_md}
 **Risk Score:** {score}
 **Actionable Recommendations:**
 {fixes_md}
 **Summary:**
 {summary}
 ---
 {legend_md}
 ---
 **Redacted Document Preview:**
 <details>
 <summary>Show/Hide Redacted Text</summary>
 </details>
 """
     return md.strip(), redacted_path, redacted_file_path, redacted_image