AIxBI_AI_Plagiarism_detection_and_resolution

Sleeping

App Files Files Community

mohitrulzz commited on 9 days ago

Commit

2ca7d6c

verified ·

1 Parent(s): fd16747

Update app.py

Browse files

Files changed (1) hide show

app.py +184 -85

app.py CHANGED Viewed

@@ -417,118 +417,217 @@ def check_duplicate_submission(document_hash: str) -> Optional[dict]:
     return None
 # -----------------------------
-# ENHANCED PDF REPORT
 # -----------------------------
 class EnhancedPDF(FPDF):
     def header(self):
         if os.path.exists(LOGO_PATH):
-            self.image(LOGO_PATH, 10, 8, 20)
         self.set_font('Arial', 'B', 15)
-        self.cell(0, 10, 'AIxBI - Professional Plagiarism Analysis Report', 0, 1, 'C')
         self.ln(10)
     def footer(self):
         self.set_y(-15)
         self.set_font('Arial', 'I', 8)
-        self.cell(0, 10, f'Page {self.page_no()} | Generated on {datetime.now().strftime("%Y-%m-%d %H:%M:%S")}',
-                 0, 0, 'C')
     def add_section_header(self, title: str):
         self.set_font('Arial', 'B', 12)
         self.set_fill_color(200, 220, 255)
-        self.cell(0, 10, title, 0, 1, 'L', 1)
         self.ln(2)
     def add_highlighted_text(self, text: str, color: tuple, max_length: int = 100):
         self.set_fill_color(*color)
-        # Truncate long text
-        display_text = text[:max_length] + "..." if len(text) > max_length else text
-        self.multi_cell(0, 8, display_text, 1, 'L', 1)
         self.ln(2)
 def generate_enhanced_pdf_report(student_name: str, student_id: str, ai_score: float,
                                plagiarism_score: float, suspicious_results: List[dict],
                                metadata: dict, ai_details: dict, output_path: str):
-    """Generate comprehensive PDF report"""
-    pdf = EnhancedPDF()
-    pdf.add_page()
-    # Executive Summary
-    pdf.add_section_header("EXECUTIVE SUMMARY")
-    pdf.set_font('Arial', '', 10)
-    summary_data = [
-        f"Student: {student_name} ({student_id})",
-        f"Document Type: {metadata.get('file_type', 'Unknown').upper()}",
-        f"Word Count: {metadata.get('word_count', 0):,}",
-        f"AI Detection Score: {ai_score:.1f}% (Confidence: {ai_details.get('confidence', 'N/A')})",
-        f"Plagiarism Score: {plagiarism_score:.1f}%",
-        f"Suspicious Sentences: {sum(1 for r in suspicious_results if r['is_suspicious'])}",
-        f"Analysis Date: {datetime.now().strftime('%B %d, %Y at %H:%M:%S')}"
-    ]
-    for item in summary_data:
-        pdf.cell(0, 6, item, 0, 1)
-    pdf.ln(5)
-    # Risk Assessment
-    pdf.add_section_header("RISK ASSESSMENT")
-    pdf.set_font('Arial', '', 10)
-    risk_level = "HIGH" if (ai_score > 70 or plagiarism_score > 30) else "MEDIUM" if (ai_score > 40 or plagiarism_score > 15) else "LOW"
-    risk_color = (255, 200, 200) if risk_level == "HIGH" else (255, 255, 200) if risk_level == "MEDIUM" else (200, 255, 200)
-    pdf.set_fill_color(*risk_color)
-    pdf.cell(0, 10, f"Overall Risk Level: {risk_level}", 1, 1, 'C', 1)
-    pdf.ln(5)
-    # AI Detection Details
-    if ai_details.get('chunk_scores'):
-        pdf.add_section_header("AI DETECTION ANALYSIS")
-        pdf.set_font('Arial', '', 9)
-        pdf.cell(0, 6, f"Chunks Analyzed: {len(ai_details['chunk_scores'])}", 0, 1)
-        pdf.cell(0, 6, f"Score Consistency (Std Dev): {ai_details.get('std_deviation', 'N/A')}", 0, 1)
-        pdf.ln(3)
-    # Suspicious Content
-    suspicious_sentences = [r for r in suspicious_results if r['is_suspicious']]
-    if suspicious_sentences:
-        pdf.add_section_header("FLAGGED CONTENT")
-        pdf.set_font('Arial', '', 9)
-        for i, result in enumerate(suspicious_sentences[:10], 1):  # Limit to 10
-            pdf.cell(0, 6, f"Issue #{i} (Confidence: {result['confidence']:.1f})", 0, 1)
-            pdf.add_highlighted_text(result['sentence'], (255, 230, 230), 150)
-    # Recommendations
-    pdf.add_section_header("RECOMMENDATIONS")
-    pdf.set_font('Arial', '', 10)
-    recommendations = []
-    if ai_score > 50:
-        recommendations.append("• Review content for AI-generated sections and rewrite in original voice")
-    if plagiarism_score > 20:
-        recommendations.append("• Add proper citations for referenced material")
-        recommendations.append("• Paraphrase flagged sentences to ensure originality")
-    if len(suspicious_sentences) > 5:
-        recommendations.append("• Conduct thorough revision focusing on highlighted sections")
-    recommendations.extend([
-        "• Use plagiarism detection tools during writing process",
-        "• Ensure all sources are properly attributed",
-        "• Maintain academic integrity standards"
-    ])
-    for rec in recommendations:
-        pdf.multi_cell(0, 6, rec)
-        pdf.ln(1)
     try:
         pdf.output(output_path)
-        logger.info(f"PDF report generated: {output_path}")
     except Exception as e:
         logger.error(f"Error generating PDF report: {e}")
-        raise
 # -----------------------------
 # ENHANCED APP LOGIC

     return None
 # -----------------------------
+# ENHANCED PDF REPORT WITH UNICODE SUPPORT
 # -----------------------------
+def clean_text_for_pdf(text: str) -> str:
+    """Clean text to be PDF-safe by removing/replacing problematic Unicode characters"""
+    # Replace common Unicode characters with ASCII equivalents
+    replacements = {
+        '•': '-',  # bullet point
+        '–': '-',  # en dash
+        '—': '-',  # em dash
+        '"': '"',  # left double quote
+        '"': '"',  # right double quote
+        ''': "'",  # left single quote
+        ''': "'",  # right single quote
+        '…': '...',  # ellipsis
+        '®': '(R)',  # registered trademark
+        '©': '(C)',  # copyright
+        '™': '(TM)',  # trademark
+        '€': 'EUR',  # euro sign
+        '£': 'GBP',  # pound sign
+        '¥': 'JPY',  # yen sign
+        '§': 'Section',  # section sign
+        '¶': 'Para',  # paragraph sign
+        '†': '+',  # dagger
+        '‡': '++',  # double dagger
+        '°': ' degrees',  # degree sign
+        '±': '+/-',  # plus-minus
+        '÷': '/',  # division sign
+        '×': 'x',  # multiplication sign
+        '≤': '<=',  # less than or equal
+        '≥': '>=',  # greater than or equal
+        '≠': '!=',  # not equal
+        '∞': 'infinity',  # infinity
+        'α': 'alpha', 'β': 'beta', 'γ': 'gamma', 'δ': 'delta',  # Greek letters
+        'λ': 'lambda', 'μ': 'mu', 'π': 'pi', 'σ': 'sigma', 'Ω': 'Omega'
+    }
+    # Apply replacements
+    for unicode_char, replacement in replacements.items():
+        text = text.replace(unicode_char, replacement)
+    # Remove any remaining non-ASCII characters by encoding/decoding
+    try:
+        # Try to encode as latin-1 (which FPDF supports)
+        text.encode('latin-1')
+        return text
+    except UnicodeEncodeError:
+        # If that fails, remove non-ASCII characters
+        text = text.encode('ascii', 'ignore').decode('ascii')
+        return text
 class EnhancedPDF(FPDF):
     def header(self):
         if os.path.exists(LOGO_PATH):
+            try:
+                self.image(LOGO_PATH, 10, 8, 20)
+            except:
+                pass  # Skip logo if there's an issue
         self.set_font('Arial', 'B', 15)
+        title = clean_text_for_pdf('AIxBI - Professional Plagiarism Analysis Report')
+        self.cell(0, 10, title, 0, 1, 'C')
         self.ln(10)
     def footer(self):
         self.set_y(-15)
         self.set_font('Arial', 'I', 8)
+        footer_text = clean_text_for_pdf(f'Page {self.page_no()} | Generated on {datetime.now().strftime("%Y-%m-%d %H:%M:%S")}')
+        self.cell(0, 10, footer_text, 0, 0, 'C')
     def add_section_header(self, title: str):
         self.set_font('Arial', 'B', 12)
         self.set_fill_color(200, 220, 255)
+        clean_title = clean_text_for_pdf(title)
+        self.cell(0, 10, clean_title, 0, 1, 'L', 1)
         self.ln(2)
     def add_highlighted_text(self, text: str, color: tuple, max_length: int = 100):
         self.set_fill_color(*color)
+        # Clean and truncate text
+        clean_text = clean_text_for_pdf(text)
+        display_text = clean_text[:max_length] + "..." if len(clean_text) > max_length else clean_text
+        try:
+            self.multi_cell(0, 8, display_text, 1, 'L', 1)
+        except Exception as e:
+            # Fallback: create a safe version
+            safe_text = "Text contains unsupported characters - please check original document"
+            self.multi_cell(0, 8, safe_text, 1, 'L', 1)
         self.ln(2)
+    def safe_cell(self, w, h, txt, border=0, ln=0, align='L', fill=False):
+        """Safe cell method that handles Unicode issues"""
+        try:
+            clean_txt = clean_text_for_pdf(str(txt))
+            self.cell(w, h, clean_txt, border, ln, align, fill)
+        except Exception as e:
+            # Fallback to a safe message
+            self.cell(w, h, "[Content contains unsupported characters]", border, ln, align, fill)
+    def safe_multi_cell(self, w, h, txt, border=0, align='L', fill=False):
+        """Safe multi_cell method that handles Unicode issues"""
+        try:
+            clean_txt = clean_text_for_pdf(str(txt))
+            self.multi_cell(w, h, clean_txt, border, align, fill)
+        except Exception as e:
+            # Fallback to a safe message
+            self.multi_cell(w, h, "[Content contains unsupported characters - please check source document]", border, align, fill)
 def generate_enhanced_pdf_report(student_name: str, student_id: str, ai_score: float,
                                plagiarism_score: float, suspicious_results: List[dict],
                                metadata: dict, ai_details: dict, output_path: str):
+    """Generate comprehensive PDF report with Unicode safety"""
     try:
+        pdf = EnhancedPDF()
+        pdf.add_page()
+        # Executive Summary
+        pdf.add_section_header("EXECUTIVE SUMMARY")
+        pdf.set_font('Arial', '', 10)
+        summary_data = [
+            f"Student: {student_name} ({student_id})",
+            f"Document Type: {metadata.get('file_type', 'Unknown').upper()}",
+            f"Word Count: {metadata.get('word_count', 0):,}",
+            f"AI Detection Score: {ai_score:.1f}% (Confidence: {ai_details.get('confidence', 'N/A')})",
+            f"Plagiarism Score: {plagiarism_score:.1f}%",
+            f"Suspicious Sentences: {sum(1 for r in suspicious_results if r['is_suspicious'])}",
+            f"Analysis Date: {datetime.now().strftime('%B %d, %Y at %H:%M:%S')}"
+        ]
+        for item in summary_data:
+            pdf.safe_cell(0, 6, item, 0, 1)
+        pdf.ln(5)
+        # Risk Assessment
+        pdf.add_section_header("RISK ASSESSMENT")
+        pdf.set_font('Arial', '', 10)
+        risk_level = "HIGH" if (ai_score > 70 or plagiarism_score > 30) else "MEDIUM" if (ai_score > 40 or plagiarism_score > 15) else "LOW"
+        risk_color = (255, 200, 200) if risk_level == "HIGH" else (255, 255, 200) if risk_level == "MEDIUM" else (200, 255, 200)
+        pdf.set_fill_color(*risk_color)
+        pdf.safe_cell(0, 10, f"Overall Risk Level: {risk_level}", 1, 1, 'C', 1)
+        pdf.ln(5)
+        # AI Detection Details
+        if ai_details.get('chunk_scores'):
+            pdf.add_section_header("AI DETECTION ANALYSIS")
+            pdf.set_font('Arial', '', 9)
+            pdf.safe_cell(0, 6, f"Chunks Analyzed: {len(ai_details['chunk_scores'])}", 0, 1)
+            pdf.safe_cell(0, 6, f"Score Consistency (Std Dev): {ai_details.get('std_deviation', 'N/A')}", 0, 1)
+            pdf.ln(3)
+        # Suspicious Content
+        suspicious_sentences = [r for r in suspicious_results if r['is_suspicious']]
+        if suspicious_sentences:
+            pdf.add_section_header("FLAGGED CONTENT")
+            pdf.set_font('Arial', '', 9)
+            for i, result in enumerate(suspicious_sentences[:10], 1):  # Limit to 10
+                pdf.safe_cell(0, 6, f"Issue #{i} (Confidence: {result['confidence']:.1f})", 0, 1)
+                pdf.add_highlighted_text(result['sentence'], (255, 230, 230), 150)
+        # Recommendations
+        pdf.add_section_header("RECOMMENDATIONS")
+        pdf.set_font('Arial', '', 10)
+        recommendations = []
+        if ai_score > 50:
+            recommendations.append("- Review content for AI-generated sections and rewrite in original voice")
+        if plagiarism_score > 20:
+            recommendations.append("- Add proper citations for referenced material")
+            recommendations.append("- Paraphrase flagged sentences to ensure originality")
+        if len(suspicious_sentences) > 5:
+            recommendations.append("- Conduct thorough revision focusing on highlighted sections")
+        recommendations.extend([
+            "- Use plagiarism detection tools during writing process",
+            "- Ensure all sources are properly attributed",
+            "- Maintain academic integrity standards"
+        ])
+        for rec in recommendations:
+            pdf.safe_multi_cell(0, 6, rec)
+            pdf.ln(1)
+        # Generate PDF with error handling
         pdf.output(output_path)
+        logger.info(f"PDF report generated successfully: {output_path}")
     except Exception as e:
         logger.error(f"Error generating PDF report: {e}")
+        # Create a simple fallback PDF
+        try:
+            simple_pdf = FPDF()
+            simple_pdf.add_page()
+            simple_pdf.set_font('Arial', 'B', 16)
+            simple_pdf.cell(0, 10, 'AIxBI Analysis Report', 0, 1, 'C')
+            simple_pdf.ln(10)
+            simple_pdf.set_font('Arial', '', 12)
+            simple_pdf.cell(0, 10, f'Student: {clean_text_for_pdf(student_name)}', 0, 1)
+            simple_pdf.cell(0, 10, f'Student ID: {clean_text_for_pdf(student_id)}', 0, 1)
+            simple_pdf.cell(0, 10, f'AI Score: {ai_score:.1f}%', 0, 1)
+            simple_pdf.cell(0, 10, f'Plagiarism Score: {plagiarism_score:.1f}%', 0, 1)
+            simple_pdf.cell(0, 10, f'Date: {datetime.now().strftime("%Y-%m-%d %H:%M:%S")}', 0, 1)
+            simple_pdf.ln(10)
+            simple_pdf.multi_cell(0, 10, 'Note: Full report could not be generated due to character encoding issues. Please contact administrator if this persists.')
+            simple_pdf.output(output_path)
+            logger.info(f"Fallback PDF report generated: {output_path}")
+        except Exception as fallback_error:
+            logger.error(f"Even fallback PDF generation failed: {fallback_error}")
+            raise Exception(f"PDF generation failed: {e}")
 # -----------------------------
 # ENHANCED APP LOGIC