mohitrulzz commited on
Commit
2ca7d6c
·
verified ·
1 Parent(s): fd16747

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +184 -85
app.py CHANGED
@@ -417,118 +417,217 @@ def check_duplicate_submission(document_hash: str) -> Optional[dict]:
417
  return None
418
 
419
  # -----------------------------
420
- # ENHANCED PDF REPORT
421
  # -----------------------------
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
422
  class EnhancedPDF(FPDF):
423
  def header(self):
424
  if os.path.exists(LOGO_PATH):
425
- self.image(LOGO_PATH, 10, 8, 20)
 
 
 
426
  self.set_font('Arial', 'B', 15)
427
- self.cell(0, 10, 'AIxBI - Professional Plagiarism Analysis Report', 0, 1, 'C')
 
428
  self.ln(10)
429
 
430
  def footer(self):
431
  self.set_y(-15)
432
  self.set_font('Arial', 'I', 8)
433
- self.cell(0, 10, f'Page {self.page_no()} | Generated on {datetime.now().strftime("%Y-%m-%d %H:%M:%S")}',
434
- 0, 0, 'C')
435
 
436
  def add_section_header(self, title: str):
437
  self.set_font('Arial', 'B', 12)
438
  self.set_fill_color(200, 220, 255)
439
- self.cell(0, 10, title, 0, 1, 'L', 1)
 
440
  self.ln(2)
441
 
442
  def add_highlighted_text(self, text: str, color: tuple, max_length: int = 100):
443
  self.set_fill_color(*color)
444
- # Truncate long text
445
- display_text = text[:max_length] + "..." if len(text) > max_length else text
446
- self.multi_cell(0, 8, display_text, 1, 'L', 1)
 
 
 
 
 
 
447
  self.ln(2)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
448
 
449
  def generate_enhanced_pdf_report(student_name: str, student_id: str, ai_score: float,
450
  plagiarism_score: float, suspicious_results: List[dict],
451
  metadata: dict, ai_details: dict, output_path: str):
452
- """Generate comprehensive PDF report"""
453
- pdf = EnhancedPDF()
454
- pdf.add_page()
455
-
456
- # Executive Summary
457
- pdf.add_section_header("EXECUTIVE SUMMARY")
458
- pdf.set_font('Arial', '', 10)
459
-
460
- summary_data = [
461
- f"Student: {student_name} ({student_id})",
462
- f"Document Type: {metadata.get('file_type', 'Unknown').upper()}",
463
- f"Word Count: {metadata.get('word_count', 0):,}",
464
- f"AI Detection Score: {ai_score:.1f}% (Confidence: {ai_details.get('confidence', 'N/A')})",
465
- f"Plagiarism Score: {plagiarism_score:.1f}%",
466
- f"Suspicious Sentences: {sum(1 for r in suspicious_results if r['is_suspicious'])}",
467
- f"Analysis Date: {datetime.now().strftime('%B %d, %Y at %H:%M:%S')}"
468
- ]
469
-
470
- for item in summary_data:
471
- pdf.cell(0, 6, item, 0, 1)
472
- pdf.ln(5)
473
-
474
- # Risk Assessment
475
- pdf.add_section_header("RISK ASSESSMENT")
476
- pdf.set_font('Arial', '', 10)
477
-
478
- risk_level = "HIGH" if (ai_score > 70 or plagiarism_score > 30) else "MEDIUM" if (ai_score > 40 or plagiarism_score > 15) else "LOW"
479
- risk_color = (255, 200, 200) if risk_level == "HIGH" else (255, 255, 200) if risk_level == "MEDIUM" else (200, 255, 200)
480
-
481
- pdf.set_fill_color(*risk_color)
482
- pdf.cell(0, 10, f"Overall Risk Level: {risk_level}", 1, 1, 'C', 1)
483
- pdf.ln(5)
484
-
485
- # AI Detection Details
486
- if ai_details.get('chunk_scores'):
487
- pdf.add_section_header("AI DETECTION ANALYSIS")
488
- pdf.set_font('Arial', '', 9)
489
- pdf.cell(0, 6, f"Chunks Analyzed: {len(ai_details['chunk_scores'])}", 0, 1)
490
- pdf.cell(0, 6, f"Score Consistency (Std Dev): {ai_details.get('std_deviation', 'N/A')}", 0, 1)
491
- pdf.ln(3)
492
-
493
- # Suspicious Content
494
- suspicious_sentences = [r for r in suspicious_results if r['is_suspicious']]
495
- if suspicious_sentences:
496
- pdf.add_section_header("FLAGGED CONTENT")
497
- pdf.set_font('Arial', '', 9)
498
-
499
- for i, result in enumerate(suspicious_sentences[:10], 1): # Limit to 10
500
- pdf.cell(0, 6, f"Issue #{i} (Confidence: {result['confidence']:.1f})", 0, 1)
501
- pdf.add_highlighted_text(result['sentence'], (255, 230, 230), 150)
502
-
503
- # Recommendations
504
- pdf.add_section_header("RECOMMENDATIONS")
505
- pdf.set_font('Arial', '', 10)
506
-
507
- recommendations = []
508
- if ai_score > 50:
509
- recommendations.append("• Review content for AI-generated sections and rewrite in original voice")
510
- if plagiarism_score > 20:
511
- recommendations.append("• Add proper citations for referenced material")
512
- recommendations.append("• Paraphrase flagged sentences to ensure originality")
513
- if len(suspicious_sentences) > 5:
514
- recommendations.append("• Conduct thorough revision focusing on highlighted sections")
515
-
516
- recommendations.extend([
517
- "• Use plagiarism detection tools during writing process",
518
- "• Ensure all sources are properly attributed",
519
- "• Maintain academic integrity standards"
520
- ])
521
-
522
- for rec in recommendations:
523
- pdf.multi_cell(0, 6, rec)
524
- pdf.ln(1)
525
-
526
  try:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
527
  pdf.output(output_path)
528
- logger.info(f"PDF report generated: {output_path}")
 
529
  except Exception as e:
530
  logger.error(f"Error generating PDF report: {e}")
531
- raise
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
532
 
533
  # -----------------------------
534
  # ENHANCED APP LOGIC
 
417
  return None
418
 
419
  # -----------------------------
420
+ # ENHANCED PDF REPORT WITH UNICODE SUPPORT
421
  # -----------------------------
422
+ def clean_text_for_pdf(text: str) -> str:
423
+ """Clean text to be PDF-safe by removing/replacing problematic Unicode characters"""
424
+ # Replace common Unicode characters with ASCII equivalents
425
+ replacements = {
426
+ '•': '-', # bullet point
427
+ '–': '-', # en dash
428
+ '—': '-', # em dash
429
+ '"': '"', # left double quote
430
+ '"': '"', # right double quote
431
+ ''': "'", # left single quote
432
+ ''': "'", # right single quote
433
+ '…': '...', # ellipsis
434
+ '®': '(R)', # registered trademark
435
+ '©': '(C)', # copyright
436
+ '™': '(TM)', # trademark
437
+ '€': 'EUR', # euro sign
438
+ '£': 'GBP', # pound sign
439
+ '¥': 'JPY', # yen sign
440
+ '§': 'Section', # section sign
441
+ '¶': 'Para', # paragraph sign
442
+ '†': '+', # dagger
443
+ '‡': '++', # double dagger
444
+ '°': ' degrees', # degree sign
445
+ '±': '+/-', # plus-minus
446
+ '÷': '/', # division sign
447
+ '×': 'x', # multiplication sign
448
+ '≤': '<=', # less than or equal
449
+ '≥': '>=', # greater than or equal
450
+ '≠': '!=', # not equal
451
+ '∞': 'infinity', # infinity
452
+ 'α': 'alpha', 'β': 'beta', 'γ': 'gamma', 'δ': 'delta', # Greek letters
453
+ 'λ': 'lambda', 'μ': 'mu', 'π': 'pi', 'σ': 'sigma', 'Ω': 'Omega'
454
+ }
455
+
456
+ # Apply replacements
457
+ for unicode_char, replacement in replacements.items():
458
+ text = text.replace(unicode_char, replacement)
459
+
460
+ # Remove any remaining non-ASCII characters by encoding/decoding
461
+ try:
462
+ # Try to encode as latin-1 (which FPDF supports)
463
+ text.encode('latin-1')
464
+ return text
465
+ except UnicodeEncodeError:
466
+ # If that fails, remove non-ASCII characters
467
+ text = text.encode('ascii', 'ignore').decode('ascii')
468
+ return text
469
+
470
  class EnhancedPDF(FPDF):
471
  def header(self):
472
  if os.path.exists(LOGO_PATH):
473
+ try:
474
+ self.image(LOGO_PATH, 10, 8, 20)
475
+ except:
476
+ pass # Skip logo if there's an issue
477
  self.set_font('Arial', 'B', 15)
478
+ title = clean_text_for_pdf('AIxBI - Professional Plagiarism Analysis Report')
479
+ self.cell(0, 10, title, 0, 1, 'C')
480
  self.ln(10)
481
 
482
  def footer(self):
483
  self.set_y(-15)
484
  self.set_font('Arial', 'I', 8)
485
+ footer_text = clean_text_for_pdf(f'Page {self.page_no()} | Generated on {datetime.now().strftime("%Y-%m-%d %H:%M:%S")}')
486
+ self.cell(0, 10, footer_text, 0, 0, 'C')
487
 
488
  def add_section_header(self, title: str):
489
  self.set_font('Arial', 'B', 12)
490
  self.set_fill_color(200, 220, 255)
491
+ clean_title = clean_text_for_pdf(title)
492
+ self.cell(0, 10, clean_title, 0, 1, 'L', 1)
493
  self.ln(2)
494
 
495
  def add_highlighted_text(self, text: str, color: tuple, max_length: int = 100):
496
  self.set_fill_color(*color)
497
+ # Clean and truncate text
498
+ clean_text = clean_text_for_pdf(text)
499
+ display_text = clean_text[:max_length] + "..." if len(clean_text) > max_length else clean_text
500
+ try:
501
+ self.multi_cell(0, 8, display_text, 1, 'L', 1)
502
+ except Exception as e:
503
+ # Fallback: create a safe version
504
+ safe_text = "Text contains unsupported characters - please check original document"
505
+ self.multi_cell(0, 8, safe_text, 1, 'L', 1)
506
  self.ln(2)
507
+
508
+ def safe_cell(self, w, h, txt, border=0, ln=0, align='L', fill=False):
509
+ """Safe cell method that handles Unicode issues"""
510
+ try:
511
+ clean_txt = clean_text_for_pdf(str(txt))
512
+ self.cell(w, h, clean_txt, border, ln, align, fill)
513
+ except Exception as e:
514
+ # Fallback to a safe message
515
+ self.cell(w, h, "[Content contains unsupported characters]", border, ln, align, fill)
516
+
517
+ def safe_multi_cell(self, w, h, txt, border=0, align='L', fill=False):
518
+ """Safe multi_cell method that handles Unicode issues"""
519
+ try:
520
+ clean_txt = clean_text_for_pdf(str(txt))
521
+ self.multi_cell(w, h, clean_txt, border, align, fill)
522
+ except Exception as e:
523
+ # Fallback to a safe message
524
+ self.multi_cell(w, h, "[Content contains unsupported characters - please check source document]", border, align, fill)
525
 
526
  def generate_enhanced_pdf_report(student_name: str, student_id: str, ai_score: float,
527
  plagiarism_score: float, suspicious_results: List[dict],
528
  metadata: dict, ai_details: dict, output_path: str):
529
+ """Generate comprehensive PDF report with Unicode safety"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
530
  try:
531
+ pdf = EnhancedPDF()
532
+ pdf.add_page()
533
+
534
+ # Executive Summary
535
+ pdf.add_section_header("EXECUTIVE SUMMARY")
536
+ pdf.set_font('Arial', '', 10)
537
+
538
+ summary_data = [
539
+ f"Student: {student_name} ({student_id})",
540
+ f"Document Type: {metadata.get('file_type', 'Unknown').upper()}",
541
+ f"Word Count: {metadata.get('word_count', 0):,}",
542
+ f"AI Detection Score: {ai_score:.1f}% (Confidence: {ai_details.get('confidence', 'N/A')})",
543
+ f"Plagiarism Score: {plagiarism_score:.1f}%",
544
+ f"Suspicious Sentences: {sum(1 for r in suspicious_results if r['is_suspicious'])}",
545
+ f"Analysis Date: {datetime.now().strftime('%B %d, %Y at %H:%M:%S')}"
546
+ ]
547
+
548
+ for item in summary_data:
549
+ pdf.safe_cell(0, 6, item, 0, 1)
550
+ pdf.ln(5)
551
+
552
+ # Risk Assessment
553
+ pdf.add_section_header("RISK ASSESSMENT")
554
+ pdf.set_font('Arial', '', 10)
555
+
556
+ risk_level = "HIGH" if (ai_score > 70 or plagiarism_score > 30) else "MEDIUM" if (ai_score > 40 or plagiarism_score > 15) else "LOW"
557
+ risk_color = (255, 200, 200) if risk_level == "HIGH" else (255, 255, 200) if risk_level == "MEDIUM" else (200, 255, 200)
558
+
559
+ pdf.set_fill_color(*risk_color)
560
+ pdf.safe_cell(0, 10, f"Overall Risk Level: {risk_level}", 1, 1, 'C', 1)
561
+ pdf.ln(5)
562
+
563
+ # AI Detection Details
564
+ if ai_details.get('chunk_scores'):
565
+ pdf.add_section_header("AI DETECTION ANALYSIS")
566
+ pdf.set_font('Arial', '', 9)
567
+ pdf.safe_cell(0, 6, f"Chunks Analyzed: {len(ai_details['chunk_scores'])}", 0, 1)
568
+ pdf.safe_cell(0, 6, f"Score Consistency (Std Dev): {ai_details.get('std_deviation', 'N/A')}", 0, 1)
569
+ pdf.ln(3)
570
+
571
+ # Suspicious Content
572
+ suspicious_sentences = [r for r in suspicious_results if r['is_suspicious']]
573
+ if suspicious_sentences:
574
+ pdf.add_section_header("FLAGGED CONTENT")
575
+ pdf.set_font('Arial', '', 9)
576
+
577
+ for i, result in enumerate(suspicious_sentences[:10], 1): # Limit to 10
578
+ pdf.safe_cell(0, 6, f"Issue #{i} (Confidence: {result['confidence']:.1f})", 0, 1)
579
+ pdf.add_highlighted_text(result['sentence'], (255, 230, 230), 150)
580
+
581
+ # Recommendations
582
+ pdf.add_section_header("RECOMMENDATIONS")
583
+ pdf.set_font('Arial', '', 10)
584
+
585
+ recommendations = []
586
+ if ai_score > 50:
587
+ recommendations.append("- Review content for AI-generated sections and rewrite in original voice")
588
+ if plagiarism_score > 20:
589
+ recommendations.append("- Add proper citations for referenced material")
590
+ recommendations.append("- Paraphrase flagged sentences to ensure originality")
591
+ if len(suspicious_sentences) > 5:
592
+ recommendations.append("- Conduct thorough revision focusing on highlighted sections")
593
+
594
+ recommendations.extend([
595
+ "- Use plagiarism detection tools during writing process",
596
+ "- Ensure all sources are properly attributed",
597
+ "- Maintain academic integrity standards"
598
+ ])
599
+
600
+ for rec in recommendations:
601
+ pdf.safe_multi_cell(0, 6, rec)
602
+ pdf.ln(1)
603
+
604
+ # Generate PDF with error handling
605
  pdf.output(output_path)
606
+ logger.info(f"PDF report generated successfully: {output_path}")
607
+
608
  except Exception as e:
609
  logger.error(f"Error generating PDF report: {e}")
610
+ # Create a simple fallback PDF
611
+ try:
612
+ simple_pdf = FPDF()
613
+ simple_pdf.add_page()
614
+ simple_pdf.set_font('Arial', 'B', 16)
615
+ simple_pdf.cell(0, 10, 'AIxBI Analysis Report', 0, 1, 'C')
616
+ simple_pdf.ln(10)
617
+ simple_pdf.set_font('Arial', '', 12)
618
+ simple_pdf.cell(0, 10, f'Student: {clean_text_for_pdf(student_name)}', 0, 1)
619
+ simple_pdf.cell(0, 10, f'Student ID: {clean_text_for_pdf(student_id)}', 0, 1)
620
+ simple_pdf.cell(0, 10, f'AI Score: {ai_score:.1f}%', 0, 1)
621
+ simple_pdf.cell(0, 10, f'Plagiarism Score: {plagiarism_score:.1f}%', 0, 1)
622
+ simple_pdf.cell(0, 10, f'Date: {datetime.now().strftime("%Y-%m-%d %H:%M:%S")}', 0, 1)
623
+ simple_pdf.ln(10)
624
+ simple_pdf.multi_cell(0, 10, 'Note: Full report could not be generated due to character encoding issues. Please contact administrator if this persists.')
625
+ simple_pdf.output(output_path)
626
+ logger.info(f"Fallback PDF report generated: {output_path}")
627
+ except Exception as fallback_error:
628
+ logger.error(f"Even fallback PDF generation failed: {fallback_error}")
629
+ raise Exception(f"PDF generation failed: {e}")
630
+
631
 
632
  # -----------------------------
633
  # ENHANCED APP LOGIC