Update app.py
Browse files
app.py
CHANGED
@@ -417,118 +417,217 @@ def check_duplicate_submission(document_hash: str) -> Optional[dict]:
|
|
417 |
return None
|
418 |
|
419 |
# -----------------------------
|
420 |
-
# ENHANCED PDF REPORT
|
421 |
# -----------------------------
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
422 |
class EnhancedPDF(FPDF):
|
423 |
def header(self):
|
424 |
if os.path.exists(LOGO_PATH):
|
425 |
-
|
|
|
|
|
|
|
426 |
self.set_font('Arial', 'B', 15)
|
427 |
-
|
|
|
428 |
self.ln(10)
|
429 |
|
430 |
def footer(self):
|
431 |
self.set_y(-15)
|
432 |
self.set_font('Arial', 'I', 8)
|
433 |
-
|
434 |
-
|
435 |
|
436 |
def add_section_header(self, title: str):
|
437 |
self.set_font('Arial', 'B', 12)
|
438 |
self.set_fill_color(200, 220, 255)
|
439 |
-
|
|
|
440 |
self.ln(2)
|
441 |
|
442 |
def add_highlighted_text(self, text: str, color: tuple, max_length: int = 100):
|
443 |
self.set_fill_color(*color)
|
444 |
-
#
|
445 |
-
|
446 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
447 |
self.ln(2)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
448 |
|
449 |
def generate_enhanced_pdf_report(student_name: str, student_id: str, ai_score: float,
|
450 |
plagiarism_score: float, suspicious_results: List[dict],
|
451 |
metadata: dict, ai_details: dict, output_path: str):
|
452 |
-
"""Generate comprehensive PDF report"""
|
453 |
-
pdf = EnhancedPDF()
|
454 |
-
pdf.add_page()
|
455 |
-
|
456 |
-
# Executive Summary
|
457 |
-
pdf.add_section_header("EXECUTIVE SUMMARY")
|
458 |
-
pdf.set_font('Arial', '', 10)
|
459 |
-
|
460 |
-
summary_data = [
|
461 |
-
f"Student: {student_name} ({student_id})",
|
462 |
-
f"Document Type: {metadata.get('file_type', 'Unknown').upper()}",
|
463 |
-
f"Word Count: {metadata.get('word_count', 0):,}",
|
464 |
-
f"AI Detection Score: {ai_score:.1f}% (Confidence: {ai_details.get('confidence', 'N/A')})",
|
465 |
-
f"Plagiarism Score: {plagiarism_score:.1f}%",
|
466 |
-
f"Suspicious Sentences: {sum(1 for r in suspicious_results if r['is_suspicious'])}",
|
467 |
-
f"Analysis Date: {datetime.now().strftime('%B %d, %Y at %H:%M:%S')}"
|
468 |
-
]
|
469 |
-
|
470 |
-
for item in summary_data:
|
471 |
-
pdf.cell(0, 6, item, 0, 1)
|
472 |
-
pdf.ln(5)
|
473 |
-
|
474 |
-
# Risk Assessment
|
475 |
-
pdf.add_section_header("RISK ASSESSMENT")
|
476 |
-
pdf.set_font('Arial', '', 10)
|
477 |
-
|
478 |
-
risk_level = "HIGH" if (ai_score > 70 or plagiarism_score > 30) else "MEDIUM" if (ai_score > 40 or plagiarism_score > 15) else "LOW"
|
479 |
-
risk_color = (255, 200, 200) if risk_level == "HIGH" else (255, 255, 200) if risk_level == "MEDIUM" else (200, 255, 200)
|
480 |
-
|
481 |
-
pdf.set_fill_color(*risk_color)
|
482 |
-
pdf.cell(0, 10, f"Overall Risk Level: {risk_level}", 1, 1, 'C', 1)
|
483 |
-
pdf.ln(5)
|
484 |
-
|
485 |
-
# AI Detection Details
|
486 |
-
if ai_details.get('chunk_scores'):
|
487 |
-
pdf.add_section_header("AI DETECTION ANALYSIS")
|
488 |
-
pdf.set_font('Arial', '', 9)
|
489 |
-
pdf.cell(0, 6, f"Chunks Analyzed: {len(ai_details['chunk_scores'])}", 0, 1)
|
490 |
-
pdf.cell(0, 6, f"Score Consistency (Std Dev): {ai_details.get('std_deviation', 'N/A')}", 0, 1)
|
491 |
-
pdf.ln(3)
|
492 |
-
|
493 |
-
# Suspicious Content
|
494 |
-
suspicious_sentences = [r for r in suspicious_results if r['is_suspicious']]
|
495 |
-
if suspicious_sentences:
|
496 |
-
pdf.add_section_header("FLAGGED CONTENT")
|
497 |
-
pdf.set_font('Arial', '', 9)
|
498 |
-
|
499 |
-
for i, result in enumerate(suspicious_sentences[:10], 1): # Limit to 10
|
500 |
-
pdf.cell(0, 6, f"Issue #{i} (Confidence: {result['confidence']:.1f})", 0, 1)
|
501 |
-
pdf.add_highlighted_text(result['sentence'], (255, 230, 230), 150)
|
502 |
-
|
503 |
-
# Recommendations
|
504 |
-
pdf.add_section_header("RECOMMENDATIONS")
|
505 |
-
pdf.set_font('Arial', '', 10)
|
506 |
-
|
507 |
-
recommendations = []
|
508 |
-
if ai_score > 50:
|
509 |
-
recommendations.append("• Review content for AI-generated sections and rewrite in original voice")
|
510 |
-
if plagiarism_score > 20:
|
511 |
-
recommendations.append("• Add proper citations for referenced material")
|
512 |
-
recommendations.append("• Paraphrase flagged sentences to ensure originality")
|
513 |
-
if len(suspicious_sentences) > 5:
|
514 |
-
recommendations.append("• Conduct thorough revision focusing on highlighted sections")
|
515 |
-
|
516 |
-
recommendations.extend([
|
517 |
-
"• Use plagiarism detection tools during writing process",
|
518 |
-
"• Ensure all sources are properly attributed",
|
519 |
-
"• Maintain academic integrity standards"
|
520 |
-
])
|
521 |
-
|
522 |
-
for rec in recommendations:
|
523 |
-
pdf.multi_cell(0, 6, rec)
|
524 |
-
pdf.ln(1)
|
525 |
-
|
526 |
try:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
527 |
pdf.output(output_path)
|
528 |
-
logger.info(f"PDF report generated: {output_path}")
|
|
|
529 |
except Exception as e:
|
530 |
logger.error(f"Error generating PDF report: {e}")
|
531 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
532 |
|
533 |
# -----------------------------
|
534 |
# ENHANCED APP LOGIC
|
|
|
417 |
return None
|
418 |
|
419 |
# -----------------------------
|
420 |
+
# ENHANCED PDF REPORT WITH UNICODE SUPPORT
|
421 |
# -----------------------------
|
422 |
+
def clean_text_for_pdf(text: str) -> str:
|
423 |
+
"""Clean text to be PDF-safe by removing/replacing problematic Unicode characters"""
|
424 |
+
# Replace common Unicode characters with ASCII equivalents
|
425 |
+
replacements = {
|
426 |
+
'•': '-', # bullet point
|
427 |
+
'–': '-', # en dash
|
428 |
+
'—': '-', # em dash
|
429 |
+
'"': '"', # left double quote
|
430 |
+
'"': '"', # right double quote
|
431 |
+
''': "'", # left single quote
|
432 |
+
''': "'", # right single quote
|
433 |
+
'…': '...', # ellipsis
|
434 |
+
'®': '(R)', # registered trademark
|
435 |
+
'©': '(C)', # copyright
|
436 |
+
'™': '(TM)', # trademark
|
437 |
+
'€': 'EUR', # euro sign
|
438 |
+
'£': 'GBP', # pound sign
|
439 |
+
'¥': 'JPY', # yen sign
|
440 |
+
'§': 'Section', # section sign
|
441 |
+
'¶': 'Para', # paragraph sign
|
442 |
+
'†': '+', # dagger
|
443 |
+
'‡': '++', # double dagger
|
444 |
+
'°': ' degrees', # degree sign
|
445 |
+
'±': '+/-', # plus-minus
|
446 |
+
'÷': '/', # division sign
|
447 |
+
'×': 'x', # multiplication sign
|
448 |
+
'≤': '<=', # less than or equal
|
449 |
+
'≥': '>=', # greater than or equal
|
450 |
+
'≠': '!=', # not equal
|
451 |
+
'∞': 'infinity', # infinity
|
452 |
+
'α': 'alpha', 'β': 'beta', 'γ': 'gamma', 'δ': 'delta', # Greek letters
|
453 |
+
'λ': 'lambda', 'μ': 'mu', 'π': 'pi', 'σ': 'sigma', 'Ω': 'Omega'
|
454 |
+
}
|
455 |
+
|
456 |
+
# Apply replacements
|
457 |
+
for unicode_char, replacement in replacements.items():
|
458 |
+
text = text.replace(unicode_char, replacement)
|
459 |
+
|
460 |
+
# Remove any remaining non-ASCII characters by encoding/decoding
|
461 |
+
try:
|
462 |
+
# Try to encode as latin-1 (which FPDF supports)
|
463 |
+
text.encode('latin-1')
|
464 |
+
return text
|
465 |
+
except UnicodeEncodeError:
|
466 |
+
# If that fails, remove non-ASCII characters
|
467 |
+
text = text.encode('ascii', 'ignore').decode('ascii')
|
468 |
+
return text
|
469 |
+
|
470 |
class EnhancedPDF(FPDF):
|
471 |
def header(self):
|
472 |
if os.path.exists(LOGO_PATH):
|
473 |
+
try:
|
474 |
+
self.image(LOGO_PATH, 10, 8, 20)
|
475 |
+
except:
|
476 |
+
pass # Skip logo if there's an issue
|
477 |
self.set_font('Arial', 'B', 15)
|
478 |
+
title = clean_text_for_pdf('AIxBI - Professional Plagiarism Analysis Report')
|
479 |
+
self.cell(0, 10, title, 0, 1, 'C')
|
480 |
self.ln(10)
|
481 |
|
482 |
def footer(self):
|
483 |
self.set_y(-15)
|
484 |
self.set_font('Arial', 'I', 8)
|
485 |
+
footer_text = clean_text_for_pdf(f'Page {self.page_no()} | Generated on {datetime.now().strftime("%Y-%m-%d %H:%M:%S")}')
|
486 |
+
self.cell(0, 10, footer_text, 0, 0, 'C')
|
487 |
|
488 |
def add_section_header(self, title: str):
|
489 |
self.set_font('Arial', 'B', 12)
|
490 |
self.set_fill_color(200, 220, 255)
|
491 |
+
clean_title = clean_text_for_pdf(title)
|
492 |
+
self.cell(0, 10, clean_title, 0, 1, 'L', 1)
|
493 |
self.ln(2)
|
494 |
|
495 |
def add_highlighted_text(self, text: str, color: tuple, max_length: int = 100):
|
496 |
self.set_fill_color(*color)
|
497 |
+
# Clean and truncate text
|
498 |
+
clean_text = clean_text_for_pdf(text)
|
499 |
+
display_text = clean_text[:max_length] + "..." if len(clean_text) > max_length else clean_text
|
500 |
+
try:
|
501 |
+
self.multi_cell(0, 8, display_text, 1, 'L', 1)
|
502 |
+
except Exception as e:
|
503 |
+
# Fallback: create a safe version
|
504 |
+
safe_text = "Text contains unsupported characters - please check original document"
|
505 |
+
self.multi_cell(0, 8, safe_text, 1, 'L', 1)
|
506 |
self.ln(2)
|
507 |
+
|
508 |
+
def safe_cell(self, w, h, txt, border=0, ln=0, align='L', fill=False):
|
509 |
+
"""Safe cell method that handles Unicode issues"""
|
510 |
+
try:
|
511 |
+
clean_txt = clean_text_for_pdf(str(txt))
|
512 |
+
self.cell(w, h, clean_txt, border, ln, align, fill)
|
513 |
+
except Exception as e:
|
514 |
+
# Fallback to a safe message
|
515 |
+
self.cell(w, h, "[Content contains unsupported characters]", border, ln, align, fill)
|
516 |
+
|
517 |
+
def safe_multi_cell(self, w, h, txt, border=0, align='L', fill=False):
|
518 |
+
"""Safe multi_cell method that handles Unicode issues"""
|
519 |
+
try:
|
520 |
+
clean_txt = clean_text_for_pdf(str(txt))
|
521 |
+
self.multi_cell(w, h, clean_txt, border, align, fill)
|
522 |
+
except Exception as e:
|
523 |
+
# Fallback to a safe message
|
524 |
+
self.multi_cell(w, h, "[Content contains unsupported characters - please check source document]", border, align, fill)
|
525 |
|
526 |
def generate_enhanced_pdf_report(student_name: str, student_id: str, ai_score: float,
|
527 |
plagiarism_score: float, suspicious_results: List[dict],
|
528 |
metadata: dict, ai_details: dict, output_path: str):
|
529 |
+
"""Generate comprehensive PDF report with Unicode safety"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
530 |
try:
|
531 |
+
pdf = EnhancedPDF()
|
532 |
+
pdf.add_page()
|
533 |
+
|
534 |
+
# Executive Summary
|
535 |
+
pdf.add_section_header("EXECUTIVE SUMMARY")
|
536 |
+
pdf.set_font('Arial', '', 10)
|
537 |
+
|
538 |
+
summary_data = [
|
539 |
+
f"Student: {student_name} ({student_id})",
|
540 |
+
f"Document Type: {metadata.get('file_type', 'Unknown').upper()}",
|
541 |
+
f"Word Count: {metadata.get('word_count', 0):,}",
|
542 |
+
f"AI Detection Score: {ai_score:.1f}% (Confidence: {ai_details.get('confidence', 'N/A')})",
|
543 |
+
f"Plagiarism Score: {plagiarism_score:.1f}%",
|
544 |
+
f"Suspicious Sentences: {sum(1 for r in suspicious_results if r['is_suspicious'])}",
|
545 |
+
f"Analysis Date: {datetime.now().strftime('%B %d, %Y at %H:%M:%S')}"
|
546 |
+
]
|
547 |
+
|
548 |
+
for item in summary_data:
|
549 |
+
pdf.safe_cell(0, 6, item, 0, 1)
|
550 |
+
pdf.ln(5)
|
551 |
+
|
552 |
+
# Risk Assessment
|
553 |
+
pdf.add_section_header("RISK ASSESSMENT")
|
554 |
+
pdf.set_font('Arial', '', 10)
|
555 |
+
|
556 |
+
risk_level = "HIGH" if (ai_score > 70 or plagiarism_score > 30) else "MEDIUM" if (ai_score > 40 or plagiarism_score > 15) else "LOW"
|
557 |
+
risk_color = (255, 200, 200) if risk_level == "HIGH" else (255, 255, 200) if risk_level == "MEDIUM" else (200, 255, 200)
|
558 |
+
|
559 |
+
pdf.set_fill_color(*risk_color)
|
560 |
+
pdf.safe_cell(0, 10, f"Overall Risk Level: {risk_level}", 1, 1, 'C', 1)
|
561 |
+
pdf.ln(5)
|
562 |
+
|
563 |
+
# AI Detection Details
|
564 |
+
if ai_details.get('chunk_scores'):
|
565 |
+
pdf.add_section_header("AI DETECTION ANALYSIS")
|
566 |
+
pdf.set_font('Arial', '', 9)
|
567 |
+
pdf.safe_cell(0, 6, f"Chunks Analyzed: {len(ai_details['chunk_scores'])}", 0, 1)
|
568 |
+
pdf.safe_cell(0, 6, f"Score Consistency (Std Dev): {ai_details.get('std_deviation', 'N/A')}", 0, 1)
|
569 |
+
pdf.ln(3)
|
570 |
+
|
571 |
+
# Suspicious Content
|
572 |
+
suspicious_sentences = [r for r in suspicious_results if r['is_suspicious']]
|
573 |
+
if suspicious_sentences:
|
574 |
+
pdf.add_section_header("FLAGGED CONTENT")
|
575 |
+
pdf.set_font('Arial', '', 9)
|
576 |
+
|
577 |
+
for i, result in enumerate(suspicious_sentences[:10], 1): # Limit to 10
|
578 |
+
pdf.safe_cell(0, 6, f"Issue #{i} (Confidence: {result['confidence']:.1f})", 0, 1)
|
579 |
+
pdf.add_highlighted_text(result['sentence'], (255, 230, 230), 150)
|
580 |
+
|
581 |
+
# Recommendations
|
582 |
+
pdf.add_section_header("RECOMMENDATIONS")
|
583 |
+
pdf.set_font('Arial', '', 10)
|
584 |
+
|
585 |
+
recommendations = []
|
586 |
+
if ai_score > 50:
|
587 |
+
recommendations.append("- Review content for AI-generated sections and rewrite in original voice")
|
588 |
+
if plagiarism_score > 20:
|
589 |
+
recommendations.append("- Add proper citations for referenced material")
|
590 |
+
recommendations.append("- Paraphrase flagged sentences to ensure originality")
|
591 |
+
if len(suspicious_sentences) > 5:
|
592 |
+
recommendations.append("- Conduct thorough revision focusing on highlighted sections")
|
593 |
+
|
594 |
+
recommendations.extend([
|
595 |
+
"- Use plagiarism detection tools during writing process",
|
596 |
+
"- Ensure all sources are properly attributed",
|
597 |
+
"- Maintain academic integrity standards"
|
598 |
+
])
|
599 |
+
|
600 |
+
for rec in recommendations:
|
601 |
+
pdf.safe_multi_cell(0, 6, rec)
|
602 |
+
pdf.ln(1)
|
603 |
+
|
604 |
+
# Generate PDF with error handling
|
605 |
pdf.output(output_path)
|
606 |
+
logger.info(f"PDF report generated successfully: {output_path}")
|
607 |
+
|
608 |
except Exception as e:
|
609 |
logger.error(f"Error generating PDF report: {e}")
|
610 |
+
# Create a simple fallback PDF
|
611 |
+
try:
|
612 |
+
simple_pdf = FPDF()
|
613 |
+
simple_pdf.add_page()
|
614 |
+
simple_pdf.set_font('Arial', 'B', 16)
|
615 |
+
simple_pdf.cell(0, 10, 'AIxBI Analysis Report', 0, 1, 'C')
|
616 |
+
simple_pdf.ln(10)
|
617 |
+
simple_pdf.set_font('Arial', '', 12)
|
618 |
+
simple_pdf.cell(0, 10, f'Student: {clean_text_for_pdf(student_name)}', 0, 1)
|
619 |
+
simple_pdf.cell(0, 10, f'Student ID: {clean_text_for_pdf(student_id)}', 0, 1)
|
620 |
+
simple_pdf.cell(0, 10, f'AI Score: {ai_score:.1f}%', 0, 1)
|
621 |
+
simple_pdf.cell(0, 10, f'Plagiarism Score: {plagiarism_score:.1f}%', 0, 1)
|
622 |
+
simple_pdf.cell(0, 10, f'Date: {datetime.now().strftime("%Y-%m-%d %H:%M:%S")}', 0, 1)
|
623 |
+
simple_pdf.ln(10)
|
624 |
+
simple_pdf.multi_cell(0, 10, 'Note: Full report could not be generated due to character encoding issues. Please contact administrator if this persists.')
|
625 |
+
simple_pdf.output(output_path)
|
626 |
+
logger.info(f"Fallback PDF report generated: {output_path}")
|
627 |
+
except Exception as fallback_error:
|
628 |
+
logger.error(f"Even fallback PDF generation failed: {fallback_error}")
|
629 |
+
raise Exception(f"PDF generation failed: {e}")
|
630 |
+
|
631 |
|
632 |
# -----------------------------
|
633 |
# ENHANCED APP LOGIC
|