om440 commited on
Commit
5b046b1
·
verified ·
1 Parent(s): 2e1e6c9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +15 -1
app.py CHANGED
@@ -39,7 +39,21 @@ def ocr_on_image(image):
39
  prompt2 =( "Extract all visible text from the image, including both handwritten and printed content."
40
  "Do not translate the text — preserve the original language exactly as it appears."
41
  "Return only the extracted text, with no explanation, no formatting, and no additions." )
42
- messages = [{"role": "user", "content": [{"type": "text", "text": prompt2}, {"type": "image"}]}]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
  texts = processor.apply_chat_template(messages, add_generation_prompt=True)
44
  inputs = processor(text=texts, images=[image], return_tensors="pt").to(device)
45
  outputs = model.generate(**inputs, max_new_tokens=250)
 
39
  prompt2 =( "Extract all visible text from the image, including both handwritten and printed content."
40
  "Do not translate the text — preserve the original language exactly as it appears."
41
  "Return only the extracted text, with no explanation, no formatting, and no additions." )
42
+ prompt3 = (
43
+ "Output ONLY the raw text as it appears in the image, nothing else."
44
+ "You have an image containing both handwritten and printed text in French and/or English, and alsos punctuation and underscores.\n"
45
+ "Your task: transcribe EXACTLY all visible text, preserving all characters, accents, punctuation, spacing, and line breaks.\n"
46
+ "Include tables and forms clearly if present.\n"
47
+ "Do NOT add any explanations, comments, summaries, or extra text.\n"
48
+ "Check the output first to not duplicate results."
49
+ "Preserve the original reading order, including line breaks and the natural layout of tables or forms. Output the text exactly as it appears visually, maintaining the structure."
50
+ "Don't indicate blank space."
51
+ "Don't separate handwritten and printex text."
52
+ "DO NOT confuse between '.' a point and '|' a boder"
53
+ "Extract only the raw text with and do not add any comment"
54
+ "Extract only the data available"
55
+ )
56
+ messages = [{"role": "user", "content": [{"type": "text", "text": prompt3}, {"type": "image"}]}]
57
  texts = processor.apply_chat_template(messages, add_generation_prompt=True)
58
  inputs = processor(text=texts, images=[image], return_tensors="pt").to(device)
59
  outputs = model.generate(**inputs, max_new_tokens=250)