Update app.py
Browse files
app.py
CHANGED
@@ -39,7 +39,21 @@ def ocr_on_image(image):
|
|
39 |
prompt2 =( "Extract all visible text from the image, including both handwritten and printed content."
|
40 |
"Do not translate the text — preserve the original language exactly as it appears."
|
41 |
"Return only the extracted text, with no explanation, no formatting, and no additions." )
|
42 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
43 |
texts = processor.apply_chat_template(messages, add_generation_prompt=True)
|
44 |
inputs = processor(text=texts, images=[image], return_tensors="pt").to(device)
|
45 |
outputs = model.generate(**inputs, max_new_tokens=250)
|
|
|
39 |
prompt2 =( "Extract all visible text from the image, including both handwritten and printed content."
|
40 |
"Do not translate the text — preserve the original language exactly as it appears."
|
41 |
"Return only the extracted text, with no explanation, no formatting, and no additions." )
|
42 |
+
prompt3 = (
|
43 |
+
"Output ONLY the raw text as it appears in the image, nothing else."
|
44 |
+
"You have an image containing both handwritten and printed text in French and/or English, and alsos punctuation and underscores.\n"
|
45 |
+
"Your task: transcribe EXACTLY all visible text, preserving all characters, accents, punctuation, spacing, and line breaks.\n"
|
46 |
+
"Include tables and forms clearly if present.\n"
|
47 |
+
"Do NOT add any explanations, comments, summaries, or extra text.\n"
|
48 |
+
"Check the output first to not duplicate results."
|
49 |
+
"Preserve the original reading order, including line breaks and the natural layout of tables or forms. Output the text exactly as it appears visually, maintaining the structure."
|
50 |
+
"Don't indicate blank space."
|
51 |
+
"Don't separate handwritten and printex text."
|
52 |
+
"DO NOT confuse between '.' a point and '|' a boder"
|
53 |
+
"Extract only the raw text with and do not add any comment"
|
54 |
+
"Extract only the data available"
|
55 |
+
)
|
56 |
+
messages = [{"role": "user", "content": [{"type": "text", "text": prompt3}, {"type": "image"}]}]
|
57 |
texts = processor.apply_chat_template(messages, add_generation_prompt=True)
|
58 |
inputs = processor(text=texts, images=[image], return_tensors="pt").to(device)
|
59 |
outputs = model.generate(**inputs, max_new_tokens=250)
|