Spaces:
Running
Running
Commit
Β·
065043f
1
Parent(s):
78227e9
added few changes and folder
Browse files
app.py
CHANGED
@@ -147,6 +147,9 @@ def process_folders(folder_paths, progress=gr.Progress()):
|
|
147 |
all_json_paths = []
|
148 |
all_errors = []
|
149 |
|
|
|
|
|
|
|
150 |
for i, folder_path in enumerate(folder_paths):
|
151 |
progress(i/len(folder_paths), desc=f"Processing folder {i+1}/{len(folder_paths)}")
|
152 |
json_data, json_path, errors = process_folder(folder_path, progress)
|
@@ -319,20 +322,20 @@ Respond with ONLY the CSV data (including header ONLY in the first chunk).
|
|
319 |
)
|
320 |
|
321 |
# Gradio interface
|
322 |
-
with gr.Blocks(title="Hindi
|
323 |
-
gr.Markdown("## π Hindi
|
324 |
gr.Markdown("Process folders of PDFs to extract text and convert to structured CSV using LLM")
|
325 |
|
326 |
with gr.Tab("PDF Processing"):
|
327 |
with gr.Row():
|
328 |
with gr.Column():
|
329 |
folder_input = gr.File(
|
330 |
-
label="Upload Folder(s) (
|
331 |
-
file_count="
|
332 |
-
file_types=["
|
333 |
-
max_files=5
|
334 |
)
|
335 |
pdf_submit = gr.Button("Process PDF Folders")
|
|
|
336 |
|
337 |
with gr.Column():
|
338 |
json_display = gr.JSON(label="Extracted JSON Data")
|
@@ -356,9 +359,24 @@ with gr.Blocks(title="Hindi Electrol Processing") as demo:
|
|
356 |
api_debug = gr.Textbox(label="Debug Information", visible=False)
|
357 |
api_status = gr.Textbox(label="API Status", visible=False)
|
358 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
359 |
# PDF Processing
|
360 |
pdf_submit.click(
|
361 |
-
|
362 |
inputs=[folder_input],
|
363 |
outputs=[json_display, json_download, pdf_errors]
|
364 |
)
|
|
|
147 |
all_json_paths = []
|
148 |
all_errors = []
|
149 |
|
150 |
+
# Ensure we don't process more than 5 folders
|
151 |
+
folder_paths = folder_paths[:5]
|
152 |
+
|
153 |
for i, folder_path in enumerate(folder_paths):
|
154 |
progress(i/len(folder_paths), desc=f"Processing folder {i+1}/{len(folder_paths)}")
|
155 |
json_data, json_path, errors = process_folder(folder_path, progress)
|
|
|
322 |
)
|
323 |
|
324 |
# Gradio interface
|
325 |
+
with gr.Blocks(title="Hindi PDF Folder Processor with LLM API") as demo:
|
326 |
+
gr.Markdown("## π Hindi PDF Folder Processor with LLM API")
|
327 |
gr.Markdown("Process folders of PDFs to extract text and convert to structured CSV using LLM")
|
328 |
|
329 |
with gr.Tab("PDF Processing"):
|
330 |
with gr.Row():
|
331 |
with gr.Column():
|
332 |
folder_input = gr.File(
|
333 |
+
label="Upload Folder(s) (Select multiple)",
|
334 |
+
file_count="multiple",
|
335 |
+
file_types=[".pdf"]
|
|
|
336 |
)
|
337 |
pdf_submit = gr.Button("Process PDF Folders")
|
338 |
+
gr.Markdown("Note: Please select multiple folders (up to 5) containing PDFs")
|
339 |
|
340 |
with gr.Column():
|
341 |
json_display = gr.JSON(label="Extracted JSON Data")
|
|
|
359 |
api_debug = gr.Textbox(label="Debug Information", visible=False)
|
360 |
api_status = gr.Textbox(label="API Status", visible=False)
|
361 |
|
362 |
+
def process_selected_folders(files):
|
363 |
+
# Filter out non-directory files and limit to 5 folders
|
364 |
+
folder_paths = []
|
365 |
+
for file_info in files:
|
366 |
+
file_path = file_info.name
|
367 |
+
if os.path.isdir(file_path):
|
368 |
+
folder_paths.append(file_path)
|
369 |
+
if len(folder_paths) >= 5:
|
370 |
+
break
|
371 |
+
|
372 |
+
if not folder_paths:
|
373 |
+
return None, None, "No valid folders selected or found in the upload"
|
374 |
+
|
375 |
+
return process_folders(folder_paths)
|
376 |
+
|
377 |
# PDF Processing
|
378 |
pdf_submit.click(
|
379 |
+
process_selected_folders,
|
380 |
inputs=[folder_input],
|
381 |
outputs=[json_display, json_download, pdf_errors]
|
382 |
)
|