shivam0109 commited on
Commit
065043f
Β·
1 Parent(s): 78227e9

added few changes and folder

Browse files
Files changed (1) hide show
  1. app.py +25 -7
app.py CHANGED
@@ -147,6 +147,9 @@ def process_folders(folder_paths, progress=gr.Progress()):
147
  all_json_paths = []
148
  all_errors = []
149
 
 
 
 
150
  for i, folder_path in enumerate(folder_paths):
151
  progress(i/len(folder_paths), desc=f"Processing folder {i+1}/{len(folder_paths)}")
152
  json_data, json_path, errors = process_folder(folder_path, progress)
@@ -319,20 +322,20 @@ Respond with ONLY the CSV data (including header ONLY in the first chunk).
319
  )
320
 
321
  # Gradio interface
322
- with gr.Blocks(title="Hindi Electrol Processing") as demo:
323
- gr.Markdown("## πŸ“„ Hindi Electrol PDF Folder Processor")
324
  gr.Markdown("Process folders of PDFs to extract text and convert to structured CSV using LLM")
325
 
326
  with gr.Tab("PDF Processing"):
327
  with gr.Row():
328
  with gr.Column():
329
  folder_input = gr.File(
330
- label="Upload Folder(s) (Up to 5)",
331
- file_count="directory",
332
- file_types=["folder"],
333
- max_files=5
334
  )
335
  pdf_submit = gr.Button("Process PDF Folders")
 
336
 
337
  with gr.Column():
338
  json_display = gr.JSON(label="Extracted JSON Data")
@@ -356,9 +359,24 @@ with gr.Blocks(title="Hindi Electrol Processing") as demo:
356
  api_debug = gr.Textbox(label="Debug Information", visible=False)
357
  api_status = gr.Textbox(label="API Status", visible=False)
358
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
359
  # PDF Processing
360
  pdf_submit.click(
361
- process_folders,
362
  inputs=[folder_input],
363
  outputs=[json_display, json_download, pdf_errors]
364
  )
 
147
  all_json_paths = []
148
  all_errors = []
149
 
150
+ # Ensure we don't process more than 5 folders
151
+ folder_paths = folder_paths[:5]
152
+
153
  for i, folder_path in enumerate(folder_paths):
154
  progress(i/len(folder_paths), desc=f"Processing folder {i+1}/{len(folder_paths)}")
155
  json_data, json_path, errors = process_folder(folder_path, progress)
 
322
  )
323
 
324
  # Gradio interface
325
+ with gr.Blocks(title="Hindi PDF Folder Processor with LLM API") as demo:
326
+ gr.Markdown("## πŸ“„ Hindi PDF Folder Processor with LLM API")
327
  gr.Markdown("Process folders of PDFs to extract text and convert to structured CSV using LLM")
328
 
329
  with gr.Tab("PDF Processing"):
330
  with gr.Row():
331
  with gr.Column():
332
  folder_input = gr.File(
333
+ label="Upload Folder(s) (Select multiple)",
334
+ file_count="multiple",
335
+ file_types=[".pdf"]
 
336
  )
337
  pdf_submit = gr.Button("Process PDF Folders")
338
+ gr.Markdown("Note: Please select multiple folders (up to 5) containing PDFs")
339
 
340
  with gr.Column():
341
  json_display = gr.JSON(label="Extracted JSON Data")
 
359
  api_debug = gr.Textbox(label="Debug Information", visible=False)
360
  api_status = gr.Textbox(label="API Status", visible=False)
361
 
362
+ def process_selected_folders(files):
363
+ # Filter out non-directory files and limit to 5 folders
364
+ folder_paths = []
365
+ for file_info in files:
366
+ file_path = file_info.name
367
+ if os.path.isdir(file_path):
368
+ folder_paths.append(file_path)
369
+ if len(folder_paths) >= 5:
370
+ break
371
+
372
+ if not folder_paths:
373
+ return None, None, "No valid folders selected or found in the upload"
374
+
375
+ return process_folders(folder_paths)
376
+
377
  # PDF Processing
378
  pdf_submit.click(
379
+ process_selected_folders,
380
  inputs=[folder_input],
381
  outputs=[json_display, json_download, pdf_errors]
382
  )