awacke1 commited on
Commit
d1eb676
Β·
verified Β·
1 Parent(s): 84fbea4

Update app.py.v2

Browse files
Files changed (1) hide show
  1. app.py.v2 +37 -47
app.py.v2 CHANGED
@@ -1,3 +1,4 @@
 
1
  import gradio as gr
2
  import pandas as pd
3
  import requests
@@ -10,6 +11,7 @@ import polars as pl
10
  import warnings
11
  import traceback
12
  import json
 
13
 
14
  # 🀫 Let's ignore those pesky warnings, shall we?
15
  warnings.filterwarnings("ignore")
@@ -18,7 +20,7 @@ warnings.filterwarnings("ignore")
18
  DATASET_CONFIG = {
19
  "caselaw": {
20
  "name": "common-pile/caselaw_access_project", "emoji": "βš–οΈ",
21
- "methods": ["πŸ’¨ API (requests)", "οΏ½ Dask", "πŸ₯ Croissant"], "is_public": True,
22
  },
23
  "prompts": {
24
  "name": "fka/awesome-chatgpt-prompts", "emoji": "πŸ€–",
@@ -44,31 +46,41 @@ def get_auth_headers():
44
  token = get_token()
45
  return {"Authorization": f"Bearer {token}"} if token else {}
46
 
 
47
  def dataframe_to_outputs(df: pd.DataFrame):
 
 
 
 
48
  if df.empty:
49
  return "No results found. 🀷", None, None, "No results to copy."
 
50
  df_str = df.astype(str)
51
  markdown_output = df_str.to_markdown(index=False)
52
- csv_buffer = io.StringIO()
53
- df.to_csv(csv_buffer, index=False)
54
- csv_buffer.seek(0)
55
- excel_buffer = io.BytesIO()
56
- df.to_excel(excel_buffer, index=False, engine='openpyxl')
57
- excel_buffer.seek(0)
 
 
 
 
 
58
  tab_delimited_output = df.to_csv(sep='\t', index=False)
 
59
  return (
60
  markdown_output,
61
- gr.File.from_bytes(csv_buffer.getvalue(), "results.csv"),
62
- gr.File.from_bytes(excel_buffer.getvalue(), "results.xlsx"),
63
  tab_delimited_output,
64
  )
65
 
66
- # --- ✨ NEW Enhanced Error Handler with Debug Logging ---
67
  def handle_error(e: Exception, request=None, response=None):
68
  """
69
  😱 Oh no! An error! This function now creates a detailed debug log.
70
  """
71
- # Basic error info
72
  error_message = f"🚨 An error occurred: {str(e)}\n"
73
  auth_tip = "πŸ”‘ For gated datasets, did you log in? Try `huggingface-cli login` in your terminal."
74
  full_trace = traceback.format_exc()
@@ -76,41 +88,20 @@ def handle_error(e: Exception, request=None, response=None):
76
  if "401" in str(e) or "Gated" in str(e):
77
  error_message += auth_tip
78
 
79
- # Detailed debug log
80
- debug_log = f"""
81
- --- 🐞 DEBUG LOG ---
82
- Traceback:
83
- {full_trace}
84
-
85
- Exception Type: {type(e).__name__}
86
- Exception Details: {e}
87
- """
88
  if request:
89
- debug_log += f"""
90
- --- REQUEST ---
91
- Method: {request.method}
92
- URL: {request.url}
93
- Headers: {json.dumps(dict(request.headers), indent=2)}
94
- """
95
  if response is not None:
96
  try:
97
- response_json = response.json()
98
- response_text = json.dumps(response_json, indent=2)
99
  except json.JSONDecodeError:
100
  response_text = response.text
101
- debug_log += f"""
102
- --- RESPONSE ---
103
- Status Code: {response.status_code}
104
- Headers: {json.dumps(dict(response.headers), indent=2)}
105
- Content:
106
- {response_text}
107
- """
108
 
109
- # Return a tuple of 9 to match the outputs
110
  return (
111
  pd.DataFrame(), gr.Gallery(None), f"### 🚨 Error\nAn error occurred. See the debug log below for details.",
112
  "", None, None, "", f"```python\n# 🚨 Error during execution:\n# {e}\n```",
113
- gr.Code(value=debug_log, visible=True) # Make the debug log visible
114
  )
115
 
116
  def search_dataframe(df: pd.DataFrame, query: str):
@@ -129,10 +120,8 @@ def fetch_data(dataset_key: str, access_method: str, query: str):
129
  """
130
  πŸš€ Main mission control. Always yields a tuple of 9 values to match the UI components.
131
  """
132
- # Initialize the state for all 9 output components
133
  outputs = [pd.DataFrame(), None, "🏁 Ready.", "", None, None, "", "", gr.Code(visible=False)]
134
-
135
- req, res = None, None # To hold request/response for debugging
136
  try:
137
  config = DATASET_CONFIG[dataset_key]
138
  repo_id = config["name"]
@@ -158,8 +147,7 @@ def fetch_data(dataset_key: str, access_method: str, query: str):
158
 
159
  res = requests.get(url, headers=headers)
160
  req = res.request
161
- res.raise_for_status() # Will raise an exception for 4xx/5xx errors
162
-
163
  data = res.json()
164
 
165
  if not data.get('rows'):
@@ -167,7 +155,11 @@ def fetch_data(dataset_key: str, access_method: str, query: str):
167
  yield tuple(outputs)
168
  break
169
 
170
- page_df = pd.json_normalize(data['rows'], record_path='row')
 
 
 
 
171
  found_in_page = search_dataframe(page_df, query)
172
 
173
  if not found_in_page.empty:
@@ -189,7 +181,6 @@ def fetch_data(dataset_key: str, access_method: str, query: str):
189
  yield tuple(outputs)
190
 
191
  df = pd.DataFrame()
192
- # Simplified for brevity - expand if needed
193
  if "Pandas" in access_method:
194
  file_path = f"hf://datasets/{repo_id}/"
195
  if repo_id == "fka/awesome-chatgpt-prompts": file_path += "prompts.csv"; df = pd.read_csv(file_path)
@@ -248,7 +239,6 @@ def create_dataset_tab(dataset_key: str):
248
 
249
  code_output = gr.Code(label="πŸ’» Python Code Snippet", language="python")
250
 
251
- # --- ✨ NEW Debug Log UI Component (language parameter removed) ---
252
  debug_log_output = gr.Code(label="🐞 Debug Log", visible=False)
253
 
254
  fetch_button.click(
@@ -257,7 +247,7 @@ def create_dataset_tab(dataset_key: str):
257
  outputs=[
258
  df_output, gallery_output, status_output, markdown_output,
259
  csv_output, xlsx_output, copy_output, code_output,
260
- debug_log_output # Add the new output here
261
  ]
262
  )
263
 
@@ -273,4 +263,4 @@ with gr.Blocks(theme=gr.themes.Soft(), title="Hugging Face Dataset Explorer") as
273
  create_dataset_tab(key)
274
 
275
  if __name__ == "__main__":
276
- demo.launch(debug=True)
 
1
+ # app.py
2
  import gradio as gr
3
  import pandas as pd
4
  import requests
 
11
  import warnings
12
  import traceback
13
  import json
14
+ import tempfile # Added for creating temporary files
15
 
16
  # 🀫 Let's ignore those pesky warnings, shall we?
17
  warnings.filterwarnings("ignore")
 
20
  DATASET_CONFIG = {
21
  "caselaw": {
22
  "name": "common-pile/caselaw_access_project", "emoji": "βš–οΈ",
23
+ "methods": ["πŸ’¨ API (requests)", "🧊 Dask", "πŸ₯ Croissant"], "is_public": True,
24
  },
25
  "prompts": {
26
  "name": "fka/awesome-chatgpt-prompts", "emoji": "πŸ€–",
 
46
  token = get_token()
47
  return {"Authorization": f"Bearer {token}"} if token else {}
48
 
49
+ # --- ✨ FIXED: dataframe_to_outputs to use temporary files ---
50
  def dataframe_to_outputs(df: pd.DataFrame):
51
+ """
52
+ πŸ“œ Takes a DataFrame and transforms it into various formats.
53
+ Now uses temporary files for maximum Gradio compatibility.
54
+ """
55
  if df.empty:
56
  return "No results found. 🀷", None, None, "No results to copy."
57
+
58
  df_str = df.astype(str)
59
  markdown_output = df_str.to_markdown(index=False)
60
+
61
+ # Create a temporary CSV file
62
+ with tempfile.NamedTemporaryFile(mode='w+', delete=False, suffix='.csv', encoding='utf-8') as tmp_csv:
63
+ df.to_csv(tmp_csv.name, index=False)
64
+ csv_path = tmp_csv.name
65
+
66
+ # Create a temporary XLSX file
67
+ with tempfile.NamedTemporaryFile(delete=False, suffix='.xlsx') as tmp_xlsx:
68
+ df.to_excel(tmp_xlsx.name, index=False, engine='openpyxl')
69
+ xlsx_path = tmp_xlsx.name
70
+
71
  tab_delimited_output = df.to_csv(sep='\t', index=False)
72
+
73
  return (
74
  markdown_output,
75
+ csv_path,
76
+ xlsx_path,
77
  tab_delimited_output,
78
  )
79
 
 
80
  def handle_error(e: Exception, request=None, response=None):
81
  """
82
  😱 Oh no! An error! This function now creates a detailed debug log.
83
  """
 
84
  error_message = f"🚨 An error occurred: {str(e)}\n"
85
  auth_tip = "πŸ”‘ For gated datasets, did you log in? Try `huggingface-cli login` in your terminal."
86
  full_trace = traceback.format_exc()
 
88
  if "401" in str(e) or "Gated" in str(e):
89
  error_message += auth_tip
90
 
91
+ debug_log = f"""--- 🐞 DEBUG LOG ---\nTraceback:\n{full_trace}\n\nException Type: {type(e).__name__}\nException Details: {e}\n"""
 
 
 
 
 
 
 
 
92
  if request:
93
+ debug_log += f"""\n--- REQUEST ---\nMethod: {request.method}\nURL: {request.url}\nHeaders: {json.dumps(dict(request.headers), indent=2)}\n"""
 
 
 
 
 
94
  if response is not None:
95
  try:
96
+ response_text = json.dumps(response.json(), indent=2)
 
97
  except json.JSONDecodeError:
98
  response_text = response.text
99
+ debug_log += f"""\n--- RESPONSE ---\nStatus Code: {response.status_code}\nHeaders: {json.dumps(dict(response.headers), indent=2)}\nContent:\n{response_text}\n"""
 
 
 
 
 
 
100
 
 
101
  return (
102
  pd.DataFrame(), gr.Gallery(None), f"### 🚨 Error\nAn error occurred. See the debug log below for details.",
103
  "", None, None, "", f"```python\n# 🚨 Error during execution:\n# {e}\n```",
104
+ gr.Code(value=debug_log, visible=True)
105
  )
106
 
107
  def search_dataframe(df: pd.DataFrame, query: str):
 
120
  """
121
  πŸš€ Main mission control. Always yields a tuple of 9 values to match the UI components.
122
  """
 
123
  outputs = [pd.DataFrame(), None, "🏁 Ready.", "", None, None, "", "", gr.Code(visible=False)]
124
+ req, res = None, None
 
125
  try:
126
  config = DATASET_CONFIG[dataset_key]
127
  repo_id = config["name"]
 
147
 
148
  res = requests.get(url, headers=headers)
149
  req = res.request
150
+ res.raise_for_status()
 
151
  data = res.json()
152
 
153
  if not data.get('rows'):
 
155
  yield tuple(outputs)
156
  break
157
 
158
+ # --- ✨ FIXED: JSON processing logic ---
159
+ # Extract the actual data from the 'row' key of each item in the list
160
+ rows_data = [item['row'] for item in data['rows']]
161
+ page_df = pd.json_normalize(rows_data)
162
+
163
  found_in_page = search_dataframe(page_df, query)
164
 
165
  if not found_in_page.empty:
 
181
  yield tuple(outputs)
182
 
183
  df = pd.DataFrame()
 
184
  if "Pandas" in access_method:
185
  file_path = f"hf://datasets/{repo_id}/"
186
  if repo_id == "fka/awesome-chatgpt-prompts": file_path += "prompts.csv"; df = pd.read_csv(file_path)
 
239
 
240
  code_output = gr.Code(label="πŸ’» Python Code Snippet", language="python")
241
 
 
242
  debug_log_output = gr.Code(label="🐞 Debug Log", visible=False)
243
 
244
  fetch_button.click(
 
247
  outputs=[
248
  df_output, gallery_output, status_output, markdown_output,
249
  csv_output, xlsx_output, copy_output, code_output,
250
+ debug_log_output
251
  ]
252
  )
253
 
 
263
  create_dataset_tab(key)
264
 
265
  if __name__ == "__main__":
266
+ demo.launch(debug=True)