cenozoic

Runtime error

App Files Files Community

phanerozoic commited on Feb 18

Commit

c13fff8

verified ·

1 Parent(s): 94fac5d

Update tools/visit_webpage.py

Browse files

Files changed (1) hide show

tools/visit_webpage.py +12 -40

tools/visit_webpage.py CHANGED Viewed

@@ -1,57 +1,29 @@
 from typing import Any, Optional
 from smolagents.tools import Tool
-import re
 import markdownify
-import time
 from smolagents.utils import truncate_content
-# Import Selenium modules for JavaScript rendering
-from selenium import webdriver
-from selenium.webdriver.chrome.options import Options
-from selenium.common.exceptions import WebDriverException
 class VisitWebpageTool(Tool):
     name = "visit_webpage"
-    description = (
-        "Visits a webpage at the given URL and returns its content as a markdown string, "
-        "using Selenium for JavaScript rendering if needed."
-    )
-    inputs = {'url': {'type': 'string', 'description': 'The URL of the webpage to visit.'}}
     output_type = "string"
     def forward(self, url: str) -> str:
-        # Attempt to render the page using Selenium to capture JavaScript-loaded content
         try:
-            chrome_options = Options()
-            chrome_options.add_argument("--headless")
-            chrome_options.add_argument("--disable-gpu")
-            chrome_options.add_argument("--no-sandbox")
-            # Initialize the Chrome webdriver; adjust executable_path if needed
-            driver = webdriver.Chrome(options=chrome_options)
-            driver.set_page_load_timeout(30)
-            driver.get(url)
-            # Wait a few seconds for dynamic content to load
-            time.sleep(5)
-            html = driver.page_source
-            driver.quit()
-        except WebDriverException as e:
-            # Fallback: if Selenium fails, use requests
-            try:
-                import requests
-                from requests.exceptions import RequestException
-                response = requests.get(url, timeout=20)
-                response.raise_for_status()
-                html = response.text
-            except Exception as ex:
-                return f"Error fetching the webpage with requests: {str(ex)}"
         except Exception as e:
-            return f"An unexpected error occurred during rendering: {str(e)}"
         try:
             # Convert the HTML content to Markdown
-            markdown_content = markdownify.markdownify(html, heading_style="ATX").strip()
-            # Clean up excessive newlines
-            markdown_content = re.sub(r"\n{3,}", "\n\n", markdown_content)
             return truncate_content(markdown_content, 10000)
         except Exception as e:
             return f"Error processing content: {str(e)}"

 from typing import Any, Optional
 from smolagents.tools import Tool
+import requests
 import markdownify
+import re
 from smolagents.utils import truncate_content
 class VisitWebpageTool(Tool):
     name = "visit_webpage"
+    description = "Visits a webpage at the given url and reads its content as a markdown string. Use this to browse webpages."
+    inputs = {'url': {'type': 'string', 'description': 'The url of the webpage to visit.'}}
     output_type = "string"
     def forward(self, url: str) -> str:
         try:
+            # Send a GET request to the URL with a 20-second timeout
+            response = requests.get(url, timeout=20)
+            response.raise_for_status()  # Raise an exception for bad status codes
+            html = response.text
         except Exception as e:
+            return f"Error fetching the webpage: {str(e)}"
         try:
             # Convert the HTML content to Markdown
+            markdown_content = markdownify.markdownify(html).strip()
+            # Remove multiple line breaks
+            markdown_content = re.sub(r'\n{3,}', '\n\n', markdown_content)
             return truncate_content(markdown_content, 10000)
         except Exception as e:
             return f"Error processing content: {str(e)}"