cenozoic

Runtime error

App Files Files Community

phanerozoic commited on Feb 18

Commit

d89e740

verified ·

1 Parent(s): 9b825c2

Update tools/visit_webpage.py

Browse files

Files changed (1) hide show

tools/visit_webpage.py +41 -27

tools/visit_webpage.py CHANGED Viewed

@@ -1,46 +1,60 @@
 from typing import Any, Optional
 from smolagents.tools import Tool
-import requests
-import markdownify
-import smolagents
 import re
 class VisitWebpageTool(Tool):
     name = "visit_webpage"
-    description = "Visits a webpage at the given url and reads its content as a markdown string. Use this to browse webpages."
-    inputs = {'url': {'type': 'string', 'description': 'The url of the webpage to visit.'}}
     output_type = "string"
     def forward(self, url: str) -> str:
         try:
-            import requests
-            from markdownify import markdownify
-            from requests.exceptions import RequestException
-            from smolagents.utils import truncate_content
-        except ImportError as e:
-            raise ImportError(
-                "You must install packages `markdownify` and `requests` to run this tool: for instance run `pip install markdownify requests`."
-            ) from e
         try:
-            # Send a GET request to the URL with a 20-second timeout
-            response = requests.get(url, timeout=20)
-            response.raise_for_status()  # Raise an exception for bad status codes
             # Convert the HTML content to Markdown
-            markdown_content = markdownify(response.text).strip()
-            # Remove multiple line breaks
             markdown_content = re.sub(r"\n{3,}", "\n\n", markdown_content)
             return truncate_content(markdown_content, 10000)
-        except requests.exceptions.Timeout:
-            return "The request timed out. Please try again later or check the URL."
-        except RequestException as e:
-            return f"Error fetching the webpage: {str(e)}"
         except Exception as e:
-            return f"An unexpected error occurred: {str(e)}"
     def __init__(self, *args, **kwargs):
         self.is_initialized = False

 from typing import Any, Optional
 from smolagents.tools import Tool
 import re
+import markdownify
+import time
+from smolagents.utils import truncate_content
+# Import Selenium modules for JavaScript rendering
+from selenium import webdriver
+from selenium.webdriver.chrome.options import Options
+from selenium.common.exceptions import WebDriverException
 class VisitWebpageTool(Tool):
     name = "visit_webpage"
+    description = (
+        "Visits a webpage at the given URL and returns its content as a markdown string, "
+        "using Selenium for JavaScript rendering if needed."
+    )
+    inputs = {'url': {'type': 'string', 'description': 'The URL of the webpage to visit.'}}
     output_type = "string"
     def forward(self, url: str) -> str:
+        # Attempt to render the page using Selenium to capture JavaScript-loaded content
         try:
+            chrome_options = Options()
+            chrome_options.add_argument("--headless")
+            chrome_options.add_argument("--disable-gpu")
+            chrome_options.add_argument("--no-sandbox")
+            # Initialize the Chrome webdriver; adjust executable_path if needed
+            driver = webdriver.Chrome(options=chrome_options)
+            driver.set_page_load_timeout(30)
+            driver.get(url)
+            # Wait a few seconds for dynamic content to load
+            time.sleep(5)
+            html = driver.page_source
+            driver.quit()
+        except WebDriverException as e:
+            # Fallback: if Selenium fails, use requests
+            try:
+                import requests
+                from requests.exceptions import RequestException
+                response = requests.get(url, timeout=20)
+                response.raise_for_status()
+                html = response.text
+            except Exception as ex:
+                return f"Error fetching the webpage with requests: {str(ex)}"
+        except Exception as e:
+            return f"An unexpected error occurred during rendering: {str(e)}"
         try:
             # Convert the HTML content to Markdown
+            markdown_content = markdownify.markdownify(html, heading_style="ATX").strip()
+            # Clean up excessive newlines
             markdown_content = re.sub(r"\n{3,}", "\n\n", markdown_content)
             return truncate_content(markdown_content, 10000)
         except Exception as e:
+            return f"Error processing content: {str(e)}"
     def __init__(self, *args, **kwargs):
         self.is_initialized = False