feat: Add ReadWebPageRaw tool to fetch raw content from URLs with error handling

2025-06-01 17:21:35 -06:00 · 2025-06-01 17:21:35 -06:00 · cb7fd42b75
commit cb7fd42b75
parent f6038cbc37
1 changed files with 61 additions and 1 deletions
--- a/cogs/ai_code_agent_cog.py
+++ b/cogs/ai_code_agent_cog.py
@ -3,6 +3,7 @@ from discord.ext import commands
 import re
 import os
 import asyncio
+import aiohttp
 import subprocess
 import json
 import base64
@ -251,7 +252,16 @@ IMPORTANT: Do NOT wrap your XML tool calls in markdown code blocks (e.g., ```xml
    ```
    (System will report if path is writable/creatable or any permission issues.)

-17. **TaskComplete:** Signals that the current multi-step task is considered complete by the AI.
+17. **ReadWebPageRaw:** Reads the raw text content from a given URL.
+    *   Ideal for fetching raw code or data from services like GitHub raw user content.
+    ```xml
+    <ReadWebPageRaw>
+      <url>https://example.com/raw/file.txt</url>
+    </ReadWebPageRaw>
+    ```
+    (System will provide page content or error in ToolResponse)
+
+18. **TaskComplete:** Signals that the current multi-step task is considered complete by the AI.
    ```xml
    <TaskComplete>
      <message>A brief summary of what was accomplished or the final status.</message>
@ -836,6 +846,13 @@ class AICodeAgentCog(commands.Cog):
                tool_output = await self._execute_tool_dry_run_write_file(file_path)
                return "TOOL_OUTPUT", f"ToolResponse: DryRunWriteFile\nPath: {file_path}\n---\n{tool_output}"

+            elif tool_name == "ReadWebPageRaw":
+                url_param = parameters.get("url")
+                if not url_param:
+                    return "TOOL_OUTPUT", "ToolResponse: Error\n---\nReadWebPageRaw: Missing 'url' parameter."
+                tool_output = await self._execute_tool_read_web_page_raw(url_param)
+                return "TOOL_OUTPUT", f"ToolResponse: ReadWebPageRaw\nURL: {url_param}\n---\n{tool_output}"
+
            else:
                # Unknown tool name found in XML
                return "TOOL_OUTPUT", f"ToolResponse: Error\n---\nUnknown tool: {tool_name} in XML: {clean_ai_response_text[:200]}"
@ -1546,6 +1563,49 @@ class AICodeAgentCog(commands.Cog):
        except Exception as e:
            return f"Error during DryRunWriteFile check for '{path}': {type(e).__name__} - {e}"

+    async def _execute_tool_read_web_page_raw(self, url: str) -> str:
+        print(f"AICodeAgentCog: _execute_tool_read_web_page_raw for URL: {url}")
+        if not url.startswith(("http://", "https://")):
+            return "Error: Invalid URL. Must start with http:// or https://"
+        
+        try:
+            async with aiohttp.ClientSession() as session:
+                # Set a timeout for the request
+                timeout = aiohttp.ClientTimeout(total=30) # 30 seconds total timeout
+                async with session.get(url, timeout=timeout) as response:
+                    if response.status == 200:
+                        # Limit the size of the content to prevent memory issues
+                        # Max 1MB for raw content, can be adjusted
+                        max_content_size = 1 * 1024 * 1024
+                        content_length = response.headers.get('Content-Length')
+                        if content_length and int(content_length) > max_content_size:
+                            return f"Error: Content at URL is too large (>{max_content_size / (1024*1024):.0f}MB). Size: {content_length} bytes."
+
+                        # Read content chunk by chunk to enforce max_content_size if Content-Length is missing/unreliable
+                        content = b""
+                        async for chunk in response.content.iter_chunked(1024): # Read 1KB chunks
+                            content += chunk
+                            if len(content) > max_content_size:
+                                return f"Error: Content at URL is too large (exceeded {max_content_size / (1024*1024):.0f}MB during download)."
+                        
+                        # Try to decode as UTF-8, replace errors
+                        return content.decode('utf-8', errors='replace')
+                    else:
+                        # Try to read a snippet of the error response body
+                        error_body_snippet = ""
+                        try:
+                            error_body_snippet = await response.text()
+                            error_body_snippet = error_body_snippet[:200] # Limit snippet length
+                        except Exception:
+                            error_body_snippet = "(Could not read error response body)"
+                        return f"Error: Failed to fetch URL. Status code: {response.status}. Response snippet: {error_body_snippet}"
+        except asyncio.TimeoutError:
+            return f"Error: Request to URL '{url}' timed out after 30 seconds."
+        except aiohttp.ClientError as e:
+            return f"Error: aiohttp client error while fetching URL '{url}': {type(e).__name__} - {e}"
+        except Exception as e:
+            return f"Error fetching content from URL '{url}': {type(e).__name__} - {e}"
+
    # --- End of New Tool Execution Methods ---

    async def _process_agent_interaction(self, ctx: commands.Context, initial_prompt_text: str):