feat: Add ReadWebPageRaw tool to fetch raw content from URLs with error handling

This commit is contained in:
Slipstream 2025-06-01 17:21:35 -06:00
parent f6038cbc37
commit cb7fd42b75
Signed by: slipstream
GPG Key ID: 13E498CE010AC6FD

View File

@ -3,6 +3,7 @@ from discord.ext import commands
import re
import os
import asyncio
import aiohttp
import subprocess
import json
import base64
@ -251,7 +252,16 @@ IMPORTANT: Do NOT wrap your XML tool calls in markdown code blocks (e.g., ```xml
```
(System will report if path is writable/creatable or any permission issues.)
17. **TaskComplete:** Signals that the current multi-step task is considered complete by the AI.
17. **ReadWebPageRaw:** Reads the raw text content from a given URL.
* Ideal for fetching raw code or data from services like GitHub raw user content.
```xml
<ReadWebPageRaw>
<url>https://example.com/raw/file.txt</url>
</ReadWebPageRaw>
```
(System will provide page content or error in ToolResponse)
18. **TaskComplete:** Signals that the current multi-step task is considered complete by the AI.
```xml
<TaskComplete>
<message>A brief summary of what was accomplished or the final status.</message>
@ -836,6 +846,13 @@ class AICodeAgentCog(commands.Cog):
tool_output = await self._execute_tool_dry_run_write_file(file_path)
return "TOOL_OUTPUT", f"ToolResponse: DryRunWriteFile\nPath: {file_path}\n---\n{tool_output}"
elif tool_name == "ReadWebPageRaw":
url_param = parameters.get("url")
if not url_param:
return "TOOL_OUTPUT", "ToolResponse: Error\n---\nReadWebPageRaw: Missing 'url' parameter."
tool_output = await self._execute_tool_read_web_page_raw(url_param)
return "TOOL_OUTPUT", f"ToolResponse: ReadWebPageRaw\nURL: {url_param}\n---\n{tool_output}"
else:
# Unknown tool name found in XML
return "TOOL_OUTPUT", f"ToolResponse: Error\n---\nUnknown tool: {tool_name} in XML: {clean_ai_response_text[:200]}"
@ -1546,6 +1563,49 @@ class AICodeAgentCog(commands.Cog):
except Exception as e:
return f"Error during DryRunWriteFile check for '{path}': {type(e).__name__} - {e}"
async def _execute_tool_read_web_page_raw(self, url: str) -> str:
print(f"AICodeAgentCog: _execute_tool_read_web_page_raw for URL: {url}")
if not url.startswith(("http://", "https://")):
return "Error: Invalid URL. Must start with http:// or https://"
try:
async with aiohttp.ClientSession() as session:
# Set a timeout for the request
timeout = aiohttp.ClientTimeout(total=30) # 30 seconds total timeout
async with session.get(url, timeout=timeout) as response:
if response.status == 200:
# Limit the size of the content to prevent memory issues
# Max 1MB for raw content, can be adjusted
max_content_size = 1 * 1024 * 1024
content_length = response.headers.get('Content-Length')
if content_length and int(content_length) > max_content_size:
return f"Error: Content at URL is too large (>{max_content_size / (1024*1024):.0f}MB). Size: {content_length} bytes."
# Read content chunk by chunk to enforce max_content_size if Content-Length is missing/unreliable
content = b""
async for chunk in response.content.iter_chunked(1024): # Read 1KB chunks
content += chunk
if len(content) > max_content_size:
return f"Error: Content at URL is too large (exceeded {max_content_size / (1024*1024):.0f}MB during download)."
# Try to decode as UTF-8, replace errors
return content.decode('utf-8', errors='replace')
else:
# Try to read a snippet of the error response body
error_body_snippet = ""
try:
error_body_snippet = await response.text()
error_body_snippet = error_body_snippet[:200] # Limit snippet length
except Exception:
error_body_snippet = "(Could not read error response body)"
return f"Error: Failed to fetch URL. Status code: {response.status}. Response snippet: {error_body_snippet}"
except asyncio.TimeoutError:
return f"Error: Request to URL '{url}' timed out after 30 seconds."
except aiohttp.ClientError as e:
return f"Error: aiohttp client error while fetching URL '{url}': {type(e).__name__} - {e}"
except Exception as e:
return f"Error fetching content from URL '{url}': {type(e).__name__} - {e}"
# --- End of New Tool Execution Methods ---
async def _process_agent_interaction(self, ctx: commands.Context, initial_prompt_text: str):