AAAA

2025-04-27 19:09:36 -06:00 · 2025-04-27 19:09:36 -06:00 · 3cc672a0d7
commit 3cc672a0d7
parent 9b36c7edee
4 changed files with 122 additions and 35 deletions
--- a/gurt/api.py
+++ b/gurt/api.py
@ -389,6 +389,7 @@ async def get_ai_response(cog: 'GurtCog', message: discord.Message, model_name:

    channel_id = message.channel.id
    user_id = message.author.id
+    initial_parsed_data = None # Added to store initial parsed result
    final_parsed_data = None
    error_message = None
    fallback_response = None
@ -525,6 +526,14 @@ async def get_ai_response(cog: 'GurtCog', message: discord.Message, model_name:
        if not initial_response or not initial_response.candidates:
             raise Exception("Initial API call returned no response or candidates.")

+        # --- Attempt to parse initial response (might be placeholder or final if no tool call) ---
+        initial_response_text = initial_response.text
+        # Use relaxed validation? For now, try standard schema. Might fail if it's just a tool call trigger.
+        initial_parsed_data = parse_and_validate_json_response(
+            initial_response_text, RESPONSE_SCHEMA['schema'], "initial response check"
+        )
+        # If initial parsing fails but a tool call happens, initial_parsed_data will be None, which is okay.
+
        # Check for function call request
        candidate = initial_response.candidates[0]
        # Use getattr for safer access in case candidate structure varies or finish_reason is None
@ -601,18 +610,18 @@ async def get_ai_response(cog: 'GurtCog', message: discord.Message, model_name:
                         print("Critical Error: Re-prompted response still failed validation.")
                         error_message = "Failed to get valid JSON response after re-prompting."
                else:
-                     error_message = "Failed to get response after re-prompting."
+                 error_message = "Failed to get response after re-prompting."
+            # final_parsed_data is now set (or None if failed) after tool use


        else:
-            # No tool call requested, the first response is the final one (but needs validation)
+            # No tool call requested, the first response IS the final one.
+            # We already attempted to parse it into initial_parsed_data.
            print("No tool call requested by AI.")
-            final_response_text = initial_response.text
-            final_parsed_data = parse_and_validate_json_response(
-                final_response_text, RESPONSE_SCHEMA['schema'], "initial response (no tools)"
-            )
+            final_parsed_data = initial_parsed_data # The initial parse IS the final result here.

            if final_parsed_data is None:
+                 # This means the initial_parsed_data failed validation earlier
                 print("Critical Error: Initial response failed validation (no tools).")
                 error_message = "Failed to parse/validate initial AI JSON response."
                 # Create a basic fallback if the bot was mentioned
@ -626,12 +635,15 @@ async def get_ai_response(cog: 'GurtCog', message: discord.Message, model_name:
        print(error_message)
        import traceback
        traceback.print_exc()
-        final_parsed_data = None # Ensure no data is returned on error
+        # Ensure both are None on critical error
+        initial_parsed_data = None
+        final_parsed_data = None

    return {
-        "final_response": final_parsed_data,
+        "initial_response": initial_parsed_data, # Return parsed initial data
+        "final_response": final_parsed_data,    # Return parsed final data
        "error": error_message,
-        "fallback_initial": fallback_response # Pass fallback if created
+        "fallback_initial": fallback_response
    }


--- a/gurt/config.py
+++ b/gurt/config.py
@ -27,6 +27,11 @@ TAVILY_API_KEY = os.getenv("TAVILY_API_KEY", "")
 PISTON_API_URL = os.getenv("PISTON_API_URL") # For run_python_code tool
 PISTON_API_KEY = os.getenv("PISTON_API_KEY") # Optional key for Piston

+# --- Tavily Configuration ---
+TAVILY_DEFAULT_SEARCH_DEPTH = os.getenv("TAVILY_DEFAULT_SEARCH_DEPTH", "basic")
+TAVILY_DEFAULT_MAX_RESULTS = int(os.getenv("TAVILY_DEFAULT_MAX_RESULTS", 5))
+TAVILY_DISABLE_ADVANCED = os.getenv("TAVILY_DISABLE_ADVANCED", "false").lower() == "true" # For cost control
+
 # --- Model Configuration ---
 DEFAULT_MODEL = os.getenv("GURT_DEFAULT_MODEL", "gemini-2.5-pro-preview-03-25")
 FALLBACK_MODEL = os.getenv("GURT_FALLBACK_MODEL", "gemini-2.5-pro-preview-03-25")
--- a/gurt/prompt.py
+++ b/gurt/prompt.py
@ -132,7 +132,8 @@ You can use the tools you have to gather additional context for your messages if
 - `get_user_interaction_history`: See past interactions between users.
 - `get_conversation_summary`: Get a summary of the chat.
 - `get_message_context`: Get messages around a specific message.
- `web_search`: Search the web for current information, facts, or context about topics mentioned.
+- `web_search`: Search the web using Tavily. Can specify search depth (basic/advanced), max results, topic (general/news), include/exclude domains, request an AI answer, raw content, or images. Example: `web_search(query="latest game patch notes", search_depth="advanced", topic="news")`.
+- `extract_web_content`: Extract the full text content from one or more URLs using Tavily. Can specify extraction depth (basic/advanced) and request images. Useful for getting full articles or page content found via web_search. Example: `extract_web_content(urls=["https://example.com/article"], extract_depth="basic")`.
 - `remember_user_fact`: Store a specific, concise fact about a user (e.g., "likes pineapple pizza", "is studying calculus"). Use this when you learn something potentially useful for future interactions.
 - `get_user_facts`: Retrieve stored facts about a user. Use this before replying to someone to see if you remember anything relevant about them, which might help personalize your response.
 - `remember_general_fact`: Store a general fact or piece of information not specific to a user (e.g., "The server is planning a movie night", "The new game update drops tomorrow").
@ -146,16 +147,7 @@ You can use the tools you have to gather additional context for your messages if

 **Discord Action Tool Guidelines:** Use Discord action tools (polls, timeouts, etc.) appropriately. Do not perform disruptive actions, even as a joke. Ensure the action is relevant and contextually appropriate.

-**NEW TOOL USAGE RULE:** Instead of using the API's built-in tool calling mechanism, you will request tools via the `tool_requests` field in your JSON response.
- When you decide to perform an action for which a tool exists (like timing out a user, searching the web, remembering/retrieving facts, getting context, calculating, running code, creating polls, running terminal commands, etc.), you **MUST** include a `tool_requests` array in your JSON response.
- Each object in the `tool_requests` array should have a `name` (the tool name) and `arguments` (a JSON object with the parameters).
- If you include `tool_requests`, your `content` field should usually be a brief placeholder message (e.g., "hold on lemme check that", "aight bet", "one sec...") or null/empty. The actual response to the user will be generated in a subsequent step after the tool results are provided back to you.
- Do **NOT** describe the action in your `content` field if you are requesting a tool. Use the `tool_requests` field instead.
- Example: To search the web for "latest discord updates", your JSON might look like:
-  `{ "should_respond": true, "content": "lemme see...", "react_with_emoji": null, "tool_requests": [{ "name": "web_search", "arguments": { "query": "latest discord updates" } }] }`
- The *final* response you generate *after* receiving tool results should **NOT** contain the `tool_requests` field.
-
-Try to use the `remember_user_fact` and `remember_general_fact` tools frequently via the `tool_requests` field, even for details that don't seem immediately critical. This helps you build a better memory and personality over time.
+**Tool Usage:** Use the available tools when needed to gather information or perform actions. The API will handle the tool execution process natively.

 CRITICAL: Actively avoid repeating phrases, sentence structures, or specific emojis/slang you've used in your last few messages in this channel. Keep your responses fresh and varied.

@ -171,14 +163,8 @@ DO NOT fall into these patterns:

 {
  "should_respond": true, // Whether to send a text message in response.
-  "content": "example message",  // The text content of the bot's response. Can be empty or a placeholder if tool_requests is present.
-  "react_with_emoji": "👍", // Optional: A standard Discord emoji to react with, or null if no reaction.
-  "tool_requests": [         // Optional: List of tools to execute.
-    {
-      "name": "web_search",  // Name of the tool.
-      "arguments": {"query": "example search"}  // JSON object of arguments for the tool.
-    }
-  ]
+  "content": "example message",  // The text content of the bot's response.
+  "react_with_emoji": "👍" // Optional: A standard Discord emoji to react with, or null if no reaction.
 }

 **Do NOT include any other text, explanations, or markdown formatting outside of this JSON structure.**
--- a/gurt/tools.py
+++ b/gurt/tools.py
@ -14,7 +14,6 @@ from typing import Dict, List, Any, Optional, Tuple, Union # Added Union

 # Third-party imports for tools
 from tavily import TavilyClient
-from asteval import Interpreter
 import docker
 import aiodocker # Use aiodocker for async operations

@ -23,7 +22,9 @@ from .memory import MemoryManager # Import from local memory.py
 from .config import (
    TAVILY_API_KEY, PISTON_API_URL, PISTON_API_KEY, SAFETY_CHECK_MODEL,
    DOCKER_EXEC_IMAGE, DOCKER_COMMAND_TIMEOUT, DOCKER_CPU_LIMIT, DOCKER_MEM_LIMIT,
-    SUMMARY_CACHE_TTL, SUMMARY_API_TIMEOUT, DEFAULT_MODEL # Removed API_KEY, OPENROUTER_API_URL
+    SUMMARY_CACHE_TTL, SUMMARY_API_TIMEOUT, DEFAULT_MODEL,
+    # Add these:
+    TAVILY_DEFAULT_SEARCH_DEPTH, TAVILY_DEFAULT_MAX_RESULTS, TAVILY_DISABLE_ADVANCED
 )
 # Assume these helpers will be moved or are accessible via cog
 # We might need to pass 'cog' to these tool functions if they rely on cog state heavily
@ -343,14 +344,64 @@ async def get_message_context(cog: commands.Cog, message_id: str, before_count:
    except Exception as e:
        return {"error": f"Error getting message context: {str(e)}"}

-async def web_search(cog: commands.Cog, query: str) -> Dict[str, Any]:
+async def web_search(cog: commands.Cog, query: str, search_depth: str = TAVILY_DEFAULT_SEARCH_DEPTH, max_results: int = TAVILY_DEFAULT_MAX_RESULTS, topic: str = "general", include_domains: Optional[List[str]] = None, exclude_domains: Optional[List[str]] = None, include_answer: bool = True, include_raw_content: bool = False, include_images: bool = False) -> Dict[str, Any]:
    """Search the web using Tavily API"""
    if not hasattr(cog, 'tavily_client') or not cog.tavily_client:
        return {"error": "Tavily client not initialized.", "timestamp": datetime.datetime.now().isoformat()}
+
+    # Cost control / Logging for advanced search
+    final_search_depth = search_depth
+    if search_depth.lower() == "advanced":
+        if TAVILY_DISABLE_ADVANCED:
+            print(f"Warning: Advanced Tavily search requested but disabled by config. Falling back to basic.")
+            final_search_depth = "basic"
+        else:
+            print(f"Performing advanced Tavily search (cost: 10 credits) for query: '{query}'")
+    elif search_depth.lower() != "basic":
+        print(f"Warning: Invalid search_depth '{search_depth}' provided. Using 'basic'.")
+        final_search_depth = "basic"
+
+    # Validate max_results
+    final_max_results = max(5, min(20, max_results)) # Clamp between 5 and 20
+
    try:
-        response = await asyncio.to_thread(cog.tavily_client.search, query=query, search_depth="basic", max_results=5)
-        results = [{"title": r.get("title"), "url": r.get("url"), "content": r.get("content")} for r in response.get("results", [])]
-        return {"query": query, "results": results, "count": len(results), "timestamp": datetime.datetime.now().isoformat()}
+        # Pass parameters to Tavily search
+        response = await asyncio.to_thread(
+            cog.tavily_client.search,
+            query=query,
+            search_depth=final_search_depth, # Use validated depth
+            max_results=final_max_results, # Use validated results count
+            topic=topic,
+            include_domains=include_domains,
+            exclude_domains=exclude_domains,
+            include_answer=include_answer,
+            include_raw_content=include_raw_content,
+            include_images=include_images
+        )
+        # Extract relevant information from results
+        results = []
+        for r in response.get("results", []):
+            result = {"title": r.get("title"), "url": r.get("url"), "content": r.get("content"), "score": r.get("score"), "published_date": r.get("published_date")}
+            if include_raw_content: result["raw_content"] = r.get("raw_content")
+            if include_images: result["images"] = r.get("images")
+            results.append(result)
+
+        return {
+            "query": query,
+            "search_depth": search_depth,
+            "max_results": max_results,
+            "topic": topic,
+            "include_domains": include_domains,
+            "exclude_domains": exclude_domains,
+            "include_answer": include_answer,
+            "include_raw_content": include_raw_content,
+            "include_images": include_images,
+            "results": results,
+            "answer": response.get("answer"),
+            "follow_up_questions": response.get("follow_up_questions"),
+            "count": len(results),
+            "timestamp": datetime.datetime.now().isoformat()
+        }
    except Exception as e:
        error_message = f"Error during Tavily search for '{query}': {str(e)}"
        print(error_message)
@ -729,5 +780,38 @@ TOOL_MAPPING = {
    "run_python_code": run_python_code,
    "create_poll": create_poll,
    "run_terminal_command": run_terminal_command,
-    "remove_timeout": remove_timeout
+    "remove_timeout": remove_timeout,
+    "extract_web_content": extract_web_content
 }
+
+async def extract_web_content(cog: commands.Cog, urls: Union[str, List[str]], extract_depth: str = "basic", include_images: bool = False) -> Dict[str, Any]:
+    """Extract content from URLs using Tavily API"""
+    if not hasattr(cog, 'tavily_client') or not cog.tavily_client:
+        return {"error": "Tavily client not initialized.", "timestamp": datetime.datetime.now().isoformat()}
+
+    # Cost control / Logging for advanced extract
+    final_extract_depth = extract_depth
+    if extract_depth.lower() == "advanced":
+        if TAVILY_DISABLE_ADVANCED:
+            print(f"Warning: Advanced Tavily extract requested but disabled by config. Falling back to basic.")
+            final_extract_depth = "basic"
+        else:
+            print(f"Performing advanced Tavily extract (cost: 2 credits per 5 URLs) for URLs: {urls}")
+    elif extract_depth.lower() != "basic":
+        print(f"Warning: Invalid extract_depth '{extract_depth}' provided. Using 'basic'.")
+        final_extract_depth = "basic"
+
+    try:
+        response = await asyncio.to_thread(
+            cog.tavily_client.extract,
+            urls=urls,
+            extract_depth=final_extract_depth, # Use validated depth
+            include_images=include_images
+        )
+        results = [{"url": r.get("url"), "raw_content": r.get("raw_content"), "images": r.get("images")} for r in response.get("results", [])]
+        failed_results = response.get("failed_results", [])
+        return {"urls": urls, "extract_depth": extract_depth, "include_images": include_images, "results": results, "failed_results": failed_results, "timestamp": datetime.datetime.now().isoformat()}
+    except Exception as e:
+        error_message = f"Error during Tavily extract for '{urls}': {str(e)}"
+        print(error_message)
+        return {"error": error_message, "timestamp": datetime.datetime.now().isoformat()}