aaad

2025-04-28 18:09:51 -06:00 · 2025-04-28 18:09:51 -06:00 · b5a2295b05
commit b5a2295b05
parent ed461871a7
4 changed files with 155 additions and 28 deletions
--- a/gurt/context.py
+++ b/gurt/context.py
@ -25,8 +25,35 @@ def gather_conversation_context(cog: 'GurtCog', channel_id: int, current_message

        for msg_data in context_messages_data:
            role = "assistant" if msg_data['author']['id'] == str(cog.bot.user.id) else "user"
-            # Simplified content for context
-            content = f"{msg_data['author']['display_name']}: {msg_data['content']}"
+
+            # Build the content string, including reply and attachment info
+            content_parts = []
+            author_name = msg_data['author']['display_name']
+
+            # Add reply prefix if applicable
+            if msg_data.get("is_reply"):
+                reply_author = msg_data.get('replied_to_author_name', 'Unknown User')
+                reply_snippet = msg_data.get('replied_to_content_snippet', '...')
+                # Keep snippet very short for context
+                reply_snippet_short = (reply_snippet[:25] + '...') if len(reply_snippet) > 28 else reply_snippet
+                content_parts.append(f"{author_name} (replying to {reply_author} '{reply_snippet_short}'):")
+            else:
+                content_parts.append(f"{author_name}:")
+
+            # Add main message content
+            if msg_data.get('content'):
+                content_parts.append(msg_data['content'])
+
+            # Add attachment descriptions
+            attachments = msg_data.get("attachment_descriptions", [])
+            if attachments:
+                # Join descriptions into a single string
+                attachment_str = " ".join([att['description'] for att in attachments])
+                content_parts.append(attachment_str)
+
+            # Join all parts with spaces
+            content = " ".join(content_parts).strip()
+
            context_api_messages.append({"role": role, "content": content})
    return context_api_messages

@ -162,6 +189,30 @@ async def get_memory_context(cog: 'GurtCog', message: discord.Message) -> Option
                if len(semantic_memory_parts) > 1: memory_parts.append("\n".join(semantic_memory_parts))
    except Exception as e: print(f"Error retrieving semantic memory context: {e}")

+    # 10. Add information about recent attachments
+    try:
+        channel_messages = cog.message_cache['by_channel'].get(channel_id, [])
+        messages_with_attachments = [msg for msg in channel_messages if msg.get("attachment_descriptions")]
+        if messages_with_attachments:
+            recent_attachments = messages_with_attachments[-5:] # Get last 5
+            attachment_memory_parts = ["Recently Shared Files/Images:"]
+            for msg in recent_attachments:
+                author_name = msg.get('author', {}).get('display_name', 'Unknown User')
+                timestamp_str = 'Unknown time'
+                try:
+                    # Safely parse timestamp
+                    if msg.get('created_at'):
+                        timestamp_str = datetime.datetime.fromisoformat(msg['created_at']).strftime('%H:%M')
+                except ValueError: pass # Ignore invalid timestamp format
+
+                descriptions = " ".join([att['description'] for att in msg.get('attachment_descriptions', [])])
+                attachment_memory_parts.append(f"- By {author_name} (at {timestamp_str}): {descriptions}")
+
+            if len(attachment_memory_parts) > 1:
+                memory_parts.append("\n".join(attachment_memory_parts))
+    except Exception as e: print(f"Error retrieving recent attachments for memory context: {e}")
+
+
    if not memory_parts: return None
    memory_context_str = "--- Memory Context ---\n" + "\n\n".join(memory_parts) + "\n--- End Memory Context ---"
    return memory_context_str
--- a/gurt/listeners.py
+++ b/gurt/listeners.py
@ -165,9 +165,10 @@ async def on_message_listener(cog: 'GurtCog', message: discord.Message):
                "guild_id": str(message.guild.id) if message.guild else None,
                "timestamp": message.created_at.timestamp()
            }
+            # Pass the entire formatted_message dictionary now
            asyncio.create_task(
                cog.memory_manager.add_message_embedding(
-                    message_id=str(message.id), text=message.content, metadata=semantic_metadata
+                    message_id=str(message.id), formatted_message_data=formatted_message, metadata=semantic_metadata
                )
            )

--- a/gurt/utils.py
+++ b/gurt/utils.py
@ -21,38 +21,72 @@ def replace_mentions_with_names(cog: 'GurtCog', content: str, message: discord.M
        return content

    processed_content = content
+    # Sort by length of ID to handle potential overlaps correctly (longer IDs first)
+    # Although Discord IDs are fixed length, this is safer if formats change
    sorted_mentions = sorted(message.mentions, key=lambda m: len(str(m.id)), reverse=True)

    for member in sorted_mentions:
+        # Use display_name for better readability
        processed_content = processed_content.replace(f'<@{member.id}>', member.display_name)
-        processed_content = processed_content.replace(f'<@!{member.id}>', member.display_name)
+        processed_content = processed_content.replace(f'<@!{member.id}>', member.display_name) # Handle nickname mention format
    return processed_content

-def format_message(cog: 'GurtCog', message: discord.Message) -> Dict[str, Any]:
-    """Helper function to format a discord.Message object into a dictionary."""
-    processed_content = replace_mentions_with_names(cog, message.content, message) # Pass cog
-    mentioned_users_details = [
-        {"id": str(m.id), "name": m.name, "display_name": m.display_name}
-        for m in message.mentions
-    ]
+def _format_attachment_size(size_bytes: int) -> str:
+    """Formats attachment size into KB or MB."""
+    if size_bytes < 1024:
+        return f"{size_bytes} B"
+    elif size_bytes < 1024 * 1024:
+        return f"{size_bytes / 1024:.1f} KB"
+    else:
+        return f"{size_bytes / (1024 * 1024):.1f} MB"

+def format_message(cog: 'GurtCog', message: discord.Message) -> Dict[str, Any]:
+    """
+    Helper function to format a discord.Message object into a dictionary,
+    including detailed reply info and attachment descriptions.
+    """
+    # Process content first to replace mentions
+    processed_content = replace_mentions_with_names(cog, message.content, message) # Pass cog
+
+    # --- Attachment Processing ---
+    attachment_descriptions = []
+    for a in message.attachments:
+        size_str = _format_attachment_size(a.size)
+        file_type = "Image" if a.content_type and a.content_type.startswith("image/") else "File"
+        description = f"[{file_type}: {a.filename} ({a.content_type or 'unknown type'}, {size_str})]"
+        attachment_descriptions.append({
+            "description": description,
+            "filename": a.filename,
+            "content_type": a.content_type,
+            "size": a.size,
+            "url": a.url # Keep URL for potential future use (e.g., vision model)
+        })
+    # --- End Attachment Processing ---
+
+    # Basic message structure
    formatted_msg = {
        "id": str(message.id),
        "author": {
-            "id": str(message.author.id), "name": message.author.name,
-            "display_name": message.author.display_name, "bot": message.author.bot
+            "id": str(message.author.id),
+            "name": message.author.name,
+            "display_name": message.author.display_name,
+            "bot": message.author.bot
        },
-        "content": processed_content,
+        "content": processed_content, # Use processed content
        "created_at": message.created_at.isoformat(),
-        "attachments": [{"filename": a.filename, "url": a.url} for a in message.attachments],
+        "attachment_descriptions": attachment_descriptions, # Use new descriptions list
+        # "attachments": [{"filename": a.filename, "url": a.url} for a in message.attachments], # REMOVED old field
        "embeds": len(message.embeds) > 0,
-        "mentions": [{"id": str(m.id), "name": m.name} for m in message.mentions], # Keep original simple list too
-        "mentioned_users_details": mentioned_users_details,
-        "replied_to_message_id": None, "replied_to_author_id": None,
-        "replied_to_author_name": None, "replied_to_content": None,
+        "mentions": [{"id": str(m.id), "name": m.name, "display_name": m.display_name} for m in message.mentions], # Keep detailed mentions
+        # Reply fields initialized
+        "replied_to_message_id": None,
+        "replied_to_author_id": None,
+        "replied_to_author_name": None,
+        "replied_to_content_snippet": None, # Changed field name for clarity
        "is_reply": False
    }

+    # --- Reply Processing ---
    if message.reference and message.reference.message_id:
        formatted_msg["replied_to_message_id"] = str(message.reference.message_id)
        formatted_msg["is_reply"] = True
@ -61,14 +95,21 @@ def format_message(cog: 'GurtCog', message: discord.Message) -> Dict[str, Any]:
        if isinstance(ref_msg, discord.Message): # Check if resolved is a Message
            formatted_msg["replied_to_author_id"] = str(ref_msg.author.id)
            formatted_msg["replied_to_author_name"] = ref_msg.author.display_name
-            formatted_msg["replied_to_content"] = ref_msg.content
+            # Create a snippet of the replied-to content
+            snippet = ref_msg.content
+            if len(snippet) > 80: # Truncate long replies
+                snippet = snippet[:77] + "..."
+            formatted_msg["replied_to_content_snippet"] = snippet
        # else: print(f"Referenced message {message.reference.message_id} not resolved.") # Optional debug
+    # --- End Reply Processing ---

    return formatted_msg

 def update_relationship(cog: 'GurtCog', user_id_1: str, user_id_2: str, change: float):
    """Updates the relationship score between two users."""
+    # Ensure consistent key order
    if user_id_1 > user_id_2: user_id_1, user_id_2 = user_id_2, user_id_1
+    # Initialize user_id_1's dict if not present
    if user_id_1 not in cog.user_relationships: cog.user_relationships[user_id_1] = {}

    current_score = cog.user_relationships[user_id_1].get(user_id_2, 0.0)
@ -81,8 +122,17 @@ async def simulate_human_typing(cog: 'GurtCog', channel, text: str):
    # Minimal delay to ensure the typing indicator shows up reliably
    # but doesn't add noticeable latency to the response.
    # The actual sending of the message happens immediately after this.
-    async with channel.typing():
-        await asyncio.sleep(0.1) # Very short sleep, just to ensure typing shows
+    # Check if the bot has permissions to send messages and type
+    perms = channel.permissions_for(channel.guild.me) if isinstance(channel, discord.TextChannel) else None
+    if perms is None or (perms.send_messages and perms.send_tts_messages): # send_tts_messages often implies typing allowed
+        try:
+            async with channel.typing():
+                await asyncio.sleep(0.1) # Very short sleep, just to ensure typing shows
+        except discord.Forbidden:
+            print(f"Warning: Missing permissions to type in channel {channel.id}")
+        except Exception as e:
+            print(f"Warning: Error during typing simulation in {channel.id}: {e}")
+    # else: print(f"Skipping typing simulation in {channel.id} due to missing permissions.") # Optional debug

 async def log_internal_api_call(cog: 'GurtCog', task_description: str, payload: Dict[str, Any], response_data: Optional[Dict[str, Any]], error: Optional[Exception] = None):
    """Helper function to log internal API calls to a file."""
@ -117,6 +167,8 @@ async def log_internal_api_call(cog: 'GurtCog', task_description: str, payload:
        if error: log_entry += f"Error: {str(error)}\n"
        log_entry += "---\n\n"

+        # Use async file writing if in async context, but this helper might be called from sync code?
+        # Sticking to sync file I/O for simplicity here, assuming logging isn't performance critical path.
        with open(log_file, "a", encoding="utf-8") as f: f.write(log_entry)
    except Exception as log_e: print(f"!!! Failed to write to internal API log file {log_file}: {log_e}")

--- a/gurt_memory.py
+++ b/gurt_memory.py
@ -734,19 +734,42 @@ class MemoryManager:

    # --- Semantic Memory Methods (ChromaDB) ---

-    async def add_message_embedding(self, message_id: str, text: str, metadata: Dict[str, Any]) -> Dict[str, Any]:
-        """Generates embedding and stores a message in ChromaDB."""
+    async def add_message_embedding(self, message_id: str, formatted_message_data: Dict[str, Any], metadata: Dict[str, Any]) -> Dict[str, Any]:
+        """
+        Generates embedding and stores a message (including attachment descriptions)
+        in ChromaDB.
+        """
        if not self.semantic_collection:
            return {"error": "Semantic memory (ChromaDB) is not initialized."}
-        if not text:
-             return {"error": "Cannot add empty text to semantic memory."}

-        logger.info(f"Adding message {message_id} to semantic memory.")
+        # Construct the text to embed: content + attachment descriptions
+        text_to_embed_parts = []
+        if formatted_message_data.get('content'):
+            text_to_embed_parts.append(formatted_message_data['content'])
+
+        attachment_descs = formatted_message_data.get('attachment_descriptions', [])
+        if attachment_descs:
+            # Add a separator if there's content AND attachments
+            if text_to_embed_parts:
+                 text_to_embed_parts.append("\n") # Add newline separator
+            # Append descriptions
+            for att in attachment_descs:
+                text_to_embed_parts.append(att.get('description', ''))
+
+        text_to_embed = " ".join(text_to_embed_parts).strip()
+
+        if not text_to_embed:
+             # This might happen if a message ONLY contains attachments and no text content,
+             # but format_message should always produce descriptions. Log if empty.
+             logger.warning(f"Message {message_id} resulted in empty text_to_embed. Original data: {formatted_message_data}")
+             return {"error": "Cannot add empty derived text to semantic memory."}
+
+        logger.info(f"Adding message {message_id} to semantic memory (including attachments).")
        try:
            # ChromaDB expects lists for inputs
            await asyncio.to_thread(
                self.semantic_collection.add,
-                documents=[text],
+                documents=[text_to_embed], # Embed the combined text
                metadatas=[metadata],
                ids=[message_id]
            )