diff --git a/gurt/context.py b/gurt/context.py index cff5395..01b36b4 100644 --- a/gurt/context.py +++ b/gurt/context.py @@ -25,8 +25,35 @@ def gather_conversation_context(cog: 'GurtCog', channel_id: int, current_message for msg_data in context_messages_data: role = "assistant" if msg_data['author']['id'] == str(cog.bot.user.id) else "user" - # Simplified content for context - content = f"{msg_data['author']['display_name']}: {msg_data['content']}" + + # Build the content string, including reply and attachment info + content_parts = [] + author_name = msg_data['author']['display_name'] + + # Add reply prefix if applicable + if msg_data.get("is_reply"): + reply_author = msg_data.get('replied_to_author_name', 'Unknown User') + reply_snippet = msg_data.get('replied_to_content_snippet', '...') + # Keep snippet very short for context + reply_snippet_short = (reply_snippet[:25] + '...') if len(reply_snippet) > 28 else reply_snippet + content_parts.append(f"{author_name} (replying to {reply_author} '{reply_snippet_short}'):") + else: + content_parts.append(f"{author_name}:") + + # Add main message content + if msg_data.get('content'): + content_parts.append(msg_data['content']) + + # Add attachment descriptions + attachments = msg_data.get("attachment_descriptions", []) + if attachments: + # Join descriptions into a single string + attachment_str = " ".join([att['description'] for att in attachments]) + content_parts.append(attachment_str) + + # Join all parts with spaces + content = " ".join(content_parts).strip() + context_api_messages.append({"role": role, "content": content}) return context_api_messages @@ -162,6 +189,30 @@ async def get_memory_context(cog: 'GurtCog', message: discord.Message) -> Option if len(semantic_memory_parts) > 1: memory_parts.append("\n".join(semantic_memory_parts)) except Exception as e: print(f"Error retrieving semantic memory context: {e}") + # 10. Add information about recent attachments + try: + channel_messages = cog.message_cache['by_channel'].get(channel_id, []) + messages_with_attachments = [msg for msg in channel_messages if msg.get("attachment_descriptions")] + if messages_with_attachments: + recent_attachments = messages_with_attachments[-5:] # Get last 5 + attachment_memory_parts = ["Recently Shared Files/Images:"] + for msg in recent_attachments: + author_name = msg.get('author', {}).get('display_name', 'Unknown User') + timestamp_str = 'Unknown time' + try: + # Safely parse timestamp + if msg.get('created_at'): + timestamp_str = datetime.datetime.fromisoformat(msg['created_at']).strftime('%H:%M') + except ValueError: pass # Ignore invalid timestamp format + + descriptions = " ".join([att['description'] for att in msg.get('attachment_descriptions', [])]) + attachment_memory_parts.append(f"- By {author_name} (at {timestamp_str}): {descriptions}") + + if len(attachment_memory_parts) > 1: + memory_parts.append("\n".join(attachment_memory_parts)) + except Exception as e: print(f"Error retrieving recent attachments for memory context: {e}") + + if not memory_parts: return None memory_context_str = "--- Memory Context ---\n" + "\n\n".join(memory_parts) + "\n--- End Memory Context ---" return memory_context_str diff --git a/gurt/listeners.py b/gurt/listeners.py index e65a49b..057507e 100644 --- a/gurt/listeners.py +++ b/gurt/listeners.py @@ -165,9 +165,10 @@ async def on_message_listener(cog: 'GurtCog', message: discord.Message): "guild_id": str(message.guild.id) if message.guild else None, "timestamp": message.created_at.timestamp() } + # Pass the entire formatted_message dictionary now asyncio.create_task( cog.memory_manager.add_message_embedding( - message_id=str(message.id), text=message.content, metadata=semantic_metadata + message_id=str(message.id), formatted_message_data=formatted_message, metadata=semantic_metadata ) ) diff --git a/gurt/utils.py b/gurt/utils.py index 1949e1b..af039cc 100644 --- a/gurt/utils.py +++ b/gurt/utils.py @@ -21,38 +21,72 @@ def replace_mentions_with_names(cog: 'GurtCog', content: str, message: discord.M return content processed_content = content + # Sort by length of ID to handle potential overlaps correctly (longer IDs first) + # Although Discord IDs are fixed length, this is safer if formats change sorted_mentions = sorted(message.mentions, key=lambda m: len(str(m.id)), reverse=True) for member in sorted_mentions: + # Use display_name for better readability processed_content = processed_content.replace(f'<@{member.id}>', member.display_name) - processed_content = processed_content.replace(f'<@!{member.id}>', member.display_name) + processed_content = processed_content.replace(f'<@!{member.id}>', member.display_name) # Handle nickname mention format return processed_content -def format_message(cog: 'GurtCog', message: discord.Message) -> Dict[str, Any]: - """Helper function to format a discord.Message object into a dictionary.""" - processed_content = replace_mentions_with_names(cog, message.content, message) # Pass cog - mentioned_users_details = [ - {"id": str(m.id), "name": m.name, "display_name": m.display_name} - for m in message.mentions - ] +def _format_attachment_size(size_bytes: int) -> str: + """Formats attachment size into KB or MB.""" + if size_bytes < 1024: + return f"{size_bytes} B" + elif size_bytes < 1024 * 1024: + return f"{size_bytes / 1024:.1f} KB" + else: + return f"{size_bytes / (1024 * 1024):.1f} MB" +def format_message(cog: 'GurtCog', message: discord.Message) -> Dict[str, Any]: + """ + Helper function to format a discord.Message object into a dictionary, + including detailed reply info and attachment descriptions. + """ + # Process content first to replace mentions + processed_content = replace_mentions_with_names(cog, message.content, message) # Pass cog + + # --- Attachment Processing --- + attachment_descriptions = [] + for a in message.attachments: + size_str = _format_attachment_size(a.size) + file_type = "Image" if a.content_type and a.content_type.startswith("image/") else "File" + description = f"[{file_type}: {a.filename} ({a.content_type or 'unknown type'}, {size_str})]" + attachment_descriptions.append({ + "description": description, + "filename": a.filename, + "content_type": a.content_type, + "size": a.size, + "url": a.url # Keep URL for potential future use (e.g., vision model) + }) + # --- End Attachment Processing --- + + # Basic message structure formatted_msg = { "id": str(message.id), "author": { - "id": str(message.author.id), "name": message.author.name, - "display_name": message.author.display_name, "bot": message.author.bot + "id": str(message.author.id), + "name": message.author.name, + "display_name": message.author.display_name, + "bot": message.author.bot }, - "content": processed_content, + "content": processed_content, # Use processed content "created_at": message.created_at.isoformat(), - "attachments": [{"filename": a.filename, "url": a.url} for a in message.attachments], + "attachment_descriptions": attachment_descriptions, # Use new descriptions list + # "attachments": [{"filename": a.filename, "url": a.url} for a in message.attachments], # REMOVED old field "embeds": len(message.embeds) > 0, - "mentions": [{"id": str(m.id), "name": m.name} for m in message.mentions], # Keep original simple list too - "mentioned_users_details": mentioned_users_details, - "replied_to_message_id": None, "replied_to_author_id": None, - "replied_to_author_name": None, "replied_to_content": None, + "mentions": [{"id": str(m.id), "name": m.name, "display_name": m.display_name} for m in message.mentions], # Keep detailed mentions + # Reply fields initialized + "replied_to_message_id": None, + "replied_to_author_id": None, + "replied_to_author_name": None, + "replied_to_content_snippet": None, # Changed field name for clarity "is_reply": False } + # --- Reply Processing --- if message.reference and message.reference.message_id: formatted_msg["replied_to_message_id"] = str(message.reference.message_id) formatted_msg["is_reply"] = True @@ -61,14 +95,21 @@ def format_message(cog: 'GurtCog', message: discord.Message) -> Dict[str, Any]: if isinstance(ref_msg, discord.Message): # Check if resolved is a Message formatted_msg["replied_to_author_id"] = str(ref_msg.author.id) formatted_msg["replied_to_author_name"] = ref_msg.author.display_name - formatted_msg["replied_to_content"] = ref_msg.content + # Create a snippet of the replied-to content + snippet = ref_msg.content + if len(snippet) > 80: # Truncate long replies + snippet = snippet[:77] + "..." + formatted_msg["replied_to_content_snippet"] = snippet # else: print(f"Referenced message {message.reference.message_id} not resolved.") # Optional debug + # --- End Reply Processing --- return formatted_msg def update_relationship(cog: 'GurtCog', user_id_1: str, user_id_2: str, change: float): """Updates the relationship score between two users.""" + # Ensure consistent key order if user_id_1 > user_id_2: user_id_1, user_id_2 = user_id_2, user_id_1 + # Initialize user_id_1's dict if not present if user_id_1 not in cog.user_relationships: cog.user_relationships[user_id_1] = {} current_score = cog.user_relationships[user_id_1].get(user_id_2, 0.0) @@ -81,8 +122,17 @@ async def simulate_human_typing(cog: 'GurtCog', channel, text: str): # Minimal delay to ensure the typing indicator shows up reliably # but doesn't add noticeable latency to the response. # The actual sending of the message happens immediately after this. - async with channel.typing(): - await asyncio.sleep(0.1) # Very short sleep, just to ensure typing shows + # Check if the bot has permissions to send messages and type + perms = channel.permissions_for(channel.guild.me) if isinstance(channel, discord.TextChannel) else None + if perms is None or (perms.send_messages and perms.send_tts_messages): # send_tts_messages often implies typing allowed + try: + async with channel.typing(): + await asyncio.sleep(0.1) # Very short sleep, just to ensure typing shows + except discord.Forbidden: + print(f"Warning: Missing permissions to type in channel {channel.id}") + except Exception as e: + print(f"Warning: Error during typing simulation in {channel.id}: {e}") + # else: print(f"Skipping typing simulation in {channel.id} due to missing permissions.") # Optional debug async def log_internal_api_call(cog: 'GurtCog', task_description: str, payload: Dict[str, Any], response_data: Optional[Dict[str, Any]], error: Optional[Exception] = None): """Helper function to log internal API calls to a file.""" @@ -117,6 +167,8 @@ async def log_internal_api_call(cog: 'GurtCog', task_description: str, payload: if error: log_entry += f"Error: {str(error)}\n" log_entry += "---\n\n" + # Use async file writing if in async context, but this helper might be called from sync code? + # Sticking to sync file I/O for simplicity here, assuming logging isn't performance critical path. with open(log_file, "a", encoding="utf-8") as f: f.write(log_entry) except Exception as log_e: print(f"!!! Failed to write to internal API log file {log_file}: {log_e}") diff --git a/gurt_memory.py b/gurt_memory.py index d282353..fc36553 100644 --- a/gurt_memory.py +++ b/gurt_memory.py @@ -734,19 +734,42 @@ class MemoryManager: # --- Semantic Memory Methods (ChromaDB) --- - async def add_message_embedding(self, message_id: str, text: str, metadata: Dict[str, Any]) -> Dict[str, Any]: - """Generates embedding and stores a message in ChromaDB.""" + async def add_message_embedding(self, message_id: str, formatted_message_data: Dict[str, Any], metadata: Dict[str, Any]) -> Dict[str, Any]: + """ + Generates embedding and stores a message (including attachment descriptions) + in ChromaDB. + """ if not self.semantic_collection: return {"error": "Semantic memory (ChromaDB) is not initialized."} - if not text: - return {"error": "Cannot add empty text to semantic memory."} - logger.info(f"Adding message {message_id} to semantic memory.") + # Construct the text to embed: content + attachment descriptions + text_to_embed_parts = [] + if formatted_message_data.get('content'): + text_to_embed_parts.append(formatted_message_data['content']) + + attachment_descs = formatted_message_data.get('attachment_descriptions', []) + if attachment_descs: + # Add a separator if there's content AND attachments + if text_to_embed_parts: + text_to_embed_parts.append("\n") # Add newline separator + # Append descriptions + for att in attachment_descs: + text_to_embed_parts.append(att.get('description', '')) + + text_to_embed = " ".join(text_to_embed_parts).strip() + + if not text_to_embed: + # This might happen if a message ONLY contains attachments and no text content, + # but format_message should always produce descriptions. Log if empty. + logger.warning(f"Message {message_id} resulted in empty text_to_embed. Original data: {formatted_message_data}") + return {"error": "Cannot add empty derived text to semantic memory."} + + logger.info(f"Adding message {message_id} to semantic memory (including attachments).") try: # ChromaDB expects lists for inputs await asyncio.to_thread( self.semantic_collection.add, - documents=[text], + documents=[text_to_embed], # Embed the combined text metadatas=[metadata], ids=[message_id] )