From 19d03a204d01aedcf482498f186630cf155a1146 Mon Sep 17 00:00:00 2001 From: Slipstream Date: Thu, 29 May 2025 11:08:16 -0600 Subject: [PATCH] feat: Implement image description generation for emojis and stickers, add listeners for guild asset updates --- gurt/api.py | 112 ++++++++++++++++++++++++++++++++++++++++++++++ gurt/cog.py | 101 ++++++++++++++++++++++++++++++++++++++--- gurt/emojis.py | 47 +++++++++++++------ gurt/listeners.py | 28 ++++++++++++ 4 files changed, 269 insertions(+), 19 deletions(-) diff --git a/gurt/api.py b/gurt/api.py index 93029f5..049985c 100644 --- a/gurt/api.py +++ b/gurt/api.py @@ -1850,6 +1850,118 @@ async def get_proactive_ai_response(cog: 'GurtCog', message: discord.Message, tr return final_parsed_data, sticker_ids_to_send_proactive +# --- AI Image Description Function --- +async def generate_image_description( + cog: 'GurtCog', + image_url: str, + item_name: str, + item_type: str, # "emoji" or "sticker" + mime_type: str # e.g., "image/png", "image/gif" +) -> Optional[str]: + """ + Generates a textual description for an image URL using a multimodal AI model. + + Args: + cog: The GurtCog instance. + image_url: The URL of the image to describe. + item_name: The name of the item (e.g., emoji name) for context. + item_type: The type of item ("emoji" or "sticker") for context. + mime_type: The MIME type of the image. + + Returns: + The AI-generated description string, or None if an error occurs. + """ + if not genai_client: + print("Error in generate_image_description: Google GenAI Client not initialized.") + return None + if not cog.session: + print("Error in generate_image_description: aiohttp session not initialized in cog.") + return None + + print(f"Attempting to generate description for {item_type} '{item_name}' from URL: {image_url}") + + try: + # 1. Download image data + async with cog.session.get(image_url, timeout=15) as response: + if response.status != 200: + print(f"Failed to download image from {image_url}. Status: {response.status}") + return None + image_bytes = await response.read() + # Validate MIME type (optional, but good practice for Gemini) + supported_image_mimes = ["image/png", "image/jpeg", "image/webp", "image/heic", "image/heif", "image/gif"] + clean_mime_type = mime_type.split(';')[0].lower() + if clean_mime_type not in supported_image_mimes: + print(f"Warning: Image MIME type '{clean_mime_type}' from {image_url} might not be fully supported for description. Proceeding anyway.") + # Fallback to a generic type if not in list, or handle error + # For now, we'll proceed with the provided mime_type + + # 2. Prepare contents for AI + # Ensure item_name is escaped if it contains characters that could break the prompt string. + # For simplicity, we assume item_name is generally safe or will be handled by the f-string. + prompt_text = ( + f"This image is a Discord {item_type} named '{item_name}'. " + "Provide a concise, factual, and unbiased textual description of its visual elements. " + "Focus solely on what is depicted. Avoid any interpretation, censorship, or subjective commentary. " + "Do not mention that it is an emoji or sticker in your description, just describe the visual content." + ) + + image_part = types.Part(inline_data=types.Blob(data=image_bytes, mime_type=clean_mime_type)) + text_part = types.Part(text=prompt_text) + description_contents: List[types.Content] = [types.Content(role="user", parts=[image_part, text_part])] + + # 3. Prepare Generation Config + # We want a plain text response, no JSON schema. Safety settings are standard (BLOCK_NONE). + # System prompt is not strictly needed here as the user prompt is direct. + description_gen_config = types.GenerateContentConfig( + temperature=0.4, # Lower temperature for more factual description + max_output_tokens=256, # Descriptions should be concise + safety_settings=STANDARD_SAFETY_SETTINGS, + # No response_mime_type or response_schema needed for plain text + tools=None, # No tools for this task + tool_config=None + ) + + # 4. Call AI + # Use a multimodal model, e.g., DEFAULT_MODEL if it's Gemini 1.5 Pro or similar + # If DEFAULT_MODEL is tuned for JSON, we might need to specify a base multimodal model here. + # For now, assume DEFAULT_MODEL can handle this. + model_to_use = DEFAULT_MODEL # Or specify a known multimodal model like "models/gemini-1.5-pro-preview-0409" + + print(f"Calling AI for image description ({item_name}) using model: {model_to_use}") + ai_response_obj = await call_google_genai_api_with_retry( + cog=cog, + model_name=model_to_use, + contents=description_contents, + generation_config=description_gen_config, + request_desc=f"Image description for {item_type} '{item_name}'" + ) + + # 5. Extract text + if not ai_response_obj: + print(f"AI call for image description of '{item_name}' returned no response object.") + return None + + description_text = _get_response_text(ai_response_obj) + if description_text: + print(f"Successfully generated description for '{item_name}': {description_text[:100]}...") + return description_text.strip() + else: + print(f"AI response for '{item_name}' contained no usable text. Response: {ai_response_obj}") + return None + + except aiohttp.ClientError as client_e: + print(f"Network error downloading image {image_url} for description: {client_e}") + return None + except asyncio.TimeoutError: + print(f"Timeout downloading image {image_url} for description.") + return None + except Exception as e: + print(f"Unexpected error in generate_image_description for '{item_name}': {type(e).__name__}: {e}") + import traceback + traceback.print_exc() + return None + + # --- Internal AI Call for Specific Tasks --- async def get_internal_ai_json_response( cog: 'GurtCog', diff --git a/gurt/cog.py b/gurt/cog.py index 5f7004d..dff2676 100644 --- a/gurt/cog.py +++ b/gurt/cog.py @@ -30,14 +30,19 @@ from .config import ( IGNORED_CHANNEL_IDS, update_ignored_channels_file # Import for ignored channels ) # Import functions/classes from other modules -from .memory import MemoryManager # Import from local memory.py -from .emojis import EmojiManager # Import EmojiManager +from .memory import MemoryManager +from .emojis import EmojiManager from .background import background_processing_task -from .commands import setup_commands # Import the setup helper -from .listeners import on_ready_listener, on_message_listener, on_reaction_add_listener, on_reaction_remove_listener # Import listener functions -from . import config as GurtConfig # Import config module for get_gurt_stats +from .commands import setup_commands +from .listeners import ( + on_ready_listener, on_message_listener, on_reaction_add_listener, + on_reaction_remove_listener, on_guild_join_listener, # Added on_guild_join_listener + on_guild_emojis_update_listener, on_guild_stickers_update_listener # Added emoji/sticker update listeners +) +from . import api # Import api to access generate_image_description +from . import config as GurtConfig # Tool mapping is used internally by api.py/process_requested_tools, no need to import here directly unless cog methods call tools directly (they shouldn't) -# Analysis, context, prompt, api, utils functions are called by listeners/commands/background task, not directly by cog methods here usually. +# Analysis, context, prompt, utils functions are called by listeners/commands/background task, not directly by cog methods here usually. # Load environment variables (might be loaded globally in main bot script too) load_dotenv() @@ -212,6 +217,21 @@ class GurtCog(commands.Cog, name="Gurt"): # Added explicit Cog name # This ensures the bot's application_id is properly set before syncing print("GurtCog: Commands will be synced when the bot is ready.") + # Add new listeners + @self.bot.event + async def on_guild_join(guild): + await on_guild_join_listener(self, guild) + + @self.bot.event + async def on_guild_emojis_update(guild, before, after): + await on_guild_emojis_update_listener(self, guild, before, after) + + @self.bot.event + async def on_guild_stickers_update(guild, before, after): + await on_guild_stickers_update_listener(self, guild, before, after) + + print("GurtCog: Additional guild event listeners added.") + # Start background task if self.background_task is None or self.background_task.done(): self.background_task = asyncio.create_task(background_processing_task(self)) @@ -248,6 +268,75 @@ class GurtCog(commands.Cog, name="Gurt"): # Added explicit Cog name self.user_relationships[user_id_1][user_id_2] = new_score # print(f"Updated relationship {user_id_1}-{user_id_2}: {current_score:.1f} -> {new_score:.1f} ({change:+.1f})") # Debug log + async def _fetch_and_process_guild_assets(self, guild: discord.Guild): + """Iterates through a guild's emojis and stickers, generates descriptions, and updates EmojiManager.""" + print(f"Processing assets for guild: {guild.name} ({guild.id})") + processed_count = 0 + # Emojis + for emoji in guild.emojis: + try: + name_key = f":{emoji.name}:" + emoji_url = str(emoji.url) + mime_type = "image/gif" if emoji.animated else "image/png" + + # Check if already processed with a description to avoid re-processing unless necessary + existing_emoji = await self.emoji_manager.get_emoji(name_key) + if existing_emoji and existing_emoji.get("url") == emoji_url and existing_emoji.get("description") and existing_emoji.get("description") != "No description generated.": + # print(f"Skipping already processed emoji: {name_key} in guild {guild.name}") + continue + + print(f"Generating description for emoji: {name_key} in guild {guild.name}") + description = await api.generate_image_description(self, emoji_url, emoji.name, "emoji", mime_type) + await self.emoji_manager.add_emoji(name_key, str(emoji.id), emoji.animated, guild.id, emoji_url, description or "No description generated.") + processed_count +=1 + await asyncio.sleep(1) # Rate limiting + except Exception as e: + print(f"Error processing emoji {emoji.name} in guild {guild.name}: {e}") + + # Stickers + for sticker in guild.stickers: + try: + name_key = f":{sticker.name}:" + sticker_url = str(sticker.url) + + existing_sticker = await self.emoji_manager.get_sticker(name_key) + if existing_sticker and existing_sticker.get("url") == sticker_url and existing_sticker.get("description") and existing_sticker.get("description") not in ["No description generated.", "Lottie animation, visual description not applicable."]: + # print(f"Skipping already processed sticker: {name_key} in guild {guild.name}") + continue + + print(f"Generating description for sticker: {sticker.name} in guild {guild.name}") + if sticker.format == discord.StickerFormatType.png or sticker.format == discord.StickerFormatType.apng: + mime_type = "image/png" # APNG is also fine as image/png for Gemini + description = await api.generate_image_description(self, sticker_url, sticker.name, "sticker", mime_type) + await self.emoji_manager.add_sticker(name_key, str(sticker.id), guild.id, sticker_url, description or "No description generated.") + elif sticker.format == discord.StickerFormatType.lottie: + await self.emoji_manager.add_sticker(name_key, str(sticker.id), guild.id, sticker_url, "Lottie animation, visual description not applicable.") + else: + print(f"Skipping sticker {sticker.name} due to unsupported format: {sticker.format}") + await self.emoji_manager.add_sticker(name_key, str(sticker.id), guild.id, sticker_url, f"Unsupported format: {sticker.format}, visual description not applicable.") + processed_count += 1 + await asyncio.sleep(1) # Rate limiting + except Exception as e: + print(f"Error processing sticker {sticker.name} in guild {guild.name}: {e}") + print(f"Finished processing {processed_count} new/updated assets for guild: {guild.name} ({guild.id})") + + async def initial_emoji_sticker_scan(self): + """Scans all guilds GURT is in on startup for emojis and stickers.""" + print("Starting initial scan of emojis and stickers for all guilds...") + # Create a list of tasks to run them concurrently but not all at once to avoid overwhelming APIs + tasks = [] + for guild in self.bot.guilds: + # Create a task for each guild + task = asyncio.create_task(self._fetch_and_process_guild_assets(guild)) + tasks.append(task) + + # Optionally, wait for all tasks to complete if needed, or let them run in background + # For a startup scan, it's probably fine to let them run without blocking on_ready too long. + # If you need to ensure all are done before something else, you can await asyncio.gather(*tasks) + # For now, just creating them to run concurrently. + print(f"Created {len(tasks)} tasks for initial emoji/sticker scan.") + + async def get_gurt_stats(self) -> Dict[str, Any]: """Collects various internal stats for Gurt.""" stats = {"config": {}, "runtime": {}, "memory": {}, "api_stats": {}, "tool_stats": {}} diff --git a/gurt/emojis.py b/gurt/emojis.py index ca0b1b5..a87d18c 100644 --- a/gurt/emojis.py +++ b/gurt/emojis.py @@ -7,7 +7,7 @@ DATA_FILE_PATH = "data/custom_emojis_stickers.json" class EmojiManager: def __init__(self, data_file: str = DATA_FILE_PATH): self.data_file = data_file - # Adjusted type hint for self.data to accommodate guild_id + # Adjusted type hint for self.data to accommodate guild_id, url, and description self.data: Dict[str, Dict[str, Dict[str, Any]]] = {"emojis": {}, "stickers": {}} self._load_data() @@ -22,15 +22,19 @@ class EmojiManager: self.data["emojis"] = { name: { "id": data.get("id"), - "animated": data.get("animated", False), # Default animated to False - "guild_id": data.get("guild_id") # Will be None if not present + "animated": data.get("animated", False), + "guild_id": data.get("guild_id"), + "url": data.get("url"), # Load new field + "description": data.get("description") # Load new field } for name, data in loaded_json.get("emojis", {}).items() if isinstance(data, dict) } self.data["stickers"] = { name: { "id": data.get("id"), - "guild_id": data.get("guild_id") # Will be None if not present + "guild_id": data.get("guild_id"), + "url": data.get("url"), # Load new field + "description": data.get("description") # Load new field } for name, data in loaded_json.get("stickers", {}).items() if isinstance(data, dict) } @@ -63,14 +67,23 @@ class EmojiManager: print(f"Error saving emoji/sticker data: {e}") return False - async def add_emoji(self, name: str, emoji_id: str, is_animated: bool, guild_id: Optional[int]) -> bool: - """Adds a custom emoji with its guild ID.""" + async def add_emoji(self, name: str, emoji_id: str, is_animated: bool, guild_id: Optional[int], url: Optional[str] = None, description: Optional[str] = None) -> bool: + """Adds a custom emoji with its guild ID, URL, and description.""" if name in self.data["emojis"]: - # Allow update if guild_id was None and is now being set, or if ID changes existing_data = self.data["emojis"][name] - if existing_data.get("id") == emoji_id and existing_data.get("guild_id") == guild_id and existing_data.get("animated") == is_animated: + if (existing_data.get("id") == emoji_id and + existing_data.get("guild_id") == guild_id and + existing_data.get("animated") == is_animated and + existing_data.get("url") == url and + existing_data.get("description") == description): return False # No change - self.data["emojis"][name] = {"id": emoji_id, "animated": is_animated, "guild_id": guild_id} + self.data["emojis"][name] = { + "id": emoji_id, + "animated": is_animated, + "guild_id": guild_id, + "url": url, + "description": description + } return self._save_data() async def remove_emoji(self, name: str) -> bool: @@ -88,13 +101,21 @@ class EmojiManager: """Gets a specific custom emoji by name.""" return self.data["emojis"].get(name) - async def add_sticker(self, name: str, sticker_id: str, guild_id: Optional[int]) -> bool: - """Adds a custom sticker with its guild ID.""" + async def add_sticker(self, name: str, sticker_id: str, guild_id: Optional[int], url: Optional[str] = None, description: Optional[str] = None) -> bool: + """Adds a custom sticker with its guild ID, URL, and description.""" if name in self.data["stickers"]: existing_data = self.data["stickers"][name] - if existing_data.get("id") == sticker_id and existing_data.get("guild_id") == guild_id: + if (existing_data.get("id") == sticker_id and + existing_data.get("guild_id") == guild_id and + existing_data.get("url") == url and + existing_data.get("description") == description): return False # No change - self.data["stickers"][name] = {"id": sticker_id, "guild_id": guild_id} + self.data["stickers"][name] = { + "id": sticker_id, + "guild_id": guild_id, + "url": url, + "description": description + } return self._save_data() async def remove_sticker(self, name: str) -> bool: diff --git a/gurt/listeners.py b/gurt/listeners.py index a816397..cbda241 100644 --- a/gurt/listeners.py +++ b/gurt/listeners.py @@ -48,6 +48,8 @@ async def on_ready_listener(cog: 'GurtCog'): traceback.print_exc() # --- Message history pre-loading removed --- + # Call the initial emoji/sticker scan + await cog.initial_emoji_sticker_scan() async def on_message_listener(cog: 'GurtCog', message: discord.Message): @@ -634,3 +636,29 @@ async def on_reaction_remove_listener(cog: 'GurtCog', reaction: discord.Reaction if sentiment == "positive": cog.gurt_message_reactions[message_id]["positive"] = max(0, cog.gurt_message_reactions[message_id]["positive"] - 1) elif sentiment == "negative": cog.gurt_message_reactions[message_id]["negative"] = max(0, cog.gurt_message_reactions[message_id]["negative"] - 1) print(f"Reaction removed from Gurt msg ({message_id}). Sentiment: {sentiment}") + + +# --- New Listener Functions for Guild Asset Updates --- + +async def on_guild_join_listener(cog: 'GurtCog', guild: discord.Guild): + """Listener function for on_guild_join.""" + print(f"Gurt joined a new guild: {guild.name} ({guild.id})") + print(f"Processing emojis and stickers for new guild: {guild.name}") + # Schedule the processing as a background task to avoid blocking + asyncio.create_task(cog._fetch_and_process_guild_assets(guild)) + +async def on_guild_emojis_update_listener(cog: 'GurtCog', guild: discord.Guild, before: List[discord.Emoji], after: List[discord.Emoji]): + """Listener function for on_guild_emojis_update.""" + print(f"Emojis updated in guild: {guild.name} ({guild.id}). Before: {len(before)}, After: {len(after)}") + # For simplicity and to ensure all changes (add, remove, name change) are caught, + # re-process all emojis for the guild. + # A more optimized approach could diff 'before' and 'after' lists. + print(f"Re-processing all emojis for guild: {guild.name}") + asyncio.create_task(cog._fetch_and_process_guild_assets(guild)) # This will re-process stickers too, which is fine. + +async def on_guild_stickers_update_listener(cog: 'GurtCog', guild: discord.Guild, before: List[discord.StickerItem], after: List[discord.StickerItem]): + """Listener function for on_guild_stickers_update.""" + print(f"Stickers updated in guild: {guild.name} ({guild.id}). Before: {len(before)}, After: {len(after)}") + # Similar to emojis, re-process all assets for simplicity. + print(f"Re-processing all stickers (and emojis) for guild: {guild.name}") + asyncio.create_task(cog._fetch_and_process_guild_assets(guild))