feat: Implement image description generation for emojis and stickers, add listeners for guild asset updates

2025-05-29 11:08:16 -06:00 · 2025-05-29 11:08:16 -06:00 · 19d03a204d
commit 19d03a204d
parent f7a1a2134e
4 changed files with 269 additions and 19 deletions
--- a/gurt/api.py
+++ b/gurt/api.py
@ -1850,6 +1850,118 @@ async def get_proactive_ai_response(cog: 'GurtCog', message: discord.Message, tr
    return final_parsed_data, sticker_ids_to_send_proactive


+# --- AI Image Description Function ---
+async def generate_image_description(
+    cog: 'GurtCog',
+    image_url: str,
+    item_name: str,
+    item_type: str, # "emoji" or "sticker"
+    mime_type: str # e.g., "image/png", "image/gif"
+) -> Optional[str]:
+    """
+    Generates a textual description for an image URL using a multimodal AI model.
+
+    Args:
+        cog: The GurtCog instance.
+        image_url: The URL of the image to describe.
+        item_name: The name of the item (e.g., emoji name) for context.
+        item_type: The type of item ("emoji" or "sticker") for context.
+        mime_type: The MIME type of the image.
+
+    Returns:
+        The AI-generated description string, or None if an error occurs.
+    """
+    if not genai_client:
+        print("Error in generate_image_description: Google GenAI Client not initialized.")
+        return None
+    if not cog.session:
+        print("Error in generate_image_description: aiohttp session not initialized in cog.")
+        return None
+
+    print(f"Attempting to generate description for {item_type} '{item_name}' from URL: {image_url}")
+
+    try:
+        # 1. Download image data
+        async with cog.session.get(image_url, timeout=15) as response:
+            if response.status != 200:
+                print(f"Failed to download image from {image_url}. Status: {response.status}")
+                return None
+            image_bytes = await response.read()
+            # Validate MIME type (optional, but good practice for Gemini)
+            supported_image_mimes = ["image/png", "image/jpeg", "image/webp", "image/heic", "image/heif", "image/gif"]
+            clean_mime_type = mime_type.split(';')[0].lower()
+            if clean_mime_type not in supported_image_mimes:
+                print(f"Warning: Image MIME type '{clean_mime_type}' from {image_url} might not be fully supported for description. Proceeding anyway.")
+                # Fallback to a generic type if not in list, or handle error
+                # For now, we'll proceed with the provided mime_type
+
+        # 2. Prepare contents for AI
+        # Ensure item_name is escaped if it contains characters that could break the prompt string.
+        # For simplicity, we assume item_name is generally safe or will be handled by the f-string.
+        prompt_text = (
+            f"This image is a Discord {item_type} named '{item_name}'. "
+            "Provide a concise, factual, and unbiased textual description of its visual elements. "
+            "Focus solely on what is depicted. Avoid any interpretation, censorship, or subjective commentary. "
+            "Do not mention that it is an emoji or sticker in your description, just describe the visual content."
+        )
+
+        image_part = types.Part(inline_data=types.Blob(data=image_bytes, mime_type=clean_mime_type))
+        text_part = types.Part(text=prompt_text)
+        description_contents: List[types.Content] = [types.Content(role="user", parts=[image_part, text_part])]
+
+        # 3. Prepare Generation Config
+        # We want a plain text response, no JSON schema. Safety settings are standard (BLOCK_NONE).
+        # System prompt is not strictly needed here as the user prompt is direct.
+        description_gen_config = types.GenerateContentConfig(
+            temperature=0.4, # Lower temperature for more factual description
+            max_output_tokens=256, # Descriptions should be concise
+            safety_settings=STANDARD_SAFETY_SETTINGS,
+            # No response_mime_type or response_schema needed for plain text
+            tools=None, # No tools for this task
+            tool_config=None
+        )
+
+        # 4. Call AI
+        # Use a multimodal model, e.g., DEFAULT_MODEL if it's Gemini 1.5 Pro or similar
+        # If DEFAULT_MODEL is tuned for JSON, we might need to specify a base multimodal model here.
+        # For now, assume DEFAULT_MODEL can handle this.
+        model_to_use = DEFAULT_MODEL # Or specify a known multimodal model like "models/gemini-1.5-pro-preview-0409"
+        
+        print(f"Calling AI for image description ({item_name}) using model: {model_to_use}")
+        ai_response_obj = await call_google_genai_api_with_retry(
+            cog=cog,
+            model_name=model_to_use,
+            contents=description_contents,
+            generation_config=description_gen_config,
+            request_desc=f"Image description for {item_type} '{item_name}'"
+        )
+
+        # 5. Extract text
+        if not ai_response_obj:
+            print(f"AI call for image description of '{item_name}' returned no response object.")
+            return None
+
+        description_text = _get_response_text(ai_response_obj)
+        if description_text:
+            print(f"Successfully generated description for '{item_name}': {description_text[:100]}...")
+            return description_text.strip()
+        else:
+            print(f"AI response for '{item_name}' contained no usable text. Response: {ai_response_obj}")
+            return None
+
+    except aiohttp.ClientError as client_e:
+        print(f"Network error downloading image {image_url} for description: {client_e}")
+        return None
+    except asyncio.TimeoutError:
+        print(f"Timeout downloading image {image_url} for description.")
+        return None
+    except Exception as e:
+        print(f"Unexpected error in generate_image_description for '{item_name}': {type(e).__name__}: {e}")
+        import traceback
+        traceback.print_exc()
+        return None
+
+
 # --- Internal AI Call for Specific Tasks ---
 async def get_internal_ai_json_response(
    cog: 'GurtCog',
--- a/gurt/cog.py
+++ b/gurt/cog.py
@ -30,14 +30,19 @@ from .config import (
    IGNORED_CHANNEL_IDS, update_ignored_channels_file # Import for ignored channels
 )
 # Import functions/classes from other modules
-from .memory import MemoryManager # Import from local memory.py
-from .emojis import EmojiManager # Import EmojiManager
+from .memory import MemoryManager
+from .emojis import EmojiManager
 from .background import background_processing_task
-from .commands import setup_commands # Import the setup helper
-from .listeners import on_ready_listener, on_message_listener, on_reaction_add_listener, on_reaction_remove_listener # Import listener functions
-from . import config as GurtConfig # Import config module for get_gurt_stats
+from .commands import setup_commands
+from .listeners import (
+    on_ready_listener, on_message_listener, on_reaction_add_listener,
+    on_reaction_remove_listener, on_guild_join_listener, # Added on_guild_join_listener
+    on_guild_emojis_update_listener, on_guild_stickers_update_listener # Added emoji/sticker update listeners
+)
+from . import api # Import api to access generate_image_description
+from . import config as GurtConfig
 # Tool mapping is used internally by api.py/process_requested_tools, no need to import here directly unless cog methods call tools directly (they shouldn't)
-# Analysis, context, prompt, api, utils functions are called by listeners/commands/background task, not directly by cog methods here usually.
+# Analysis, context, prompt, utils functions are called by listeners/commands/background task, not directly by cog methods here usually.

 # Load environment variables (might be loaded globally in main bot script too)
 load_dotenv()
@ -212,6 +217,21 @@ class GurtCog(commands.Cog, name="Gurt"): # Added explicit Cog name
        # This ensures the bot's application_id is properly set before syncing
        print("GurtCog: Commands will be synced when the bot is ready.")

+        # Add new listeners
+        @self.bot.event
+        async def on_guild_join(guild):
+            await on_guild_join_listener(self, guild)
+
+        @self.bot.event
+        async def on_guild_emojis_update(guild, before, after):
+            await on_guild_emojis_update_listener(self, guild, before, after)
+
+        @self.bot.event
+        async def on_guild_stickers_update(guild, before, after):
+            await on_guild_stickers_update_listener(self, guild, before, after)
+
+        print("GurtCog: Additional guild event listeners added.")
+
        # Start background task
        if self.background_task is None or self.background_task.done():
            self.background_task = asyncio.create_task(background_processing_task(self))
@ -248,6 +268,75 @@ class GurtCog(commands.Cog, name="Gurt"): # Added explicit Cog name
        self.user_relationships[user_id_1][user_id_2] = new_score
        # print(f"Updated relationship {user_id_1}-{user_id_2}: {current_score:.1f} -> {new_score:.1f} ({change:+.1f})") # Debug log

+    async def _fetch_and_process_guild_assets(self, guild: discord.Guild):
+        """Iterates through a guild's emojis and stickers, generates descriptions, and updates EmojiManager."""
+        print(f"Processing assets for guild: {guild.name} ({guild.id})")
+        processed_count = 0
+        # Emojis
+        for emoji in guild.emojis:
+            try:
+                name_key = f":{emoji.name}:"
+                emoji_url = str(emoji.url)
+                mime_type = "image/gif" if emoji.animated else "image/png"
+                
+                # Check if already processed with a description to avoid re-processing unless necessary
+                existing_emoji = await self.emoji_manager.get_emoji(name_key)
+                if existing_emoji and existing_emoji.get("url") == emoji_url and existing_emoji.get("description") and existing_emoji.get("description") != "No description generated.":
+                    # print(f"Skipping already processed emoji: {name_key} in guild {guild.name}")
+                    continue
+
+                print(f"Generating description for emoji: {name_key} in guild {guild.name}")
+                description = await api.generate_image_description(self, emoji_url, emoji.name, "emoji", mime_type)
+                await self.emoji_manager.add_emoji(name_key, str(emoji.id), emoji.animated, guild.id, emoji_url, description or "No description generated.")
+                processed_count +=1
+                await asyncio.sleep(1)  # Rate limiting
+            except Exception as e:
+                print(f"Error processing emoji {emoji.name} in guild {guild.name}: {e}")
+
+        # Stickers
+        for sticker in guild.stickers:
+            try:
+                name_key = f":{sticker.name}:"
+                sticker_url = str(sticker.url)
+                
+                existing_sticker = await self.emoji_manager.get_sticker(name_key)
+                if existing_sticker and existing_sticker.get("url") == sticker_url and existing_sticker.get("description") and existing_sticker.get("description") not in ["No description generated.", "Lottie animation, visual description not applicable."]:
+                    # print(f"Skipping already processed sticker: {name_key} in guild {guild.name}")
+                    continue
+
+                print(f"Generating description for sticker: {sticker.name} in guild {guild.name}")
+                if sticker.format == discord.StickerFormatType.png or sticker.format == discord.StickerFormatType.apng:
+                    mime_type = "image/png" # APNG is also fine as image/png for Gemini
+                    description = await api.generate_image_description(self, sticker_url, sticker.name, "sticker", mime_type)
+                    await self.emoji_manager.add_sticker(name_key, str(sticker.id), guild.id, sticker_url, description or "No description generated.")
+                elif sticker.format == discord.StickerFormatType.lottie:
+                    await self.emoji_manager.add_sticker(name_key, str(sticker.id), guild.id, sticker_url, "Lottie animation, visual description not applicable.")
+                else:
+                    print(f"Skipping sticker {sticker.name} due to unsupported format: {sticker.format}")
+                    await self.emoji_manager.add_sticker(name_key, str(sticker.id), guild.id, sticker_url, f"Unsupported format: {sticker.format}, visual description not applicable.")
+                processed_count += 1
+                await asyncio.sleep(1)  # Rate limiting
+            except Exception as e:
+                print(f"Error processing sticker {sticker.name} in guild {guild.name}: {e}")
+        print(f"Finished processing {processed_count} new/updated assets for guild: {guild.name} ({guild.id})")
+
+    async def initial_emoji_sticker_scan(self):
+        """Scans all guilds GURT is in on startup for emojis and stickers."""
+        print("Starting initial scan of emojis and stickers for all guilds...")
+        # Create a list of tasks to run them concurrently but not all at once to avoid overwhelming APIs
+        tasks = []
+        for guild in self.bot.guilds:
+            # Create a task for each guild
+            task = asyncio.create_task(self._fetch_and_process_guild_assets(guild))
+            tasks.append(task)
+        
+        # Optionally, wait for all tasks to complete if needed, or let them run in background
+        # For a startup scan, it's probably fine to let them run without blocking on_ready too long.
+        # If you need to ensure all are done before something else, you can await asyncio.gather(*tasks)
+        # For now, just creating them to run concurrently.
+        print(f"Created {len(tasks)} tasks for initial emoji/sticker scan.")
+
+
    async def get_gurt_stats(self) -> Dict[str, Any]:
        """Collects various internal stats for Gurt."""
        stats = {"config": {}, "runtime": {}, "memory": {}, "api_stats": {}, "tool_stats": {}}
--- a/gurt/emojis.py
+++ b/gurt/emojis.py
@ -7,7 +7,7 @@ DATA_FILE_PATH = "data/custom_emojis_stickers.json"
 class EmojiManager:
    def __init__(self, data_file: str = DATA_FILE_PATH):
        self.data_file = data_file
-        # Adjusted type hint for self.data to accommodate guild_id
+        # Adjusted type hint for self.data to accommodate guild_id, url, and description
        self.data: Dict[str, Dict[str, Dict[str, Any]]] = {"emojis": {}, "stickers": {}}
        self._load_data()

@ -22,15 +22,19 @@ class EmojiManager:
                        self.data["emojis"] = {
                            name: {
                                "id": data.get("id"),
-                                "animated": data.get("animated", False), # Default animated to False
-                                "guild_id": data.get("guild_id") # Will be None if not present
+                                "animated": data.get("animated", False),
+                                "guild_id": data.get("guild_id"),
+                                "url": data.get("url"), # Load new field
+                                "description": data.get("description") # Load new field
                            }
                            for name, data in loaded_json.get("emojis", {}).items() if isinstance(data, dict)
                        }
                        self.data["stickers"] = {
                            name: {
                                "id": data.get("id"),
-                                "guild_id": data.get("guild_id") # Will be None if not present
+                                "guild_id": data.get("guild_id"),
+                                "url": data.get("url"), # Load new field
+                                "description": data.get("description") # Load new field
                            }
                            for name, data in loaded_json.get("stickers", {}).items() if isinstance(data, dict)
                        }
@ -63,14 +67,23 @@ class EmojiManager:
            print(f"Error saving emoji/sticker data: {e}")
            return False

-    async def add_emoji(self, name: str, emoji_id: str, is_animated: bool, guild_id: Optional[int]) -> bool:
-        """Adds a custom emoji with its guild ID."""
+    async def add_emoji(self, name: str, emoji_id: str, is_animated: bool, guild_id: Optional[int], url: Optional[str] = None, description: Optional[str] = None) -> bool:
+        """Adds a custom emoji with its guild ID, URL, and description."""
        if name in self.data["emojis"]:
-            # Allow update if guild_id was None and is now being set, or if ID changes
            existing_data = self.data["emojis"][name]
-            if existing_data.get("id") == emoji_id and existing_data.get("guild_id") == guild_id and existing_data.get("animated") == is_animated:
+            if (existing_data.get("id") == emoji_id and
+                existing_data.get("guild_id") == guild_id and
+                existing_data.get("animated") == is_animated and
+                existing_data.get("url") == url and
+                existing_data.get("description") == description):
                return False # No change
-        self.data["emojis"][name] = {"id": emoji_id, "animated": is_animated, "guild_id": guild_id}
+        self.data["emojis"][name] = {
+            "id": emoji_id,
+            "animated": is_animated,
+            "guild_id": guild_id,
+            "url": url,
+            "description": description
+        }
        return self._save_data()

    async def remove_emoji(self, name: str) -> bool:
@ -88,13 +101,21 @@ class EmojiManager:
        """Gets a specific custom emoji by name."""
        return self.data["emojis"].get(name)

-    async def add_sticker(self, name: str, sticker_id: str, guild_id: Optional[int]) -> bool:
-        """Adds a custom sticker with its guild ID."""
+    async def add_sticker(self, name: str, sticker_id: str, guild_id: Optional[int], url: Optional[str] = None, description: Optional[str] = None) -> bool:
+        """Adds a custom sticker with its guild ID, URL, and description."""
        if name in self.data["stickers"]:
            existing_data = self.data["stickers"][name]
-            if existing_data.get("id") == sticker_id and existing_data.get("guild_id") == guild_id:
+            if (existing_data.get("id") == sticker_id and
+                existing_data.get("guild_id") == guild_id and
+                existing_data.get("url") == url and
+                existing_data.get("description") == description):
                return False # No change
-        self.data["stickers"][name] = {"id": sticker_id, "guild_id": guild_id}
+        self.data["stickers"][name] = {
+            "id": sticker_id,
+            "guild_id": guild_id,
+            "url": url,
+            "description": description
+        }
        return self._save_data()

    async def remove_sticker(self, name: str) -> bool:
--- a/gurt/listeners.py
+++ b/gurt/listeners.py
@ -48,6 +48,8 @@ async def on_ready_listener(cog: 'GurtCog'):
        traceback.print_exc()

    # --- Message history pre-loading removed ---
+    # Call the initial emoji/sticker scan
+    await cog.initial_emoji_sticker_scan()


 async def on_message_listener(cog: 'GurtCog', message: discord.Message):
@ -634,3 +636,29 @@ async def on_reaction_remove_listener(cog: 'GurtCog', reaction: discord.Reaction
        if sentiment == "positive": cog.gurt_message_reactions[message_id]["positive"] = max(0, cog.gurt_message_reactions[message_id]["positive"] - 1)
        elif sentiment == "negative": cog.gurt_message_reactions[message_id]["negative"] = max(0, cog.gurt_message_reactions[message_id]["negative"] - 1)
        print(f"Reaction removed from Gurt msg ({message_id}). Sentiment: {sentiment}")
+
+
+# --- New Listener Functions for Guild Asset Updates ---
+
+async def on_guild_join_listener(cog: 'GurtCog', guild: discord.Guild):
+    """Listener function for on_guild_join."""
+    print(f"Gurt joined a new guild: {guild.name} ({guild.id})")
+    print(f"Processing emojis and stickers for new guild: {guild.name}")
+    # Schedule the processing as a background task to avoid blocking
+    asyncio.create_task(cog._fetch_and_process_guild_assets(guild))
+
+async def on_guild_emojis_update_listener(cog: 'GurtCog', guild: discord.Guild, before: List[discord.Emoji], after: List[discord.Emoji]):
+    """Listener function for on_guild_emojis_update."""
+    print(f"Emojis updated in guild: {guild.name} ({guild.id}). Before: {len(before)}, After: {len(after)}")
+    # For simplicity and to ensure all changes (add, remove, name change) are caught,
+    # re-process all emojis for the guild.
+    # A more optimized approach could diff 'before' and 'after' lists.
+    print(f"Re-processing all emojis for guild: {guild.name}")
+    asyncio.create_task(cog._fetch_and_process_guild_assets(guild)) # This will re-process stickers too, which is fine.
+
+async def on_guild_stickers_update_listener(cog: 'GurtCog', guild: discord.Guild, before: List[discord.StickerItem], after: List[discord.StickerItem]):
+    """Listener function for on_guild_stickers_update."""
+    print(f"Stickers updated in guild: {guild.name} ({guild.id}). Before: {len(before)}, After: {len(after)}")
+    # Similar to emojis, re-process all assets for simplicity.
+    print(f"Re-processing all stickers (and emojis) for guild: {guild.name}")
+    asyncio.create_task(cog._fetch_and_process_guild_assets(guild))