feat: Implement image description generation for emojis and stickers, add listeners for guild asset updates

2025-05-29 11:08:16 -06:00 · 2025-05-29 11:08:16 -06:00 · 19d03a204d
commit 19d03a204d
parent f7a1a2134e
4 changed files with 269 additions and 19 deletions
--- a/gurt/api.py
+++ b/gurt/api.py
@ -1850,6 +1850,118 @@ async def get_proactive_ai_response(cog: 'GurtCog', message: discord.Message, tr
    return final_parsed_data, sticker_ids_to_send_proactive
 # --- AI Image Description Function ---
 async def generate_image_description(
    cog: 'GurtCog',
    image_url: str,
    item_name: str,
    item_type: str, # "emoji" or "sticker"
    mime_type: str # e.g., "image/png", "image/gif"
 ) -> Optional[str]:
    """
    Generates a textual description for an image URL using a multimodal AI model.
    Args:
        cog: The GurtCog instance.
        image_url: The URL of the image to describe.
        item_name: The name of the item (e.g., emoji name) for context.
        item_type: The type of item ("emoji" or "sticker") for context.
        mime_type: The MIME type of the image.
    Returns:
        The AI-generated description string, or None if an error occurs.
    """
    if not genai_client:
        print("Error in generate_image_description: Google GenAI Client not initialized.")
        return None
    if not cog.session:
        print("Error in generate_image_description: aiohttp session not initialized in cog.")
        return None
    print(f"Attempting to generate description for {item_type} '{item_name}' from URL: {image_url}")
    try:
        # 1. Download image data
        async with cog.session.get(image_url, timeout=15) as response:
            if response.status != 200:
                print(f"Failed to download image from {image_url}. Status: {response.status}")
                return None
            image_bytes = await response.read()
            # Validate MIME type (optional, but good practice for Gemini)
            supported_image_mimes = ["image/png", "image/jpeg", "image/webp", "image/heic", "image/heif", "image/gif"]
            clean_mime_type = mime_type.split(';')[0].lower()
            if clean_mime_type not in supported_image_mimes:
                print(f"Warning: Image MIME type '{clean_mime_type}' from {image_url} might not be fully supported for description. Proceeding anyway.")
                # Fallback to a generic type if not in list, or handle error
                # For now, we'll proceed with the provided mime_type
        # 2. Prepare contents for AI
        # Ensure item_name is escaped if it contains characters that could break the prompt string.
        # For simplicity, we assume item_name is generally safe or will be handled by the f-string.
        prompt_text = (
            f"This image is a Discord {item_type} named '{item_name}'. "
            "Provide a concise, factual, and unbiased textual description of its visual elements. "
            "Focus solely on what is depicted. Avoid any interpretation, censorship, or subjective commentary. "
            "Do not mention that it is an emoji or sticker in your description, just describe the visual content."
        )
        image_part = types.Part(inline_data=types.Blob(data=image_bytes, mime_type=clean_mime_type))
        text_part = types.Part(text=prompt_text)
        description_contents: List[types.Content] = [types.Content(role="user", parts=[image_part, text_part])]
        # 3. Prepare Generation Config
        # We want a plain text response, no JSON schema. Safety settings are standard (BLOCK_NONE).
        # System prompt is not strictly needed here as the user prompt is direct.
        description_gen_config = types.GenerateContentConfig(
            temperature=0.4, # Lower temperature for more factual description
            max_output_tokens=256, # Descriptions should be concise
            safety_settings=STANDARD_SAFETY_SETTINGS,
            # No response_mime_type or response_schema needed for plain text
            tools=None, # No tools for this task
            tool_config=None
        )
        # 4. Call AI
        # Use a multimodal model, e.g., DEFAULT_MODEL if it's Gemini 1.5 Pro or similar
        # If DEFAULT_MODEL is tuned for JSON, we might need to specify a base multimodal model here.
        # For now, assume DEFAULT_MODEL can handle this.
        model_to_use = DEFAULT_MODEL # Or specify a known multimodal model like "models/gemini-1.5-pro-preview-0409"
        print(f"Calling AI for image description ({item_name}) using model: {model_to_use}")
        ai_response_obj = await call_google_genai_api_with_retry(
            cog=cog,
            model_name=model_to_use,
            contents=description_contents,
            generation_config=description_gen_config,
            request_desc=f"Image description for {item_type} '{item_name}'"
        )
        # 5. Extract text
        if not ai_response_obj:
            print(f"AI call for image description of '{item_name}' returned no response object.")
            return None
        description_text = _get_response_text(ai_response_obj)
        if description_text:
            print(f"Successfully generated description for '{item_name}': {description_text[:100]}...")
            return description_text.strip()
        else:
            print(f"AI response for '{item_name}' contained no usable text. Response: {ai_response_obj}")
            return None
    except aiohttp.ClientError as client_e:
        print(f"Network error downloading image {image_url} for description: {client_e}")
        return None
    except asyncio.TimeoutError:
        print(f"Timeout downloading image {image_url} for description.")
        return None
    except Exception as e:
        print(f"Unexpected error in generate_image_description for '{item_name}': {type(e).__name__}: {e}")
        import traceback
        traceback.print_exc()
        return None
 # --- Internal AI Call for Specific Tasks ---
 async def get_internal_ai_json_response(
    cog: 'GurtCog',
--- a/gurt/cog.py
+++ b/gurt/cog.py
@ -30,14 +30,19 @@ from .config import (
    IGNORED_CHANNEL_IDS, update_ignored_channels_file # Import for ignored channels
 )
 # Import functions/classes from other modules
-from .memory import MemoryManager # Import from local memory.py
+from .memory import MemoryManager
-from .emojis import EmojiManager # Import EmojiManager
+from .emojis import EmojiManager
 from .background import background_processing_task
-from .commands import setup_commands # Import the setup helper
+from .commands import setup_commands
-from .listeners import on_ready_listener, on_message_listener, on_reaction_add_listener, on_reaction_remove_listener # Import listener functions
+from .listeners import (
-from . import config as GurtConfig # Import config module for get_gurt_stats
+    on_ready_listener, on_message_listener, on_reaction_add_listener,
    on_reaction_remove_listener, on_guild_join_listener, # Added on_guild_join_listener
    on_guild_emojis_update_listener, on_guild_stickers_update_listener # Added emoji/sticker update listeners
 )
 from . import api # Import api to access generate_image_description
 from . import config as GurtConfig
 # Tool mapping is used internally by api.py/process_requested_tools, no need to import here directly unless cog methods call tools directly (they shouldn't)
-# Analysis, context, prompt, api, utils functions are called by listeners/commands/background task, not directly by cog methods here usually.
+# Analysis, context, prompt, utils functions are called by listeners/commands/background task, not directly by cog methods here usually.
 # Load environment variables (might be loaded globally in main bot script too)
 load_dotenv()
@ -212,6 +217,21 @@ class GurtCog(commands.Cog, name="Gurt"): # Added explicit Cog name
        # This ensures the bot's application_id is properly set before syncing
        print("GurtCog: Commands will be synced when the bot is ready.")
        # Add new listeners
        @self.bot.event
        async def on_guild_join(guild):
            await on_guild_join_listener(self, guild)
        @self.bot.event
        async def on_guild_emojis_update(guild, before, after):
            await on_guild_emojis_update_listener(self, guild, before, after)
        @self.bot.event
        async def on_guild_stickers_update(guild, before, after):
            await on_guild_stickers_update_listener(self, guild, before, after)
        print("GurtCog: Additional guild event listeners added.")
        # Start background task
        if self.background_task is None or self.background_task.done():
            self.background_task = asyncio.create_task(background_processing_task(self))
@ -248,6 +268,75 @@ class GurtCog(commands.Cog, name="Gurt"): # Added explicit Cog name
        self.user_relationships[user_id_1][user_id_2] = new_score
        # print(f"Updated relationship {user_id_1}-{user_id_2}: {current_score:.1f} -> {new_score:.1f} ({change:+.1f})") # Debug log
    async def _fetch_and_process_guild_assets(self, guild: discord.Guild):
        """Iterates through a guild's emojis and stickers, generates descriptions, and updates EmojiManager."""
        print(f"Processing assets for guild: {guild.name} ({guild.id})")
        processed_count = 0
        # Emojis
        for emoji in guild.emojis:
            try:
                name_key = f":{emoji.name}:"
                emoji_url = str(emoji.url)
                mime_type = "image/gif" if emoji.animated else "image/png"
                # Check if already processed with a description to avoid re-processing unless necessary
                existing_emoji = await self.emoji_manager.get_emoji(name_key)
                if existing_emoji and existing_emoji.get("url") == emoji_url and existing_emoji.get("description") and existing_emoji.get("description") != "No description generated.":
                    # print(f"Skipping already processed emoji: {name_key} in guild {guild.name}")
                    continue
                print(f"Generating description for emoji: {name_key} in guild {guild.name}")
                description = await api.generate_image_description(self, emoji_url, emoji.name, "emoji", mime_type)
                await self.emoji_manager.add_emoji(name_key, str(emoji.id), emoji.animated, guild.id, emoji_url, description or "No description generated.")
                processed_count +=1
                await asyncio.sleep(1)  # Rate limiting
            except Exception as e:
                print(f"Error processing emoji {emoji.name} in guild {guild.name}: {e}")
        # Stickers
        for sticker in guild.stickers:
            try:
                name_key = f":{sticker.name}:"
                sticker_url = str(sticker.url)
                existing_sticker = await self.emoji_manager.get_sticker(name_key)
                if existing_sticker and existing_sticker.get("url") == sticker_url and existing_sticker.get("description") and existing_sticker.get("description") not in ["No description generated.", "Lottie animation, visual description not applicable."]:
                    # print(f"Skipping already processed sticker: {name_key} in guild {guild.name}")
                    continue
                print(f"Generating description for sticker: {sticker.name} in guild {guild.name}")
                if sticker.format == discord.StickerFormatType.png or sticker.format == discord.StickerFormatType.apng:
                    mime_type = "image/png" # APNG is also fine as image/png for Gemini
                    description = await api.generate_image_description(self, sticker_url, sticker.name, "sticker", mime_type)
                    await self.emoji_manager.add_sticker(name_key, str(sticker.id), guild.id, sticker_url, description or "No description generated.")
                elif sticker.format == discord.StickerFormatType.lottie:
                    await self.emoji_manager.add_sticker(name_key, str(sticker.id), guild.id, sticker_url, "Lottie animation, visual description not applicable.")
                else:
                    print(f"Skipping sticker {sticker.name} due to unsupported format: {sticker.format}")
                    await self.emoji_manager.add_sticker(name_key, str(sticker.id), guild.id, sticker_url, f"Unsupported format: {sticker.format}, visual description not applicable.")
                processed_count += 1
                await asyncio.sleep(1)  # Rate limiting
            except Exception as e:
                print(f"Error processing sticker {sticker.name} in guild {guild.name}: {e}")
        print(f"Finished processing {processed_count} new/updated assets for guild: {guild.name} ({guild.id})")
    async def initial_emoji_sticker_scan(self):
        """Scans all guilds GURT is in on startup for emojis and stickers."""
        print("Starting initial scan of emojis and stickers for all guilds...")
        # Create a list of tasks to run them concurrently but not all at once to avoid overwhelming APIs
        tasks = []
        for guild in self.bot.guilds:
            # Create a task for each guild
            task = asyncio.create_task(self._fetch_and_process_guild_assets(guild))
            tasks.append(task)
        # Optionally, wait for all tasks to complete if needed, or let them run in background
        # For a startup scan, it's probably fine to let them run without blocking on_ready too long.
        # If you need to ensure all are done before something else, you can await asyncio.gather(*tasks)
        # For now, just creating them to run concurrently.
        print(f"Created {len(tasks)} tasks for initial emoji/sticker scan.")
    async def get_gurt_stats(self) -> Dict[str, Any]:
        """Collects various internal stats for Gurt."""
        stats = {"config": {}, "runtime": {}, "memory": {}, "api_stats": {}, "tool_stats": {}}
--- a/gurt/emojis.py
+++ b/gurt/emojis.py
@ -7,7 +7,7 @@ DATA_FILE_PATH = "data/custom_emojis_stickers.json"
 class EmojiManager:
    def __init__(self, data_file: str = DATA_FILE_PATH):
        self.data_file = data_file
-        # Adjusted type hint for self.data to accommodate guild_id
+        # Adjusted type hint for self.data to accommodate guild_id, url, and description
        self.data: Dict[str, Dict[str, Dict[str, Any]]] = {"emojis": {}, "stickers": {}}
        self._load_data()
@ -22,15 +22,19 @@ class EmojiManager:
                        self.data["emojis"] = {
                            name: {
                                "id": data.get("id"),
-                                "animated": data.get("animated", False), # Default animated to False
+                                "animated": data.get("animated", False),
-                                "guild_id": data.get("guild_id") # Will be None if not present
+                                "guild_id": data.get("guild_id"),
                                "url": data.get("url"), # Load new field
                                "description": data.get("description") # Load new field
                            }
                            for name, data in loaded_json.get("emojis", {}).items() if isinstance(data, dict)
                        }
                        self.data["stickers"] = {
                            name: {
                                "id": data.get("id"),
-                                "guild_id": data.get("guild_id") # Will be None if not present
+                                "guild_id": data.get("guild_id"),
                                "url": data.get("url"), # Load new field
                                "description": data.get("description") # Load new field
                            }
                            for name, data in loaded_json.get("stickers", {}).items() if isinstance(data, dict)
                        }
@ -63,14 +67,23 @@ class EmojiManager:
            print(f"Error saving emoji/sticker data: {e}")
            return False
-    async def add_emoji(self, name: str, emoji_id: str, is_animated: bool, guild_id: Optional[int]) -> bool:
+    async def add_emoji(self, name: str, emoji_id: str, is_animated: bool, guild_id: Optional[int], url: Optional[str] = None, description: Optional[str] = None) -> bool:
-        """Adds a custom emoji with its guild ID."""
+        """Adds a custom emoji with its guild ID, URL, and description."""
        if name in self.data["emojis"]:
            # Allow update if guild_id was None and is now being set, or if ID changes
            existing_data = self.data["emojis"][name]
-            if existing_data.get("id") == emoji_id and existing_data.get("guild_id") == guild_id and existing_data.get("animated") == is_animated:
+            if (existing_data.get("id") == emoji_id and
                existing_data.get("guild_id") == guild_id and
                existing_data.get("animated") == is_animated and
                existing_data.get("url") == url and
                existing_data.get("description") == description):
                return False # No change
-        self.data["emojis"][name] = {"id": emoji_id, "animated": is_animated, "guild_id": guild_id}
+        self.data["emojis"][name] = {
            "id": emoji_id,
            "animated": is_animated,
            "guild_id": guild_id,
            "url": url,
            "description": description
        }
        return self._save_data()
    async def remove_emoji(self, name: str) -> bool:
@ -88,13 +101,21 @@ class EmojiManager:
        """Gets a specific custom emoji by name."""
        return self.data["emojis"].get(name)
-    async def add_sticker(self, name: str, sticker_id: str, guild_id: Optional[int]) -> bool:
+    async def add_sticker(self, name: str, sticker_id: str, guild_id: Optional[int], url: Optional[str] = None, description: Optional[str] = None) -> bool:
-        """Adds a custom sticker with its guild ID."""
+        """Adds a custom sticker with its guild ID, URL, and description."""
        if name in self.data["stickers"]:
            existing_data = self.data["stickers"][name]
-            if existing_data.get("id") == sticker_id and existing_data.get("guild_id") == guild_id:
+            if (existing_data.get("id") == sticker_id and
                existing_data.get("guild_id") == guild_id and
                existing_data.get("url") == url and
                existing_data.get("description") == description):
                return False # No change
-        self.data["stickers"][name] = {"id": sticker_id, "guild_id": guild_id}
+        self.data["stickers"][name] = {
            "id": sticker_id,
            "guild_id": guild_id,
            "url": url,
            "description": description
        }
        return self._save_data()
    async def remove_sticker(self, name: str) -> bool:
--- a/gurt/listeners.py
+++ b/gurt/listeners.py
@ -48,6 +48,8 @@ async def on_ready_listener(cog: 'GurtCog'):
        traceback.print_exc()
    # --- Message history pre-loading removed ---
    # Call the initial emoji/sticker scan
    await cog.initial_emoji_sticker_scan()
 async def on_message_listener(cog: 'GurtCog', message: discord.Message):
@ -634,3 +636,29 @@ async def on_reaction_remove_listener(cog: 'GurtCog', reaction: discord.Reaction
        if sentiment == "positive": cog.gurt_message_reactions[message_id]["positive"] = max(0, cog.gurt_message_reactions[message_id]["positive"] - 1)
        elif sentiment == "negative": cog.gurt_message_reactions[message_id]["negative"] = max(0, cog.gurt_message_reactions[message_id]["negative"] - 1)
        print(f"Reaction removed from Gurt msg ({message_id}). Sentiment: {sentiment}")
 # --- New Listener Functions for Guild Asset Updates ---
 async def on_guild_join_listener(cog: 'GurtCog', guild: discord.Guild):
    """Listener function for on_guild_join."""
    print(f"Gurt joined a new guild: {guild.name} ({guild.id})")
    print(f"Processing emojis and stickers for new guild: {guild.name}")
    # Schedule the processing as a background task to avoid blocking
    asyncio.create_task(cog._fetch_and_process_guild_assets(guild))
 async def on_guild_emojis_update_listener(cog: 'GurtCog', guild: discord.Guild, before: List[discord.Emoji], after: List[discord.Emoji]):
    """Listener function for on_guild_emojis_update."""
    print(f"Emojis updated in guild: {guild.name} ({guild.id}). Before: {len(before)}, After: {len(after)}")
    # For simplicity and to ensure all changes (add, remove, name change) are caught,
    # re-process all emojis for the guild.
    # A more optimized approach could diff 'before' and 'after' lists.
    print(f"Re-processing all emojis for guild: {guild.name}")
    asyncio.create_task(cog._fetch_and_process_guild_assets(guild)) # This will re-process stickers too, which is fine.
 async def on_guild_stickers_update_listener(cog: 'GurtCog', guild: discord.Guild, before: List[discord.StickerItem], after: List[discord.StickerItem]):
    """Listener function for on_guild_stickers_update."""
    print(f"Stickers updated in guild: {guild.name} ({guild.id}). Before: {len(before)}, After: {len(after)}")
    # Similar to emojis, re-process all assets for simplicity.
    print(f"Re-processing all stickers (and emojis) for guild: {guild.name}")
    asyncio.create_task(cog._fetch_and_process_guild_assets(guild))