feat: Implement image description generation for emojis and stickers, add listeners for guild asset updates

This commit is contained in:
Slipstream 2025-05-29 11:08:16 -06:00
parent f7a1a2134e
commit 19d03a204d
Signed by: slipstream
GPG Key ID: 13E498CE010AC6FD
4 changed files with 269 additions and 19 deletions

View File

@ -1850,6 +1850,118 @@ async def get_proactive_ai_response(cog: 'GurtCog', message: discord.Message, tr
return final_parsed_data, sticker_ids_to_send_proactive return final_parsed_data, sticker_ids_to_send_proactive
# --- AI Image Description Function ---
async def generate_image_description(
cog: 'GurtCog',
image_url: str,
item_name: str,
item_type: str, # "emoji" or "sticker"
mime_type: str # e.g., "image/png", "image/gif"
) -> Optional[str]:
"""
Generates a textual description for an image URL using a multimodal AI model.
Args:
cog: The GurtCog instance.
image_url: The URL of the image to describe.
item_name: The name of the item (e.g., emoji name) for context.
item_type: The type of item ("emoji" or "sticker") for context.
mime_type: The MIME type of the image.
Returns:
The AI-generated description string, or None if an error occurs.
"""
if not genai_client:
print("Error in generate_image_description: Google GenAI Client not initialized.")
return None
if not cog.session:
print("Error in generate_image_description: aiohttp session not initialized in cog.")
return None
print(f"Attempting to generate description for {item_type} '{item_name}' from URL: {image_url}")
try:
# 1. Download image data
async with cog.session.get(image_url, timeout=15) as response:
if response.status != 200:
print(f"Failed to download image from {image_url}. Status: {response.status}")
return None
image_bytes = await response.read()
# Validate MIME type (optional, but good practice for Gemini)
supported_image_mimes = ["image/png", "image/jpeg", "image/webp", "image/heic", "image/heif", "image/gif"]
clean_mime_type = mime_type.split(';')[0].lower()
if clean_mime_type not in supported_image_mimes:
print(f"Warning: Image MIME type '{clean_mime_type}' from {image_url} might not be fully supported for description. Proceeding anyway.")
# Fallback to a generic type if not in list, or handle error
# For now, we'll proceed with the provided mime_type
# 2. Prepare contents for AI
# Ensure item_name is escaped if it contains characters that could break the prompt string.
# For simplicity, we assume item_name is generally safe or will be handled by the f-string.
prompt_text = (
f"This image is a Discord {item_type} named '{item_name}'. "
"Provide a concise, factual, and unbiased textual description of its visual elements. "
"Focus solely on what is depicted. Avoid any interpretation, censorship, or subjective commentary. "
"Do not mention that it is an emoji or sticker in your description, just describe the visual content."
)
image_part = types.Part(inline_data=types.Blob(data=image_bytes, mime_type=clean_mime_type))
text_part = types.Part(text=prompt_text)
description_contents: List[types.Content] = [types.Content(role="user", parts=[image_part, text_part])]
# 3. Prepare Generation Config
# We want a plain text response, no JSON schema. Safety settings are standard (BLOCK_NONE).
# System prompt is not strictly needed here as the user prompt is direct.
description_gen_config = types.GenerateContentConfig(
temperature=0.4, # Lower temperature for more factual description
max_output_tokens=256, # Descriptions should be concise
safety_settings=STANDARD_SAFETY_SETTINGS,
# No response_mime_type or response_schema needed for plain text
tools=None, # No tools for this task
tool_config=None
)
# 4. Call AI
# Use a multimodal model, e.g., DEFAULT_MODEL if it's Gemini 1.5 Pro or similar
# If DEFAULT_MODEL is tuned for JSON, we might need to specify a base multimodal model here.
# For now, assume DEFAULT_MODEL can handle this.
model_to_use = DEFAULT_MODEL # Or specify a known multimodal model like "models/gemini-1.5-pro-preview-0409"
print(f"Calling AI for image description ({item_name}) using model: {model_to_use}")
ai_response_obj = await call_google_genai_api_with_retry(
cog=cog,
model_name=model_to_use,
contents=description_contents,
generation_config=description_gen_config,
request_desc=f"Image description for {item_type} '{item_name}'"
)
# 5. Extract text
if not ai_response_obj:
print(f"AI call for image description of '{item_name}' returned no response object.")
return None
description_text = _get_response_text(ai_response_obj)
if description_text:
print(f"Successfully generated description for '{item_name}': {description_text[:100]}...")
return description_text.strip()
else:
print(f"AI response for '{item_name}' contained no usable text. Response: {ai_response_obj}")
return None
except aiohttp.ClientError as client_e:
print(f"Network error downloading image {image_url} for description: {client_e}")
return None
except asyncio.TimeoutError:
print(f"Timeout downloading image {image_url} for description.")
return None
except Exception as e:
print(f"Unexpected error in generate_image_description for '{item_name}': {type(e).__name__}: {e}")
import traceback
traceback.print_exc()
return None
# --- Internal AI Call for Specific Tasks --- # --- Internal AI Call for Specific Tasks ---
async def get_internal_ai_json_response( async def get_internal_ai_json_response(
cog: 'GurtCog', cog: 'GurtCog',

View File

@ -30,14 +30,19 @@ from .config import (
IGNORED_CHANNEL_IDS, update_ignored_channels_file # Import for ignored channels IGNORED_CHANNEL_IDS, update_ignored_channels_file # Import for ignored channels
) )
# Import functions/classes from other modules # Import functions/classes from other modules
from .memory import MemoryManager # Import from local memory.py from .memory import MemoryManager
from .emojis import EmojiManager # Import EmojiManager from .emojis import EmojiManager
from .background import background_processing_task from .background import background_processing_task
from .commands import setup_commands # Import the setup helper from .commands import setup_commands
from .listeners import on_ready_listener, on_message_listener, on_reaction_add_listener, on_reaction_remove_listener # Import listener functions from .listeners import (
from . import config as GurtConfig # Import config module for get_gurt_stats on_ready_listener, on_message_listener, on_reaction_add_listener,
on_reaction_remove_listener, on_guild_join_listener, # Added on_guild_join_listener
on_guild_emojis_update_listener, on_guild_stickers_update_listener # Added emoji/sticker update listeners
)
from . import api # Import api to access generate_image_description
from . import config as GurtConfig
# Tool mapping is used internally by api.py/process_requested_tools, no need to import here directly unless cog methods call tools directly (they shouldn't) # Tool mapping is used internally by api.py/process_requested_tools, no need to import here directly unless cog methods call tools directly (they shouldn't)
# Analysis, context, prompt, api, utils functions are called by listeners/commands/background task, not directly by cog methods here usually. # Analysis, context, prompt, utils functions are called by listeners/commands/background task, not directly by cog methods here usually.
# Load environment variables (might be loaded globally in main bot script too) # Load environment variables (might be loaded globally in main bot script too)
load_dotenv() load_dotenv()
@ -212,6 +217,21 @@ class GurtCog(commands.Cog, name="Gurt"): # Added explicit Cog name
# This ensures the bot's application_id is properly set before syncing # This ensures the bot's application_id is properly set before syncing
print("GurtCog: Commands will be synced when the bot is ready.") print("GurtCog: Commands will be synced when the bot is ready.")
# Add new listeners
@self.bot.event
async def on_guild_join(guild):
await on_guild_join_listener(self, guild)
@self.bot.event
async def on_guild_emojis_update(guild, before, after):
await on_guild_emojis_update_listener(self, guild, before, after)
@self.bot.event
async def on_guild_stickers_update(guild, before, after):
await on_guild_stickers_update_listener(self, guild, before, after)
print("GurtCog: Additional guild event listeners added.")
# Start background task # Start background task
if self.background_task is None or self.background_task.done(): if self.background_task is None or self.background_task.done():
self.background_task = asyncio.create_task(background_processing_task(self)) self.background_task = asyncio.create_task(background_processing_task(self))
@ -248,6 +268,75 @@ class GurtCog(commands.Cog, name="Gurt"): # Added explicit Cog name
self.user_relationships[user_id_1][user_id_2] = new_score self.user_relationships[user_id_1][user_id_2] = new_score
# print(f"Updated relationship {user_id_1}-{user_id_2}: {current_score:.1f} -> {new_score:.1f} ({change:+.1f})") # Debug log # print(f"Updated relationship {user_id_1}-{user_id_2}: {current_score:.1f} -> {new_score:.1f} ({change:+.1f})") # Debug log
async def _fetch_and_process_guild_assets(self, guild: discord.Guild):
"""Iterates through a guild's emojis and stickers, generates descriptions, and updates EmojiManager."""
print(f"Processing assets for guild: {guild.name} ({guild.id})")
processed_count = 0
# Emojis
for emoji in guild.emojis:
try:
name_key = f":{emoji.name}:"
emoji_url = str(emoji.url)
mime_type = "image/gif" if emoji.animated else "image/png"
# Check if already processed with a description to avoid re-processing unless necessary
existing_emoji = await self.emoji_manager.get_emoji(name_key)
if existing_emoji and existing_emoji.get("url") == emoji_url and existing_emoji.get("description") and existing_emoji.get("description") != "No description generated.":
# print(f"Skipping already processed emoji: {name_key} in guild {guild.name}")
continue
print(f"Generating description for emoji: {name_key} in guild {guild.name}")
description = await api.generate_image_description(self, emoji_url, emoji.name, "emoji", mime_type)
await self.emoji_manager.add_emoji(name_key, str(emoji.id), emoji.animated, guild.id, emoji_url, description or "No description generated.")
processed_count +=1
await asyncio.sleep(1) # Rate limiting
except Exception as e:
print(f"Error processing emoji {emoji.name} in guild {guild.name}: {e}")
# Stickers
for sticker in guild.stickers:
try:
name_key = f":{sticker.name}:"
sticker_url = str(sticker.url)
existing_sticker = await self.emoji_manager.get_sticker(name_key)
if existing_sticker and existing_sticker.get("url") == sticker_url and existing_sticker.get("description") and existing_sticker.get("description") not in ["No description generated.", "Lottie animation, visual description not applicable."]:
# print(f"Skipping already processed sticker: {name_key} in guild {guild.name}")
continue
print(f"Generating description for sticker: {sticker.name} in guild {guild.name}")
if sticker.format == discord.StickerFormatType.png or sticker.format == discord.StickerFormatType.apng:
mime_type = "image/png" # APNG is also fine as image/png for Gemini
description = await api.generate_image_description(self, sticker_url, sticker.name, "sticker", mime_type)
await self.emoji_manager.add_sticker(name_key, str(sticker.id), guild.id, sticker_url, description or "No description generated.")
elif sticker.format == discord.StickerFormatType.lottie:
await self.emoji_manager.add_sticker(name_key, str(sticker.id), guild.id, sticker_url, "Lottie animation, visual description not applicable.")
else:
print(f"Skipping sticker {sticker.name} due to unsupported format: {sticker.format}")
await self.emoji_manager.add_sticker(name_key, str(sticker.id), guild.id, sticker_url, f"Unsupported format: {sticker.format}, visual description not applicable.")
processed_count += 1
await asyncio.sleep(1) # Rate limiting
except Exception as e:
print(f"Error processing sticker {sticker.name} in guild {guild.name}: {e}")
print(f"Finished processing {processed_count} new/updated assets for guild: {guild.name} ({guild.id})")
async def initial_emoji_sticker_scan(self):
"""Scans all guilds GURT is in on startup for emojis and stickers."""
print("Starting initial scan of emojis and stickers for all guilds...")
# Create a list of tasks to run them concurrently but not all at once to avoid overwhelming APIs
tasks = []
for guild in self.bot.guilds:
# Create a task for each guild
task = asyncio.create_task(self._fetch_and_process_guild_assets(guild))
tasks.append(task)
# Optionally, wait for all tasks to complete if needed, or let them run in background
# For a startup scan, it's probably fine to let them run without blocking on_ready too long.
# If you need to ensure all are done before something else, you can await asyncio.gather(*tasks)
# For now, just creating them to run concurrently.
print(f"Created {len(tasks)} tasks for initial emoji/sticker scan.")
async def get_gurt_stats(self) -> Dict[str, Any]: async def get_gurt_stats(self) -> Dict[str, Any]:
"""Collects various internal stats for Gurt.""" """Collects various internal stats for Gurt."""
stats = {"config": {}, "runtime": {}, "memory": {}, "api_stats": {}, "tool_stats": {}} stats = {"config": {}, "runtime": {}, "memory": {}, "api_stats": {}, "tool_stats": {}}

View File

@ -7,7 +7,7 @@ DATA_FILE_PATH = "data/custom_emojis_stickers.json"
class EmojiManager: class EmojiManager:
def __init__(self, data_file: str = DATA_FILE_PATH): def __init__(self, data_file: str = DATA_FILE_PATH):
self.data_file = data_file self.data_file = data_file
# Adjusted type hint for self.data to accommodate guild_id # Adjusted type hint for self.data to accommodate guild_id, url, and description
self.data: Dict[str, Dict[str, Dict[str, Any]]] = {"emojis": {}, "stickers": {}} self.data: Dict[str, Dict[str, Dict[str, Any]]] = {"emojis": {}, "stickers": {}}
self._load_data() self._load_data()
@ -22,15 +22,19 @@ class EmojiManager:
self.data["emojis"] = { self.data["emojis"] = {
name: { name: {
"id": data.get("id"), "id": data.get("id"),
"animated": data.get("animated", False), # Default animated to False "animated": data.get("animated", False),
"guild_id": data.get("guild_id") # Will be None if not present "guild_id": data.get("guild_id"),
"url": data.get("url"), # Load new field
"description": data.get("description") # Load new field
} }
for name, data in loaded_json.get("emojis", {}).items() if isinstance(data, dict) for name, data in loaded_json.get("emojis", {}).items() if isinstance(data, dict)
} }
self.data["stickers"] = { self.data["stickers"] = {
name: { name: {
"id": data.get("id"), "id": data.get("id"),
"guild_id": data.get("guild_id") # Will be None if not present "guild_id": data.get("guild_id"),
"url": data.get("url"), # Load new field
"description": data.get("description") # Load new field
} }
for name, data in loaded_json.get("stickers", {}).items() if isinstance(data, dict) for name, data in loaded_json.get("stickers", {}).items() if isinstance(data, dict)
} }
@ -63,14 +67,23 @@ class EmojiManager:
print(f"Error saving emoji/sticker data: {e}") print(f"Error saving emoji/sticker data: {e}")
return False return False
async def add_emoji(self, name: str, emoji_id: str, is_animated: bool, guild_id: Optional[int]) -> bool: async def add_emoji(self, name: str, emoji_id: str, is_animated: bool, guild_id: Optional[int], url: Optional[str] = None, description: Optional[str] = None) -> bool:
"""Adds a custom emoji with its guild ID.""" """Adds a custom emoji with its guild ID, URL, and description."""
if name in self.data["emojis"]: if name in self.data["emojis"]:
# Allow update if guild_id was None and is now being set, or if ID changes
existing_data = self.data["emojis"][name] existing_data = self.data["emojis"][name]
if existing_data.get("id") == emoji_id and existing_data.get("guild_id") == guild_id and existing_data.get("animated") == is_animated: if (existing_data.get("id") == emoji_id and
existing_data.get("guild_id") == guild_id and
existing_data.get("animated") == is_animated and
existing_data.get("url") == url and
existing_data.get("description") == description):
return False # No change return False # No change
self.data["emojis"][name] = {"id": emoji_id, "animated": is_animated, "guild_id": guild_id} self.data["emojis"][name] = {
"id": emoji_id,
"animated": is_animated,
"guild_id": guild_id,
"url": url,
"description": description
}
return self._save_data() return self._save_data()
async def remove_emoji(self, name: str) -> bool: async def remove_emoji(self, name: str) -> bool:
@ -88,13 +101,21 @@ class EmojiManager:
"""Gets a specific custom emoji by name.""" """Gets a specific custom emoji by name."""
return self.data["emojis"].get(name) return self.data["emojis"].get(name)
async def add_sticker(self, name: str, sticker_id: str, guild_id: Optional[int]) -> bool: async def add_sticker(self, name: str, sticker_id: str, guild_id: Optional[int], url: Optional[str] = None, description: Optional[str] = None) -> bool:
"""Adds a custom sticker with its guild ID.""" """Adds a custom sticker with its guild ID, URL, and description."""
if name in self.data["stickers"]: if name in self.data["stickers"]:
existing_data = self.data["stickers"][name] existing_data = self.data["stickers"][name]
if existing_data.get("id") == sticker_id and existing_data.get("guild_id") == guild_id: if (existing_data.get("id") == sticker_id and
existing_data.get("guild_id") == guild_id and
existing_data.get("url") == url and
existing_data.get("description") == description):
return False # No change return False # No change
self.data["stickers"][name] = {"id": sticker_id, "guild_id": guild_id} self.data["stickers"][name] = {
"id": sticker_id,
"guild_id": guild_id,
"url": url,
"description": description
}
return self._save_data() return self._save_data()
async def remove_sticker(self, name: str) -> bool: async def remove_sticker(self, name: str) -> bool:

View File

@ -48,6 +48,8 @@ async def on_ready_listener(cog: 'GurtCog'):
traceback.print_exc() traceback.print_exc()
# --- Message history pre-loading removed --- # --- Message history pre-loading removed ---
# Call the initial emoji/sticker scan
await cog.initial_emoji_sticker_scan()
async def on_message_listener(cog: 'GurtCog', message: discord.Message): async def on_message_listener(cog: 'GurtCog', message: discord.Message):
@ -634,3 +636,29 @@ async def on_reaction_remove_listener(cog: 'GurtCog', reaction: discord.Reaction
if sentiment == "positive": cog.gurt_message_reactions[message_id]["positive"] = max(0, cog.gurt_message_reactions[message_id]["positive"] - 1) if sentiment == "positive": cog.gurt_message_reactions[message_id]["positive"] = max(0, cog.gurt_message_reactions[message_id]["positive"] - 1)
elif sentiment == "negative": cog.gurt_message_reactions[message_id]["negative"] = max(0, cog.gurt_message_reactions[message_id]["negative"] - 1) elif sentiment == "negative": cog.gurt_message_reactions[message_id]["negative"] = max(0, cog.gurt_message_reactions[message_id]["negative"] - 1)
print(f"Reaction removed from Gurt msg ({message_id}). Sentiment: {sentiment}") print(f"Reaction removed from Gurt msg ({message_id}). Sentiment: {sentiment}")
# --- New Listener Functions for Guild Asset Updates ---
async def on_guild_join_listener(cog: 'GurtCog', guild: discord.Guild):
"""Listener function for on_guild_join."""
print(f"Gurt joined a new guild: {guild.name} ({guild.id})")
print(f"Processing emojis and stickers for new guild: {guild.name}")
# Schedule the processing as a background task to avoid blocking
asyncio.create_task(cog._fetch_and_process_guild_assets(guild))
async def on_guild_emojis_update_listener(cog: 'GurtCog', guild: discord.Guild, before: List[discord.Emoji], after: List[discord.Emoji]):
"""Listener function for on_guild_emojis_update."""
print(f"Emojis updated in guild: {guild.name} ({guild.id}). Before: {len(before)}, After: {len(after)}")
# For simplicity and to ensure all changes (add, remove, name change) are caught,
# re-process all emojis for the guild.
# A more optimized approach could diff 'before' and 'after' lists.
print(f"Re-processing all emojis for guild: {guild.name}")
asyncio.create_task(cog._fetch_and_process_guild_assets(guild)) # This will re-process stickers too, which is fine.
async def on_guild_stickers_update_listener(cog: 'GurtCog', guild: discord.Guild, before: List[discord.StickerItem], after: List[discord.StickerItem]):
"""Listener function for on_guild_stickers_update."""
print(f"Stickers updated in guild: {guild.name} ({guild.id}). Before: {len(before)}, After: {len(after)}")
# Similar to emojis, re-process all assets for simplicity.
print(f"Re-processing all stickers (and emojis) for guild: {guild.name}")
asyncio.create_task(cog._fetch_and_process_guild_assets(guild))