feat: Implement image description generation for emojis and stickers, add listeners for guild asset updates
This commit is contained in:
parent
f7a1a2134e
commit
19d03a204d
112
gurt/api.py
112
gurt/api.py
@ -1850,6 +1850,118 @@ async def get_proactive_ai_response(cog: 'GurtCog', message: discord.Message, tr
|
||||
return final_parsed_data, sticker_ids_to_send_proactive
|
||||
|
||||
|
||||
# --- AI Image Description Function ---
|
||||
async def generate_image_description(
|
||||
cog: 'GurtCog',
|
||||
image_url: str,
|
||||
item_name: str,
|
||||
item_type: str, # "emoji" or "sticker"
|
||||
mime_type: str # e.g., "image/png", "image/gif"
|
||||
) -> Optional[str]:
|
||||
"""
|
||||
Generates a textual description for an image URL using a multimodal AI model.
|
||||
|
||||
Args:
|
||||
cog: The GurtCog instance.
|
||||
image_url: The URL of the image to describe.
|
||||
item_name: The name of the item (e.g., emoji name) for context.
|
||||
item_type: The type of item ("emoji" or "sticker") for context.
|
||||
mime_type: The MIME type of the image.
|
||||
|
||||
Returns:
|
||||
The AI-generated description string, or None if an error occurs.
|
||||
"""
|
||||
if not genai_client:
|
||||
print("Error in generate_image_description: Google GenAI Client not initialized.")
|
||||
return None
|
||||
if not cog.session:
|
||||
print("Error in generate_image_description: aiohttp session not initialized in cog.")
|
||||
return None
|
||||
|
||||
print(f"Attempting to generate description for {item_type} '{item_name}' from URL: {image_url}")
|
||||
|
||||
try:
|
||||
# 1. Download image data
|
||||
async with cog.session.get(image_url, timeout=15) as response:
|
||||
if response.status != 200:
|
||||
print(f"Failed to download image from {image_url}. Status: {response.status}")
|
||||
return None
|
||||
image_bytes = await response.read()
|
||||
# Validate MIME type (optional, but good practice for Gemini)
|
||||
supported_image_mimes = ["image/png", "image/jpeg", "image/webp", "image/heic", "image/heif", "image/gif"]
|
||||
clean_mime_type = mime_type.split(';')[0].lower()
|
||||
if clean_mime_type not in supported_image_mimes:
|
||||
print(f"Warning: Image MIME type '{clean_mime_type}' from {image_url} might not be fully supported for description. Proceeding anyway.")
|
||||
# Fallback to a generic type if not in list, or handle error
|
||||
# For now, we'll proceed with the provided mime_type
|
||||
|
||||
# 2. Prepare contents for AI
|
||||
# Ensure item_name is escaped if it contains characters that could break the prompt string.
|
||||
# For simplicity, we assume item_name is generally safe or will be handled by the f-string.
|
||||
prompt_text = (
|
||||
f"This image is a Discord {item_type} named '{item_name}'. "
|
||||
"Provide a concise, factual, and unbiased textual description of its visual elements. "
|
||||
"Focus solely on what is depicted. Avoid any interpretation, censorship, or subjective commentary. "
|
||||
"Do not mention that it is an emoji or sticker in your description, just describe the visual content."
|
||||
)
|
||||
|
||||
image_part = types.Part(inline_data=types.Blob(data=image_bytes, mime_type=clean_mime_type))
|
||||
text_part = types.Part(text=prompt_text)
|
||||
description_contents: List[types.Content] = [types.Content(role="user", parts=[image_part, text_part])]
|
||||
|
||||
# 3. Prepare Generation Config
|
||||
# We want a plain text response, no JSON schema. Safety settings are standard (BLOCK_NONE).
|
||||
# System prompt is not strictly needed here as the user prompt is direct.
|
||||
description_gen_config = types.GenerateContentConfig(
|
||||
temperature=0.4, # Lower temperature for more factual description
|
||||
max_output_tokens=256, # Descriptions should be concise
|
||||
safety_settings=STANDARD_SAFETY_SETTINGS,
|
||||
# No response_mime_type or response_schema needed for plain text
|
||||
tools=None, # No tools for this task
|
||||
tool_config=None
|
||||
)
|
||||
|
||||
# 4. Call AI
|
||||
# Use a multimodal model, e.g., DEFAULT_MODEL if it's Gemini 1.5 Pro or similar
|
||||
# If DEFAULT_MODEL is tuned for JSON, we might need to specify a base multimodal model here.
|
||||
# For now, assume DEFAULT_MODEL can handle this.
|
||||
model_to_use = DEFAULT_MODEL # Or specify a known multimodal model like "models/gemini-1.5-pro-preview-0409"
|
||||
|
||||
print(f"Calling AI for image description ({item_name}) using model: {model_to_use}")
|
||||
ai_response_obj = await call_google_genai_api_with_retry(
|
||||
cog=cog,
|
||||
model_name=model_to_use,
|
||||
contents=description_contents,
|
||||
generation_config=description_gen_config,
|
||||
request_desc=f"Image description for {item_type} '{item_name}'"
|
||||
)
|
||||
|
||||
# 5. Extract text
|
||||
if not ai_response_obj:
|
||||
print(f"AI call for image description of '{item_name}' returned no response object.")
|
||||
return None
|
||||
|
||||
description_text = _get_response_text(ai_response_obj)
|
||||
if description_text:
|
||||
print(f"Successfully generated description for '{item_name}': {description_text[:100]}...")
|
||||
return description_text.strip()
|
||||
else:
|
||||
print(f"AI response for '{item_name}' contained no usable text. Response: {ai_response_obj}")
|
||||
return None
|
||||
|
||||
except aiohttp.ClientError as client_e:
|
||||
print(f"Network error downloading image {image_url} for description: {client_e}")
|
||||
return None
|
||||
except asyncio.TimeoutError:
|
||||
print(f"Timeout downloading image {image_url} for description.")
|
||||
return None
|
||||
except Exception as e:
|
||||
print(f"Unexpected error in generate_image_description for '{item_name}': {type(e).__name__}: {e}")
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
return None
|
||||
|
||||
|
||||
# --- Internal AI Call for Specific Tasks ---
|
||||
async def get_internal_ai_json_response(
|
||||
cog: 'GurtCog',
|
||||
|
101
gurt/cog.py
101
gurt/cog.py
@ -30,14 +30,19 @@ from .config import (
|
||||
IGNORED_CHANNEL_IDS, update_ignored_channels_file # Import for ignored channels
|
||||
)
|
||||
# Import functions/classes from other modules
|
||||
from .memory import MemoryManager # Import from local memory.py
|
||||
from .emojis import EmojiManager # Import EmojiManager
|
||||
from .memory import MemoryManager
|
||||
from .emojis import EmojiManager
|
||||
from .background import background_processing_task
|
||||
from .commands import setup_commands # Import the setup helper
|
||||
from .listeners import on_ready_listener, on_message_listener, on_reaction_add_listener, on_reaction_remove_listener # Import listener functions
|
||||
from . import config as GurtConfig # Import config module for get_gurt_stats
|
||||
from .commands import setup_commands
|
||||
from .listeners import (
|
||||
on_ready_listener, on_message_listener, on_reaction_add_listener,
|
||||
on_reaction_remove_listener, on_guild_join_listener, # Added on_guild_join_listener
|
||||
on_guild_emojis_update_listener, on_guild_stickers_update_listener # Added emoji/sticker update listeners
|
||||
)
|
||||
from . import api # Import api to access generate_image_description
|
||||
from . import config as GurtConfig
|
||||
# Tool mapping is used internally by api.py/process_requested_tools, no need to import here directly unless cog methods call tools directly (they shouldn't)
|
||||
# Analysis, context, prompt, api, utils functions are called by listeners/commands/background task, not directly by cog methods here usually.
|
||||
# Analysis, context, prompt, utils functions are called by listeners/commands/background task, not directly by cog methods here usually.
|
||||
|
||||
# Load environment variables (might be loaded globally in main bot script too)
|
||||
load_dotenv()
|
||||
@ -212,6 +217,21 @@ class GurtCog(commands.Cog, name="Gurt"): # Added explicit Cog name
|
||||
# This ensures the bot's application_id is properly set before syncing
|
||||
print("GurtCog: Commands will be synced when the bot is ready.")
|
||||
|
||||
# Add new listeners
|
||||
@self.bot.event
|
||||
async def on_guild_join(guild):
|
||||
await on_guild_join_listener(self, guild)
|
||||
|
||||
@self.bot.event
|
||||
async def on_guild_emojis_update(guild, before, after):
|
||||
await on_guild_emojis_update_listener(self, guild, before, after)
|
||||
|
||||
@self.bot.event
|
||||
async def on_guild_stickers_update(guild, before, after):
|
||||
await on_guild_stickers_update_listener(self, guild, before, after)
|
||||
|
||||
print("GurtCog: Additional guild event listeners added.")
|
||||
|
||||
# Start background task
|
||||
if self.background_task is None or self.background_task.done():
|
||||
self.background_task = asyncio.create_task(background_processing_task(self))
|
||||
@ -248,6 +268,75 @@ class GurtCog(commands.Cog, name="Gurt"): # Added explicit Cog name
|
||||
self.user_relationships[user_id_1][user_id_2] = new_score
|
||||
# print(f"Updated relationship {user_id_1}-{user_id_2}: {current_score:.1f} -> {new_score:.1f} ({change:+.1f})") # Debug log
|
||||
|
||||
async def _fetch_and_process_guild_assets(self, guild: discord.Guild):
|
||||
"""Iterates through a guild's emojis and stickers, generates descriptions, and updates EmojiManager."""
|
||||
print(f"Processing assets for guild: {guild.name} ({guild.id})")
|
||||
processed_count = 0
|
||||
# Emojis
|
||||
for emoji in guild.emojis:
|
||||
try:
|
||||
name_key = f":{emoji.name}:"
|
||||
emoji_url = str(emoji.url)
|
||||
mime_type = "image/gif" if emoji.animated else "image/png"
|
||||
|
||||
# Check if already processed with a description to avoid re-processing unless necessary
|
||||
existing_emoji = await self.emoji_manager.get_emoji(name_key)
|
||||
if existing_emoji and existing_emoji.get("url") == emoji_url and existing_emoji.get("description") and existing_emoji.get("description") != "No description generated.":
|
||||
# print(f"Skipping already processed emoji: {name_key} in guild {guild.name}")
|
||||
continue
|
||||
|
||||
print(f"Generating description for emoji: {name_key} in guild {guild.name}")
|
||||
description = await api.generate_image_description(self, emoji_url, emoji.name, "emoji", mime_type)
|
||||
await self.emoji_manager.add_emoji(name_key, str(emoji.id), emoji.animated, guild.id, emoji_url, description or "No description generated.")
|
||||
processed_count +=1
|
||||
await asyncio.sleep(1) # Rate limiting
|
||||
except Exception as e:
|
||||
print(f"Error processing emoji {emoji.name} in guild {guild.name}: {e}")
|
||||
|
||||
# Stickers
|
||||
for sticker in guild.stickers:
|
||||
try:
|
||||
name_key = f":{sticker.name}:"
|
||||
sticker_url = str(sticker.url)
|
||||
|
||||
existing_sticker = await self.emoji_manager.get_sticker(name_key)
|
||||
if existing_sticker and existing_sticker.get("url") == sticker_url and existing_sticker.get("description") and existing_sticker.get("description") not in ["No description generated.", "Lottie animation, visual description not applicable."]:
|
||||
# print(f"Skipping already processed sticker: {name_key} in guild {guild.name}")
|
||||
continue
|
||||
|
||||
print(f"Generating description for sticker: {sticker.name} in guild {guild.name}")
|
||||
if sticker.format == discord.StickerFormatType.png or sticker.format == discord.StickerFormatType.apng:
|
||||
mime_type = "image/png" # APNG is also fine as image/png for Gemini
|
||||
description = await api.generate_image_description(self, sticker_url, sticker.name, "sticker", mime_type)
|
||||
await self.emoji_manager.add_sticker(name_key, str(sticker.id), guild.id, sticker_url, description or "No description generated.")
|
||||
elif sticker.format == discord.StickerFormatType.lottie:
|
||||
await self.emoji_manager.add_sticker(name_key, str(sticker.id), guild.id, sticker_url, "Lottie animation, visual description not applicable.")
|
||||
else:
|
||||
print(f"Skipping sticker {sticker.name} due to unsupported format: {sticker.format}")
|
||||
await self.emoji_manager.add_sticker(name_key, str(sticker.id), guild.id, sticker_url, f"Unsupported format: {sticker.format}, visual description not applicable.")
|
||||
processed_count += 1
|
||||
await asyncio.sleep(1) # Rate limiting
|
||||
except Exception as e:
|
||||
print(f"Error processing sticker {sticker.name} in guild {guild.name}: {e}")
|
||||
print(f"Finished processing {processed_count} new/updated assets for guild: {guild.name} ({guild.id})")
|
||||
|
||||
async def initial_emoji_sticker_scan(self):
|
||||
"""Scans all guilds GURT is in on startup for emojis and stickers."""
|
||||
print("Starting initial scan of emojis and stickers for all guilds...")
|
||||
# Create a list of tasks to run them concurrently but not all at once to avoid overwhelming APIs
|
||||
tasks = []
|
||||
for guild in self.bot.guilds:
|
||||
# Create a task for each guild
|
||||
task = asyncio.create_task(self._fetch_and_process_guild_assets(guild))
|
||||
tasks.append(task)
|
||||
|
||||
# Optionally, wait for all tasks to complete if needed, or let them run in background
|
||||
# For a startup scan, it's probably fine to let them run without blocking on_ready too long.
|
||||
# If you need to ensure all are done before something else, you can await asyncio.gather(*tasks)
|
||||
# For now, just creating them to run concurrently.
|
||||
print(f"Created {len(tasks)} tasks for initial emoji/sticker scan.")
|
||||
|
||||
|
||||
async def get_gurt_stats(self) -> Dict[str, Any]:
|
||||
"""Collects various internal stats for Gurt."""
|
||||
stats = {"config": {}, "runtime": {}, "memory": {}, "api_stats": {}, "tool_stats": {}}
|
||||
|
@ -7,7 +7,7 @@ DATA_FILE_PATH = "data/custom_emojis_stickers.json"
|
||||
class EmojiManager:
|
||||
def __init__(self, data_file: str = DATA_FILE_PATH):
|
||||
self.data_file = data_file
|
||||
# Adjusted type hint for self.data to accommodate guild_id
|
||||
# Adjusted type hint for self.data to accommodate guild_id, url, and description
|
||||
self.data: Dict[str, Dict[str, Dict[str, Any]]] = {"emojis": {}, "stickers": {}}
|
||||
self._load_data()
|
||||
|
||||
@ -22,15 +22,19 @@ class EmojiManager:
|
||||
self.data["emojis"] = {
|
||||
name: {
|
||||
"id": data.get("id"),
|
||||
"animated": data.get("animated", False), # Default animated to False
|
||||
"guild_id": data.get("guild_id") # Will be None if not present
|
||||
"animated": data.get("animated", False),
|
||||
"guild_id": data.get("guild_id"),
|
||||
"url": data.get("url"), # Load new field
|
||||
"description": data.get("description") # Load new field
|
||||
}
|
||||
for name, data in loaded_json.get("emojis", {}).items() if isinstance(data, dict)
|
||||
}
|
||||
self.data["stickers"] = {
|
||||
name: {
|
||||
"id": data.get("id"),
|
||||
"guild_id": data.get("guild_id") # Will be None if not present
|
||||
"guild_id": data.get("guild_id"),
|
||||
"url": data.get("url"), # Load new field
|
||||
"description": data.get("description") # Load new field
|
||||
}
|
||||
for name, data in loaded_json.get("stickers", {}).items() if isinstance(data, dict)
|
||||
}
|
||||
@ -63,14 +67,23 @@ class EmojiManager:
|
||||
print(f"Error saving emoji/sticker data: {e}")
|
||||
return False
|
||||
|
||||
async def add_emoji(self, name: str, emoji_id: str, is_animated: bool, guild_id: Optional[int]) -> bool:
|
||||
"""Adds a custom emoji with its guild ID."""
|
||||
async def add_emoji(self, name: str, emoji_id: str, is_animated: bool, guild_id: Optional[int], url: Optional[str] = None, description: Optional[str] = None) -> bool:
|
||||
"""Adds a custom emoji with its guild ID, URL, and description."""
|
||||
if name in self.data["emojis"]:
|
||||
# Allow update if guild_id was None and is now being set, or if ID changes
|
||||
existing_data = self.data["emojis"][name]
|
||||
if existing_data.get("id") == emoji_id and existing_data.get("guild_id") == guild_id and existing_data.get("animated") == is_animated:
|
||||
if (existing_data.get("id") == emoji_id and
|
||||
existing_data.get("guild_id") == guild_id and
|
||||
existing_data.get("animated") == is_animated and
|
||||
existing_data.get("url") == url and
|
||||
existing_data.get("description") == description):
|
||||
return False # No change
|
||||
self.data["emojis"][name] = {"id": emoji_id, "animated": is_animated, "guild_id": guild_id}
|
||||
self.data["emojis"][name] = {
|
||||
"id": emoji_id,
|
||||
"animated": is_animated,
|
||||
"guild_id": guild_id,
|
||||
"url": url,
|
||||
"description": description
|
||||
}
|
||||
return self._save_data()
|
||||
|
||||
async def remove_emoji(self, name: str) -> bool:
|
||||
@ -88,13 +101,21 @@ class EmojiManager:
|
||||
"""Gets a specific custom emoji by name."""
|
||||
return self.data["emojis"].get(name)
|
||||
|
||||
async def add_sticker(self, name: str, sticker_id: str, guild_id: Optional[int]) -> bool:
|
||||
"""Adds a custom sticker with its guild ID."""
|
||||
async def add_sticker(self, name: str, sticker_id: str, guild_id: Optional[int], url: Optional[str] = None, description: Optional[str] = None) -> bool:
|
||||
"""Adds a custom sticker with its guild ID, URL, and description."""
|
||||
if name in self.data["stickers"]:
|
||||
existing_data = self.data["stickers"][name]
|
||||
if existing_data.get("id") == sticker_id and existing_data.get("guild_id") == guild_id:
|
||||
if (existing_data.get("id") == sticker_id and
|
||||
existing_data.get("guild_id") == guild_id and
|
||||
existing_data.get("url") == url and
|
||||
existing_data.get("description") == description):
|
||||
return False # No change
|
||||
self.data["stickers"][name] = {"id": sticker_id, "guild_id": guild_id}
|
||||
self.data["stickers"][name] = {
|
||||
"id": sticker_id,
|
||||
"guild_id": guild_id,
|
||||
"url": url,
|
||||
"description": description
|
||||
}
|
||||
return self._save_data()
|
||||
|
||||
async def remove_sticker(self, name: str) -> bool:
|
||||
|
@ -48,6 +48,8 @@ async def on_ready_listener(cog: 'GurtCog'):
|
||||
traceback.print_exc()
|
||||
|
||||
# --- Message history pre-loading removed ---
|
||||
# Call the initial emoji/sticker scan
|
||||
await cog.initial_emoji_sticker_scan()
|
||||
|
||||
|
||||
async def on_message_listener(cog: 'GurtCog', message: discord.Message):
|
||||
@ -634,3 +636,29 @@ async def on_reaction_remove_listener(cog: 'GurtCog', reaction: discord.Reaction
|
||||
if sentiment == "positive": cog.gurt_message_reactions[message_id]["positive"] = max(0, cog.gurt_message_reactions[message_id]["positive"] - 1)
|
||||
elif sentiment == "negative": cog.gurt_message_reactions[message_id]["negative"] = max(0, cog.gurt_message_reactions[message_id]["negative"] - 1)
|
||||
print(f"Reaction removed from Gurt msg ({message_id}). Sentiment: {sentiment}")
|
||||
|
||||
|
||||
# --- New Listener Functions for Guild Asset Updates ---
|
||||
|
||||
async def on_guild_join_listener(cog: 'GurtCog', guild: discord.Guild):
|
||||
"""Listener function for on_guild_join."""
|
||||
print(f"Gurt joined a new guild: {guild.name} ({guild.id})")
|
||||
print(f"Processing emojis and stickers for new guild: {guild.name}")
|
||||
# Schedule the processing as a background task to avoid blocking
|
||||
asyncio.create_task(cog._fetch_and_process_guild_assets(guild))
|
||||
|
||||
async def on_guild_emojis_update_listener(cog: 'GurtCog', guild: discord.Guild, before: List[discord.Emoji], after: List[discord.Emoji]):
|
||||
"""Listener function for on_guild_emojis_update."""
|
||||
print(f"Emojis updated in guild: {guild.name} ({guild.id}). Before: {len(before)}, After: {len(after)}")
|
||||
# For simplicity and to ensure all changes (add, remove, name change) are caught,
|
||||
# re-process all emojis for the guild.
|
||||
# A more optimized approach could diff 'before' and 'after' lists.
|
||||
print(f"Re-processing all emojis for guild: {guild.name}")
|
||||
asyncio.create_task(cog._fetch_and_process_guild_assets(guild)) # This will re-process stickers too, which is fine.
|
||||
|
||||
async def on_guild_stickers_update_listener(cog: 'GurtCog', guild: discord.Guild, before: List[discord.StickerItem], after: List[discord.StickerItem]):
|
||||
"""Listener function for on_guild_stickers_update."""
|
||||
print(f"Stickers updated in guild: {guild.name} ({guild.id}). Before: {len(before)}, After: {len(after)}")
|
||||
# Similar to emojis, re-process all assets for simplicity.
|
||||
print(f"Re-processing all stickers (and emojis) for guild: {guild.name}")
|
||||
asyncio.create_task(cog._fetch_and_process_guild_assets(guild))
|
||||
|
Loading…
x
Reference in New Issue
Block a user