From 36f3f80487785c726e329a625e6915b84b230239 Mon Sep 17 00:00:00 2001 From: Slipstream Date: Fri, 30 May 2025 22:34:27 -0600 Subject: [PATCH] feat: Implement dedicated voice text channel functionality and logging for voice interactions --- cogs/VoiceGatewayCog.py | 161 ++++++++++++++++++++++++++++++++++++---- gurt/config.py | 8 ++ gurt/listeners.py | 40 ++++++---- gurt/tools.py | 18 +++++ 4 files changed, 197 insertions(+), 30 deletions(-) diff --git a/cogs/VoiceGatewayCog.py b/cogs/VoiceGatewayCog.py index c647624..b01e638 100644 --- a/cogs/VoiceGatewayCog.py +++ b/cogs/VoiceGatewayCog.py @@ -7,6 +7,10 @@ import wave # For saving audio data import functools # Added for partial import subprocess # For audio conversion from discord.ext import voice_recv # For receiving voice +from typing import Optional # For type hinting + +# Gurt specific imports +from gurt import config as GurtConfig # Attempt to import STT and VAD libraries try: @@ -241,6 +245,7 @@ class VoiceGatewayCog(commands.Cog): def __init__(self, bot): self.bot = bot self.active_sinks = {} # guild_id: VoiceAudioSink + self.dedicated_voice_text_channels: dict[int, int] = {} # guild_id: channel_id self.whisper_model = None if whisper: try: @@ -253,6 +258,98 @@ class VoiceGatewayCog(commands.Cog): else: print("Whisper library not available. STT functionality will be disabled.") + async def _ensure_dedicated_voice_text_channel(self, guild: discord.Guild, voice_channel: discord.VoiceChannel) -> Optional[discord.TextChannel]: + if not GurtConfig.VOICE_DEDICATED_TEXT_CHANNEL_ENABLED: + return None + + existing_channel_id = self.dedicated_voice_text_channels.get(guild.id) + if existing_channel_id: + channel = guild.get_channel(existing_channel_id) + if channel and isinstance(channel, discord.TextChannel): + print(f"Found existing dedicated voice text channel: {channel.name} ({channel.id})") + return channel + else: + print(f"Dedicated voice text channel ID {existing_channel_id} for guild {guild.id} is invalid or not found. Will create a new one.") + del self.dedicated_voice_text_channels[guild.id] # Remove invalid ID + + # Create new channel + channel_name = GurtConfig.VOICE_DEDICATED_TEXT_CHANNEL_NAME_TEMPLATE.format( + voice_channel_name=voice_channel.name, + guild_name=guild.name + # Add more placeholders if needed + ) + # Sanitize channel name (Discord has restrictions) + channel_name = "".join(c for c in channel_name if c.isalnum() or c in ['-', '_', ' ']).strip() + channel_name = channel_name.replace(' ', '-').lower() + if not channel_name: # Fallback if template results in empty string + channel_name = "gurt-voice-chat" + + # Check if a channel with this name already exists (to avoid duplicates if bot restarted without proper cleanup) + for existing_guild_channel in guild.text_channels: + if existing_guild_channel.name == channel_name: + print(f"Found existing channel by name '{channel_name}' ({existing_guild_channel.id}). Reusing.") + self.dedicated_voice_text_channels[guild.id] = existing_guild_channel.id + # Optionally update topic and permissions if needed + try: + if existing_guild_channel.topic != GurtConfig.VOICE_DEDICATED_TEXT_CHANNEL_TOPIC: + await existing_guild_channel.edit(topic=GurtConfig.VOICE_DEDICATED_TEXT_CHANNEL_TOPIC) + # Send initial message if channel is empty or last message isn't the initial one + async for last_message in existing_guild_channel.history(limit=1): + if last_message.content != GurtConfig.VOICE_DEDICATED_TEXT_CHANNEL_INITIAL_MESSAGE: + await existing_guild_channel.send(GurtConfig.VOICE_DEDICATED_TEXT_CHANNEL_INITIAL_MESSAGE) + break # Only need the very last message + else: # No messages in channel + await existing_guild_channel.send(GurtConfig.VOICE_DEDICATED_TEXT_CHANNEL_INITIAL_MESSAGE) + + except discord.Forbidden: + print(f"Missing permissions to update reused dedicated channel {channel_name}") + except Exception as e_reuse: + print(f"Error updating reused dedicated channel {channel_name}: {e_reuse}") + return existing_guild_channel + + overwrites = { + guild.me: discord.PermissionOverwrite(read_messages=True, send_messages=True, manage_messages=True), # GURT needs to manage + guild.default_role: discord.PermissionOverwrite(read_messages=False, send_messages=False) # Private by default + # Consider adding server admins/mods with read/send permissions + } + # Add owner and admins with full perms to the channel + if guild.owner: + overwrites[guild.owner] = discord.PermissionOverwrite(read_messages=True, send_messages=True, manage_channels=True, manage_messages=True) + for role in guild.roles: + if role.permissions.administrator and not role.is_default(): # Check for admin roles + overwrites[role] = discord.PermissionOverwrite(read_messages=True, send_messages=True, manage_channels=True, manage_messages=True) + + + try: + print(f"Creating new dedicated voice text channel: {channel_name}") + new_channel = await guild.create_text_channel( + name=channel_name, + overwrites=overwrites, + topic=GurtConfig.VOICE_DEDICATED_TEXT_CHANNEL_TOPIC, + reason="GURT Dedicated Voice Chat Channel" + ) + self.dedicated_voice_text_channels[guild.id] = new_channel.id + if GurtConfig.VOICE_DEDICATED_TEXT_CHANNEL_INITIAL_MESSAGE: + await new_channel.send(GurtConfig.VOICE_DEDICATED_TEXT_CHANNEL_INITIAL_MESSAGE) + print(f"Created dedicated voice text channel: {new_channel.name} ({new_channel.id})") + return new_channel + except discord.Forbidden: + print(f"Forbidden: Could not create dedicated voice text channel '{channel_name}' in guild {guild.name}.") + return None + except Exception as e: + print(f"Error creating dedicated voice text channel '{channel_name}': {e}") + return None + + def get_dedicated_text_channel_for_guild(self, guild_id: int) -> Optional[discord.TextChannel]: + channel_id = self.dedicated_voice_text_channels.get(guild_id) + if channel_id: + guild = self.bot.get_guild(guild_id) + if guild: + channel = guild.get_channel(channel_id) + if isinstance(channel, discord.TextChannel): + return channel + return None + async def cog_load(self): print("VoiceGatewayCog loaded!") @@ -262,15 +359,32 @@ class VoiceGatewayCog(commands.Cog): for vc in list(self.bot.voice_clients): # Iterate over a copy guild_id = vc.guild.id if guild_id in self.active_sinks: - # Ensure vc is an instance of VoiceRecvClient or compatible for stop_listening if vc.is_connected() and hasattr(vc, 'is_listening') and vc.is_listening(): - # Check if stop_listening exists, VoiceRecvClient might have different API if hasattr(vc, 'stop_listening'): - vc.stop_listening() + vc.stop_listening() else: # Or equivalent for VoiceRecvClient - pass # May need specific cleanup for voice_recv + pass self.active_sinks[guild_id].cleanup() del self.active_sinks[guild_id] + + # Handle dedicated text channel cleanup on cog unload + if GurtConfig.VOICE_DEDICATED_TEXT_CHANNEL_ENABLED and GurtConfig.VOICE_DEDICATED_TEXT_CHANNEL_CLEANUP_ON_LEAVE: + dedicated_channel_id = self.dedicated_voice_text_channels.get(guild_id) + if dedicated_channel_id: + try: + channel_to_delete = vc.guild.get_channel(dedicated_channel_id) or await self.bot.fetch_channel(dedicated_channel_id) + if channel_to_delete: + print(f"Deleting dedicated voice text channel {channel_to_delete.name} ({channel_to_delete.id}) during cog unload.") + await channel_to_delete.delete(reason="GURT VoiceGatewayCog unload") + except discord.NotFound: + print(f"Dedicated voice text channel {dedicated_channel_id} not found for deletion during unload.") + except discord.Forbidden: + print(f"Forbidden: Could not delete dedicated voice text channel {dedicated_channel_id} during unload.") + except Exception as e: + print(f"Error deleting dedicated voice text channel {dedicated_channel_id} during unload: {e}") + if guild_id in self.dedicated_voice_text_channels: + del self.dedicated_voice_text_channels[guild_id] + if vc.is_connected(): await vc.disconnect(force=True) print("VoiceGatewayCog unloaded and disconnected from voice channels.") @@ -281,54 +395,53 @@ class VoiceGatewayCog(commands.Cog): return None, "Channel not provided." guild = channel.guild - voice_client = guild.voice_client # This will be VoiceRecvClient if already connected by this cog + voice_client = guild.voice_client if voice_client and voice_client.is_connected(): if voice_client.channel == channel: print(f"Already connected to {channel.name} in {guild.name}.") - # Ensure listening is active if already connected - # Check if it's a VoiceRecvClient instance if isinstance(voice_client, voice_recv.VoiceRecvClient): if guild.id not in self.active_sinks or not voice_client.is_listening(): self.start_listening_for_vc(voice_client) - else: # If it's a regular VoiceClient, we need to reconnect with VoiceRecvClient + # Ensure dedicated channel is set up even if already connected + await self._ensure_dedicated_voice_text_channel(guild, channel) + else: print(f"Reconnecting with VoiceRecvClient to {channel.name}.") await voice_client.disconnect(force=True) - try: # Reconnect with VoiceRecvClient + try: voice_client = await channel.connect(cls=voice_recv.VoiceRecvClient, timeout=10.0) print(f"Reconnected to {channel.name} in {guild.name} with VoiceRecvClient.") self.start_listening_for_vc(voice_client) + await self._ensure_dedicated_voice_text_channel(guild, channel) except asyncio.TimeoutError: return None, f"Timeout trying to reconnect to {channel.name} with VoiceRecvClient." except Exception as e: return None, f"Error reconnecting to {channel.name} with VoiceRecvClient: {str(e)}" - return voice_client, "Already connected to this channel." else: - # Handling move_to for VoiceRecvClient might need care. - # Simplest: disconnect and reconnect with VoiceRecvClient to the new channel. print(f"Moving to {channel.name} in {guild.name}. Reconnecting with VoiceRecvClient.") - await voice_client.disconnect(force=True) + await voice_client.disconnect(force=True) # This will trigger cleanup for old channel's dedicated text channel if configured try: voice_client = await channel.connect(cls=voice_recv.VoiceRecvClient, timeout=10.0) print(f"Moved and reconnected to {channel.name} in {guild.name} with VoiceRecvClient.") self.start_listening_for_vc(voice_client) + await self._ensure_dedicated_voice_text_channel(guild, channel) except asyncio.TimeoutError: return None, f"Timeout trying to move and connect to {channel.name}." except Exception as e: return None, f"Error moving and connecting to {channel.name}: {str(e)}" else: try: - # Connect using VoiceRecvClient voice_client = await channel.connect(cls=voice_recv.VoiceRecvClient, timeout=10.0) print(f"Connected to {channel.name} in {guild.name} with VoiceRecvClient.") self.start_listening_for_vc(voice_client) + await self._ensure_dedicated_voice_text_channel(guild, channel) except asyncio.TimeoutError: return None, f"Timeout trying to connect to {channel.name}." except Exception as e: return None, f"Error connecting to {channel.name}: {str(e)}" - if not voice_client: # Should not happen if connect succeeded + if not voice_client: return None, "Failed to establish voice client after connection." return voice_client, f"Successfully connected and listening in {channel.name}." @@ -364,6 +477,24 @@ class VoiceGatewayCog(commands.Cog): if guild_id in self.active_sinks: self.active_sinks[guild_id].cleanup() del self.active_sinks[guild_id] + + # Handle dedicated text channel cleanup + if GurtConfig.VOICE_DEDICATED_TEXT_CHANNEL_ENABLED and GurtConfig.VOICE_DEDICATED_TEXT_CHANNEL_CLEANUP_ON_LEAVE: + dedicated_channel_id = self.dedicated_voice_text_channels.get(guild_id) + if dedicated_channel_id: + try: + channel_to_delete = guild.get_channel(dedicated_channel_id) or await self.bot.fetch_channel(dedicated_channel_id) + if channel_to_delete: + print(f"Deleting dedicated voice text channel {channel_to_delete.name} ({channel_to_delete.id}).") + await channel_to_delete.delete(reason="GURT disconnected from voice channel") + except discord.NotFound: + print(f"Dedicated voice text channel {dedicated_channel_id} not found for deletion.") + except discord.Forbidden: + print(f"Forbidden: Could not delete dedicated voice text channel {dedicated_channel_id}.") + except Exception as e: + print(f"Error deleting dedicated voice text channel {dedicated_channel_id}: {e}") + if guild_id in self.dedicated_voice_text_channels: + del self.dedicated_voice_text_channels[guild_id] await voice_client.disconnect(force=True) print(f"Disconnected from voice in {guild.name}.") diff --git a/gurt/config.py b/gurt/config.py index bc04067..ef976bd 100644 --- a/gurt/config.py +++ b/gurt/config.py @@ -238,6 +238,14 @@ DOCKER_COMMAND_TIMEOUT = int(os.getenv("DOCKER_COMMAND_TIMEOUT", 10)) DOCKER_CPU_LIMIT = os.getenv("DOCKER_CPU_LIMIT", "0.5") DOCKER_MEM_LIMIT = os.getenv("DOCKER_MEM_LIMIT", "64m") +# --- Voice Configuration --- +VOICE_DEDICATED_TEXT_CHANNEL_ENABLED = os.getenv("VOICE_DEDICATED_TEXT_CHANNEL_ENABLED", "true").lower() == "true" +VOICE_DEDICATED_TEXT_CHANNEL_NAME_TEMPLATE = os.getenv("VOICE_DEDICATED_TEXT_CHANNEL_NAME_TEMPLATE", "🎙️gurt-voice-chat") +VOICE_DEDICATED_TEXT_CHANNEL_TOPIC = os.getenv("VOICE_DEDICATED_TEXT_CHANNEL_TOPIC", "GURT Voice Chat | Transcriptions & Text Interactions") +VOICE_DEDICATED_TEXT_CHANNEL_CLEANUP_ON_LEAVE = os.getenv("VOICE_DEDICATED_TEXT_CHANNEL_CLEANUP_ON_LEAVE", "false").lower() == "true" +VOICE_DEDICATED_TEXT_CHANNEL_INITIAL_MESSAGE = os.getenv("VOICE_DEDICATED_TEXT_CHANNEL_INITIAL_MESSAGE", "GURT is listening in voice. Transcriptions and text-based voice interactions will appear here. Type your messages here to talk to GURT in voice!") +VOICE_LOG_SPEECH_TO_DEDICATED_CHANNEL = os.getenv("VOICE_LOG_SPEECH_TO_DEDICATED_CHANNEL", "true").lower() == "true" + # --- Response Schema --- RESPONSE_SCHEMA = { "name": "gurt_response", diff --git a/gurt/listeners.py b/gurt/listeners.py index 32fb936..5f74248 100644 --- a/gurt/listeners.py +++ b/gurt/listeners.py @@ -731,7 +731,7 @@ async def on_voice_transcription_received_listener(cog: 'GurtCog', guild: discor """Listener for transcribed voice messages.""" from .api import get_ai_response # For processing the text from .utils import format_message, simulate_human_typing # For creating pseudo-message and sending response - from .config import IGNORED_CHANNEL_IDS # To respect ignored channels if applicable + from .config import IGNORED_CHANNEL_IDS, VOICE_DEDICATED_TEXT_CHANNEL_ENABLED # Import new config print(f"Voice transcription received from {user.name} ({user.id}) in {guild.name}: '{text}'") @@ -746,26 +746,36 @@ async def on_voice_transcription_received_listener(cog: 'GurtCog', guild: discor # Or, if GURT is in a voice channel, it might have an associated text channel. # This part needs careful consideration for the best UX. - # Try to find a suitable text channel in the guild. - # This logic might need to be more sophisticated, e.g. last active channel for the user. text_channel = None - if guild: - # Prefer system channel or a common channel name - if guild.system_channel and guild.system_channel.permissions_for(guild.me).send_messages: - text_channel = guild.system_channel + if VOICE_DEDICATED_TEXT_CHANNEL_ENABLED: + voice_gateway_cog = cog.bot.get_cog("VoiceGatewayCog") + if voice_gateway_cog: + text_channel = voice_gateway_cog.get_dedicated_text_channel_for_guild(guild.id) + if text_channel: + print(f"Using dedicated voice text channel: {text_channel.name} ({text_channel.id})") + else: + print(f"Dedicated voice text channel feature is ON, but no channel found for guild {guild.id}. Aborting voice transcription processing.") + return # Do not proceed if dedicated channel is expected but not found else: - for channel in guild.text_channels: - if channel.name.lower() in ["general", "chat", "lounge", "discussion"] and channel.permissions_for(guild.me).send_messages: - text_channel = channel - break - if not text_channel and guild.text_channels: # Fallback to first available text channel - text_channel = guild.text_channels[0] - + print("VoiceGatewayCog not found. Cannot get dedicated text channel. Aborting voice transcription processing.") + return + else: # Fallback to old behavior if dedicated channel feature is off + if guild: + if guild.system_channel and guild.system_channel.permissions_for(guild.me).send_messages: + text_channel = guild.system_channel + else: + for channel in guild.text_channels: + if channel.name.lower() in ["general", "chat", "lounge", "discussion"] and channel.permissions_for(guild.me).send_messages: + text_channel = channel + break + if not text_channel and guild.text_channels: + text_channel = guild.text_channels[0] + if not text_channel: print(f"Could not find a suitable text channel in guild {guild.name} for voice transcription context. Aborting.") return - # Check if this pseudo-channel context should be ignored + # Check if this pseudo-channel context should be ignored (applies to both dedicated and fallback) if text_channel.id in IGNORED_CHANNEL_IDS: print(f"Skipping voice transcription as target context channel {text_channel.name} ({text_channel.id}) is ignored.") return diff --git a/gurt/tools.py b/gurt/tools.py index 5110e19..d5889fc 100644 --- a/gurt/tools.py +++ b/gurt/tools.py @@ -3043,6 +3043,9 @@ async def speak_in_voice_channel(cog: commands.Cog, text_to_speak: str, tts_prov if not active_vc or not active_vc.is_connected(): return {"status": "error", "error": "GURT is not connected to a voice channel."} + # Import GurtConfig for voice channel settings + from .config import VOICE_DEDICATED_TEXT_CHANNEL_ENABLED, VOICE_LOG_SPEECH_TO_DEDICATED_CHANNEL + tts_cog = cog.bot.get_cog("TTSProviderCog") if not tts_cog: return {"status": "error", "error": "TTSProviderCog not loaded."} @@ -3080,6 +3083,21 @@ async def speak_in_voice_channel(cog: commands.Cog, text_to_speak: str, tts_prov play_success, play_message = await voice_gateway_cog.play_audio_file(active_vc, audio_file_path) if play_success: + # Log to dedicated text channel if enabled + if VOICE_DEDICATED_TEXT_CHANNEL_ENABLED and VOICE_LOG_SPEECH_TO_DEDICATED_CHANNEL: + if voice_gateway_cog: # Should exist if we got this far + dedicated_channel = voice_gateway_cog.get_dedicated_text_channel_for_guild(active_vc.guild.id) + if dedicated_channel: + try: + await dedicated_channel.send(f"GURT (Voice): {text_to_speak}") + print(f"Logged GURT's speech to dedicated channel {dedicated_channel.name}") + except Exception as e_log: + print(f"Error logging GURT's speech to dedicated channel {dedicated_channel.name}: {e_log}") + else: + print(f"Could not find dedicated text channel for guild {active_vc.guild.id} to log speech.") + else: # Should not happen + print("VoiceGatewayCog not found for logging speech to dedicated channel.") + return {"status": "success", "message": play_message, "text_spoken": text_to_speak, "provider_used": chosen_provider} else: # TTSProviderCog's cleanup should handle the audio_file_path if play fails