import discord from discord.ext import commands import asyncio import os import tempfile import wave # For saving audio data import functools # Added for partial import subprocess # For audio conversion from discord.ext import voice_recv # For receiving voice from typing import Optional # For type hinting # Gurt specific imports from gurt import config as GurtConfig # Attempt to import STT and VAD libraries try: from google.cloud import speech except ImportError: print( "Google Cloud Speech library not found. Please install with 'pip install google-cloud-speech'" ) speech = None try: import webrtcvad except ImportError: print( "webrtcvad library not found. Please install with 'pip install webrtc-voice-activity-detector'" ) webrtcvad = None # OpusDecoder is no longer needed as discord-ext-voice-recv provides PCM. FFMPEG_OPTIONS = { # 'before_options': '-reconnect 1 -reconnect_streamed 1 -reconnect_delay_max 5', # Removed as these are for network streams and might cause issues with local files "options": "-vn" } # Constants for audio processing SAMPLE_RATE = 16000 # Whisper prefers 16kHz CHANNELS = 1 # Mono SAMPLE_WIDTH = 2 # 16-bit audio (2 bytes per sample) VAD_MODE = 3 # VAD aggressiveness (0-3, 3 is most aggressive) FRAME_DURATION_MS = 30 # Duration of a frame in ms for VAD (10, 20, or 30) BYTES_PER_FRAME = (SAMPLE_RATE // 1000) * FRAME_DURATION_MS * CHANNELS * SAMPLE_WIDTH # OPUS constants removed as Opus decoding is no longer handled here. # Silence detection parameters SILENCE_THRESHOLD_FRAMES = 25 # Number of consecutive silent VAD frames to consider end of speech (e.g., 25 * 30ms = 750ms) MAX_SPEECH_DURATION_S = 15 # Max duration of a single speech segment to process MAX_SPEECH_FRAMES = (MAX_SPEECH_DURATION_S * 1000) // FRAME_DURATION_MS # Helper function for audio conversion def _convert_audio_to_16khz_mono(raw_pcm_data_48k_stereo: bytes) -> bytes: """ Converts raw 48kHz stereo PCM data to 16kHz mono PCM data using FFmpeg. """ input_temp_file = None output_temp_file = None converted_audio_data = b"" try: with tempfile.NamedTemporaryFile(suffix=".raw", delete=False) as tmp_in: input_temp_file = tmp_in.name tmp_in.write(raw_pcm_data_48k_stereo) with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp_out: output_temp_file = tmp_out.name command = [ "ffmpeg", "-f", "s16le", # Input format: signed 16-bit little-endian PCM "-ac", "2", # Input channels: stereo "-ar", "48000", # Input sample rate: 48kHz "-i", input_temp_file, "-ac", str(CHANNELS), # Output channels (e.g., 1 for mono) "-ar", str(SAMPLE_RATE), # Output sample rate (e.g., 16000) "-sample_fmt", "s16", # Output sample format "-y", # Overwrite output file if it exists output_temp_file, ] process = subprocess.run(command, capture_output=True, check=False) if process.returncode != 0: print( f"FFmpeg error during audio conversion. Return code: {process.returncode}" ) print(f"FFmpeg stdout: {process.stdout.decode(errors='ignore')}") print(f"FFmpeg stderr: {process.stderr.decode(errors='ignore')}") return b"" with open(output_temp_file, "rb") as f_out: with wave.open(f_out, "rb") as wf: if ( wf.getnchannels() == CHANNELS and wf.getframerate() == SAMPLE_RATE and wf.getsampwidth() == SAMPLE_WIDTH ): converted_audio_data = wf.readframes(wf.getnframes()) else: print( f"Warning: Converted WAV file format mismatch. Expected {CHANNELS}ch, {SAMPLE_RATE}Hz, {SAMPLE_WIDTH}bytes/sample." ) print( f"Got: {wf.getnchannels()}ch, {wf.getframerate()}Hz, {wf.getsampwidth()}bytes/sample." ) return b"" except FileNotFoundError: print( "FFmpeg command not found. Please ensure FFmpeg is installed and in your system's PATH." ) return b"" except Exception as e: print(f"Error during audio conversion: {e}") return b"" finally: if input_temp_file and os.path.exists(input_temp_file): os.remove(input_temp_file) if output_temp_file and os.path.exists(output_temp_file): os.remove(output_temp_file) return converted_audio_data class VoiceAudioSink(voice_recv.AudioSink): # Inherit from voice_recv.AudioSink def __init__(self, cog_instance): # Removed voice_client parameter super().__init__() self.cog = cog_instance # self.voice_client is set by the library when listen() is called # user_audio_data now keyed by user_id, 'decoder' removed self.user_audio_data = ( {} ) # {user_id: {'buffer': bytearray, 'speaking': False, 'silent_frames': 0, 'speech_frames': 0, 'vad': VAD_instance}} # OpusDecoder check removed if not webrtcvad: print( "VAD library not loaded. STT might be less efficient or not work as intended." ) def wants_opus(self) -> bool: """ Indicates whether the sink wants Opus-encoded audio (True) or PCM audio (False). Our sink processes PCM data, so we return False. """ return False # Signature changed: user object directly, data is VoiceData def write(self, user: discord.User, voice_data_packet: voice_recv.VoiceData): if ( not webrtcvad or not self.voice_client or not user ): # OpusDecoder check removed, user check added return user_id = user.id # Get user_id from the user object if user_id not in self.user_audio_data: self.user_audio_data[user_id] = { "buffer": bytearray(), "speaking": False, "silent_frames": 0, "speech_frames": 0, # 'decoder' removed "vad": webrtcvad.Vad(VAD_MODE) if webrtcvad else None, } entry = self.user_audio_data[user_id] # Extract PCM data from VoiceData packet raw_pcm_data_48k_stereo = voice_data_packet.pcm # Convert incoming 48kHz stereo PCM to 16kHz mono PCM pcm_data = _convert_audio_to_16khz_mono(raw_pcm_data_48k_stereo) if not pcm_data: # Conversion failed or returned empty bytes # print(f"Audio conversion failed for user {user_id}. Skipping frame.") return # VAD processing expects frames of 10, 20, or 30 ms. # pcm_data is now 16kHz mono, hopefully in appropriate chunks from conversion. # We need to ensure it's split into VAD-compatible frame lengths if not already. # If pcm_data (now 16kHz mono) is a 20ms chunk, its length is 640 bytes. # A 10ms frame at 16kHz is 320 bytes. A 30ms frame is 960 bytes. # Ensure frame_length for VAD is correct (e.g. 20ms at 16kHz mono = 640 bytes) # This constant could be defined at class or module level. # For a 20ms frame, which is typical for voice packets: frame_length_for_vad_20ms = (SAMPLE_RATE // 1000) * 20 * CHANNELS * SAMPLE_WIDTH if ( len(pcm_data) % frame_length_for_vad_20ms != 0 and len(pcm_data) > 0 ): # Check if it's a multiple, or handle if not. # This might happen if the converted chunk size isn't exactly what VAD expects per call. # For now, we'll try to process it. A more robust solution might buffer/segment pcm_data # into exact 10, 20, or 30ms chunks for VAD. # print(f"Warning: PCM data length {len(pcm_data)} after conversion is not an exact multiple of VAD frame size {frame_length_for_vad_20ms} for User {user_id}. Trying to process.") pass # Continue, VAD might handle it or error. # Process VAD in chunks if pcm_data is longer than one VAD frame # For simplicity, let's assume pcm_data is one processable chunk for now. # If pcm_data can be multiple VAD frames, iterate through it. # Current VAD logic processes the whole pcm_data chunk at once. # This is okay if pcm_data is already a single VAD frame (e.g. 20ms). if entry["vad"]: try: # Ensure pcm_data is a valid frame for VAD (e.g. 10, 20, 30 ms) # If pcm_data is, for example, 640 bytes (20ms at 16kHz mono), it's fine. if len(pcm_data) == frame_length_for_vad_20ms: # Common case is_speech = entry["vad"].is_speech(pcm_data, SAMPLE_RATE) elif ( len(pcm_data) > 0 ): # If not standard, but has data, try (might error) # print(f"VAD processing for User {user_id} with non-standard PCM length {len(pcm_data)}. May error.") # This path is risky if VAD is strict. For now, we assume it's handled or errors. # A robust way: segment pcm_data into valid VAD frames. # For now, let's assume the chunk from conversion is one such frame. is_speech = entry["vad"].is_speech( pcm_data, SAMPLE_RATE ) # This might fail if len is not 10/20/30ms worth else: # No data is_speech = False except Exception as e: # webrtcvad can raise errors on invalid frame length # print(f"VAD error for User {user_id} with PCM length {len(pcm_data)}: {e}. Defaulting to speech=True for this frame.") is_speech = True # Fallback: if VAD fails, assume it's speech else: # No VAD is_speech = True if is_speech: entry["buffer"].extend(pcm_data) entry["speaking"] = True entry["silent_frames"] = 0 entry["speech_frames"] += 1 if entry["speech_frames"] >= MAX_SPEECH_FRAMES: # print(f"Max speech frames reached for User {user_id}. Processing segment.") self.cog.bot.loop.create_task( self.cog.process_audio_segment( user_id, bytes(entry["buffer"]), self.voice_client.guild ) ) entry["buffer"].clear() entry["speaking"] = False entry["speech_frames"] = 0 elif entry["speaking"]: # Was speaking, now silence entry["buffer"].extend(pcm_data) # Add this last silent frame for context entry["silent_frames"] += 1 if entry["silent_frames"] >= SILENCE_THRESHOLD_FRAMES: # print(f"Silence threshold reached for User {user_id}. Processing segment.") self.cog.bot.loop.create_task( self.cog.process_audio_segment( user_id, bytes(entry["buffer"]), self.voice_client.guild ) ) entry["buffer"].clear() entry["speaking"] = False entry["speech_frames"] = 0 entry["silent_frames"] = 0 # If not is_speech and not entry['speaking'], do nothing (ignore silence) def cleanup(self): print("VoiceAudioSink cleanup called.") # Iterate over a copy of items if modifications occur, or handle user_id directly for user_id, data_entry in list(self.user_audio_data.items()): if data_entry["buffer"]: # user object is not directly available here, but process_audio_segment takes user_id # We need the guild, which should be available from self.voice_client if self.voice_client and self.voice_client.guild: guild = self.voice_client.guild print( f"Processing remaining audio for User ID {user_id} on cleanup." ) self.cog.bot.loop.create_task( self.cog.process_audio_segment( user_id, bytes(data_entry["buffer"]), guild ) ) else: print( f"Cannot process remaining audio for User ID {user_id}: voice_client or guild not available." ) self.user_audio_data.clear() class VoiceGatewayCog(commands.Cog): def __init__(self, bot): self.bot = bot self.active_sinks = {} # guild_id: VoiceAudioSink self.dedicated_voice_text_channels: dict[int, int] = {} # guild_id: channel_id self.speech_client = None if speech: try: self.speech_client = speech.SpeechClient() print("Google Cloud Speech client initialized successfully.") except Exception as e: print( f"Error initializing Google Cloud Speech client: {e}. STT will not be available." ) self.speech_client = None else: print( "Google Cloud Speech library not available. STT functionality will be disabled." ) async def _ensure_dedicated_voice_text_channel( self, guild: discord.Guild, voice_channel: discord.VoiceChannel ) -> Optional[discord.TextChannel]: if not GurtConfig.VOICE_DEDICATED_TEXT_CHANNEL_ENABLED: return None existing_channel_id = self.dedicated_voice_text_channels.get(guild.id) if existing_channel_id: channel = guild.get_channel(existing_channel_id) if channel and isinstance(channel, discord.TextChannel): print( f"Found existing dedicated voice text channel: {channel.name} ({channel.id})" ) return channel else: print( f"Dedicated voice text channel ID {existing_channel_id} for guild {guild.id} is invalid or not found. Will create a new one." ) del self.dedicated_voice_text_channels[guild.id] # Remove invalid ID # Create new channel channel_name = GurtConfig.VOICE_DEDICATED_TEXT_CHANNEL_NAME_TEMPLATE.format( voice_channel_name=voice_channel.name, guild_name=guild.name, # Add more placeholders if needed ) # Sanitize channel name (Discord has restrictions) channel_name = "".join( c for c in channel_name if c.isalnum() or c in ["-", "_", " "] ).strip() channel_name = channel_name.replace(" ", "-").lower() if not channel_name: # Fallback if template results in empty string channel_name = "gurt-voice-chat" # Check if a channel with this name already exists (to avoid duplicates if bot restarted without proper cleanup) for existing_guild_channel in guild.text_channels: if existing_guild_channel.name == channel_name: print( f"Found existing channel by name '{channel_name}' ({existing_guild_channel.id}). Reusing." ) self.dedicated_voice_text_channels[guild.id] = existing_guild_channel.id # Optionally update topic and permissions if needed try: if ( existing_guild_channel.topic != GurtConfig.VOICE_DEDICATED_TEXT_CHANNEL_TOPIC ): await existing_guild_channel.edit( topic=GurtConfig.VOICE_DEDICATED_TEXT_CHANNEL_TOPIC ) # Send initial message if channel is empty or last message isn't the initial one async for last_message in existing_guild_channel.history(limit=1): if ( last_message.content != GurtConfig.VOICE_DEDICATED_TEXT_CHANNEL_INITIAL_MESSAGE ): await existing_guild_channel.send( GurtConfig.VOICE_DEDICATED_TEXT_CHANNEL_INITIAL_MESSAGE ) break # Only need the very last message else: # No messages in channel await existing_guild_channel.send( GurtConfig.VOICE_DEDICATED_TEXT_CHANNEL_INITIAL_MESSAGE ) except discord.Forbidden: print( f"Missing permissions to update reused dedicated channel {channel_name}" ) except Exception as e_reuse: print( f"Error updating reused dedicated channel {channel_name}: {e_reuse}" ) return existing_guild_channel overwrites = { guild.me: discord.PermissionOverwrite( read_messages=True, send_messages=True, manage_messages=True ), # GURT needs to manage guild.default_role: discord.PermissionOverwrite( read_messages=False, send_messages=False ), # Private by default # Consider adding server admins/mods with read/send permissions } # Add owner and admins with full perms to the channel if guild.owner: overwrites[guild.owner] = discord.PermissionOverwrite( read_messages=True, send_messages=True, manage_channels=True, manage_messages=True, ) for role in guild.roles: if ( role.permissions.administrator and not role.is_default() ): # Check for admin roles overwrites[role] = discord.PermissionOverwrite( read_messages=True, send_messages=True, manage_channels=True, manage_messages=True, ) try: print(f"Creating new dedicated voice text channel: {channel_name}") new_channel = await guild.create_text_channel( name=channel_name, overwrites=overwrites, topic=GurtConfig.VOICE_DEDICATED_TEXT_CHANNEL_TOPIC, reason="GURT Dedicated Voice Chat Channel", ) self.dedicated_voice_text_channels[guild.id] = new_channel.id if GurtConfig.VOICE_DEDICATED_TEXT_CHANNEL_INITIAL_MESSAGE: await new_channel.send( GurtConfig.VOICE_DEDICATED_TEXT_CHANNEL_INITIAL_MESSAGE ) print( f"Created dedicated voice text channel: {new_channel.name} ({new_channel.id})" ) return new_channel except discord.Forbidden: print( f"Forbidden: Could not create dedicated voice text channel '{channel_name}' in guild {guild.name}." ) return None except Exception as e: print(f"Error creating dedicated voice text channel '{channel_name}': {e}") return None def get_dedicated_text_channel_for_guild( self, guild_id: int ) -> Optional[discord.TextChannel]: channel_id = self.dedicated_voice_text_channels.get(guild_id) if channel_id: guild = self.bot.get_guild(guild_id) if guild: channel = guild.get_channel(channel_id) if isinstance(channel, discord.TextChannel): return channel return None async def cog_load(self): print("VoiceGatewayCog loaded!") async def cog_unload(self): print("Unloading VoiceGatewayCog...") # Disconnect from all voice channels and clean up sinks for vc in list(self.bot.voice_clients): # Iterate over a copy guild_id = vc.guild.id if guild_id in self.active_sinks: if ( vc.is_connected() and hasattr(vc, "is_listening") and vc.is_listening() ): if hasattr(vc, "stop_listening"): vc.stop_listening() else: # Or equivalent for VoiceRecvClient pass self.active_sinks[guild_id].cleanup() del self.active_sinks[guild_id] # Handle dedicated text channel cleanup on cog unload if ( GurtConfig.VOICE_DEDICATED_TEXT_CHANNEL_ENABLED and GurtConfig.VOICE_DEDICATED_TEXT_CHANNEL_CLEANUP_ON_LEAVE ): dedicated_channel_id = self.dedicated_voice_text_channels.get(guild_id) if dedicated_channel_id: try: channel_to_delete = vc.guild.get_channel( dedicated_channel_id ) or await self.bot.fetch_channel(dedicated_channel_id) if channel_to_delete: print( f"Deleting dedicated voice text channel {channel_to_delete.name} ({channel_to_delete.id}) during cog unload." ) await channel_to_delete.delete( reason="GURT VoiceGatewayCog unload" ) except discord.NotFound: print( f"Dedicated voice text channel {dedicated_channel_id} not found for deletion during unload." ) except discord.Forbidden: print( f"Forbidden: Could not delete dedicated voice text channel {dedicated_channel_id} during unload." ) except Exception as e: print( f"Error deleting dedicated voice text channel {dedicated_channel_id} during unload: {e}" ) if guild_id in self.dedicated_voice_text_channels: del self.dedicated_voice_text_channels[guild_id] if vc.is_connected(): await vc.disconnect(force=True) print("VoiceGatewayCog unloaded and disconnected from voice channels.") async def connect_to_voice(self, channel: discord.VoiceChannel): """Connects the bot to a specified voice channel and starts listening.""" if not channel: return None, "Channel not provided." guild = channel.guild voice_client = guild.voice_client if voice_client and voice_client.is_connected(): if voice_client.channel == channel: print(f"Already connected to {channel.name} in {guild.name}.") if isinstance(voice_client, voice_recv.VoiceRecvClient): if ( guild.id not in self.active_sinks or not voice_client.is_listening() ): self.start_listening_for_vc(voice_client) # Ensure dedicated channel is set up even if already connected await self._ensure_dedicated_voice_text_channel(guild, channel) else: print(f"Reconnecting with VoiceRecvClient to {channel.name}.") await voice_client.disconnect(force=True) try: voice_client = await channel.connect( cls=voice_recv.VoiceRecvClient, timeout=10.0 ) print( f"Reconnected to {channel.name} in {guild.name} with VoiceRecvClient." ) self.start_listening_for_vc(voice_client) await self._ensure_dedicated_voice_text_channel(guild, channel) except asyncio.TimeoutError: return ( None, f"Timeout trying to reconnect to {channel.name} with VoiceRecvClient.", ) except Exception as e: return ( None, f"Error reconnecting to {channel.name} with VoiceRecvClient: {str(e)}", ) return voice_client, "Already connected to this channel." else: print( f"Moving to {channel.name} in {guild.name}. Reconnecting with VoiceRecvClient." ) await voice_client.disconnect( force=True ) # This will trigger cleanup for old channel's dedicated text channel if configured try: voice_client = await channel.connect( cls=voice_recv.VoiceRecvClient, timeout=10.0 ) print( f"Moved and reconnected to {channel.name} in {guild.name} with VoiceRecvClient." ) self.start_listening_for_vc(voice_client) await self._ensure_dedicated_voice_text_channel(guild, channel) except asyncio.TimeoutError: return ( None, f"Timeout trying to move and connect to {channel.name}.", ) except Exception as e: return ( None, f"Error moving and connecting to {channel.name}: {str(e)}", ) else: try: voice_client = await channel.connect( cls=voice_recv.VoiceRecvClient, timeout=10.0 ) print( f"Connected to {channel.name} in {guild.name} with VoiceRecvClient." ) self.start_listening_for_vc(voice_client) await self._ensure_dedicated_voice_text_channel(guild, channel) except asyncio.TimeoutError: return None, f"Timeout trying to connect to {channel.name}." except Exception as e: return None, f"Error connecting to {channel.name}: {str(e)}" if not voice_client: return None, "Failed to establish voice client after connection." return voice_client, f"Successfully connected and listening in {channel.name}." def start_listening_for_vc(self, voice_client: discord.VoiceClient): """Starts or restarts listening for a given voice client.""" guild_id = voice_client.guild.id if guild_id in self.active_sinks: # If sink exists, ensure it's clean and listening is (re)started if voice_client.is_listening(): voice_client.stop_listening() # Stop previous listening if any self.active_sinks[guild_id].cleanup() # Clean old state # Re-initialize or ensure the sink is fresh for the current VC self.active_sinks[guild_id] = VoiceAudioSink(self) else: self.active_sinks[guild_id] = VoiceAudioSink(self) if not voice_client.is_listening(): voice_client.listen(self.active_sinks[guild_id]) print( f"Started listening in {voice_client.channel.name} for guild {guild_id}" ) else: print( f"Already listening in {voice_client.channel.name} for guild {guild_id}" ) async def disconnect_from_voice(self, guild: discord.Guild): """Disconnects the bot from the voice channel in the given guild.""" voice_client = guild.voice_client if voice_client and voice_client.is_connected(): if voice_client.is_listening(): voice_client.stop_listening() guild_id = guild.id if guild_id in self.active_sinks: self.active_sinks[guild_id].cleanup() del self.active_sinks[guild_id] # Handle dedicated text channel cleanup if ( GurtConfig.VOICE_DEDICATED_TEXT_CHANNEL_ENABLED and GurtConfig.VOICE_DEDICATED_TEXT_CHANNEL_CLEANUP_ON_LEAVE ): dedicated_channel_id = self.dedicated_voice_text_channels.get(guild_id) if dedicated_channel_id: try: channel_to_delete = guild.get_channel( dedicated_channel_id ) or await self.bot.fetch_channel(dedicated_channel_id) if channel_to_delete: print( f"Deleting dedicated voice text channel {channel_to_delete.name} ({channel_to_delete.id})." ) await channel_to_delete.delete( reason="GURT disconnected from voice channel" ) except discord.NotFound: print( f"Dedicated voice text channel {dedicated_channel_id} not found for deletion." ) except discord.Forbidden: print( f"Forbidden: Could not delete dedicated voice text channel {dedicated_channel_id}." ) except Exception as e: print( f"Error deleting dedicated voice text channel {dedicated_channel_id}: {e}" ) if guild_id in self.dedicated_voice_text_channels: del self.dedicated_voice_text_channels[guild_id] await voice_client.disconnect(force=True) print(f"Disconnected from voice in {guild.name}.") return True, f"Disconnected from voice in {guild.name}." return False, "Not connected to voice in this guild." async def play_audio_file( self, voice_client: discord.VoiceClient, audio_file_path: str ): """Plays an audio file in the voice channel.""" if not voice_client or not voice_client.is_connected(): print("Error: Voice client not connected.") return False, "Voice client not connected." if not os.path.exists(audio_file_path): print(f"Error: Audio file not found at {audio_file_path}") return False, "Audio file not found." if voice_client.is_playing(): voice_client.stop() # Stop current audio if any try: audio_source = discord.FFmpegPCMAudio(audio_file_path, **FFMPEG_OPTIONS) voice_client.play( audio_source, after=lambda e: self.after_audio_playback(e, audio_file_path), ) print(f"Playing audio: {audio_file_path}") return True, f"Playing {os.path.basename(audio_file_path)}" except Exception as e: print( f"Error creating/playing FFmpegPCMAudio source for {audio_file_path}: {e}" ) return False, f"Error playing audio: {str(e)}" def after_audio_playback(self, error, audio_file_path): if error: print(f"Error during audio playback for {audio_file_path}: {error}") else: print(f"Finished playing {audio_file_path}") # TTSProviderCog's cleanup will handle deleting the file. # Removed start_listening_pipeline as the sink now handles more logic directly or via tasks. async def process_audio_segment( self, user_id: int, audio_data: bytes, guild: discord.Guild ): """Processes a segment of audio data using Google Cloud Speech-to-Text.""" if not self.speech_client or not audio_data: if not audio_data: print( f"process_audio_segment called for user {user_id} with empty audio_data." ) return try: recognition_config = speech.RecognitionConfig( encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16, sample_rate_hertz=SAMPLE_RATE, # Defined as 16000 language_code="en-US", enable_automatic_punctuation=True, model="telephony", # Consider uncommenting if default isn't ideal for voice chat ) recognition_audio = speech.RecognitionAudio(content=audio_data) # Run in executor as it's a network call that can be blocking response = await self.bot.loop.run_in_executor( None, # Default ThreadPoolExecutor functools.partial( self.speech_client.recognize, config=recognition_config, audio=recognition_audio, ), ) transcribed_text = "" for result in response.results: if result.alternatives: transcribed_text += result.alternatives[0].transcript + " " transcribed_text = transcribed_text.strip() if transcribed_text: user = guild.get_member(user_id) or await self.bot.fetch_user(user_id) print( f"Google STT for {user.name} ({user_id}) in {guild.name}: {transcribed_text}" ) self.bot.dispatch( "voice_transcription_received", guild, user, transcribed_text ) except Exception as e: print( f"Error processing audio segment with Google STT for user {user_id}: {e}" ) async def setup(bot: commands.Bot): # Check for FFmpeg before adding cog try: # Try running ffmpeg -version to check if it's installed and in PATH process = await asyncio.create_subprocess_shell( "ffmpeg -version", stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE, ) stdout, stderr = await process.communicate() if process.returncode == 0: print("FFmpeg found. VoiceGatewayCog can be loaded.") await bot.add_cog(VoiceGatewayCog(bot)) print("VoiceGatewayCog loaded successfully!") else: print( "FFmpeg not found or not working correctly. VoiceGatewayCog will not be loaded." ) print(f"FFmpeg check stdout: {stdout.decode(errors='ignore')}") print(f"FFmpeg check stderr: {stderr.decode(errors='ignore')}") except FileNotFoundError: print( "FFmpeg command not found. VoiceGatewayCog will not be loaded. Please install FFmpeg and ensure it's in your system's PATH." ) except Exception as e: print( f"An error occurred while checking for FFmpeg: {e}. VoiceGatewayCog will not be loaded." )