discordbot/cogs/VoiceGatewayCog.py

import discord
from discord.ext import commands
import asyncio
import os
import tempfile
import wave  # For saving audio data
import functools  # Added for partial
import subprocess  # For audio conversion
from discord.ext import voice_recv  # For receiving voice
from typing import Optional  # For type hinting

# Gurt specific imports
from gurt import config as GurtConfig

# Attempt to import STT and VAD libraries
try:
    from google.cloud import speech
except ImportError:
    print(
        "Google Cloud Speech library not found. Please install with 'pip install google-cloud-speech'"
    )
    speech = None

try:
    import webrtcvad
except ImportError:
    print(
        "webrtcvad library not found. Please install with 'pip install webrtc-voice-activity-detector'"
    )
    webrtcvad = None

# OpusDecoder is no longer needed as discord-ext-voice-recv provides PCM.

FFMPEG_OPTIONS = {
    # 'before_options': '-reconnect 1 -reconnect_streamed 1 -reconnect_delay_max 5', # Removed as these are for network streams and might cause issues with local files
    "options": "-vn"
}

# Constants for audio processing
SAMPLE_RATE = 16000  # Whisper prefers 16kHz
CHANNELS = 1  # Mono
SAMPLE_WIDTH = 2  # 16-bit audio (2 bytes per sample)
VAD_MODE = 3  # VAD aggressiveness (0-3, 3 is most aggressive)
FRAME_DURATION_MS = 30  # Duration of a frame in ms for VAD (10, 20, or 30)
BYTES_PER_FRAME = (SAMPLE_RATE // 1000) * FRAME_DURATION_MS * CHANNELS * SAMPLE_WIDTH
# OPUS constants removed as Opus decoding is no longer handled here.

# Silence detection parameters
SILENCE_THRESHOLD_FRAMES = 25  # Number of consecutive silent VAD frames to consider end of speech (e.g., 25 * 30ms = 750ms)
MAX_SPEECH_DURATION_S = 15  # Max duration of a single speech segment to process
MAX_SPEECH_FRAMES = (MAX_SPEECH_DURATION_S * 1000) // FRAME_DURATION_MS


# Helper function for audio conversion
def _convert_audio_to_16khz_mono(raw_pcm_data_48k_stereo: bytes) -> bytes:
    """
    Converts raw 48kHz stereo PCM data to 16kHz mono PCM data using FFmpeg.
    """
    input_temp_file = None
    output_temp_file = None
    converted_audio_data = b""

    try:
        with tempfile.NamedTemporaryFile(suffix=".raw", delete=False) as tmp_in:
            input_temp_file = tmp_in.name
            tmp_in.write(raw_pcm_data_48k_stereo)

        with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp_out:
            output_temp_file = tmp_out.name

        command = [
            "ffmpeg",
            "-f",
            "s16le",  # Input format: signed 16-bit little-endian PCM
            "-ac",
            "2",  # Input channels: stereo
            "-ar",
            "48000",  # Input sample rate: 48kHz
            "-i",
            input_temp_file,
            "-ac",
            str(CHANNELS),  # Output channels (e.g., 1 for mono)
            "-ar",
            str(SAMPLE_RATE),  # Output sample rate (e.g., 16000)
            "-sample_fmt",
            "s16",  # Output sample format
            "-y",  # Overwrite output file if it exists
            output_temp_file,
        ]

        process = subprocess.run(command, capture_output=True, check=False)

        if process.returncode != 0:
            print(
                f"FFmpeg error during audio conversion. Return code: {process.returncode}"
            )
            print(f"FFmpeg stdout: {process.stdout.decode(errors='ignore')}")
            print(f"FFmpeg stderr: {process.stderr.decode(errors='ignore')}")
            return b""

        with open(output_temp_file, "rb") as f_out:
            with wave.open(f_out, "rb") as wf:
                if (
                    wf.getnchannels() == CHANNELS
                    and wf.getframerate() == SAMPLE_RATE
                    and wf.getsampwidth() == SAMPLE_WIDTH
                ):
                    converted_audio_data = wf.readframes(wf.getnframes())
                else:
                    print(
                        f"Warning: Converted WAV file format mismatch. Expected {CHANNELS}ch, {SAMPLE_RATE}Hz, {SAMPLE_WIDTH}bytes/sample."
                    )
                    print(
                        f"Got: {wf.getnchannels()}ch, {wf.getframerate()}Hz, {wf.getsampwidth()}bytes/sample."
                    )
                    return b""
    except FileNotFoundError:
        print(
            "FFmpeg command not found. Please ensure FFmpeg is installed and in your system's PATH."
        )
        return b""
    except Exception as e:
        print(f"Error during audio conversion: {e}")
        return b""
    finally:
        if input_temp_file and os.path.exists(input_temp_file):
            os.remove(input_temp_file)
        if output_temp_file and os.path.exists(output_temp_file):
            os.remove(output_temp_file)

    return converted_audio_data


class VoiceAudioSink(voice_recv.AudioSink):  # Inherit from voice_recv.AudioSink
    def __init__(self, cog_instance):  # Removed voice_client parameter
        super().__init__()
        self.cog = cog_instance
        # self.voice_client is set by the library when listen() is called
        # user_audio_data now keyed by user_id, 'decoder' removed
        self.user_audio_data = (
            {}
        )  # {user_id: {'buffer': bytearray, 'speaking': False, 'silent_frames': 0, 'speech_frames': 0, 'vad': VAD_instance}}

        # OpusDecoder check removed
        if not webrtcvad:
            print(
                "VAD library not loaded. STT might be less efficient or not work as intended."
            )

    def wants_opus(self) -> bool:
        """
        Indicates whether the sink wants Opus-encoded audio (True) or PCM audio (False).
        Our sink processes PCM data, so we return False.
        """
        return False

    # Signature changed: user object directly, data is VoiceData
    def write(self, user: discord.User, voice_data_packet: voice_recv.VoiceData):
        if (
            not webrtcvad or not self.voice_client or not user
        ):  # OpusDecoder check removed, user check added
            return

        user_id = user.id  # Get user_id from the user object

        if user_id not in self.user_audio_data:
            self.user_audio_data[user_id] = {
                "buffer": bytearray(),
                "speaking": False,
                "silent_frames": 0,
                "speech_frames": 0,
                # 'decoder' removed
                "vad": webrtcvad.Vad(VAD_MODE) if webrtcvad else None,
            }

        entry = self.user_audio_data[user_id]

        # Extract PCM data from VoiceData packet
        raw_pcm_data_48k_stereo = voice_data_packet.pcm

        # Convert incoming 48kHz stereo PCM to 16kHz mono PCM
        pcm_data = _convert_audio_to_16khz_mono(raw_pcm_data_48k_stereo)
        if not pcm_data:  # Conversion failed or returned empty bytes
            # print(f"Audio conversion failed for user {user_id}. Skipping frame.")
            return

        # VAD processing expects frames of 10, 20, or 30 ms.
        # pcm_data is now 16kHz mono, hopefully in appropriate chunks from conversion.
        # We need to ensure it's split into VAD-compatible frame lengths if not already.
        # If pcm_data (now 16kHz mono) is a 20ms chunk, its length is 640 bytes.
        # A 10ms frame at 16kHz is 320 bytes. A 30ms frame is 960 bytes.

        # Ensure frame_length for VAD is correct (e.g. 20ms at 16kHz mono = 640 bytes)
        # This constant could be defined at class or module level.
        # For a 20ms frame, which is typical for voice packets:
        frame_length_for_vad_20ms = (SAMPLE_RATE // 1000) * 20 * CHANNELS * SAMPLE_WIDTH

        if (
            len(pcm_data) % frame_length_for_vad_20ms != 0 and len(pcm_data) > 0
        ):  # Check if it's a multiple, or handle if not.
            # This might happen if the converted chunk size isn't exactly what VAD expects per call.
            # For now, we'll try to process it. A more robust solution might buffer/segment pcm_data
            # into exact 10, 20, or 30ms chunks for VAD.
            # print(f"Warning: PCM data length {len(pcm_data)} after conversion is not an exact multiple of VAD frame size {frame_length_for_vad_20ms} for User {user_id}. Trying to process.")
            pass  # Continue, VAD might handle it or error.

        # Process VAD in chunks if pcm_data is longer than one VAD frame
        # For simplicity, let's assume pcm_data is one processable chunk for now.
        # If pcm_data can be multiple VAD frames, iterate through it.
        # Current VAD logic processes the whole pcm_data chunk at once.
        # This is okay if pcm_data is already a single VAD frame (e.g. 20ms).

        if entry["vad"]:
            try:
                # Ensure pcm_data is a valid frame for VAD (e.g. 10, 20, 30 ms)
                # If pcm_data is, for example, 640 bytes (20ms at 16kHz mono), it's fine.
                if len(pcm_data) == frame_length_for_vad_20ms:  # Common case
                    is_speech = entry["vad"].is_speech(pcm_data, SAMPLE_RATE)
                elif (
                    len(pcm_data) > 0
                ):  # If not standard, but has data, try (might error)
                    # print(f"VAD processing for User {user_id} with non-standard PCM length {len(pcm_data)}. May error.")
                    # This path is risky if VAD is strict. For now, we assume it's handled or errors.
                    # A robust way: segment pcm_data into valid VAD frames.
                    # For now, let's assume the chunk from conversion is one such frame.
                    is_speech = entry["vad"].is_speech(
                        pcm_data, SAMPLE_RATE
                    )  # This might fail if len is not 10/20/30ms worth
                else:  # No data
                    is_speech = False

            except Exception as e:  # webrtcvad can raise errors on invalid frame length
                # print(f"VAD error for User {user_id} with PCM length {len(pcm_data)}: {e}. Defaulting to speech=True for this frame.")
                is_speech = True  # Fallback: if VAD fails, assume it's speech
        else:  # No VAD
            is_speech = True

        if is_speech:
            entry["buffer"].extend(pcm_data)
            entry["speaking"] = True
            entry["silent_frames"] = 0
            entry["speech_frames"] += 1
            if entry["speech_frames"] >= MAX_SPEECH_FRAMES:
                # print(f"Max speech frames reached for User {user_id}. Processing segment.")
                self.cog.bot.loop.create_task(
                    self.cog.process_audio_segment(
                        user_id, bytes(entry["buffer"]), self.voice_client.guild
                    )
                )
                entry["buffer"].clear()
                entry["speaking"] = False
                entry["speech_frames"] = 0
        elif entry["speaking"]:  # Was speaking, now silence
            entry["buffer"].extend(pcm_data)  # Add this last silent frame for context
            entry["silent_frames"] += 1
            if entry["silent_frames"] >= SILENCE_THRESHOLD_FRAMES:
                # print(f"Silence threshold reached for User {user_id}. Processing segment.")
                self.cog.bot.loop.create_task(
                    self.cog.process_audio_segment(
                        user_id, bytes(entry["buffer"]), self.voice_client.guild
                    )
                )
                entry["buffer"].clear()
                entry["speaking"] = False
                entry["speech_frames"] = 0
                entry["silent_frames"] = 0
        # If not is_speech and not entry['speaking'], do nothing (ignore silence)

    def cleanup(self):
        print("VoiceAudioSink cleanup called.")
        # Iterate over a copy of items if modifications occur, or handle user_id directly
        for user_id, data_entry in list(self.user_audio_data.items()):
            if data_entry["buffer"]:
                # user object is not directly available here, but process_audio_segment takes user_id
                # We need the guild, which should be available from self.voice_client
                if self.voice_client and self.voice_client.guild:
                    guild = self.voice_client.guild
                    print(
                        f"Processing remaining audio for User ID {user_id} on cleanup."
                    )
                    self.cog.bot.loop.create_task(
                        self.cog.process_audio_segment(
                            user_id, bytes(data_entry["buffer"]), guild
                        )
                    )
                else:
                    print(
                        f"Cannot process remaining audio for User ID {user_id}: voice_client or guild not available."
                    )
        self.user_audio_data.clear()


class VoiceGatewayCog(commands.Cog):
    def __init__(self, bot):
        self.bot = bot
        self.active_sinks = {}  # guild_id: VoiceAudioSink
        self.dedicated_voice_text_channels: dict[int, int] = {}  # guild_id: channel_id
        self.speech_client = None
        if speech:
            try:
                self.speech_client = speech.SpeechClient()
                print("Google Cloud Speech client initialized successfully.")
            except Exception as e:
                print(
                    f"Error initializing Google Cloud Speech client: {e}. STT will not be available."
                )
                self.speech_client = None
        else:
            print(
                "Google Cloud Speech library not available. STT functionality will be disabled."
            )

    async def _ensure_dedicated_voice_text_channel(
        self, guild: discord.Guild, voice_channel: discord.VoiceChannel
    ) -> Optional[discord.TextChannel]:
        if not GurtConfig.VOICE_DEDICATED_TEXT_CHANNEL_ENABLED:
            return None

        existing_channel_id = self.dedicated_voice_text_channels.get(guild.id)
        if existing_channel_id:
            channel = guild.get_channel(existing_channel_id)
            if channel and isinstance(channel, discord.TextChannel):
                print(
                    f"Found existing dedicated voice text channel: {channel.name} ({channel.id})"
                )
                return channel
            else:
                print(
                    f"Dedicated voice text channel ID {existing_channel_id} for guild {guild.id} is invalid or not found. Will create a new one."
                )
                del self.dedicated_voice_text_channels[guild.id]  # Remove invalid ID

        # Create new channel
        channel_name = GurtConfig.VOICE_DEDICATED_TEXT_CHANNEL_NAME_TEMPLATE.format(
            voice_channel_name=voice_channel.name,
            guild_name=guild.name,
            # Add more placeholders if needed
        )
        # Sanitize channel name (Discord has restrictions)
        channel_name = "".join(
            c for c in channel_name if c.isalnum() or c in ["-", "_", " "]
        ).strip()
        channel_name = channel_name.replace(" ", "-").lower()
        if not channel_name:  # Fallback if template results in empty string
            channel_name = "gurt-voice-chat"

        # Check if a channel with this name already exists (to avoid duplicates if bot restarted without proper cleanup)
        for existing_guild_channel in guild.text_channels:
            if existing_guild_channel.name == channel_name:
                print(
                    f"Found existing channel by name '{channel_name}' ({existing_guild_channel.id}). Reusing."
                )
                self.dedicated_voice_text_channels[guild.id] = existing_guild_channel.id
                # Optionally update topic and permissions if needed
                try:
                    if (
                        existing_guild_channel.topic
                        != GurtConfig.VOICE_DEDICATED_TEXT_CHANNEL_TOPIC
                    ):
                        await existing_guild_channel.edit(
                            topic=GurtConfig.VOICE_DEDICATED_TEXT_CHANNEL_TOPIC
                        )
                    # Send initial message if channel is empty or last message isn't the initial one
                    async for last_message in existing_guild_channel.history(limit=1):
                        if (
                            last_message.content
                            != GurtConfig.VOICE_DEDICATED_TEXT_CHANNEL_INITIAL_MESSAGE
                        ):
                            await existing_guild_channel.send(
                                GurtConfig.VOICE_DEDICATED_TEXT_CHANNEL_INITIAL_MESSAGE
                            )
                        break  # Only need the very last message
                    else:  # No messages in channel
                        await existing_guild_channel.send(
                            GurtConfig.VOICE_DEDICATED_TEXT_CHANNEL_INITIAL_MESSAGE
                        )

                except discord.Forbidden:
                    print(
                        f"Missing permissions to update reused dedicated channel {channel_name}"
                    )
                except Exception as e_reuse:
                    print(
                        f"Error updating reused dedicated channel {channel_name}: {e_reuse}"
                    )
                return existing_guild_channel

        overwrites = {
            guild.me: discord.PermissionOverwrite(
                read_messages=True, send_messages=True, manage_messages=True
            ),  # GURT needs to manage
            guild.default_role: discord.PermissionOverwrite(
                read_messages=False, send_messages=False
            ),  # Private by default
            # Consider adding server admins/mods with read/send permissions
        }
        # Add owner and admins with full perms to the channel
        if guild.owner:
            overwrites[guild.owner] = discord.PermissionOverwrite(
                read_messages=True,
                send_messages=True,
                manage_channels=True,
                manage_messages=True,
            )
        for role in guild.roles:
            if (
                role.permissions.administrator and not role.is_default()
            ):  # Check for admin roles
                overwrites[role] = discord.PermissionOverwrite(
                    read_messages=True,
                    send_messages=True,
                    manage_channels=True,
                    manage_messages=True,
                )

        try:
            print(f"Creating new dedicated voice text channel: {channel_name}")
            new_channel = await guild.create_text_channel(
                name=channel_name,
                overwrites=overwrites,
                topic=GurtConfig.VOICE_DEDICATED_TEXT_CHANNEL_TOPIC,
                reason="GURT Dedicated Voice Chat Channel",
            )
            self.dedicated_voice_text_channels[guild.id] = new_channel.id
            if GurtConfig.VOICE_DEDICATED_TEXT_CHANNEL_INITIAL_MESSAGE:
                await new_channel.send(
                    GurtConfig.VOICE_DEDICATED_TEXT_CHANNEL_INITIAL_MESSAGE
                )
            print(
                f"Created dedicated voice text channel: {new_channel.name} ({new_channel.id})"
            )
            return new_channel
        except discord.Forbidden:
            print(
                f"Forbidden: Could not create dedicated voice text channel '{channel_name}' in guild {guild.name}."
            )
            return None
        except Exception as e:
            print(f"Error creating dedicated voice text channel '{channel_name}': {e}")
            return None

    def get_dedicated_text_channel_for_guild(
        self, guild_id: int
    ) -> Optional[discord.TextChannel]:
        channel_id = self.dedicated_voice_text_channels.get(guild_id)
        if channel_id:
            guild = self.bot.get_guild(guild_id)
            if guild:
                channel = guild.get_channel(channel_id)
                if isinstance(channel, discord.TextChannel):
                    return channel
        return None

    async def cog_load(self):
        print("VoiceGatewayCog loaded!")

    async def cog_unload(self):
        print("Unloading VoiceGatewayCog...")
        # Disconnect from all voice channels and clean up sinks
        for vc in list(self.bot.voice_clients):  # Iterate over a copy
            guild_id = vc.guild.id
            if guild_id in self.active_sinks:
                if (
                    vc.is_connected()
                    and hasattr(vc, "is_listening")
                    and vc.is_listening()
                ):
                    if hasattr(vc, "stop_listening"):
                        vc.stop_listening()
                    else:  # Or equivalent for VoiceRecvClient
                        pass
                self.active_sinks[guild_id].cleanup()
                del self.active_sinks[guild_id]

            # Handle dedicated text channel cleanup on cog unload
            if (
                GurtConfig.VOICE_DEDICATED_TEXT_CHANNEL_ENABLED
                and GurtConfig.VOICE_DEDICATED_TEXT_CHANNEL_CLEANUP_ON_LEAVE
            ):
                dedicated_channel_id = self.dedicated_voice_text_channels.get(guild_id)
                if dedicated_channel_id:
                    try:
                        channel_to_delete = vc.guild.get_channel(
                            dedicated_channel_id
                        ) or await self.bot.fetch_channel(dedicated_channel_id)
                        if channel_to_delete:
                            print(
                                f"Deleting dedicated voice text channel {channel_to_delete.name} ({channel_to_delete.id}) during cog unload."
                            )
                            await channel_to_delete.delete(
                                reason="GURT VoiceGatewayCog unload"
                            )
                    except discord.NotFound:
                        print(
                            f"Dedicated voice text channel {dedicated_channel_id} not found for deletion during unload."
                        )
                    except discord.Forbidden:
                        print(
                            f"Forbidden: Could not delete dedicated voice text channel {dedicated_channel_id} during unload."
                        )
                    except Exception as e:
                        print(
                            f"Error deleting dedicated voice text channel {dedicated_channel_id} during unload: {e}"
                        )
                if guild_id in self.dedicated_voice_text_channels:
                    del self.dedicated_voice_text_channels[guild_id]

            if vc.is_connected():
                await vc.disconnect(force=True)
        print("VoiceGatewayCog unloaded and disconnected from voice channels.")

    async def connect_to_voice(self, channel: discord.VoiceChannel):
        """Connects the bot to a specified voice channel and starts listening."""
        if not channel:
            return None, "Channel not provided."

        guild = channel.guild
        voice_client = guild.voice_client

        if voice_client and voice_client.is_connected():
            if voice_client.channel == channel:
                print(f"Already connected to {channel.name} in {guild.name}.")
                if isinstance(voice_client, voice_recv.VoiceRecvClient):
                    if (
                        guild.id not in self.active_sinks
                        or not voice_client.is_listening()
                    ):
                        self.start_listening_for_vc(voice_client)
                    # Ensure dedicated channel is set up even if already connected
                    await self._ensure_dedicated_voice_text_channel(guild, channel)
                else:
                    print(f"Reconnecting with VoiceRecvClient to {channel.name}.")
                    await voice_client.disconnect(force=True)
                    try:
                        voice_client = await channel.connect(
                            cls=voice_recv.VoiceRecvClient, timeout=10.0
                        )
                        print(
                            f"Reconnected to {channel.name} in {guild.name} with VoiceRecvClient."
                        )
                        self.start_listening_for_vc(voice_client)
                        await self._ensure_dedicated_voice_text_channel(guild, channel)
                    except asyncio.TimeoutError:
                        return (
                            None,
                            f"Timeout trying to reconnect to {channel.name} with VoiceRecvClient.",
                        )
                    except Exception as e:
                        return (
                            None,
                            f"Error reconnecting to {channel.name} with VoiceRecvClient: {str(e)}",
                        )
                return voice_client, "Already connected to this channel."
            else:
                print(
                    f"Moving to {channel.name} in {guild.name}. Reconnecting with VoiceRecvClient."
                )
                await voice_client.disconnect(
                    force=True
                )  # This will trigger cleanup for old channel's dedicated text channel if configured
                try:
                    voice_client = await channel.connect(
                        cls=voice_recv.VoiceRecvClient, timeout=10.0
                    )
                    print(
                        f"Moved and reconnected to {channel.name} in {guild.name} with VoiceRecvClient."
                    )
                    self.start_listening_for_vc(voice_client)
                    await self._ensure_dedicated_voice_text_channel(guild, channel)
                except asyncio.TimeoutError:
                    return (
                        None,
                        f"Timeout trying to move and connect to {channel.name}.",
                    )
                except Exception as e:
                    return (
                        None,
                        f"Error moving and connecting to {channel.name}: {str(e)}",
                    )
        else:
            try:
                voice_client = await channel.connect(
                    cls=voice_recv.VoiceRecvClient, timeout=10.0
                )
                print(
                    f"Connected to {channel.name} in {guild.name} with VoiceRecvClient."
                )
                self.start_listening_for_vc(voice_client)
                await self._ensure_dedicated_voice_text_channel(guild, channel)
            except asyncio.TimeoutError:
                return None, f"Timeout trying to connect to {channel.name}."
            except Exception as e:
                return None, f"Error connecting to {channel.name}: {str(e)}"

        if not voice_client:
            return None, "Failed to establish voice client after connection."

        return voice_client, f"Successfully connected and listening in {channel.name}."

    def start_listening_for_vc(self, voice_client: discord.VoiceClient):
        """Starts or restarts listening for a given voice client."""
        guild_id = voice_client.guild.id
        if guild_id in self.active_sinks:
            # If sink exists, ensure it's clean and listening is (re)started
            if voice_client.is_listening():
                voice_client.stop_listening()  # Stop previous listening if any
            self.active_sinks[guild_id].cleanup()  # Clean old state
            # Re-initialize or ensure the sink is fresh for the current VC
            self.active_sinks[guild_id] = VoiceAudioSink(self)
        else:
            self.active_sinks[guild_id] = VoiceAudioSink(self)

        if not voice_client.is_listening():
            voice_client.listen(self.active_sinks[guild_id])
            print(
                f"Started listening in {voice_client.channel.name} for guild {guild_id}"
            )
        else:
            print(
                f"Already listening in {voice_client.channel.name} for guild {guild_id}"
            )

    async def disconnect_from_voice(self, guild: discord.Guild):
        """Disconnects the bot from the voice channel in the given guild."""
        voice_client = guild.voice_client
        if voice_client and voice_client.is_connected():
            if voice_client.is_listening():
                voice_client.stop_listening()

            guild_id = guild.id
            if guild_id in self.active_sinks:
                self.active_sinks[guild_id].cleanup()
                del self.active_sinks[guild_id]

            # Handle dedicated text channel cleanup
            if (
                GurtConfig.VOICE_DEDICATED_TEXT_CHANNEL_ENABLED
                and GurtConfig.VOICE_DEDICATED_TEXT_CHANNEL_CLEANUP_ON_LEAVE
            ):
                dedicated_channel_id = self.dedicated_voice_text_channels.get(guild_id)
                if dedicated_channel_id:
                    try:
                        channel_to_delete = guild.get_channel(
                            dedicated_channel_id
                        ) or await self.bot.fetch_channel(dedicated_channel_id)
                        if channel_to_delete:
                            print(
                                f"Deleting dedicated voice text channel {channel_to_delete.name} ({channel_to_delete.id})."
                            )
                            await channel_to_delete.delete(
                                reason="GURT disconnected from voice channel"
                            )
                    except discord.NotFound:
                        print(
                            f"Dedicated voice text channel {dedicated_channel_id} not found for deletion."
                        )
                    except discord.Forbidden:
                        print(
                            f"Forbidden: Could not delete dedicated voice text channel {dedicated_channel_id}."
                        )
                    except Exception as e:
                        print(
                            f"Error deleting dedicated voice text channel {dedicated_channel_id}: {e}"
                        )
                if guild_id in self.dedicated_voice_text_channels:
                    del self.dedicated_voice_text_channels[guild_id]

            await voice_client.disconnect(force=True)
            print(f"Disconnected from voice in {guild.name}.")
            return True, f"Disconnected from voice in {guild.name}."
        return False, "Not connected to voice in this guild."

    async def play_audio_file(
        self, voice_client: discord.VoiceClient, audio_file_path: str
    ):
        """Plays an audio file in the voice channel."""
        if not voice_client or not voice_client.is_connected():
            print("Error: Voice client not connected.")
            return False, "Voice client not connected."

        if not os.path.exists(audio_file_path):
            print(f"Error: Audio file not found at {audio_file_path}")
            return False, "Audio file not found."

        if voice_client.is_playing():
            voice_client.stop()  # Stop current audio if any

        try:
            audio_source = discord.FFmpegPCMAudio(audio_file_path, **FFMPEG_OPTIONS)
            voice_client.play(
                audio_source,
                after=lambda e: self.after_audio_playback(e, audio_file_path),
            )
            print(f"Playing audio: {audio_file_path}")
            return True, f"Playing {os.path.basename(audio_file_path)}"
        except Exception as e:
            print(
                f"Error creating/playing FFmpegPCMAudio source for {audio_file_path}: {e}"
            )
            return False, f"Error playing audio: {str(e)}"

    def after_audio_playback(self, error, audio_file_path):
        if error:
            print(f"Error during audio playback for {audio_file_path}: {error}")
        else:
            print(f"Finished playing {audio_file_path}")
        # TTSProviderCog's cleanup will handle deleting the file.

    # Removed start_listening_pipeline as the sink now handles more logic directly or via tasks.

    async def process_audio_segment(
        self, user_id: int, audio_data: bytes, guild: discord.Guild
    ):
        """Processes a segment of audio data using Google Cloud Speech-to-Text."""
        if not self.speech_client or not audio_data:
            if not audio_data:
                print(
                    f"process_audio_segment called for user {user_id} with empty audio_data."
                )
            return

        try:
            recognition_config = speech.RecognitionConfig(
                encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16,
                sample_rate_hertz=SAMPLE_RATE,  # Defined as 16000
                language_code="en-US",
                enable_automatic_punctuation=True,
                model="telephony",  # Consider uncommenting if default isn't ideal for voice chat
            )
            recognition_audio = speech.RecognitionAudio(content=audio_data)

            # Run in executor as it's a network call that can be blocking
            response = await self.bot.loop.run_in_executor(
                None,  # Default ThreadPoolExecutor
                functools.partial(
                    self.speech_client.recognize,
                    config=recognition_config,
                    audio=recognition_audio,
                ),
            )

            transcribed_text = ""
            for result in response.results:
                if result.alternatives:
                    transcribed_text += result.alternatives[0].transcript + " "

            transcribed_text = transcribed_text.strip()

            if transcribed_text:
                user = guild.get_member(user_id) or await self.bot.fetch_user(user_id)
                print(
                    f"Google STT for {user.name} ({user_id}) in {guild.name}: {transcribed_text}"
                )
                self.bot.dispatch(
                    "voice_transcription_received", guild, user, transcribed_text
                )

        except Exception as e:
            print(
                f"Error processing audio segment with Google STT for user {user_id}: {e}"
            )


async def setup(bot: commands.Bot):
    # Check for FFmpeg before adding cog
    try:
        # Try running ffmpeg -version to check if it's installed and in PATH
        process = await asyncio.create_subprocess_shell(
            "ffmpeg -version",
            stdout=asyncio.subprocess.PIPE,
            stderr=asyncio.subprocess.PIPE,
        )
        stdout, stderr = await process.communicate()
        if process.returncode == 0:
            print("FFmpeg found. VoiceGatewayCog can be loaded.")
            await bot.add_cog(VoiceGatewayCog(bot))
            print("VoiceGatewayCog loaded successfully!")
        else:
            print(
                "FFmpeg not found or not working correctly. VoiceGatewayCog will not be loaded."
            )
            print(f"FFmpeg check stdout: {stdout.decode(errors='ignore')}")
            print(f"FFmpeg check stderr: {stderr.decode(errors='ignore')}")

    except FileNotFoundError:
        print(
            "FFmpeg command not found. VoiceGatewayCog will not be loaded. Please install FFmpeg and ensure it's in your system's PATH."
        )
    except Exception as e:
        print(
            f"An error occurred while checking for FFmpeg: {e}. VoiceGatewayCog will not be loaded."
        )