discordbot/cogs/tts_provider_cog.py

import discord
from discord.ext import commands
from discord import app_commands
import os
import asyncio
import tempfile
import sys
import importlib.util
from google.cloud import texttospeech

class TTSProviderCog(commands.Cog):
    def __init__(self, bot):
        self.bot = bot
        print("TTSProviderCog initialized!")
        self.cleanup_old_files()

        # Schedule periodic cleanup
        self.cleanup_task = self.bot.loop.create_task(self.periodic_cleanup())

    async def periodic_cleanup(self):
        """Periodically clean up old TTS files."""
        import asyncio
        while not self.bot.is_closed():
            # Clean up every hour
            await asyncio.sleep(3600)  # 1 hour
            self.cleanup_old_files()

    def cog_unload(self):
        """Cancel the cleanup task when the cog is unloaded."""
        if hasattr(self, 'cleanup_task') and self.cleanup_task:
            self.cleanup_task.cancel()

    def cleanup_old_files(self):
        """Clean up old TTS files to prevent disk space issues."""
        try:
            import glob
            import time
            import os

            # Create the SOUND directory if it doesn't exist
            os.makedirs("./SOUND", exist_ok=True)

            # Get current time
            current_time = time.time()

            # Find all TTS files older than 1 hour
            old_files = []
            for pattern in ["./SOUND/tts_*.mp3", "./SOUND/tts_direct_*.mp3", "./SOUND/tts_test_*.mp3"]:
                for file in glob.glob(pattern):
                    if os.path.exists(file) and os.path.getmtime(file) < current_time - 3600:  # 1 hour = 3600 seconds
                        old_files.append(file)

            # Delete old files
            for file in old_files:
                try:
                    os.remove(file)
                    print(f"Cleaned up old TTS file: {file}")
                except Exception as e:
                    print(f"Error removing old TTS file {file}: {e}")

            print(f"Cleaned up {len(old_files)} old TTS files")
        except Exception as e:
            print(f"Error during cleanup: {e}")

    async def generate_tts_directly(self, provider, text, output_file=None):
        """Generate TTS audio directly without using a subprocess."""
        # Create a unique output file if none is provided
        if output_file is None:
            import uuid
            output_file = f"./SOUND/tts_direct_{uuid.uuid4().hex}.mp3"

        # Create output directory if it doesn't exist
        os.makedirs("./SOUND", exist_ok=True)

        # Check if the provider is available
        if provider == "gtts":
            # Check if gtts is available
            if importlib.util.find_spec("gtts") is None:
                return False, "Google TTS (gtts) is not installed. Run: pip install gtts"

            try:
                from gtts import gTTS
                tts = gTTS(text=text, lang='en')
                tts.save(output_file)
                return True, output_file
            except Exception as e:
                return False, f"Error with Google TTS: {str(e)}"

        elif provider == "pyttsx3":
            # Check if pyttsx3 is available
            if importlib.util.find_spec("pyttsx3") is None:
                return False, "pyttsx3 is not installed. Run: pip install pyttsx3"

            try:
                import pyttsx3
                engine = pyttsx3.init()
                engine.save_to_file(text, output_file)
                engine.runAndWait()
                return True, output_file
            except Exception as e:
                return False, f"Error with pyttsx3: {str(e)}"

        elif provider == "coqui":
            # Check if TTS is available
            if importlib.util.find_spec("TTS") is None:
                return False, "Coqui TTS is not installed. Run: pip install TTS"

            try:
                from TTS.api import TTS
                tts = TTS("tts_models/en/ljspeech/tacotron2-DDC")
                tts.tts_to_file(text=text, file_path=output_file)
                return True, output_file
            except Exception as e:
                return False, f"Error with Coqui TTS: {str(e)}"

        elif provider == "espeak":
            # Check if we can run espeak-ng command
            import subprocess
            import platform

            try:
                # On Windows, we'll check if the command exists
                if platform.system() == "Windows":
                    result = subprocess.run(["where", "espeak-ng"], capture_output=True, text=True)
                    espeak_available = result.returncode == 0
                else:
                    # On Linux/Mac, we'll use which
                    result = subprocess.run(["which", "espeak-ng"], capture_output=True, text=True)
                    espeak_available = result.returncode == 0

                if not espeak_available:
                    return False, "espeak-ng is not installed or not in PATH. Install espeak-ng and make sure it's in your PATH."

                # Create a WAV file first
                wav_file = output_file.replace(".mp3", ".wav")

                # Run espeak-ng to generate the audio
                cmd = ["espeak-ng", "-w", wav_file, text]
                process = subprocess.run(cmd, capture_output=True, text=True)

                if process.returncode != 0:
                    return False, f"Error running espeak-ng: {process.stderr}"

                # Convert WAV to MP3 if needed
                if output_file.endswith(".mp3"):
                    try:
                        # Try to use pydub for conversion
                        from pydub import AudioSegment
                        sound = AudioSegment.from_wav(wav_file)
                        sound.export(output_file, format="mp3")
                        # Remove the temporary WAV file
                        os.remove(wav_file)
                    except Exception as e:
                        # If pydub fails, just use the WAV file
                        print(f"Warning: Could not convert WAV to MP3: {e}")
                        output_file = wav_file
                else:
                    # If the output file doesn't end with .mp3, we're already using the WAV file
                    output_file = wav_file

                return True, output_file
            except Exception as e:
                return False, f"Error with espeak-ng: {str(e)}"

        elif provider == "google_cloud_tts":
            # Check if google-cloud-texttospeech is available
            if importlib.util.find_spec("google.cloud.texttospeech") is None:
                return False, "Google Cloud TTS library is not installed. Run: pip install google-cloud-texttospeech"

            try:
                client = texttospeech.TextToSpeechClient() # Assumes GOOGLE_APPLICATION_CREDENTIALS is set

                input_text = texttospeech.SynthesisInput(text=text)

                # Specify the voice, using your requested model
                voice = texttospeech.VoiceSelectionParams(
                    language_code="en-US",
                    name="en-US-Chirp3-HD-Autonoe"
                )

                # Specify audio configuration (MP3 output)
                audio_config = texttospeech.AudioConfig(
                    audio_encoding=texttospeech.AudioEncoding.MP3
                )

                response = client.synthesize_speech(
                    request={"input": input_text, "voice": voice, "audio_config": audio_config}
                )

                # The response's audio_content is binary. Write it to the output file.
                with open(output_file, "wb") as out:
                    out.write(response.audio_content)
                return True, output_file
            except Exception as e:
                error_message = f"Error with Google Cloud TTS: {str(e)}"
                if "quota" in str(e).lower():
                    error_message += " This might be a quota issue with your Google Cloud project."
                elif "credentials" in str(e).lower():
                    error_message += " Please ensure GOOGLE_APPLICATION_CREDENTIALS environment variable is set correctly."
                return False, error_message

        else:
            return False, f"Unknown TTS provider: {provider}"

    @app_commands.command(name="ttsprovider", description="Test different TTS providers")
    @app_commands.describe(
        provider="Select the TTS provider to use",
        text="Text to be spoken"
    )
    @app_commands.choices(provider=[
        app_commands.Choice(name="Google TTS (Online)", value="gtts"),
        app_commands.Choice(name="pyttsx3 (Offline)", value="pyttsx3"),
        app_commands.Choice(name="Coqui TTS (AI Voice)", value="coqui"),
        app_commands.Choice(name="eSpeak-NG (Offline)", value="espeak"),
        app_commands.Choice(name="Google Cloud TTS (Chirp HD)", value="google_cloud_tts")
    ])
    async def ttsprovider_slash(self, interaction: discord.Interaction,
                               provider: str,
                               text: str = "This is a test of text to speech"):
        """Test different TTS providers"""
        await interaction.response.defer(thinking=True)

        # Create a temporary script to test the TTS provider
        script_content = f"""
import importlib.util
import sys
import os
import traceback

# Print Python version and path for debugging
print(f"Python version: {{sys.version}}")
print(f"Python executable: {{sys.executable}}")
print(f"Current working directory: {{os.getcwd()}}")

# Check for TTS libraries
try:
    import pkg_resources
    installed_packages = [pkg.key for pkg in pkg_resources.working_set]
    print(f"Installed packages: {{installed_packages}}")
except Exception as e:
    print(f"Error getting installed packages: {{e}}")

# Check for specific TTS libraries
try:
    GTTS_AVAILABLE = importlib.util.find_spec("gtts") is not None
    print(f"GTTS_AVAILABLE: {{GTTS_AVAILABLE}}")
    if GTTS_AVAILABLE:
        import gtts
        print(f"gtts version: {{gtts.__version__}}")
except Exception as e:
    print(f"Error checking gtts: {{e}}")
    GTTS_AVAILABLE = False

try:
    PYTTSX3_AVAILABLE = importlib.util.find_spec("pyttsx3") is not None
    print(f"PYTTSX3_AVAILABLE: {{PYTTSX3_AVAILABLE}}")
    if PYTTSX3_AVAILABLE:
        import pyttsx3
        print("pyttsx3 imported successfully")
except Exception as e:
    print(f"Error checking pyttsx3: {{e}}")
    PYTTSX3_AVAILABLE = False

try:
    COQUI_AVAILABLE = importlib.util.find_spec("TTS") is not None
    print(f"COQUI_AVAILABLE: {{COQUI_AVAILABLE}}")
    if COQUI_AVAILABLE:
        import TTS
        print(f"TTS version: {{TTS.__version__}}")
except Exception as e:
    print(f"Error checking TTS: {{e}}")
    COQUI_AVAILABLE = False

# Check for espeak-ng
try:
    import subprocess
    import platform
    if platform.system() == "Windows":
        # On Windows, we'll check if the command exists
        result = subprocess.run(["where", "espeak-ng"], capture_output=True, text=True)
        ESPEAK_AVAILABLE = result.returncode == 0
    else:
        # On Linux/Mac, we'll use which
        result = subprocess.run(["which", "espeak-ng"], capture_output=True, text=True)
        ESPEAK_AVAILABLE = result.returncode == 0
    print(f"ESPEAK_AVAILABLE: {{ESPEAK_AVAILABLE}}")
    if ESPEAK_AVAILABLE:
        # Try to get version
        version_result = subprocess.run(["espeak-ng", "--version"], capture_output=True, text=True)
        if version_result.returncode == 0:
            print(f"espeak-ng version: {{version_result.stdout.strip()}}")
        else:
            print("espeak-ng found but couldn't get version")
except Exception as e:
    print(f"Error checking espeak-ng: {{e}}")
    ESPEAK_AVAILABLE = False

try:
    GCLOUD_TTS_AVAILABLE = importlib.util.find_spec("google.cloud.texttospeech") is not None
    print(f"GCLOUD_TTS_AVAILABLE: {{GCLOUD_TTS_AVAILABLE}}")
    if GCLOUD_TTS_AVAILABLE:
        import google.cloud.texttospeech
        print(f"google-cloud-texttospeech version: {{google.cloud.texttospeech.__version__}}")
except Exception as e:
    print(f"Error checking google.cloud.texttospeech: {{e}}")
    GCLOUD_TTS_AVAILABLE = False

def generate_tts_audio(provider, text, output_file):
    print(f"Testing TTS provider: {{provider}}")
    print(f"Text: {{text}}")
    print(f"Output file: {{output_file}}")

    if provider == "gtts" and GTTS_AVAILABLE:
        try:
            from gtts import gTTS
            tts = gTTS(text=text, lang='en')
            tts.save(output_file)
            print(f"Google TTS audio saved to {{output_file}}")
            return True
        except Exception as e:
            print(f"Error with Google TTS: {{e}}")
            traceback.print_exc()
            return False
    elif provider == "pyttsx3" and PYTTSX3_AVAILABLE:
        try:
            import pyttsx3
            engine = pyttsx3.init()
            engine.save_to_file(text, output_file)
            engine.runAndWait()
            print(f"pyttsx3 audio saved to {{output_file}}")
            return True
        except Exception as e:
            print(f"Error with pyttsx3: {{e}}")
            traceback.print_exc()
            return False
    elif provider == "coqui" and COQUI_AVAILABLE:
        try:
            from TTS.api import TTS
            tts = TTS("tts_models/en/ljspeech/tacotron2-DDC")
            tts.tts_to_file(text=text, file_path=output_file)
            print(f"Coqui TTS audio saved to {{output_file}}")
            return True
        except Exception as e:
            print(f"Error with Coqui TTS: {{e}}")
            traceback.print_exc()
            return False
    elif provider == "espeak" and ESPEAK_AVAILABLE:
        try:
            # Create a WAV file first
            wav_file = output_file.replace(".mp3", ".wav")

            # Run espeak-ng to generate the audio
            cmd = ["espeak-ng", "-w", wav_file, text]
            process = subprocess.run(cmd, capture_output=True, text=True)

            if process.returncode != 0:
                print(f"Error running espeak-ng: {{process.stderr}}")
                traceback.print_exc()
                return False

            # Convert WAV to MP3 if needed
            if output_file.endswith(".mp3"):
                try:
                    # Try to use pydub for conversion
                    from pydub import AudioSegment
                    sound = AudioSegment.from_wav(wav_file)
                    sound.export(output_file, format="mp3")
                    # Remove the temporary WAV file
                    os.remove(wav_file)
                    print(f"espeak-ng audio saved to {{output_file}}")
                except Exception as e:
                    # If pydub fails, just use the WAV file
                    print(f"Warning: Could not convert WAV to MP3: {{e}}")
                    print(f"Using WAV file instead: {{wav_file}}")
                    output_file = wav_file
            else:
                # If the output file doesn't end with .mp3, we're already using the WAV file
                output_file = wav_file
                print(f"espeak-ng audio saved to {{output_file}}")

            return True
        except Exception as e:
            print(f"Error with espeak-ng: {{e}}")
            traceback.print_exc()
            return False
    elif provider == "google_cloud_tts" and GCLOUD_TTS_AVAILABLE:
        try:
            from google.cloud import texttospeech as gcloud_tts
            client = gcloud_tts.TextToSpeechClient()
            input_text = gcloud_tts.SynthesisInput(text=text)
            voice = gcloud_tts.VoiceSelectionParams(
                language_code="en-US",
                name="en-US-Chirp3-HD-Autonoe"
            )
            audio_config = gcloud_tts.AudioConfig(
                audio_encoding=gcloud_tts.AudioEncoding.MP3
            )
            response = client.synthesize_speech(
                request={{"input": input_text, "voice": voice, "audio_config": audio_config}}
            )
            with open(output_file, "wb") as out:
                out.write(response.audio_content)
            print(f"Google Cloud TTS audio saved to {{output_file}}")
            return True
        except Exception as e:
            print(f"Error with Google Cloud TTS: {{e}}")
            traceback.print_exc()
            return False
    else:
        print(f"TTS provider {{provider}} not available.")
        return False

# Create output directory if it doesn't exist
os.makedirs("./SOUND", exist_ok=True)

# Generate a unique filename
import uuid
unique_id = uuid.uuid4().hex
output_file = f"./SOUND/tts_test_{{unique_id}}.mp3"
print(f"Using output file: {{output_file}}")

# Generate TTS audio
try:
    success = generate_tts_audio("{provider}", "{text}", output_file)
    print(f"TTS generation {{'' if success else 'un'}}successful")
except Exception as e:
    print(f"Unexpected error: {{e}}")
    traceback.print_exc()
    success = False

# Verify file exists and has content
if os.path.exists(output_file):
    file_size = os.path.getsize(output_file)
    print(f"Output file exists, size: {{file_size}} bytes")
else:
    print("Output file does not exist")
"""

        # Save the script to a temporary file
        script_path = os.path.join(tempfile.gettempdir(), "tts_test.py")
        with open(script_path, "w", encoding="utf8") as f:
            f.write(script_content)

        # Run the script
        process = await asyncio.create_subprocess_exec(
            sys.executable, script_path,
            stdout=asyncio.subprocess.PIPE,
            stderr=asyncio.subprocess.PIPE
        )

        # Wait for the process to complete
        stdout, stderr = await process.communicate()

        # Get the output regardless of return code
        stdout_text = stdout.decode() if stdout else ""
        stderr_text = stderr.decode() if stderr else ""

        # Combine stdout and stderr for complete output
        full_output = f"STDOUT:\n{stdout_text}\n\nSTDERR:\n{stderr_text}"

        # Extract the output filename from the stdout
        output_filename = None
        for line in stdout_text.split('\n'):
            if line.startswith("Using output file:"):
                output_filename = line.split(":", 1)[1].strip()
                break

        # If we couldn't find the filename in the output, use a default pattern to search
        if not output_filename:
            # Look for any tts_test_*.mp3 files created in the last minute
            import glob
            import time
            current_time = time.time()
            tts_files = []
            for file in glob.glob("./SOUND/tts_test_*.mp3"):
                if os.path.exists(file) and os.path.getmtime(file) > current_time - 60:
                    tts_files.append(file)

            if tts_files:
                # Use the most recently created file
                output_filename = max(tts_files, key=os.path.getmtime)
            else:
                # Fallback to the old filename pattern
                output_filename = "./SOUND/tts_test.mp3"

        # Check if the TTS file was generated
        if os.path.exists(output_filename) and os.path.getsize(output_filename) > 0:
            # Success! Send the audio file
            await interaction.followup.send(
                f"✅ Successfully tested TTS provider: {provider}\nText: {text}\nFile: {os.path.basename(output_filename)}",
                file=discord.File(output_filename)
            )
        else:
            # Failed to generate audio with subprocess, try direct method as fallback
            await interaction.followup.send(f"Subprocess method failed. Trying direct TTS generation with {provider}...")

            # Try the direct method
            success, result = await self.generate_tts_directly(provider, text)

            if success and os.path.exists(result) and os.path.getsize(result) > 0:
                # Direct method succeeded!
                await interaction.followup.send(
                    f"✅ Successfully generated TTS audio with {provider} (direct method)\nText: {text}",
                    file=discord.File(result)
                )
                return

            # Both methods failed, send detailed error information
            error_message = f"❌ Failed to generate TTS audio with provider: {provider}\n\n"

            # Check if the process failed
            if process.returncode != 0:
                error_message += f"Process returned error code: {process.returncode}\n\n"

            # Add direct method error
            if not success:
                error_message += f"Direct method error: {result}\n\n"

            # Create a summary of the most important information
            error_summary = "Error Summary:\n"

            # Extract key information from the output
            if f"{provider.upper()}_AVAILABLE: False" in full_output:
                error_summary += f"- The {provider} library is not available or not properly installed\n"

            if "Error with " + provider in full_output:
                # Extract the specific error message
                error_line = next((line for line in full_output.split('\n') if "Error with " + provider in line), "")
                if error_line:
                    error_summary += f"- {error_line}\n"

            # Add the error summary to the message
            error_message += error_summary + "\n"

            # Add instructions for fixing the issue
            error_message += "To fix this issue, try:\n"
            error_message += "1. Make sure the required packages are installed:\n"

            if provider == "gtts":
                error_message += "   - Run: pip install gtts\n"
            elif provider == "pyttsx3":
                error_message += "   - Run: pip install pyttsx3\n"
                error_message += "   - On Linux, you may need additional packages: sudo apt-get install espeak\n"
            elif provider == "coqui":
                error_message += "   - Run: pip install TTS\n"
                error_message += "   - This may require additional dependencies based on your system\n"
            elif provider == "google_cloud_tts":
                error_message += "   - Run: pip install google-cloud-texttospeech\n"
                error_message += "   - Ensure GOOGLE_APPLICATION_CREDENTIALS environment variable is set correctly.\n"

            error_message += "2. Restart the bot after installing the packages\n"

            # Add a note about the full output
            error_message += "\nFull diagnostic output is available but may be too long to display here."

            # Send the error message
            await interaction.followup.send(error_message)

            # If the output is not too long, send it as a separate message
            if len(full_output) <= 1900:  # Discord message limit is 2000 characters
                await interaction.followup.send(f"```\n{full_output}\n```")
            else:
                # Save the output to a file and send it
                output_file = os.path.join(tempfile.gettempdir(), "tts_error_log.txt")
                with open(output_file, "w", encoding="utf8") as f:
                    f.write(full_output)
                await interaction.followup.send("Detailed error log:", file=discord.File(output_file))

    @commands.command(name="ttscheck")
    async def tts_check(self, ctx):
        """Check if TTS libraries are installed and working."""
        await ctx.send("Checking TTS libraries...")

        # Check for gtts
        gtts_available = importlib.util.find_spec("gtts") is not None
        gtts_version = "Not installed"
        if gtts_available:
            try:
                import gtts
                gtts_version = getattr(gtts, "__version__", "Unknown version")
            except Exception as e:
                gtts_version = f"Error importing: {str(e)}"

        # Check for pyttsx3
        pyttsx3_available = importlib.util.find_spec("pyttsx3") is not None
        pyttsx3_version = "Not installed"
        if pyttsx3_available:
            try:
                import pyttsx3
                pyttsx3_version = "Installed (no version info available)"
            except Exception as e:
                pyttsx3_version = f"Error importing: {str(e)}"

        # Check for TTS (Coqui)
        coqui_available = importlib.util.find_spec("TTS") is not None
        coqui_version = "Not installed"
        if coqui_available:
            try:
                import TTS
                coqui_version = getattr(TTS, "__version__", "Unknown version")
            except Exception as e:
                coqui_version = f"Error importing: {str(e)}"

        # Check for espeak-ng
        espeak_version = "Not installed"
        try:
            import subprocess
            import platform
            if platform.system() == "Windows":
                # On Windows, we'll check if the command exists
                result = subprocess.run(["where", "espeak-ng"], capture_output=True, text=True)
                espeak_available = result.returncode == 0
            else:
                # On Linux/Mac, we'll use which
                result = subprocess.run(["which", "espeak-ng"], capture_output=True, text=True)
                espeak_available = result.returncode == 0

            if espeak_available:
                # Try to get version
                version_result = subprocess.run(["espeak-ng", "--version"], capture_output=True, text=True)
                if version_result.returncode == 0:
                    espeak_version = version_result.stdout.strip()
                else:
                    espeak_version = "Installed (version unknown)"
            else:
                espeak_version = "Not installed"
        except Exception as e:
            espeak_version = f"Error checking: {str(e)}"

        # Check for Google Cloud TTS
        gcloud_tts_available = importlib.util.find_spec("google.cloud.texttospeech") is not None
        gcloud_tts_version = "Not installed"
        if gcloud_tts_available:
            try:
                import google.cloud.texttospeech as gcloud_tts_module
                gcloud_tts_version = getattr(gcloud_tts_module, "__version__", "Unknown version")
            except Exception as e:
                gcloud_tts_version = f"Error importing: {str(e)}"

        # Create a report
        report = "**TTS Libraries Status:**\n"
        report += f"- Google TTS (gtts): {gtts_version}\n"
        report += f"- pyttsx3: {pyttsx3_version}\n"
        report += f"- Coqui TTS: {coqui_version}\n"
        report += f"- eSpeak-NG: {espeak_version}\n"
        report += f"- Google Cloud TTS: {gcloud_tts_version}\n\n"

        # Add installation instructions
        report += "**Installation Instructions:**\n"
        report += "- Google TTS: `pip install gtts`\n"
        report += "- pyttsx3: `pip install pyttsx3`\n"
        report += "- Coqui TTS: `pip install TTS`\n"
        report += "- eSpeak-NG: Install from https://github.com/espeak-ng/espeak-ng/releases\n"
        report += "- Google Cloud TTS: `pip install google-cloud-texttospeech` (ensure `GOOGLE_APPLICATION_CREDENTIALS` is set)\n\n"

        report += "After installing, restart the bot for the changes to take effect."

        await ctx.send(report)

async def setup(bot: commands.Bot):
    print("Loading TTSProviderCog...")
    await bot.add_cog(TTSProviderCog(bot))
    print("TTSProviderCog loaded successfully!")